#!/usr/bin/python3

import argparse
import ast
import atexit
import glob
import inspect
import itertools
import os
import os.path
import pathlib
import re
import shutil
import stat
import subprocess
import sys
import types

sys.path.insert(0, (  os.path.dirname(os.path.abspath(__file__))
                    + "/../lib/charliecloud"))
import charliecloud as ch


## Globals ##

# Command line arguments.
cli = None

# Image state object.
state = None


## Imports not in standard library ##

# See charliecloud.py for the messy import of this.
lark = ch.lark


## Constants ##

CH_BIN = os.path.dirname(os.path.abspath(
           inspect.getframeinfo(inspect.currentframe()).filename))
CH_RUN = CH_BIN + "/ch-run"

ARG_DEFAULTS = { "HTTP_PROXY": os.environ.get("HTTP_PROXY"),
                 "HTTPS_PROXY": os.environ.get("HTTPS_PROXY"),
                 "FTP_PROXY": os.environ.get("FTP_PROXY"),
                 "NO_PROXY": os.environ.get("NO_PROXY"),
                 "http_proxy": os.environ.get("http_proxy"),
                 "https_proxy": os.environ.get("https_proxy"),
                 "ftp_proxy": os.environ.get("ftp_proxy"),
                 "no_proxy": os.environ.get("no_proxy"),
                 "PATH": "/ch/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
                 # GNU tar, when it thinks it's running as root, tries to
                 # chown(2) and chgrp(2) files to whatever's in the tarball.
                 "TAR_OPTIONS": "--no-same-owner" }

ENV_DEFAULTS = { }

GRAMMAR = r"""
?start: ( instruction | _COMMENT )+

?instruction: _WS? ( cmd | copy | arg | env | from_ | run | workdir )

cmd: "CMD"i _WS LINE _NEWLINES

copy: "COPY"i ( _WS copy_chown )? ( copy_shell ) _NEWLINES
copy_chown: "--chown" "=" /[^ \t\n]+/
copy_shell: _WS WORD ( _WS WORD )+

arg: "ARG"i _WS ( arg_bare | arg_equals ) _NEWLINES
arg_bare: WORD
arg_equals: WORD "=" ( WORD | STRING_QUOTED )

env: "ENV"i _WS ( env_space | env_equalses ) _NEWLINES
env_space: WORD _WS LINE
env_equalses: env_equals ( _WS env_equals )*
env_equals: WORD "=" ( WORD | STRING_QUOTED )

from_: "FROM"i _WS ID [ from_tag | from_digest ] [ from_alias ] _NEWLINES
from_tag: ":" ID
from_digest: "@" HEXID
from_alias: "AS"i ID

run: "RUN"i _WS ( run_exec | run_shell ) _NEWLINES
run_exec.2: _string_list
run_shell: LINE

workdir: "WORKDIR"i _WS LINE _NEWLINES

ID: /[A-Za-z0-9_.\/-]+/
HEXID: /[a-fA-F0-9]+/
LINE: ( LINE_CONTINUE | /[^\n]/ )+
WORD: /[^ \t\n=]/+

_string_list: "[" _WS? STRING_QUOTED ( "," _WS? STRING_QUOTED )* _WS? "]"

LINE_CONTINUE: "\\\n"
%ignore LINE_CONTINUE

_COMMENT: _WS? /#[^\n]*/ _NEWLINES
_NEWLINES: _WS? "\n"+
_WS: /[ \t]/+

%import common.ESCAPED_STRING -> STRING_QUOTED
"""


## Main ##

def main():

   atexit.register(color_reset, sys.stdout, sys.stderr)

   if (not os.path.exists(CH_RUN)):
      ch.depfails.append(("missing", CH_RUN))

   global cli
   ap = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      description="Build an image from a Dockerfile; completely unprivileged.",
      epilog="""\
environment variables:
  CH_GROW_STORAGE       default for --storage
""")
   ap.add_argument("--build-arg", action="append", default=None,
                   metavar="KEY=VALUE",
                   help="set build-time variables")
   ap.add_argument("--dependencies", action=ch.CLI_Dependencies,
                   help="print any missing dependencies and exit")
   ap.add_argument("-f", "--file", metavar="DOCKERFILE",
                   help="Dockerfile to use (default: CONTEXT/Dockerfile)")
   ap.add_argument("-n", "--dry-run", action="store_true",
                   help="don't execute instructions")
   ap.add_argument("--no-cache", action="store_true",
                   help="ignored (layer caching not yet supported)")
   ap.add_argument("--parse-only", action="store_true",
                   help="stop after parsing the Dockerfile")
   ap.add_argument("--print-storage", action=CLI_Print_Storage,
                   help="print the state and images directory, then exit")
   ap.add_argument("-s", "--storage", metavar="DIR",
                   default=os.environ.get("CH_GROW_STORAGE",
                                          "/var/tmp/ch-grow"),
                   help="internal builder storage (default: /var/tmp/ch-grow)")
   ap.add_argument("-t", "--tag", metavar="TAG",
                   help="name of image to create (default: inferred)")
   ap.add_argument("-v", "--verbose", action="count", default=0,
                   help="print extra chatter (can be repeated)")
   ap.add_argument("--version", action=ch.CLI_Version,
                   help="print version and exit")
   ap.add_argument("context", metavar="CONTEXT",
                   help="context directory")

   if (len(sys.argv) < 2):
       ap.print_help(file=sys.stderr)
       sys.exit(1)

   cli = ap.parse_args()
   ch.verbose = cli.verbose
   if (cli.file is None):
      cli.file = cli.context + "/Dockerfile"
   if (cli.tag is None):
      m = re.search(r"(([^/]+)/)?Dockerfile(\.(.+))?$",
                    os.path.abspath(cli.file))
      if (m is not None):
         if m.group(4):    # extension
            cli.tag = m.group(4)
         elif m.group(2):  # containing directory
            cli.tag = m.group(2)
   if (":" not in cli.tag):
      cli.tag += ":latest"
   def build_arg_get(arg):
      kv = arg.split("=")
      if (len(kv) == 2):
         return kv
      else:
         v = os.getenv(kv[0])
         if (v is None):
            FATAL("--build-arg: %s: no value and not in environment" % kv[0])
         return (kv[0], v)
   if (cli.build_arg is None):
      cli.build_arg = list()
   cli.build_arg = dict( build_arg_get(i) for i in cli.build_arg )

   ch.dependencies_check()

   INFO("growing: %s" % dir_image(cli.tag))
   DEBUG(cli)

   global state
   state = State()

   parser = lark.Lark(GRAMMAR, parser="earley", propagate_positions=True)
   text = open(cli.file, "rt").read()
   tree = parser.parse(text)

   DEBUG(tree.pretty())
   if (cli.parse_only):
      sys.exit(0)

   Main_Loop().visit(tree)

   if (len(cli.build_arg) != 0):
      FATAL("--build-arg: not consumed: " + " ".join(cli.build_arg.keys()))


class Main_Loop(lark.Visitor):

   def __default__(self, tree):
      class_ = "I_" + tree.data
      if (class_ in globals()):
         inst = globals()[class_](tree)
         INFO(inst)
         inst.execute()


## Instruction classes ##

class Instruction(object):

   def __init__(self, tree):
      self.lineno = tree.meta.line
      self.tree = tree

   def __str__(self):
      return ("%3s %s %s"
              % (self.lineno, self.__class__.__name__.split("_")[1].upper(),
                 self.str_()))

   # Return the value of first immediate subtree childname's first immediate
   # child terminal of type type_, or None if either the child or terminal
   # does not exist.
   def child(self, childname, type_):
      children = self.tree.find_data(childname)
      try:
         child = next(children)
      except StopIteration:
         return None  # no child subtree named childname
      return terminal(child, type_)

   def execute(self):
      if (not cli.dry_run):
         self.execute_()

   def execute_(self):
      pass

   def str_(self):
      return "(unimplemented)"

   # Return value of first immediate child terminal of type type_, or None if
   # not found.
   def terminal(self, type_, index=0):
      return terminal(self.tree, type_, index)

   # Yield values of all immediate child terminals of type type_, or empty
   # list if none found.
   def terminals(self, type_):
      return terminals(self.tree, type_)


class I_cmd(Instruction):
   pass


class Copy(Instruction):

   def str_(self):
      return "%s -> %s" % (self.srcs, repr(self.dst))

   def execute_(self):
      # The Dockerfile spec for COPY is complex and messy. This implementation
      # is not conforming but hopefully comes close. Known nonconformance:
      #
      # 1. We use the Python glob semantics instead of the Go ones.
      #
      # 2. With "docker build" COPY seems to follow cp(1) semantics if the
      #    destination is an existing directory: sources are copied to within
      #    that directory regardless of whether it has trailing slash. This
      #    contrasts with the spec's implication that this only happens if a
      #    trailing slash is present; otherwise, the destination is replaced.
      #
      # 3. Spec does not say what to do with symlinks. If the source is a
      #    symlink to a file, we copy the link target to a regular file;
      #    otherwise, we copy symlinks themselves.
      srcs = itertools.chain.from_iterable(glob.glob(cli.context + "/" + i)
                                           for i in self.srcs)
      dst = dir_image(cli.tag) + "/"
      if (not self.dst.startswith("/")):
         dst += state.workdir + "/"
      dst += self.dst
      if (dst.endswith("/")):
         dst = dst[:-1]
         mkdirs(dir_image(cli.tag) + dst)
      for src in srcs:
         cp(src, dst)


class I_copy_shell(Copy):

   def __init__(self, *args):
      super().__init__(*args)
      paths = [variables_sub(i, state.env_build)
               for i in self.terminals("WORD")]
      self.srcs = paths[:-1]
      self.dst = paths[-1]

class Arg(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = self.terminal("WORD", 0)
      if (self.key in cli.build_arg):
         self.value = cli.build_arg[self.key]
         del cli.build_arg[self.key]
      else:
         self.value = self.value_default()
      if (self.value is not None):
         self.value = variables_sub(self.value, state.env_build)

   def str_(self):
      if (self.value is None):
         return self.key
      else:
         return "%s='%s'" % (self.key, self.value)

   def execute_(self):
      if (self.value is not None):
         state.arg[self.key] = self.value

class I_arg_bare(Arg):

   def __init__(self, *args):
      super().__init__(*args)

   def value_default(self):
      return None

class I_arg_equals(Arg):

   def __init__(self, *args):
      super().__init__(*args)

   def value_default(self):
      v = self.terminal("WORD", 1)
      if (v is None):
         v = unescape(self.terminal("STRING_QUOTED"))
      return v

class Env(Instruction):

   def str_(self):
      return "%s='%s'" % (self.key, self.value)

   def execute_(self):
      state.env[self.key] = self.value
      with open("%s/ch/environment" % dir_image(cli.tag), "wt") as fp:
         for (k, v) in state.env.items():
            print("%s=%s" % (k, v), file=fp)


class I_env_equals(Env):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = self.terminal("WORD", 0)
      self.value = self.terminal("WORD", 1)
      if (self.value is None):
         self.value = unescape(self.terminal("STRING_QUOTED"))
      self.value = variables_sub(self.value, state.env_build)


class I_env_space(Env):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = self.terminal("WORD")
      value = self.terminal("LINE")
      if (not value.startswith('"')):
         value = '"' + value + '"'
      self.value = unescape(value)
      self.value = variables_sub(self.value, state.env_build)


class I_from_(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.name = self.terminal("ID")
      self.alias = self.child("from_alias", "ID")
      self.tag = self.child("from_tag", "ID")
      self.digest = self.child("from_digest", "HEXID")
      if (self.tag is None and self.digest is None):
         self.tag = "latest"

   @property
   def fullname(self):
      if (self.tag is not None):
         return "%s:%s" % (self.name, self.tag)
      elif (self.digest is not None):
         return "%s@%s" % (self.name, self.digest)
      else:
         return self.name

   def base_copy(self):
      DEBUG("copying: %s -> %s" % (self.fullname, cli.tag))
      rmtree(dir_image(cli.tag))
      shutil.copytree(dir_image(self.fullname), dir_image(cli.tag),
                      symlinks=True)

   def base_pull(self):
      img = ch.Image(ch.Image_Ref(self.fullname),
                     dir_downloads(), dir_image(self.fullname), "")
      img.download()
      img.flatten()
      image_fixup(dir_image(self.fullname))

   def execute_(self):
      if (cli.tag == self.fullname):
         FATAL("output image name same as FROM: %s" % self.fullname)
      mkdirs(dir_images())
      if (not os.path.isdir(dir_image(self.fullname))):
         DEBUG("image not found, pulling: %s" % self.fullname)
         self.base_pull()
      self.base_copy()
      state.reset()

   def str_(self):
      alias = "AS %s" % self.alias if self.alias else ""
      return "%s %s" % (self.fullname, alias)


class Run(Instruction):

   def execute_(self):
      rootfs = dir_image(cli.tag)
      pathlib.Path(rootfs + "/etc/resolv.conf").touch(exist_ok=True)
      pathlib.Path(rootfs + "/etc/hosts").touch(exist_ok=True)
      args = [CH_BIN + "/ch-run", "-w", "--no-home", "--no-passwd",
              "--cd", state.workdir, "--uid=0", "--gid=0",
              rootfs, "--"] + self.cmd
      cmd(args, env=state.env_build)

   def str_(self):
      return str(self.cmd)


class I_run_exec(Run):

   def __init__(self, *args):
      super().__init__(*args)
      self.cmd = [    variables_sub(unescape(i), state.env_build)
                  for i in self.terminals("STRING_QUOTED")]


class I_run_shell(Run):

   def __init__(self, *args):
      super().__init__(*args)
      # FIXME: Can't figure out how to remove continuations at parse time.
      cmd = self.terminal("LINE").replace("\\\n", "")
      self.cmd = ["/bin/sh", "-c", cmd]


class I_workdir(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.path = variables_sub(self.terminal("LINE"), state.env_build)

   def str_(self):
      return self.path

   def execute_(self):
      mkdirs(dir_image(cli.tag) + self.path)
      state.chdir(self.path)


## Supporting classes ##

class CLI_Print_Storage(ch.CLI_Action_Exit):

   def __call__(self, _, namespace, *args, **kwargs):
      print(namespace.storage)
      sys.exit(0)

class State(object):

   def __init__(self):
      self.reset()

   @property
   def env_build(self):
      return { **self.arg, **self.env }

   def chdir(self, path):
      if (path.startswith("/")):
         self.workdir = path
      else:
         self.workdir += "/" + path

   def reset(self):
      self.workdir = "/"
      self.arg = { k: v for (k, v) in ARG_DEFAULTS.items() if v is not None }
      self.env = { k: v for (k, v) in ENV_DEFAULTS.items() if v is not None }


## Supporting functions ###

def DEBUG(*args, **kwargs):
   if (cli.verbose):
      color("36m", sys.stderr)
      print(flush=True, file=sys.stderr, *args, **kwargs)
      color_reset(sys.stderr)

def ERROR(*args, **kwargs):
   color("31m", sys.stderr)
   print(flush=True, file=sys.stderr, *args, **kwargs)
   color_reset(sys.stderr)

def FATAL(*args, **kwargs):
   ERROR(*args, **kwargs)
   sys.exit(1)

def INFO(*args, **kwargs):
   print(flush=True, *args, **kwargs)

def cmd(args, env=None):
   DEBUG("environment: %s" % env)
   DEBUG("executing: %s" % args)
   color("33m", sys.stdout)
   cp = subprocess.run(args, env=env, stdin=subprocess.DEVNULL)
   color_reset(sys.stdout)
   if (cp.returncode):
      FATAL("%s failed with return code %d" % (args[0], cp.returncode))

def color(color, fp):
   if (fp.isatty()):
      print("\033[" + color, end="", flush=True, file=fp)

def color_reset(*fps):
   for fp in fps:
      color("0m", fp)

def cp(src, dst):
   DEBUG("copying: %s -> %s" % (src, dst))
   if (os.path.isdir(src)):
      if (os.path.isdir(dst)):  # FIXME: excessive stat(2) with many sources?
         dst += "/" + os.path.basename(src)
      shutil.copytree(src, dst, symlinks=False)
   else:
      shutil.copy2(src, dst)

def dir_downloads():
   return "%s/dlcache" % cli.storage

def dir_images():
   return "%s/img" % cli.storage

def dir_image(image):
   return "%s/%s" % (dir_images(), image.replace("/", "%"))

def file_ensure_exists(path):
   with open(path, "a") as fp:
      pass

def file_write(path, content, mode=None):
   with open(path, "wt") as fp:
      fp.write(content)
      if (mode is not None):
         os.chmod(fp.fileno(), mode)

def image_fixup(path):
   DEBUG("fixing up image: %s" % path)
   # Metadata directory.
   mkdirs("%s/ch/bin" % path)
   file_ensure_exists("%s/ch/environment" % path)
   # Mount points.
   file_ensure_exists("%s/etc/hosts" % path)
   file_ensure_exists("%s/etc/resolv.conf" % path)
   # /etc/{passwd,group}
   file_write("%s/etc/passwd" % path, """\
root:x:0:0:root:/root:/bin/sh
nobody:x:65534:65534:nobody:/:/bin/false
""")
   file_write("%s/etc/group" % path, """\
root:x:0:
nogroup:x:65534:
""")
   # Kludges to work around expectations of real root, not UID 0 in a
   # unprivileged user namespace. See also the default environment.
   #
   # Debian "apt" and friends want to chown(1), chgrp(1), etc. in various ways.
   symlink("/bin/true", "%s/ch/bin/chown" % path)
   symlink("/bin/true", "%s/ch/bin/chgrp" % path)
   symlink("/bin/true", "%s/ch/bin/dpkg-statoverride" % path)

def mkdirs(path):
   DEBUG("ensuring directory: " + path)
   os.makedirs(path, exist_ok=True)

def rmtree(path):
   if (os.path.isdir(path)):
      DEBUG("deleting directory: " + path)
      shutil.rmtree(path)

def symlink(target, source):
   try:
      os.symlink(target, source)
   except FileExistsError:
      if (not os.path.islink(source)):
         FATAL("can't symlink: source exists and isn't a symlink: %s"
               % source)
      if (os.readlink(source) != target):
         FATAL("can't symlink: %s exists; want target %s but existing is %s"
               % (source, target, os.readlink(source)))

def terminal(tree, type_, index=0):
   for (i, t) in enumerate(terminals(tree, type_)):
      if (i == index):
         return t
   return None

def terminals(tree, type_):
   for i in tree.children:
      if (isinstance(i, lark.lexer.Token) and i.type == type_):
         yield i.value

def variables_sub(s, variables):
   # FIXME: This should go in the grammar rather than being a regex kludge.
   #
   # Dockerfile spec does not say what to do if substituting a value that's
   # not set. We ignore those subsitutions. This is probably wrong (the shell
   # substitutes the empty string).
   for (k, v) in variables.items():
      #DEBUG("s: %s, k: %s, v: %s" % (s, k, v))
      s = re.sub(r"(?<!\\)\${?%s}?" % k, v, s)
   return s

def unescape(sl):
   # FIXME: This is also ugly and should go in the grammar.
   #
   # The Dockerfile spec does not precisely define string escaping, but I'm
   # guessing it's the Go rules. You will note that we are using Python rules.
   # This is wrong but close enough for now (see also gripe in previous
   # paragraph).
   if (not (sl.startswith('"') and sl.endswith('"'))):
      FATAL("string literal not quoted")
   return ast.literal_eval(sl)


## Bootstrap ##

if (__name__ == "__main__"):
   main()
