#!/usr/bin/env python

import glob
import os
import re
import shutil
try:
    import gevent.subprocess as subprocess
except:
    import subprocess
import sys
import traceback
import json
import hashlib

verbose = False
if 'BUILDCACHE_VERBOSE' in os.environ and os.environ['BUILDCACHE_VERBOSE'] == '1':
    verbose = True
debug = False
if 'BUILDCACHE_DEBUG' in os.environ and os.environ['BUILDCACHE_DEBUG'] == '1':
    debug = True
follow_fork = True
if 'BUILDCACHE_FOLLOW_FORK' in os.environ and os.environ['BUILDCACHE_FOLLOW_FORK'] == '1':
    follow_fork = True

readonly_re = re.compile('.*O_RDONLY.*')
open_re = re.compile('open."([^"]+)", ([A-Za-z_\|]+)(, \d+)?. += (\d+).*')
write_re = re.compile('write.(\d+).*')
mmap_re = re.compile('mmap.([0x0-9a-zA-z]+), (\d+), ([A-Z_|]+), ([A-Z_|]+), (-?\d+), ([0x0-9a-zA-z]+).*')

patterns_to_skip = [
    '/dev/.*',
    '/etc/.*',
    '/lib/.*',
    '/lib64/.*',
    '/proc/.*',
    '/run/.*',
    '/sys/.*',
    '/tmp/.*',
    '/usr/.*',
    '/var/.*',
    '/afs/csail.mit.edu/.*',
    '.*/\\..*',
    '/afs/.*'
    ]

environment_variables_to_skip = [
    '.*EMACS.*',
    '.*SESSION_COOKIE.*',
    'BUILDCACHE.*',
    'COLORTERM',
    'COLUMNS',
    'DBUS.*',
    'DISPLAY',
    'GNOME.*',
    'GPG_AGENT_INFO',
    'LESSCLOSE',
    'LESSOPEN',
    'LOGNAME',
    'LS_COLORS',
    'MAIL',
    'MAKEFLAGS',
    'MAKELEVEL',
    'MFLAGS',
    'NXSESSIONID',
    'OLDPWD',
    'SESSION_MANAGER',
    'SHADOW_XAUTHORITY',
    'SHLVL',
    'SSH.*',
    'STY',
    'TERM',
    'TERMCAP',
    'TMUX.*',
    'USER',
    'WINDOW',
    'XAUTHORITY',
    'XDG*',
]

environment_re_to_skip = [re.compile(p) for p in environment_variables_to_skip]

re_to_skip = []

filesRead = {}
filesWritten = {}
fdFile = {}
fdFlags = {}

## python 2.6 does not have subprocess.check_output
def check_output(cmd):
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    (stdoutdata, stderrdata) = p.communicate()
    return stdoutdata

def scanStraceFiles(tmpdir):
    for stracefile in glob.glob('%s/*' % tmpdir):
        for line in open(stracefile, 'r'):
            m = open_re.match(line)
            wm = write_re.match(line)
            if m:
                fname = os.path.abspath(m.group(1))
                flags =  m.group(2)

                fd = m.group(m.lastindex)
                fdFile[fd] = fname
                fdFlags[fd] = flags
                #print fd, fname, flags

                if readonly_re.match(flags):
                    if fname in filesRead:
                        filesRead[fname] += '|' + flags
                    else:
                        filesRead[fname] = flags
                else:
                    # might be a write, but we need further evidence
                    pass
            elif wm:
                fd = wm.group(1)
                if int(fd) <= 2:
                    continue
                if not (fd in fdFile):
                    #print 'unknown file for fd', fd
                    #print line
                    continue
                fname = fdFile[fd]
                flags = fdFlags[fd]
                #print fd, fname, flags
                if not fname in filesWritten:
                    filesWritten[fname] = flags
            else:
                #print 'no matching re: ', line[0:-1]
                pass

def writeContextFile():
    m = hashlib.sha1('PWD=%s %s' % (os.getcwd(), ' '.join(sys.argv)))
    sha1 = m.hexdigest()

    cache_dir = buildcache_cachedir(sha1)

    filename = os.path.join(cache_dir, 'buildcache.context.json')
    if not os.path.exists(os.path.dirname(filename)):
        os.makedirs(os.path.dirname(filename))
    env = {}
    for k in os.environ:
        if len([p for p in environment_re_to_skip if p.match(k)]):
            continue
        env[k] = os.environ[k]
    context = {'argv': sys.argv,
               'env': env }
    if debug:
        print 'context', context
    if os.path.isfile(filename):
        os.rename(filename, filename + '.old')
    f = open(filename, 'w+')
    jsonstr = json.dumps(context, sort_keys=True, indent=4, separators=(',', ': '))
    f.write(jsonstr)
    f.write('\n')
    f.close()
    if debug:
        print 'SHA1(PWD=%s %s)' % (os.getcwd(), ' '.join(sys.argv))
        print '    %s' % sha1
    return filename, cache_dir

def updateFilesWrittenFile(cache_dir, filesWritten):
    filename = os.path.join(cache_dir, 'buildcache.fileswritten.json')
    if not os.path.exists(os.path.dirname(filename)):
        os.makedirs(os.path.dirname(filename))
    cwd = '/' #os.getcwd()
    filesWrittenAndRemain = filter(os.path.exists, filesWritten)
    info = {'filesWritten': [f for f in filesWrittenAndRemain],
            'cwd': cwd}
    f = open(filename, 'w+')
    f.write(json.dumps(info))
    f.close()

def readFilesWrittenFile(cache_dir):
    filename = os.path.join(cache_dir, 'buildcache.fileswritten.json')
    if not os.path.exists(os.path.dirname(filename)):
        return []
    f = open(filename, 'r')
    info = json.loads(f.read())
    f.close()
    return info

def writeFilesToCache(cache_dir, filesWritten):
    cwd = '/'
    for f in filesWritten:
        if not os.path.exists(f):
            if debug:
                print 'skipping non-existent file', f
            continue
        relname = os.path.relpath(f, cwd)
        cname = os.path.join(cache_dir, relname)
        if debug:
            print 'caching', f
            print '     to', cname 
        cdir = os.path.dirname(cname)
        if not os.path.isdir(cdir):
            os.makedirs(cdir)
        shutil.copy2(f, cname)

def readFilesFromCache(cache_dir):
    info = readFilesWrittenFile(cache_dir)
    filesWritten = info['filesWritten']
    cwd = info['cwd']
    for f in filesWritten:
        relname = os.path.relpath(f, cwd)
        cname = os.path.join(cache_dir, relname)
        if not os.path.exists(cname):
            print 'cached file does not exist', cname
            continue
        if debug:
            print 'restoring', f
        fdir = os.path.dirname(f)
        if not os.path.isdir(fdir):
            os.makedirs(fdir)
        try:
            shutil.copy2(cname, f)
        except:
            print 'Exception:', sys.exc_info()
            print 'Restoring cname %s to %s' % (cname, f)
            print traceback.format_exc()
        
def filterFiles(files):
    uniqfiles = []
    for f in files:
        if not os.path.exists(f) or not os.path.isfile(f):
            continue
        skip = False
        for re in re_to_skip:
            m = re.match(f)
            if m:
                skip = True
                break
        if skip:
            continue
        uniqfiles.append(f)
    uniqfiles.sort()
    return uniqfiles    

def buildcache_cachedir(key):
    if not 'BUILDCACHE_CACHEDIR' in os.environ:
        print 'no BUILDCACHE_CACHEDIR'
        #print os.environ
        return None
    cachedir = os.path.join(os.environ['BUILDCACHE_CACHEDIR'], key)
    return cachedir
def buildcache_outputdir():
    if not 'BUILDCACHE_OUTPUTDIR' in os.environ:
        return os.getcwd()
    outputdir = os.environ['BUILDCACHE_OUTPUTDIR']
    return outputdir

def update_md5sum(md5sum):
    if not cache_dir:
        return
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    f = open(os.path.join(cache_dir, 'buildcache.md5sum'), 'w+')
    rc = f.write(md5sum)
    f.close()

def check_md5sum():
    if not cache_dir:
        if verbose: print 'no cache_dir'
        return 22
    if not os.path.exists(cache_dir):
        if verbose: print 'no cache entry', cache_dir
        return 23
    p = subprocess.Popen(['md5sum', '-c', os.path.join(cache_dir, 'buildcache.md5sum')], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    result = p.communicate()[0]
    if debug:
        print result
    elif verbose:
        for line in result.split('\n'):
            #print line
            if not line.endswith(' OK'):
                #print line.replace('FAILED', 'was changed')
                sys.stderr.write("%s\n" % line.replace('FAILED', 'was changed'))
    if p.returncode != 0:
        if os.path.isfile(context_file_name + '.old'):
            sys.stderr.write('%s\n' % check_output(['diff', '-u', context_file_name, (context_file_name + '.old')]))
    return p.returncode
    

tmpdir = '/tmp/buildcache-%d-%d' % (os.getuid(), os.getpid())
if os.path.isdir(tmpdir):
    shutil.rmtree(tmpdir)
os.makedirs(tmpdir)

processpath = check_output(['which', sys.argv[1]])
processpath = os.path.dirname(processpath)
if processpath.endswith('/bin'):
    processpath = os.path.dirname(processpath)
patterns_to_skip.append(os.path.join(processpath, '.*'))
#print patterns_to_skip

re_to_skip = [re.compile(pattern) for pattern in patterns_to_skip]

output_dir = buildcache_outputdir()

(context_file_name, cache_dir) = writeContextFile()

if check_md5sum() == 0:
    if verbose: print '%s: using cached result' % sys.argv[0]
    readFilesFromCache(cache_dir)
    sys.exit(0)

strace = ['strace', '-e', 'open,write', '-o', '%s/strace' % tmpdir]
if follow_fork:
    strace.append('-ff')
    #if verbose: print 'buildcache using -ff', strace + sys.argv[1:]
if cache_dir:
    args = strace + sys.argv[1:]
else:
    args = sys.argv[1:]
strace = subprocess.Popen(args, bufsize=-1)
strace.communicate()
returncode=strace.returncode

if returncode == 0 and cache_dir:
    try:
        scanStraceFiles(tmpdir)
        output_dir = os.path.abspath(buildcache_outputdir())
        if debug:
            print 'footprint: ================================================================'
            for f in filterFiles(filesRead):
                print f
        if debug:
            print 'files to cache: ==========================================================='
            for f in filterFiles(filesWritten):
                print f
            print '==========================================================================='
        footprint = [context_file_name]
        for uf in filterFiles(filesRead):
            if uf in filesWritten:
                continue
            # if os.path.abspath(uf).startswith(output_dir):
            #     continue
            footprint.append(uf)
        md5sum = check_output(['md5sum'] + footprint)
        if debug:
            print md5sum
        update_md5sum(md5sum)
        filtered_files_written = [context_file_name] + filterFiles(filesWritten)
        updateFilesWrittenFile(cache_dir, filtered_files_written)
        writeFilesToCache(cache_dir, filtered_files_written)
        check_md5sum()

    except:
        print "Unexpected error:", sys.exc_info()
        print traceback.format_exc()

    shutil.rmtree(tmpdir)

sys.exit(returncode)
