#!
#
# $Header: /u/drspeech/repos/quicknet2/qngentrn.in,v 1.16 2014/05/06 02:06:47 davidj Exp $
#
# qngentrn - train an MLP layer-by-layer

import sys
import string
import re
import time
import os
import argparse
import subprocess
import socket

version = "v3_50pre4"   # The version of this program
progname = "qngentrn"           # The name of this program
qnmultitrn = "./qnmultitrn"
qnmungewts = "./qnmungewts"
maxepoch = 999999               # Bigger than any epoch

logfile = None

gen_arch = [ ]
gen_epochs = [ ]
gen_best_cv_percent = [ ]
gen_best_cv_epoch = [ ]
gen_best_train_percent = [ ]
gen_best_train_epoch = [ ]
gen_mcups = [ ]
gen_mcps = [ ]
gen_secs = [ ]

def type_uint_list(s):
    s.strip()
    l = s.split(',')
    for ss in l:
    	try:
	    i = int(ss)
	except ValueError:
            raise argparse.ArgumentTypeError("invalid unsigned int list: '%s'" % s)
        if (i<0):
            raise argparse.ArgumentTypeError("invalid unsigned int list: '%s'" % s)
    return s

def type_float_list(s):
    s.strip()
    l = s.split(',')
    for ss in l:
    	try:
	    f = float(ss)
	except ValueError:
            raise argparse.ArgumentTypeError("invalid float list: '%s'" % s)
    return s

def type_uint(s):
    try:
        i = int(s)
    except ValueError:
        raise argparse.ArgumentTypeError("invalid unsigned int: '%s'" % s)
    if (i<0):
        raise argparse.ArgumentTypeError("invalid unsigned int: '%s'" % s)
    return i

def type_bool(s):
    if re.match(r'[1ty]', s, re.IGNORECASE):
        r = True
    else:
        r = False
    return r

def qnlog(s):
    logfile.write("%s: %s\n" % (progname, s))
    logfile.flush()

def qndie(s):
    sys.stderr.write("%s: ERROR - %s.\n" % (progname, s))
    sys.exit(1)

def qnlaunch(cmd, prefix, outvars = None):
    qnlog("launching %s: %s" % (prefix, cmd))
    proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    try:
        while (True):
            line = proc.stdout.readline()
            if (line == ""):
                break
            m = re.match(r'^QNFACT_([^ ]+) (.+)\n$', line)
            if (m != None):
                if outvars != None:
                    (key, value) = m.groups()
                    outvars[key] = value
	    else:
                logfile.write(prefix + ": " + line)
                logfile.flush()
        rc = proc.wait()
    except KeyboardInterrupt:
        qndie("program interrupted")
    return rc

# Do a full weight file template expansion for commands that do not do
# it themselves
def qnfulltemplate(s, gen, epoch):
    t = time.strftime("%Y%m%d-%H:%M")
    h = re.sub(r'\..*$', "", socket.gethostname())
    p = str(os.getpid())
    s = re.sub(r'%g', str(gen), s)
    s = re.sub(r'%e', str(epoch), s)
    s = re.sub(r'%p', p, s)
    s = re.sub(r'%t', t, s)
    s = re.sub(r'%h', h, s)
    s = re.sub(r'%%', '%', s)
    return s

# Do a partial weight file template expansion for commands that also
# do their own expansion
def qnparttemplate(s, gen):
    p = str(os.getpid())
    s = re.sub(r'%g', str(gen), s)
    s = re.sub(r'%p', p, s)
    return s


parser = argparse.ArgumentParser(prog=progname,description="QuickNet layerwise MLP training script")

parser.add_argument("-V", "--version", action="version", version="%s %s"%(progname, version), help="Display version and exit")
parser.add_argument("-v", "--verbose", type=type_bool, metavar="BOOL", help="Generate extra status messages")
# ...
parser.add_argument("-d", "--debug", type=int, default=0, help="Level of diagnostic output")
parser.add_argument("--log_file", metavar="FILE", default="-", help="Output log file")


parser.add_argument("--ftr1_file",metavar="FILE", help="Input feature file")
parser.add_argument("--ftr1_format", choices=['pfile','pre','lna','onlftr','srifile','srilist','htklist','htkscp'], help="Input feature file format")
parser.add_argument("--ftr1_width", type=type_uint, metavar="WIDTH", help="Input feature file width")
parser.add_argument("--ftr1_norm_mode", choices=['file'], help="Normalization mode - currently only \"file\" is supported")
parser.add_argument("--ftr1_norm_file", metavar="FILE", help="Normalization file for input feature file")
parser.add_argument("--ftr1_ftr_start", type=type_uint, default=None, metavar="INT", help="Index of first feature from ftr1_file")
parser.add_argument("--ftr1_ftr_count", type=type_uint, default=None, metavar="INT", help="Number of feature sfrom ftr1_file")
parser.add_argument("--ftr1_delta_order", type=type_uint, metavar="INT", help="Order of derivatives added to ftr1_file")
parser.add_argument("--ftr1_delta_win", type=type_uint, metavar="INT", help="Window size for ftr1_file derivative calculation")
parser.add_argument("--ftr1_window_offset", type=int, metavar="INT", help="Offset of window on ftr1_file in frames")
parser.add_argument("--ftr1_window_len", type=type_uint, metavar="INT", help="Length of window on ftr1_file in frames")

parser.add_argument("--hardtarget_file", metavar="FILE", help="Target label file")
parser.add_argument("--hardtarget_format", choices=['pfile','pre','ilab'], help="Target label file format")
parser.add_argument("--hardtarget_window_offset", type=type_uint, metavar="INT", help="Offset of label in hardtarget_file window in frames")
parser.add_argument("--hardtarget_lastlab_reject", type=type_bool, metavar="BOOL", help="Last label value indicates no-train frames?")

parser.add_argument("--window_extent", metavar="INT", help="Extent of all windows in frames")

parser.add_argument("--train_sent_range", metavar="RANGE", help="Training sentence indices in QN_Range format")
parser.add_argument("--cv_sent_range", metavar="RANGE", help="Cross validation sentence indices in QN_Range format")
parser.add_argument("--train_cache_frames", type=type_uint, metavar="INT", help="Number of training frames in cache")
parser.add_argument("--train_cache_seed", type=int, metavar="INT", help="Training presentation randomization seed")

parser.add_argument("--init_random_bias_min", type=type_float_list, metavar="FLOAT", help="Minimum random bias", default="-4.1")
parser.add_argument("--init_random_bias_max", type=type_float_list, metavar="FLOAT", help="Maximum random bias", default="-3.9")
parser.add_argument("--init_random_weight_min", type=type_float_list, metavar="FLOAT", help="Minimum random weight", default="-0.1")
parser.add_argument("--init_random_weight_max", type=type_float_list, metavar="FLOAT", help="Maximum random weight", default="0.1")
parser.add_argument("--init_random_seed", type=int, metavar="INT", help="")

parser.add_argument("--log_weight_file", metavar="FILE", default="qngentrn-%h-%p.weights", help="Intra-generation log weight file")
parser.add_argument("--log2_weight_file", metavar="FILE", default="qngentrn-%h-%p-pre%g.weights", help="Pre-gen log weight file")
parser.add_argument("--log3_weight_file", metavar="FILE", default="qngentrn-%h-%p-post%g.weights", help="Post-gen log weight file")
parser.add_argument("--log_weight_format", default="matlab", choices=['matlab'], help="Log weight file format")
parser.add_argument("--out_weight_file", metavar="FILE", default="out.weights", help="Output weight file")
parser.add_argument("--out_weight_format", default="matlab", choices=['matlab'], help="Output weight file format")

parser.add_argument("--learnrate_schedule", default='newbob', choices=['newbob','list','smoothdecay'], help="Learning rate schedule type")
parser.add_argument("--learnrate_vals", default='0.008', type=type_float_list, metavar="FLOAT", help="Learning rate(s)")
parser.add_argument("--learnrate_scale", default='0.5', metavar="SCALE", help="Scale factor for successive learning rates")
parser.add_argument("--learnrate_epochs", default='9999', metavar="EPOCHS", help="Maximum number of epochs")

parser.add_argument("--gen_learnrate_schedule", choices=['newbob','list','smoothdecay'], help="Learning rate schedule type for non-final nets")
parser.add_argument("--gen_learnrate_vals", type=type_float_list, metavar="FLOAT", help="Learning rate(s) for non-final nets")
parser.add_argument("--gen_learnrate_scale", metavar="SCALE", help="Scale factor for successive learning rates for non-final nets")
parser.add_argument("--gen_learnrate_epochs", metavar="EPOCHS", help="Maximum number of epochs for non-final nets")

parser.add_argument("--mlp_input_size", "-I", required=True, type=type_uint, metavar="INT", help="Size of MLP input layer")
parser.add_argument("--mlp_hidden_size", "-H", type=type_uint_list, required=True, metavar="INT[,INT]...", help="Size of MLP hidden layers (comma-separated list)")
parser.add_argument("--mlp_output_size", "-O", required=True, type=type_uint, metavar="INT", help="Size of MLP output layer")
parser.add_argument("--mlp_bunch_size", type=type_uint, metavar="INT", help="Size of mini-batch for training")
parser.add_argument("--mlp_hidden_type", choices=['sigmoid', "tanh", "relu"], help="Non-linearity in MLP hidden layers")
parser.add_argument("--mlp_output_type", choices=['sigmoid', 'sigmoidx', 'softmax', 'tanh'], help="Non-linearity in MLP output layer")
parser.add_argument("--mlp_threads", type=type_uint, metavar="INT", help="Number of CPU threads to use")


parser.add_argument("--use_blas", type=type_bool, metavar="BOOL",help="Use BLAS libraries?")
parser.add_argument("--use_pp", type=type_bool, metavar="BOOL", help="Use internal high-performance libraries?")
parser.add_argument("--use_fe", type=type_bool, metavar="BOOL", help="Use fast exponenet for sigmoid/softmax/tanh etc?")
parser.add_argument("--use_cuda", type=type_bool, metavar="BOOL", help="Use CUDA GPU hardware?")

parser.add_argument("--qnmultitrn", metavar="PROGRAM", default="qnmultitrn", help="version of qnmultitrn to use")
parser.add_argument("--qnmungewts", metavar="PROGRAM", default="qnmungewts", help="version of qnmungewts to use")

argv = []
argv.append(sys.argv[0])
for i in range (1, len(sys.argv)):
    arg = re.sub(r'^([a-zA-Z0-9].*=.*)$', r'--\1', sys.argv[i])
    argv.append(arg)

args = parser.parse_args(argv[1:])

logfile = None
if args.log_file=="-":
    logfile = sys.stdout
else:
    try:
        logfile = open(args.log_file, "w")
    except:
        qndie("failed to open log_file %s" % args.log_file)


## TODO
## Check for too many hidden layers
## Check for length of init_weight_min etc. lists

mungecmd = args.qnmungewts
traincmd = args.qnmultitrn
genargs = ""
lastargs = ""

start_time = time.time()
sys.version
# Get rid of excess version info
python_version = ((sys.version).splitlines()[0]).strip()
# python_version = re.sub(r' .*$', r'', sys.version)

qnlog("Program version: %s %s." % (progname, version))
qnlog("Program start: %s." % time.ctime(start_time))
qnlog("Host: %s." % socket.gethostname())
qnlog("Python: %s." % (sys.executable))
qnlog("Python version: %s." % python_version)
qnlog("Command line arguments:")
traincmd += " machine_readable=true"
if args.verbose is not None:
    qnlog("  verbose=%s" % args.verbose)
    traincmd += " verbose=%s" % args.verbose
    mungecmd += " verbose=true"            # Even with verbose, not much output
if args.debug is not None:
    qnlog("  debug=%i" % args.debug)
    traincmd += " debug=%i" % args.debug
    mungecmd += " debug=%i" % args.debug
if args.log_file is not None:
    qnlog("  log_file=%s" % args.log_file)
    traincmd += " log_file=%s" % args.log_file
if args.ftr1_file is not None:
    qnlog("  ftr1_file=%s" % args.ftr1_file)
    traincmd += " ftr1_file=%s" % args.ftr1_file
if args.ftr1_width is not None:
    qnlog("  ftr1_width=%s" % args.ftr1_width)
    traincmd += " ftr1_width=%i" % args.ftr1_width
if args.ftr1_norm_mode is not None:
    qnlog("  ftr1_norm_mode=%s" % args.ftr1_norm_mode)
    traincmd += " ftr1_norm_mode=%s" % args.ftr1_norm_mode
if args.ftr1_norm_file is not None:
    qnlog("  ftr1_norm_file=%s" % args.ftr1_norm_file)
    traincmd += " ftr1_norm_file=%s" % args.ftr1_norm_file
if args.ftr1_ftr_start is not None:
    qnlog("  ftr1_ftr_start=%s" % args.ftr1_ftr_start)
    traincmd += " ftr1_ftr_start=%s" % args.ftr1_ftr_start
if args.ftr1_ftr_count is not None:
    qnlog("  ftr1_ftr_count=%s" % args.ftr1_ftr_count)
    traincmd += " ftr1_ftr_count=%s" % args.ftr1_ftr_count
if args.ftr1_delta_order is not None:
    qnlog("  ftr1_delta_order=%s" % args.ftr1_delta_order)
    traincmd += " ftr1_delta_order=%s" % args.ftr1_delta_order
if args.ftr1_delta_win is not None:
    qnlog("  ftr1_delta_win=%s" % args.ftr1_delta_win)
    traincmd += " ftr1_delta_win=%s" % args.ftr1_delta_win
if args.ftr1_window_offset is not None:
    qnlog("  ftr1_window_offset=%s" % args.ftr1_window_offset)
    traincmd += " ftr1_window_offset=%s" % args.ftr1_window_offset
if args.ftr1_window_len is not None:
    qnlog("  ftr1_window_len=%s" % args.ftr1_window_len)
    traincmd += " ftr1_window_len=%s" % args.ftr1_window_len
if args.hardtarget_file is not None:
    qnlog("  hardtarget_file=%s" % args.hardtarget_file)
    traincmd += " hardtarget_file=%s" % args.hardtarget_file
if args.hardtarget_format is not None:
    qnlog("  hardtarget_format=%s" % args.hardtarget_format)
    traincmd += " hardtarget_format=%s" % args.hardtarget_format
if args.hardtarget_window_offset is not None:
    qnlog("  hardtarget_window_ofsset=%s" % args.hardtarget_window_offset)
    traincmd += " hardtarget_window_offset=%s" % args.hardtarget_window_offset
if args.hardtarget_lastlab_reject is not None:
    qnlog("  hardtarget_lastlab_reject=%s" % args.hardtarget_lastlab_reject)
    traincmd += " hardtarget_lastlab_reject=%s" % args.hardtarget_lastlab_reject

if args.window_extent is not None:
    qnlog("  window_extent=%s" % args.window_extent)
    traincmd += " window_extent=%s" % args.window_extent

if args.train_sent_range is not None:
    qnlog("  train_sent_range=%s" % args.train_sent_range)
    traincmd += " train_sent_range=%s" % args.train_sent_range
if args.cv_sent_range is not None:
    qnlog("  cv_sent_range=%s" % args.cv_sent_range)
    traincmd += " cv_sent_range=%s" % args.cv_sent_range
if args.train_cache_frames is not None:
    qnlog("  train_cache_frames=%s" % args.train_cache_frames)
    traincmd += " train_cache_frames=%s" % args.train_cache_frames
if args.train_cache_seed is not None:
    qnlog("  train_cache_seed=%s" % args.train_cache_seed)
    traincmd += " train_cache_seed=%s" % args.train_cache_seed

if args.init_random_seed is not None:
    qnlog("  init_random_seed=%s" % args.init_random_seed)

if args.log_weight_file is not None:
    qnlog("  log_weight_file=%s" % args.log_weight_file)
if args.log2_weight_file is not None:
    qnlog("  log2_weight_file=%s" % args.log2_weight_file)
if args.log3_weight_file is not None:
    qnlog("  log3_weight_file=%s" % args.log3_weight_file)
if args.log_weight_format is not None:
    qnlog("  log_weight_format=%s" % args.log_weight_format)
    traincmd += " log_weight_format=%s" % args.log_weight_format
    traincmd += " init_weight_format=%s" % args.log_weight_format
    mungecmd += " out_format=%s" % args.log_weight_format
if args.out_weight_file is not None:
    qnlog("  out_weight_file=%s" % args.out_weight_file)
if args.out_weight_format is not None:
    qnlog("  out_weight_format=%s" % args.out_weight_format)

if args.learnrate_schedule is not None:
    qnlog("  learnrate_schedule=%s" % args.learnrate_schedule)
    lastargs += " learnrate_schedule=%s" % args.learnrate_schedule
if args.learnrate_vals is not None:
    qnlog("  learnrate_vals=%s" % args.learnrate_vals)
    lastargs += " learnrate_vals=%s" % args.learnrate_vals
if args.learnrate_scale is not None:
    qnlog("  learnrate_scale=%s" % args.learnrate_scale)
    lastargs += " learnrate_scale=%s" % args.learnrate_scale
if args.learnrate_epochs is not None:
    qnlog("  learnrate_epochs=%s" % args.learnrate_epochs)
    lastargs += " learnrate_epochs=%s" % args.learnrate_epochs

if args.gen_learnrate_schedule is not None:
    qnlog("  gen_learnrate_schedule=%s" % args.gen_learnrate_schedule)
    genargs += " learnrate_schedule=%s" % args.gen_learnrate_schedule
else:
    genargs += " learnrate_schedule=%s" % args.learnrate_schedule
if args.gen_learnrate_vals is not None:
    qnlog("  gen_learnrate_vals=%s" % args.gen_learnrate_vals)
    genargs += " learnrate_vals=%s" % args.gen_learnrate_vals
else:
    genargs += " learnrate_vals=%s" % args.learnrate_vals
if args.gen_learnrate_scale is not None:
    qnlog("  gen_learnrate_scale=%s" % args.gen_learnrate_scale)
    genargs += " learnrate_scale=%s" % args.gen_learnrate_scale
else:
    genargs += " learnrate_scale=%s" % args.learnrate_scale
if args.gen_learnrate_epochs is not None:
    qnlog("  gen_learnrate_epochs=%s" % args.gen_learnrate_epochs)
    genargs += " learnrate_epochs=%s" % args.gen_learnrate_epochs
else:
    genargs += " learnrate_epochs=%s" % args.learnrate_epochs

# mlp size params are mandatory
qnlog("  mlp_input_size=%s" % args.mlp_input_size)
mlp_input_size = int(args.mlp_input_size)

qnlog("  mlp_hidden_size=%s" % args.mlp_hidden_size)
mlp_hidden_size = list(args.mlp_hidden_size.split(","))
mlp_hidden_layers = len(mlp_hidden_size)
mlp_total_layers = mlp_hidden_layers + 2
if (mlp_total_layers<3) or (mlp_total_layers>9):
    qndie("total number of layers is %i, must be between 3 and 9"
           % mlp_total_layers)
       
qnlog("  mlp_output_size=%s" % args.mlp_output_size)
mlp_output_size = int(args.mlp_output_size)

if args.mlp_hidden_type is not None:
    traincmd += " mlp_hidden_type=%s" % args.mlp_hidden_type
    qnlog("  mlp_hidden_type=%s" % args.mlp_hidden_type)
if args.mlp_output_type is not None:
    traincmd += " mlp_output_type=%s" % args.mlp_output_type
    qnlog("  mlp_output_type=%s" % args.mlp_output_type)
if args.mlp_bunch_size is not None:
    traincmd += " mlp_bunch_size=%i" % args.mlp_bunch_size
    qnlog("  mlp_bunch_size=%s" % args.mlp_bunch_size)
if args.mlp_threads is not None:
    traincmd += " mlp_threads=%i" % args.mlp_threads
    qnlog("  mlp_threads=%s" % args.mlp_threads)

if args.init_random_seed is not None:
    mungecmd += " init_random_seed=%i" % args.init_random_seed
# Need to handle weight and bias stuff after we know size of net
init_random_bias_min = []
init_random_bias_max = []
init_random_weight_min = []
init_random_weight_max = []
if args.init_random_bias_min is not None:
    qnlog("  init_random_bias_min=%s" % args.init_random_bias_min)
    init_random_bias_min = list(args.init_random_bias_min.split(","))
if args.init_random_bias_max is not None:
    qnlog("  init_random_bias_max=%s" % args.init_random_bias_max)
    init_random_bias_max = list(args.init_random_bias_max.split(","))
if args.init_random_weight_min is not None:
    qnlog("  init_random_weight_min=%s" % args.init_random_weight_min)
    init_random_weight_min = list(args.init_random_weight_min.split(","))
if args.init_random_weight_max is not None:
    qnlog("  init_random_weight_max=%s" % args.init_random_weight_max)
    init_random_weight_max = list(args.init_random_weight_max.split(","))
if len(init_random_bias_min) != len(init_random_bias_max):
    qndie("number of init_random_bias_min values must be same as init_random_bias_max")
if len(init_random_weight_min) != len(init_random_weight_max):
    qndie("number of init_random_weight_min values must be same as init_random_weight_max")
lb = len(init_random_bias_min)
if (lb==0) or (lb==1) or (lb==2) or (lb==mlp_total_layers-1):
   pass
else:
    qndie("number of init_random_bias_min/max values must be 0, 1, 2 or one less than the total number of layers")
lw = len(init_random_weight_min)
if (lw==0) or (lw==1) or (lw==2) or (lw==mlp_total_layers-1):
   pass
else:
    qndie("number of init_random_weight_min/max values must be 0, 1, 2 or one less than the total number of layers")


if args.use_blas is not None:
    traincmd += " use_blas=%i" % args.use_blas
    qnlog("  use_blas=%s" % args.use_blas)
if args.use_pp is not None:
    traincmd += " use_pp=%i" % args.use_pp
    qnlog("  use_pp=%s" % args.use_pp)
if args.use_fe is not None:
    traincmd += " use_fe=%i" % args.use_fe
    qnlog("  use_fe=%s" % args.use_fe)
if args.use_cuda is not None:
    traincmd += " use_cuda=%i" % args.use_cuda
    qnlog("  use_cuda=%s" % args.use_cuda)

if args.qnmultitrn is not None:
    qnlog("  qnmultitrn=%s" % args.qnmultitrn)
if args.qnmungewts is not None:
    qnlog("  qnmungewts=%s" % args.qnmungewts)



gens = mlp_hidden_layers
gen = 0
epoch = 0
cur_hidden_size = []


### For all of the generations
for gen in range(1, gens+1):

    ### Some layers are special
    if gen==1:
        first_layer = True
    else:
        first_layer = False
    if gen==mlp_hidden_layers:
        last_layer = True
    else:
        last_layer = False

    ### Between-gen weight munging
    cur_hidden_size.append(mlp_hidden_size[gen-1])
    cur_mlp_size = [mlp_input_size] + cur_hidden_size + [mlp_output_size]
    cur_mlp_size_str = [str(n) for n in cur_mlp_size]
    if (not first_layer):
        in_weight_file = out_weight_file
        in_layers = gen
    out_weight_file = qnfulltemplate(args.log2_weight_file, gen, epoch)
    prefix = "grow%s-%s" % (str(gen), "x".join(cur_mlp_size_str))

    initcmd = ""
    # The number of bias/weights min/maxes is either 1, 2 or one less
    # than the total number of layers for the current generation
    bl = len(init_random_bias_min)
    if bl > 0:
        if bl > 2:
            bl = gen + 1
        initcmd += " init_random_bias_min=%s" % (",".join(init_random_bias_min[0:bl]))
    	initcmd += " init_random_bias_max=%s" % (",".join(init_random_bias_max[0:bl]))
    wl = len(init_random_weight_min)
    if wl >0 :
        if wl > 2:
            wl = gen + 1
        initcmd += " init_random_weight_min=%s" % (",".join(init_random_weight_min[0:wl]))
	initcmd += " init_random_weight_max=%s" % (",".join(init_random_weight_max[0:wl]))

    cmd = mungecmd + initcmd
    if (not first_layer):
        cmd += " in1_file=%s" % (in_weight_file)
        cmd += " in1_layers=%s" % (in_layers)
    cmd += " out_size=%s" % (",".join(cur_mlp_size_str))
    cmd += " out_file=%s" % (out_weight_file)
    if qnlaunch(cmd, prefix)!=0:
        qndie("generation %i weight file growth failed" % gen)

    ### Actual training
    init_weight_file = out_weight_file
    log_weight_file = qnparttemplate(args.log_weight_file, gen)
    if last_layer:
        out_weight_file = args.out_weight_file
        cmd = traincmd + lastargs
    else:
        out_weight_file = qnfulltemplate(args.log3_weight_file, gen, epoch)
        cmd = traincmd + genargs
    prefix = "bp%s-%s" % (str(gen), "x".join(cur_mlp_size_str))

    cmd += " init_weight_file=%s" % (init_weight_file)
    cmd += " log_weight_file=%s" % (log_weight_file)
    cmd += " out_weight_file=%s" % (out_weight_file)
    cmd += " mlp_size=%s" % (",".join(cur_mlp_size_str))
    outvars = {}
    sub_start = time.time()
    if qnlaunch(cmd, prefix, outvars)!=0:
        qndie("generation %i training failed" % gen)
    sub_end = time.time()
    sub_time = sub_end - sub_start
    gen_arch.append((outvars.get('ARCH', None)).replace(' ', ''))
    gen_epochs.append(outvars.get('EPOCHS', None))
    gen_mcps.append(outvars.get('MCPS', None))
    gen_mcups.append(outvars.get('MCUPS', None))
    gen_best_cv_epoch.append(outvars.get('BEST_CV_EPOCH', None))
    gen_best_cv_percent.append(outvars.get('BEST_CV_PERCENT', None))
    gen_best_train_epoch.append(outvars.get('BEST_TRAIN_EPOCH', None))
    gen_best_train_percent.append(outvars.get('BEST_TRAIN_PERCENT', None))
    gen_secs.append(sub_time)
    # for k in outvars:
    #    print "%s=%s" % (k, outvars[k])

qnlog("%-29s%7s %7s %5s %11s %6s" % ("ARCH", "TRAIN", "CV  ", "EPOCH", "GCPS/GCUPS", "HRS"))
for gen in range(0, gens):
    arch = gen_arch[gen]
    epochs = gen_epochs[gen]
    gcps = float(gen_mcps[gen])/1000.0
    gcups =float(gen_mcups[gen])/1000.0
    best_cv_epoch = gen_best_cv_epoch[gen]
    best_cv_percent = gen_best_cv_percent[gen]
    best_train_epoch = gen_best_train_epoch[gen]
    best_train_percent = gen_best_train_percent[gen]
    epstat = best_cv_epoch + "/" + epochs
    speedstat = "%.1f/%.1f" % (gcps, gcups)
    hours=float(gen_secs[gen])/3600
    qnlog("%-29s%6.2f%% %6.2f%% %5s %11s %6.2f" % (arch,
                                                 float(best_train_percent),
                                                 float(best_cv_percent),
                                                 epstat,
                                                 speedstat,
                                                 hours)
)
if (best_cv_epoch!=None and best_cv_percent!=None):
    qnlog("Best full net CV accuracy: %s%% correct in epoch %s." % (best_cv_percent, best_cv_epoch))
end_time = time.time()
qnlog("Program stop: %s." % time.ctime(end_time))
total_secs = int(end_time - start_time)
hours = total_secs / 3600
mins = (total_secs / 60) % 60
secs = total_secs % 60


qnlog("Program time: %d secs (%d hours, %d mins, %d secs)." % (total_secs, hours, mins, secs))

## TO DO


#################################################
## some args we probably want to add later
#################################################
# ckpt_weight_file = "ckpt-%h-%t.weights";
# ckpt_weight_format = "matlab";
# ckpt_hours = 0;
# mlp_lrmultiplier
# softtarget_file = "";
# softtarget_format = "pfile";
# softtarget_width = 0;
# softtarget_window_offset = 0;
# ftr1_norm_am = QN_DFLT_NORM_AM;
# ftr1_norm_av = QN_DFLT_NORM_AV;

