#!/usr/bin/Rscript --silent

args <- commandArgs(TRUE)

USAGE_STRING = "============= ReMix Testing Script   v. June 28, 2014 ========================
Usage:

$ printscore-admix.R <DISTPATH> <CONDSTRING> <reference> <input> <nvec> <cond-pop>

 	Arguments:
 1 DISTPATH  path to the directory containing readmix.r
 2 Condition string - free or equal, the second for equal weights
 3 Reference file - a CSV file with mean population admixture
 4 Input fie
 5 nvec
 6 Initial populations file
 
 If the directory contains a file with name (your_reference_file_name).sdev , 
 it will be used too.
"

NES_OPTS <- 6

if(length(args) < NES_OPTS){
	cat(USAGE_STRING)
	stop()
}

# Argument parsing
optarg <- 1
DISTPATH = args[[optarg]]
optarg <- optarg + 1

CONDSTRING = args[[optarg]]
optarg <- optarg + 1
# Reference file : a csv file with (mean) admixture proportions of known populations
REFERENCE_FILE = args[[optarg]] # 'Harappa1.csv'
optarg <- optarg + 1
#  Additional file: errors for reference:
REF_SD_FILE = paste(REFERENCE_FILE, ".sdev", sep="")

# Infile: file that has the test sample as its second row (the first row is the header naming the ancestral populations)
INFILE = args[[optarg]]
optarg <- optarg + 1
# Populations to begin with

N_REP = as.numeric(args[[optarg]])
optarg <- optarg + 1

keep.init = TRUE
PRESENT_POPS_FILE = args[[optarg]]
optarg <- optarg + 1
CONDITIONAL = TRUE
	
present_pops = read.table(PRESENT_POPS_FILE, stringsAsFactors = FALSE)

if ( ncol(present_pops) != 1 ){
	stop("Error in the populations file: there should be one population per row \n")
}
present_pops = present_pops[[1]]  # convert to vector

comments = grep("^#", present_pops)
if( length(comments) > 0 ){
	present_pops = present_pops[-comments]
}

keep.init = rep(TRUE, length(present_pops) )

tentative = grep("^[?]", present_pops)
if (length(tentative) > 0){
	present_pops[tentative] = gsub( "^[?]", "", present_pops[tentative])
	keep.init[tentative] = FALSE
}

library("lpSolve")
source(paste(DISTPATH, "/R/readmix_deep.r", sep = ""))

R = read.csv(REFERENCE_FILE, row.names = 1)
R <- columns_sanity_check(R)
R <- prep_input(R)
K <- dim(R)[2]
# Reference population errors (sdev)
Rsd = NULL
if ( file.exists(REF_SD_FILE) ) {
	Rsd = read.csv(REF_SD_FILE, row.names=1)
	Rsd = as.matrix(Rsd)
 	if ( !all(dim(Rsd) == dim(R)) ) {
		cat("Reference stdev table size (", dim(Rsd),  ")does not match that of reference means", dim(R), ".\n")
		stop()
 	}
} else message("Reference stdev table not found")
 
test = read.csv(INFILE, row.names = 1) 

t <- as.matrix(test[1, , drop = FALSE])
colnames(t) <- colnames(test)
rownames(t) <- rownames(test)

Rcurr <- as.matrix(R[present_pops, ,drop=FALSE])
if (CONDSTRING == "free") {
	objfunc <- lp_cheb_approx_readmix(Rcurr, t)
	cat(objfunc$maxerr, "\n")
	if ( N_REP == 2 ) {
		tcurr <- t
		coefs <- c()
		for (i in present_pops) {
			r = R[i,]
			new_coef = get_coef(r,tcurr)
			coefs <- c(coefs, new_coef)
			tcurr = tcurr - new_coef * r
		}
		cat(max(abs(tcurr)), "\n")
		if ( length(args) > NES_OPTS ) {
			cat("Coefs:")
			print(coefs)
		}
	} else if ( N_REP == 3 ) {
#		cat(1-cor(t(t), pred(objfunc$solution, Rcurr)), "\n")
#		cat(1-max(apply(Rcurr, 1, FUN=function(x) cor(t(t), x))), "\n")
		cat(1-min(apply(Rcurr, 1, FUN=function(x) cor(t(t), x))), "\n")
	} else if ( N_REP == 4 ) {
#		cat(1-cor(t(t), pred(objfunc$solution, Rcurr)), "\n")
		cat(1-min(dist(Rcurr)), "\n")
#		cat(1-min(apply(Rcurr, 1, FUN=function(x) cor(t(t), x))), "\n")
	}
} else if (CONDSTRING == "equal") {
	objfunc <- lp_cheb_approx_readmix(Rcurr, t)
	cat(objfunc$maxerr, "\n")
	coefs <- rep(1 / length(present_pops), length(present_pops))
	tcurr = pred(coefs, Rcurr) - pred(objfunc$solution, Rcurr)
	cat(100*sqrt(sum(tcurr^2)/K), "\n")
#	cat(max(abs(tcurr)), "\n")
#		if ( length(args) > NES_OPTS ) {
#			cat("Coefs:")
#			print(coefs)
#		}
} else {
	message("Unknown condstring:", CONDSTRING)
}

