#!/usr/bin/perl

# script gcpreg

use strict;                             # use strict c-style declarations
use File::Basename;                     # funcs to parse pathnames

# Current version here:

my $version = "3.3.2";

# Global variables

# the following are command line options

my $chan_def;
my $channel;
my $tc_def;
my $tc;
my $app_met_def;
my $app_met;
my $app_met_suff;
my $gcppath;
my $gcpregfile;
my $makefigs;
my $term;
my @terms;
my $mode;
my $gpregrcfile;
my $verbose_level;
my $work_dir;
my $out_dir;
my $embryo_name;
my @gcps;
my $target;
my $exclude_file;
my $GreatMaxVal;
my $mGreatMaxVal;

# program globals start here

my $wavex_command="wavex";
my $splapp_command="splapp";
my $splextr_command="splextr";
my $regpat_command="regpat";

my $HAVE_GUI="yes";
my $jreg_command="gcpreg-extr";

my $varenv;

#my $gnuplot_command="gnuplot ";
my $gnuplot_command="gnuplot -persist";

my $infile;                             # full name of original data file
my $base;                               # filename of original data file
my $dir;                                # directory where original data file
                                        # is located

my $command_line;

my $xstart;
my $xend;
my $ystart;
my $yend;
my $ngenes;

# Main starts here ########################################################

$chan_def = 0;
$tc_def = 0;
$app_met_def = 0;
$makefigs = 0;
$term = "fig";
$mode = "auto";
$target = "reg";
$verbose_level = 0;
$work_dir = ".";
$out_dir = ".";
$GreatMaxVal = 255;
$xstart = 0;
$xend = 100;
$ystart = 45;
$yend = 55;
$ngenes = 1;

$command_line=join(' ',@ARGV);

while (@ARGV > 0) {
  $_ = $ARGV[0];

  if ($_ eq '--spl') {
    $app_met_def = 1;
    $app_met = "spl";
    $app_met_suff = "spl";
  } elsif ($_ eq '--frdwt') {
    $app_met_def = 1;
    $app_met = "frdwt";
    $app_met_suff = "frdwt";
  } elsif ($_ eq '--frdwt2') {
    $app_met_def = 1;
    $app_met = "frdwt2";
    $app_met_suff = "frdwt";
  } elsif ($_ eq '-A') {
    shift;
    $xstart = $ARGV[0];
  } elsif ($_ eq '-B') {
    shift;
    $xend = $ARGV[0];
  } elsif ($_ eq '-C') {
    shift;
    $ystart = $ARGV[0];
  } elsif ($_ eq '-D') {
    shift;
    $yend = $ARGV[0];
  } elsif ($_ eq '-Z') {
    shift;
    $ngenes = $ARGV[0];
  } elsif ($_ eq '-c') {
    shift;
    $chan_def = 1;
    $channel = $ARGV[0];
  } elsif ($_ eq '-d') {
    shift;
    $ENV{GCPPATH} = $ARGV[0];
  } elsif ($_ eq '-g') {
    shift;
    $makefigs = 1;
    $term = $ARGV[0];
  } elsif ($_ eq '-h') {
    print_help_msg();
    exit(0);
  } elsif ($_ eq '-m') {
    shift;
    $mode = $ARGV[0];
  } elsif ($_ eq '-n') {
    shift;
    $target = $ARGV[0];
  } elsif ($_ eq '-o') {
    shift;
    $out_dir = $ARGV[0];
  } elsif ($_ eq '-s') {
    shift;
    $ENV{GCPREGRCFILE} = $ARGV[0];
  } elsif ($_ eq '-t') {
    shift;
    $tc_def = 1;
    $tc = $ARGV[0];
  } elsif ($_ eq '-G') {
    shift;
    $GreatMaxVal = $ARGV[0];
  } elsif ($_ eq '-v') {
    print STDERR "This is gcpreg v.$version.\n";
    exit(0);
  } elsif ($_ eq '-V') {
    shift;
    if ($ARGV[0] =~ /^\d$/) {
      $verbose_level = $ARGV[0];
    } else {
      unshift @ARGV, $_;
      $verbose_level = 1;
    }
  } elsif ($_ eq '-w') {
    shift;
    $work_dir = $ARGV[0];
  } else {
    $infile  = $ARGV[0];                    # assign arguments to variables
    $infile =~ s{ ^ ~ ( [^/]* ) }           # substitute tilde so that open can
                { $1                        # understand it
                  ? (getpwnam($1))[7]
		  : ( $ENV{HOME} || $ENV{LOGDIR}
                       || (getpwuid($>))[7]
                     )
	    }ex;
    if ( $mode =~/^semiauto$/ ) {
      @gcps = @ARGV[1..$#ARGV];
    }
    last;
  }
  shift;
}

die "gcpreg: approximation is undefined (--spl or --frdwt) !\n" if ( !$app_met_def );
die "gcpreg: channel is undefined (-c) !\n" if ( !$chan_def );
die "gcpreg: timeclass is undefined (-t) !\n" if ( !$tc_def );
die "gcpreg: $infile not found!\n" if ( ! -e $infile );

$base = basename($infile);              # parse path name
$dir  = dirname($infile);

if ( $base =~/procd.asc$/ ) { ($embryo_name, $_) = split /procd.asc/ , $base ; }
if ( $base =~/.proc.d.asc$/ ) { ($embryo_name, $_) = split /.proc.d.asc/ , $base ;}
if ( $base =~/.norm$/ ) { ($embryo_name, $_) = split /.norm/ , $base ;}
if ( $base =~/.nrm$/ ) { ($embryo_name, $_) = split /.nrm/ , $base ;}
#print $embryo_name;

#if ( substr($base,-4) )

#$_ = $base;
#($embryo_name, $_) = split /\./ ;

die "<error>\n Undefined environment variable GCPPATH\n</error>\n" if ( !defined($ENV{GCPPATH}));
die "<error>\n Undefined environment variable GCPREGRCFILE\n</error>\n" if ( !defined($ENV{GCPREGRCFILE}));

$mGreatMaxVal = 0.2 * $GreatMaxVal;

$gcppath = $ENV{GCPPATH};
$gcpregfile = $ENV{GCPREGRCFILE};

printf("#$infile#\n");
$exclude_file = $out_dir . "/" . $embryo_name . ".exclude";
if ( -e $exclude_file ) {
	unlink($exclude_file);
}

if ( ( $target =~/^reg$/ ) || ( $target =~/^gcp$/ ) ) {

	if ( $app_met =~ /^frdwt$/ ) { do_wavex(); }

	if ( $app_met =~ /^frdwt2$/ ) { do_wavex2(); }

	if ( $app_met =~ /^spl$/ ) { do_splextr(); }

	if ( $HAVE_GUI =~ /^yes$/ && $mode =~/^interactive$/ ) {
		do_gui();
	} elsif ( $mode =~/^interactive$/ ) {
		die "<error>\n GUI for interactive mode is not installed!\n</error>\n"
	}

	if ( -e $exclude_file ) {
		die "Registration canceled!\n";
	}
} elsif ( $target =~/^ren$/ ) {
	printf("Going for registration with previously extracted GCPs!\n");
}

if ( ( $target =~/^reg$/ ) || ( $target =~/^ren$/ ) ) {
	do_regpat();
} elsif ( $target =~/^gcp$/ ) {
	printf("GCP's extracted, no registration performed!\n");
}

if ( $makefigs == 1 ) { 
	@terms = split /\,/,$term;
	foreach ( @terms ) {
		$term = $_;
		makefigures(); 
	}
}

if ( $target =~/^reg$/ ) {
	system("rm -f $work_dir/$embryo_name.txt");
}

if ( $verbose_level < 2 ) {
	system("rm -f $work_dir/$embryo_name.coef");
	system("rm -f $work_dir/$embryo_name.spl");
	system("rm -f $work_dir/$embryo_name.frdwt");
}
if ( $verbose_level < 1 ) {
	system("rm -f $out_dir/$embryo_name.gcp");
	system("rm -f $work_dir/$embryo_name.rg");
	system("rm -f $work_dir/$embryo_name.txt");
}

if ( ( $target =~/^reg$/ ) || ( $target =~/^ren$/ ) ) {
	print_tag();
}

#--subroutines-----------------

sub  do_gui()
{
	my $options;
	my $command;
	$options = " -c:$channel -o:$out_dir:$app_met_suff:$GreatMaxVal ";
	$command = $jreg_command . $options . $infile;
	system($command);
	if ( $? != 0 ) {
		exit(0);
	}
}

sub do_wavex()
{
	my $options;
	my $command;
	my $gcp;
	$options = " -o 1 -c $channel -t $tc -m $mode -V 2 -w $work_dir -d $out_dir -A $xstart -B $xend -C $ystart -D $yend ";
	$command = $wavex_command . $options . $infile;
	 if ( $mode =~/^semiauto$/ ) {
		for $gcp (@gcps) {
			$command = $command . " " . $gcp;
		}
	}
	system($command);
	if ( $? != 256 ) {
		exit(0);
	}
}

sub do_wavex2()
{
	my $options;
	my $command;
	my $gcp;
	$options = " -o 2 -c $channel -t $tc -m $mode -V 2 -w $work_dir -d $out_dir -A $xstart -B $xend -C $ystart -D $yend ";
	$command = $wavex_command . $options . $infile;
	 if ( $mode =~/^semiauto$/ ) {
		for $gcp (@gcps) {
			$command = $command . " " . $gcp;
		}
	}
	system($command);
	if ( $? != 256 ) {
		exit(0);
	}
}

sub do_splextr()
{
	my $options;
	my $command;
	my $gcp;
	my $continue_on_null_det;
	my $gcp_file;
	my @AP_coords;
	my @data_int;
	my $num_max;
	my $maximum;
	my $kounter;
	my $num_min;
	my $i;
	my $int_file;
	$options = " -c $channel -t $tc -V 2 -w $work_dir  -d $out_dir -A $xstart -B $xend -C $ystart -D $yend ";
	$command = $splapp_command . $options . $infile;
	system($command);
	$continue_on_null_det = 0;
	if ( $HAVE_GUI =~ /^yes$/ && $mode =~/^interactive$/) {
		$continue_on_null_det = 1;
	}
	if ( $? != 256 ) {
		if ( $continue_on_null_det == 0 ) {
			exit(0);
		} else {
			$int_file = "$gcppath/int.$tc";
			open(IN,"<$int_file") || die "can't read refernce points!";
			@data_int = <IN>;
			close(IN);
			@AP_coords = split /\s+/, $data_int[0];
			$gcp_file = "$out_dir/$embryo_name\.gcp";
			open(T,">$gcp_file");
			select(T);
			for ($i=0; $i<=$#AP_coords; $i++) {
				printf("%5.2f ",$AP_coords[$i]);
			}
			printf("\n");
			$num_max = 1;
			$num_min = 1;
			$maximum = 1;
			$kounter = 0;
			for ($i=0; $i<=$#AP_coords; $i++) {
				if ( $maximum ) {
					printf("%5.2f ", 0.4 * $GreatMaxVal);
					$maximum = 0;
					$num_max++;
				} else {
					printf("%5.2f ", 0.04 * $GreatMaxVal);
					$maximum = 1;
					$num_min++;
				}
			}
			close(T);
		}
	} else {
		$options = " -c $channel -t $tc -m $mode -V 2 -w $work_dir  -d $out_dir -A $xstart -B $xend -C $ystart -D $yend ";
		$command = $splextr_command . $options . $infile;
		if ( $mode =~/^semiauto$/ ) {
			for $gcp (@gcps) {
				$command = $command . " " . $gcp;
			}
		}
		system($command);
		if ( $? != 256 ) {
			exit(0);
		}
	}
}

sub do_regpat()
{
  my $options;
  my $command;

  $options = " -c $channel -t $tc -V 2 -w $work_dir -d $out_dir -A $xstart -B $xend -C $ystart -D $yend ";

  $command = $regpat_command . $options . $infile;

  system($command);

    if ( $? != 256 ) {
	exit(0);
    }

}

sub makefigures()
{
	my $script_file="$work_dir/tempscript";
	my $script;
	if ( $target =~/^reg$/ ) {
		open(SCRIPT,">$script_file");
		select(SCRIPT);
		$script = print_script();
		print SCRIPT $script;
		close(SCRIPT);
		system("$gnuplot_command $script_file");
		system("rm -f $script_file");
}
	if ( $verbose_level > 0 ) {
		if ( $app_met =~ /^frdwt$/ ) {
			open(SCRIPT,">$script_file");
			select(SCRIPT);
			$script = print_script_frdwt();
			print SCRIPT $script;
			close(SCRIPT);
		} elsif ( $app_met =~ /^spl$/ ) {
			open(SCRIPT,">$script_file");
			select(SCRIPT);
			$script = print_script_spl();
			print SCRIPT $script;
			close(SCRIPT);
		}
		system("$gnuplot_command $script_file");
		system("rm -f $script_file");
		open(SCRIPT,">$script_file");
		select(SCRIPT);
		$script = print_script_gcp();
		print SCRIPT $script;
		close(SCRIPT);
		system("$gnuplot_command $script_file");
		system("rm -f $script_file");
		system("rm -f $work_dir/gcptemp");
	}
}

sub print_script {
	my $script;
	my $script_output;
	my $pattern_file;
	my $int_file;
	if ( $app_met =~ /^frdwt$/ ) {
		$int_file = "$gcppath/w$tc\.e.100";
	}
	if ( $app_met =~ /^spl$/ ) {
		$int_file = "$gcppath/s$tc\.e.100";
	}
	$pattern_file = "$work_dir/$embryo_name\.rg";
	if ( $term !~ /^X11$/ ) {
		($script_output = <<EOOUT ) =~ s/^\s+//gm;
set output "$out_dir/$embryo_name\.$term"
EOOUT
	}
	($script = <<EOSCRIPT ) =~ s/^\s+//gm;
set term $term
set title "$embryo_name: Superimposed"
set xlabel "Percent Embryo Length"
set ylabel "Intensity"
set nokey
set xrange [0:100]
set yrange [0:$GreatMaxVal]
plot "$pattern_file" using 2:3 with lines lt 1 lw 2, "$int_file" using 2:3 with lines lt 2 lw 2
EOSCRIPT
	return $script_output . $script;
}

sub print_script_frdwt {
  my $script;
  my $script_output;
  my $frdwt_file;

  $frdwt_file = "$work_dir/$embryo_name\.frdwt";

  if ( $term !~ /^X11$/ ) {

    ($script_output = <<EOOUT ) =~ s/^\s+//gm;
       set output "$work_dir/$embryo_name\.frdwt\.$term"
EOOUT

  }

($script = <<EOSCRIPT ) =~ s/^\s+//gm;
set term $term
set title "$embryo_name: FRDWT"
set xlabel "Percent Embryo Length"
set ylabel "Intensity"
set nokey
set zeroaxis
set xrange [0:100]
set yrange [-$mGreatMaxVal:$GreatMaxVal]
plot "$frdwt_file" using 2:3 with lines lt 1 lw 2,"$frdwt_file" using 2:4 with lines lt 2 lw 2, "$frdwt_file" using 2:5 with lines lt 3 lw 2
EOSCRIPT

  return $script_output . $script;
}


sub print_script_spl {
  my $script;
  my $script_output;
  my $spl_file;

  $spl_file = "$work_dir/$embryo_name\.spl";

  if ( $term !~ /^X11$/ ) {

    ($script_output = <<EOOUT ) =~ s/^\s+//gm;
       set output "$work_dir/$embryo_name\.spl\.$term"
EOOUT

  }

($script = <<EOSCRIPT ) =~ s/^\s+//gm;
set term $term
set title "$embryo_name: SPL"
set xlabel "Percent Embryo Length"
set ylabel "Intensity"
set nokey
set zeroaxis
set xrange [0:100]
set yrange [0:$GreatMaxVal]
plot "$spl_file" using 2:3 with lines lt 1 lw 2,"$spl_file" using 2:4 with lines lt 2 lw 2
EOSCRIPT

  return $script_output . $script;
}

sub print_script_gcp {
  my $script;
  my $script_output;
  my $script_labels;
  my $pattern_file;
  my $gcp_file;
  my @data_gcp;
  my @AP_coords;
  my @prot_concs;
  my $i;
  my $gcp_temp_file="$work_dir/gcptemp";

  my @labels;
  my $num_max;
  my $num_min;
  my $maximum;
  my $kounter;
  my $ly;

  if ( $app_met =~ /^frdwt$/ ) {
    $pattern_file = "$work_dir/$embryo_name\.frdwt";
  }

  if ( $app_met =~ /^spl$/ ) {
    $pattern_file = "$work_dir/$embryo_name\.spl";
  }

  $gcp_file = "$out_dir/$embryo_name\.gcp";
  open(IN,"<$gcp_file") || die "can't read gcp!";
  @data_gcp = <IN>;
  close(IN);

  @AP_coords = split /\s+/, $data_gcp[0];
  @prot_concs = split /\s+/, $data_gcp[1];

  open(T,">$gcp_temp_file");
  select(T);

  $num_max = 1;
  $num_min = 1;
  $maximum = 1;
  $kounter = 0;

  for ($i=1; $i<=$#AP_coords; $i++) {
    if ( $prot_concs[$i] != 0.0 ) {
      printf("%9.3f %9.3f\n", $AP_coords[$i], $prot_concs[$i]);
      if ( $maximum ) {
        $ly = $prot_concs[$i]+15;
        $labels[$kounter] = " \"$num_max max\" at $AP_coords[$i],$ly center";
      } else {
        $ly = $prot_concs[$i]-10;
        $labels[$kounter] = " \"$num_min min\" at $AP_coords[$i],$ly center";
      }
      $kounter++;
    }
    if ( $maximum ) {
      $maximum = 0;
      $num_max++;
    } else {
      $maximum = 1;
      $num_min++;
    }
  }

  close(T);

  if ( $term !~ /^X11$/ ) {

    ($script_output = <<EOOUT ) =~ s/^\s+//gm;
       set output "$out_dir/$embryo_name\.gcp\.$term"
EOOUT

  }

$script_labels = "";
for ( $i=0; $i<=$#labels; $i++) {
  $script_labels .= sprintf "set label %s\n",$labels[$i];
}

($script = <<EOSCRIPT ) =~ s/^\s+//gm;
set term $term
set title "$embryo_name: gcps"
set xlabel "Percent Embryo Length"
set ylabel "Intensity"
set nokey
set xrange [0:100]
set yrange [0:$GreatMaxVal]
plot "$pattern_file" using 2:3 with lines lt 1 lw 2, "$gcp_temp_file" using 1:2 with points pt 2 ps 3
EOSCRIPT

  return $script_labels . $script_output . $script;
}

sub print_tag {
	my $filename;
	my @content;

	$filename="$out_dir/$embryo_name\.reg";
	open(IN,"<$filename") || die "can't open $filename!";
	@content = <IN>;
	close(IN);

	open(OUT,">$filename") || die "can't open $filename!";
	select(OUT);
	printf("<gcpreg>\nversion $version.\n");
	printf("%s\n",$command_line);
	printf("</gcpreg>\n");
	print @content;
	close(OUT);
}

#   print_help_msg: prints help message for -h option
#   -------------------------------------------------

sub print_help_msg {

    print STDERR <<EOF

USAGE: gcpreg   --spl | --frdwt -c <channel> -t <timeclass> [-g <terminal>] [-h]
                 [-m <mode>] [-o <out_dir>] [-s <parameters file>] [-v] [-V[<level>]]
                <filename> [1max=<coord>] [1min=<coord>] ... [7max=<coord>]

ARGUMENTS:
  --spl
  --frdwt
  -c <channel>                   number of a channel containing the eve pattern.
                                 Can be 1, 2 or 3; This option should be always present
                                 on the command line.
  -t <timeclass>                 Temporal class the embryo belongs to: 2, 3, ... , 8.
                                 This option should be always present on the command line.
  <filename>                     Datafile to process;
OPTIONS:
  -d <gcppath>
  -g <terminal>                  Enable this option if you want to check the quality of results
                                 visually. Depending on verbose level this option plots graphs
                                 from all resulting files using gnuplot. Consequently, terminal
                                 can be any that gnuplot understands with `set term` command.
                                 Remember that if <terminal> is `X11` then graphs will appear
                                 on the screen immediately.
  -h                             Print help message and exit;
  -m <mode>                      Can be auto(default), or semiauto, allowing for a user to define
                                 some or all GCP. Mode interactive can be called only if the Java
				 GUI is installed.
  -o <out_dir>			 *.gcp files and figures about them;
  -s <parameters file>           Name of the parameters fiel to use instead of the default
                                 - gcpreg.rc. gcpreg.rc is defined by the environment virable
                                 \$GCPREGRCFILE that should be set.
  -v                             Print version and exit;
  -V[<level>]                    Be verbose: save (don't delete) intermediate information.

                                 Level 1 is default and is set if option is given without any
                                         argument. Info:
					 <filename>.rg
					 Contains 1D 10% strip after registration. Used to
					 check registration quality together with int.<tc>
					 in graphical mode.
					 <filename.gcp>
					 GCP's

                                 Level 2 gives everything from Level1 and adds
                                 	 <filename>.spl or <filename>.frdwt.

  -w <work_dir>			 *.spl, *.frdwt, *.rg files and figures about them;

  <GCP>=<coord>                  semiauto mode only! <GCP> can be 1min, 2min, 3min, 4min, 5min,
                                 6min, or 1max, 2max, 3max, 4max, 5max, 6max, 7max.

DESCRIPTION:

The program is designed to register one-dimensional eve
expression patterns belonging to one and the same temporal class.
These patterns cannot be directly superimposed because of small
individual differences among the population. Finding of the
coordinate transformations such that expression domains of the
gene product in different embryos would be superimposed defines
the registration problem.

The registration is implemented by means of the point mapping
technique. It is based on the extraction of ``ground control
points'' (GCP), which are a small number of characteristic
features in each pattern, and application of a coordinate
transformation to make the patterns coincide as closely as
possible. There are two options to extract discrete characteristic
features from gene expression patterns: by means of quadratic
spline approximation or wavelet decomposition.

INPUT FILES:

  gcpreg.rc                      contains parameters controlling program behavior. Defined by the
                                 environment variable \$GCPREGRCFILE.

  \$GCPPATH/int.<timeclass>      where timeclass can be 2,3, ..., 8. Files with standard
                                 coordinates.

  \$GCPPATH/s<timeclass>.e.100   files containing eve integrated patterns. (Will be fetched from
                                 FlyEx in the future).

OUTPUT FILES:

 <filename>.reg                  The registered pattern (x - coordinates are transformed). The
                                 header contains information from gcpreg enclosed in tags:
                                 <gcpreg> and <\gcpreg>.

 <filename>.rg                   Only in Verbose mode (level 1 and any higher). Contains 1D 10%
                                 strip after registration. Used to check registration quality
                                 together with int.<tc> in graphical mode.

 <filename>.gcp                  Only in Verbose mode (level 1 and any higher).

 <filename>.spl                  Only in Verbose mode (level 2). Contains spline approximation
                                 and 1D 10% strip before registration. Used to check approximation
                                 quality in graphical mode.

 <filename>.frdwt                Only in Verbose mode (level 2). Contains wavelet approximation
                                 and 1D 10% strip before registration. Used to check approximation
                                 quality in graphical mode.

Please report bugs to <kozlov\@spbcas.ru>. Thank you!

EOF

}
