#!/usr/bin/perl

# script rgdata

use strict;                             # use strict c-style declarations
use File::Basename;                     # funcs to parse pathnames
use POSIX qw(tmpnam);                   # for temp files

# Current version here:

my $version = "3.3.2";

# Global variables

# the following are command line options

my $tc_def;
my $tc;
my $app_met_def;
my $app_met;
my $reference_dir;
my $reference_dir_defined;
my $verbose_level;
my $work_dir;
my $output_dir;
my $st_dir;
my $result_dir;
my $dataset_id;
my @embrlist;
my @filelist;
my @channellist;
my @dirlist;
my @missinglist;
my $makefigs;
my $term;
my $mode;
my $gnuplot_command;
my $gcppath;
my $embryo_name;
my $GreatMaxVal;

# program globals start here

my $gcpreg_command="gcpreg";
my $dregpat_command="dregpat";
my $gnuplot_command="gnuplot -persist";

my $varenv;

my $extrfile;

my $infile;                             # full name of original data file
my $base;                               # filename of original data file
my $dir;                                # directory where original data file
                                        # is located

my $command_line;

# Main starts here ########################################################

$tc_def = 0;
$makefigs = 0;
$term = "fig";
$mode = "auto";
$verbose_level = 0;
$work_dir = ".";
$output_dir = ".";
$result_dir = ".";
$reference_dir_defined = 0;
$dataset_id = substr(tmpnam(),-5);
$GreatMaxVal = 255;

$command_line=join(' ',@ARGV);

while (@ARGV > 0) {
  $_ = $ARGV[0];

  if ($_ eq '-d') {
    shift;
    $output_dir = $ARGV[0];
  } elsif ($_ eq '-g') {
    shift;
    $makefigs = 1;
    $term = $ARGV[0];
  } elsif ($_ eq '-h') {
    print_help_msg();
    exit(0);
  } elsif ($_ eq '-i') {
    shift;
    $dataset_id = $ARGV[0];
  } elsif ($_ eq '-r') {
    shift;
    $result_dir = $ARGV[0];
  } elsif ($_ eq '-s') {
    shift;
    $ENV{EXTRGCP} = $ARGV[0];
  } elsif ($_ eq '-t') {
    shift;
    $tc_def = 1;
    $tc = $ARGV[0];
  } elsif ($_ eq '-G') {
    shift;
    $GreatMaxVal = $ARGV[0];
  } elsif ($_ eq '-u') {
    shift;
    $reference_dir = $ARGV[0];
    $reference_dir_defined = 1;
  } elsif ($_ eq '-v') {
    print STDERR "This is rgdata v.$version.\n";
    exit(0);
  } elsif ($_ eq '-V') {
    shift;
    if ($ARGV[0] =~ /^\d$/) {
      $verbose_level = $ARGV[0];
    } else {
      unshift @ARGV, $_;
      $verbose_level = 1;
    }
  } elsif ($_ eq '-w') {
    shift;
    $work_dir = $ARGV[0];
  } else {
    $infile  = $ARGV[0];                    # substitute tilde so that open can understand it
    $infile =~ s{ ^ ~ ( [^/]* ) }
                { $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($>))[7] ) }ex;
    last;
  }
  shift;
}

die "rgdata: timeclass is undefined (-t) !\n" if ( !$tc_def );
die "rgdata: $infile not found!\n" if ( ! -e $infile );

$base = basename($infile);              # parse path name
$dir  = dirname($infile);

#print $embryo_name;

#if ( substr($base,-4) )

#$_ = $base;
#($embryo_name, $_) = split /\./ ;

die "<error>\n Undefined environment variable EXTRGCP\n</error>\n" if ( !defined($ENV{EXTRGCP}));

$st_dir = $ENV{EXTRGCP};

die "\n<error>\n$st_dir is not the full path!\n<error>" if ( ! ($st_dir =~ /^\//) );

makelists();

if ( $#missinglist >= 0) {
	write_missing();
	die "<error>\nrun missing.$dataset_id.sh\n</error>\n"
}

write_extr();

do_dregpat();

if ( $makefigs == 1 ) { makefigures(); }

if ( $verbose_level == 0 ) { clean_buffers(); }

print_tag();
make_mask_int();

#--subroutines-----------------

sub do_dregpat {
	my $command;
	if ( $reference_dir_defined == 0 ) {
		$command = $dregpat_command . " " . " -t " . $tc . " -r " . $result_dir . " -i  " . $dataset_id . " -d " . $output_dir . " -V 2 " . $extrfile;
	} else {
		$command = $dregpat_command . " " . " -t " . $tc . " -r " . $result_dir . " -i  " . $dataset_id . " -d " . $output_dir . " -V 2 " . "-u " . $reference_dir . " " . $extrfile;
	}
	system($command);

    if ( $? != 256 ) {
	exit(0);
    }

}


sub clean_buffers {
	system("rm -f $output_dir/*.rg $output_dir/*.txt")
}

sub makelists() {
	my @data;
	my @paths;
	my $entry;
	my $file;
	my $channel;
	my $directory;
	my $path;
	my $basedir;
	my $kounter;
	my $missing_kounter;
	my $gcpfile;
	my $exclude_file;
	my $flag;
	my $embr_name;

	open( IN, $infile ) || die("rgdata: couldn't open $infile!\n");
	@data = <IN>;
	close( IN );

	$missing_kounter = 0;;
	$kounter = 0;
	foreach $entry ( @data ) {
		( $directory , $channel ) = split / /, $entry;
		die "\n<error>$directory is not the full path!\n<error>" if ( ! ($directory =~ /^\//) );
		@paths = split /\//, $directory;
		$basedir = $paths[$#paths];
		opendir(DIR, $directory) || die("rgdata: couldn't open $directory!\n");;
		foreach $file ( readdir(DIR) ) {
			$flag = 0;
			if ( $file =~/procd.asc$/ ) {
				($embr_name, $_) = split /procd.asc/, basename($file);
				$flag = 1;
			} elsif ( $file =~/.proc.d.asc$/ ) {
				($embr_name, $_) = split /.proc.d.asc/, basename($file);
				$flag = 1;
			} elsif ( $file =~/.norm$/ ) {
				($embr_name, $_) = split /.norm/, basename($file);
				$flag = 1;
			}
			if ( $flag ) {
				$exclude_file = $st_dir . "/" . $embr_name . ".exclude";
				if ( ! -e $exclude_file ) {
					$embrlist[$kounter] = $embr_name;
					$filelist[$kounter] = $directory . "/" . $file;
					$dirlist[$kounter] = $basedir;
					$channellist[$kounter] = $channel;
					$gcpfile = $st_dir . "/" . $embrlist[$kounter] . ".gcp";
					if ( ! -e $gcpfile ) {
						printf("missing %s\n",$gcpfile);
						$missinglist[$missing_kounter] = $kounter;
						$missing_kounter++;
					}
					$kounter++;
				} else {
					unlink($exclude_file);
				}
			}
		}
		closedir(DIR);
	}
}

sub write_missing() {
	my $outfile;
	my $index;
	my $script;

	$outfile = $output_dir . "/missing\.$dataset_id\.sh";

	system("rm -f $outfile");

	open(SCRIPT,">$outfile");
	select(SCRIPT);

#	printf("#!/bin/sh\n\n");
#	printf("# Automatically generated - don't modify! \n");
#	printf("# This is rgdata v.$version. \n");
#	printf("# Usage : missing.sh [--frdwt | --spl]\n");


($script = <<EOSCRIPT ) =~ s/^\s+//gm;
#!/bin/sh
# Automatically generated - don't modify!
# This is rgdata v.$version.
# Usage : missing.sh [--frdwt | --spl]
usage() {
  echo "usage: missing.sh [--frdwt | --spl]"
  cat <<EOF
EOF
}
options=""
defaults=" -V 2 -m auto -g X11 -o $st_dir "
set -e
while [ \$# -gt 0 ]
do
  case \$1 in
    -h | --help | -?)
      usage
      exit 0
      ;;
    --frdwt)
      frdwt=yes
      options=" --frdwt "
      ;;
    --spl)
      spl=yes
      options=" --spl "
      ;;
    *)
      echo "*** unknown command line option (try '-h')" 1>&2
      exit 1
      ;;
  esac
  shift
done

EOSCRIPT

print $script;

	for ( $index = 0; $index <=$#missinglist; $index++ ) {
		printf("gcpreg \$defaults \$options -c %d -t %d -G %d %s\n",$channellist[$missinglist[$index]], $tc, $GreatMaxVal, $filelist[$missinglist[$index]]);
	}

	close(SCRIPT);
	system("chmod +x $outfile");
}

sub write_extr() {
	my $i;
	my $j;
	my @infield;

	$extrfile = $output_dir . "/extr\.$tc\.$dataset_id";

	system("rm -f $extrfile");
	open(EXTR,">$extrfile");
	select(EXTR);

	for ( $i = 0; $i <= $#filelist; $i++ ) {
		if ( ($makefigs == 1) && ($term !~ /^X11$/) ) {
			system("mkdir -p -m ug+wrx $output_dir/fig/$dirlist[$i]/T$tc");
		}
		printf("%d. %s",$i+1,$filelist[$i]);
		printf("\n%s %d",$dirlist[$i],$channellist[$i]);
		printf("\n%s",$embrlist[$i]);
		printf("\n");
		open(IN,"$st_dir/$embrlist[$i]\.gcp")  || die("rgdata: couldn't open $st_dir/$embrlist[$i]\.gcp!\n");
		@infield = <IN>;
		close(IN);
		for ( $j = 0; $j <= $#infield; $j++ ) {
			printf("%s",$infield[$j]);
		}
		printf("\n");
	}
	close(EXTR);
}

sub makefigures()
{
  my $script_file="tempscript";
  my $script;
	my $i;

	for ( $i=0; $i<=$#filelist; $i++ ) {

		open(SCRIPT,">$script_file");
		select(SCRIPT);

		$script = print_script_gcp($i);
		print SCRIPT $script;

		close(SCRIPT);
		system("$gnuplot_command $script_file");

		system("rm -f $script_file");
		system("rm -f gcptemp");
	}

}


sub print_script_gcp {
	my $index=$_[0];
  my $script;
  my $script_output;
  my $script_labels;
  my $pattern_file;
  my $gcp_file;
  my @data_gcp;
  my @AP_coords;
  my @prot_concs;
  my $i;
  my $gcp_temp_file="gcptemp";

  my @labels;
  my $num_max;
  my $num_min;
  my $maximum;
  my $kounter;
  my $ly;

  $pattern_file = "$output_dir/$embrlist[$index]\.rg";

  $gcp_file = "$output_dir/int\.tc$tc\.$dataset_id";
  open(IN,"<$gcp_file") || die "can't read gcp!";
  @data_gcp = <IN>;
  close(IN);

  @AP_coords = split /\s+/, $data_gcp[0];
  @prot_concs = split /\s+/, $data_gcp[1];

  open(T,">$gcp_temp_file");
  select(T);

  $num_max = 1;
  $num_min = 1;
  $maximum = 1;
  $kounter = 0;

  for ($i=1; $i<=$#AP_coords; $i++) {
    if ( $prot_concs[$i] != 0.0 ) {
      printf("%5.2f %8.2f\n",$AP_coords[$i],$prot_concs[$i]);
      if ( $maximum ) {
        $ly = $prot_concs[$i]+15;
        $labels[$kounter] = " \"$num_max max\" at $AP_coords[$i],$ly center";
      } else {
        $ly = $prot_concs[$i]-10;
        $labels[$kounter] = " \"$num_min min\" at $AP_coords[$i],$ly center";
      }
      $kounter++;
    }
    if ( $maximum ) {
      $maximum = 0;
      $num_max++;
    } else {
      $maximum = 1;
      $num_min++;
    }
  }

  close(T);

  if ( $term !~ /^X11$/ ) {

    ($script_output = <<EOOUT ) =~ s/^\s+//gm;
       set output "$output_dir/fig/$dirlist[$index]/T$tc/$embrlist[$index]\.$term"
EOOUT

  }

$script_labels = "";
for ( $i=0; $i<=$#labels; $i++) {
  $script_labels .= sprintf "set label %s\n",$labels[$i];
}

($script = <<EOSCRIPT ) =~ s/^\s+//gm;
set term $term
set title "$embrlist[$index]: gcps"
set xlabel "Percent Embryo Length"
set ylabel "Intensity"
set nokey
set xrange [0:100]
set yrange [0:$GreatMaxVal]
plot "$pattern_file" using 2:3 with lines lt 1 lw 2, "$gcp_temp_file" using 1:2 with points pt 2 ps 3, "$gcp_temp_file" using 1:2 with impulses
EOSCRIPT

  return $script_labels . $script_output . $script;
}

sub make_mask_int {
  my $gcp_file;
  my @data_gcp;
  my @AP_coords;
  my @prot_concs;
	my $i;

  $gcp_file = "$output_dir/int\.tc$tc\.$dataset_id";
  open(IN,"<$gcp_file") || die "can't read gcp!";
  @data_gcp = <IN>;
  close(IN);

  @AP_coords = split /\s+/, $data_gcp[0];
  @prot_concs = split /\s+/, $data_gcp[1];

  open(T,">$gcp_file");
  select(T);

	print $data_gcp[0];
	for ($i=1;$i<=$#AP_coords;$i++) {
		if ( $AP_coords[$i] == 0 ) {
			printf("%9d",0);
		} else {
			printf("%9d",1);
		}
	}
	printf("\n");
  close(T);
}

sub print_tag {
	my $filename;
	my @content;
	my $file;
	my $i;

	for($i=0;$i<=$#filelist;$i++) {
		$filename="$result_dir/$dirlist[$i]/T$tc/$embrlist[$i]\.reg";
		open(IN,"<$filename") || die "can't open $filename!";
		@content = <IN>;
		close(IN);

		open(OUT,">$filename") || die "can't open $filename!";
		select(OUT);
		printf("<rgdata>\nversion $version.\n");
		printf("%s\n",$command_line);
		printf("</rgdata>\n");
		print @content;
		close(OUT);
	}
}


#   print_help_msg: prints help message for -h option
#   -------------------------------------------------

sub print_help_msg {

    print STDERR <<EOF

USAGE: rgdata   -t <timeclass> [-h] [-v] [-V[<level>]] <filename>

ARGUMENTS:
  -t <timeclass>                 Temporal class the embryo belongs to: 2, 3, ... , 8.
                                 This option should be always present on the command line.
  <filename>                     Datafile to process;
OPTIONS:
  -h                             Print help message and exit;
  -v                             Print version and exit;
  -V		                 Be verbose:create and save intermediate information.

DESCRIPTION:

Please report bugs to <kozlov\@spbcas.ru>. Thank you!

EOF

}

