#! /usr/bin/perl -w

use warnings FATAL => 'all';
use strict;

#use Carp::Always;
#$SIG{__DIE__} = sub { Carp::confess( @_ ) };
use File::stat;
use File::Copy;
use Fcntl ':mode';
use File::Spec;
use Getopt::Std;
use Fcntl ':mode';
use File::Temp ':mktemp';
use Data::Dumper;

my %pwds = ();
my %ppids = ();
my %commandnames = ();
my %seen_stat = ();
my %seen_open = ();
my %unfinished = ();
my %fds = ();

my $debug = 0;
my $print_cmdlist = 0;

my %opened_files = ();
my %stated_files = ();
my @stated_dirs = ();
my @stated_alldirs = ();

my %block2file;
my %file2blocks;
my %file2stat;
my %small_files;
my %stat_files;
my %stated_dirs;
my %stated_alldirs;
my %file2size;
my $startofxsession = 800000;
my $verbose = 0;
my $warning = 0;
my $filesintimeorder = undef;


# make_abs (PWD, ARG) makes ARG an absolute pathname if it isn't already
# with PWD being the current dir (i.e. the one to which ARG is relative to)
# Additionally to returning a perl canonical path it also strips
# the trailing part of the path if ARG is '..'.  This makes the output
# nice in case of a chain of chdir(sub)/chdir(..) calls, but in the case
# of softlinked dirs it makes a difference.
sub make_abs_slow($$)
{
    my ($pwd, $f) = @_;
    if ($f eq File::Spec->updir()) {
      ($f = $pwd) =~ s,/[^/]+/*$,,;
      $f = '/' if (!$f);
    } elsif (!File::Spec->file_name_is_absolute( $f)) {
      $f = File::Spec->catfile($pwd, $f);
    }
    return File::Spec->canonpath($f);
}

sub make_abs($$)
{
    my ($pwd, $f) = @_;
    return undef unless ($f && $pwd);
    return $f if ($f =~ m,^/,);
    if ($f eq '..') {
	$pwd =~ s,/[^/]+/*$,,;
	$pwd = '/' unless $pwd;
	return $pwd;
    }
    return "$pwd/$f";
}

my %allfiles = ();
my $filecounter = 0;
my %filelists = ();
my $clockjump = 0;

sub add_file($$$$)
{
  my ($time, $process, $command, $file) = @_;

  #print "add_file $time $process $command $file\n";
  if (! defined $filelists{$process} )
  {
    $filelists{$process} = { starttime => $time, unique => {} };
  }

  # reset times before clock jump
  if ( $filelists{$process}->{starttime} < $clockjump )
  {
     $filelists{$process}->{starttime} = $time;
  }

  my $delta =  $time - $filelists{$process}->{starttime};
  if ($delta > 19900) {
      return;
  }

  if (! defined $allfiles{$command . $file} )
  {
     $allfiles{$command . $file} = $filecounter;
     if ( $command eq "open" ) {
       die if (defined $opened_files{$file});
       # the open is just the first action
       $opened_files{$file} = [ $filecounter ];
	
       if ($file eq "/etc/X11/xdm/Xsession") {
		$startofxsession = $time;
       }
     } elsif ( $command eq "stat" ) {
       # only one action
       die if (defined $stated_files{$file});
       $stated_files{$file} = $filecounter;
     }
     #print "all $filecounter $command $file\n";
     $filecounter++;
     if ($filesintimeorder) {
	print $filesintimeorder $file . "\n";
     }
  }

  my $fid = $allfiles{$command . $file};
  $allfiles{$command . $fid} = $file;
  $filelists{$process}->{unique}->{$fid} = $time;
  #print STDERR "$process $command $file $delta $fid\n";
}

my $xsession = 0;

sub add_command($$$$)
{
  my ($time, $process, $command, $file) = @_;
  #print STDERR "add $time $process $command $file\n";
  return if ($file =~ m,^/proc/, || $file =~ m,^/sys/, || $file =~ m,^/dev/, || $file =~ m,^/tmp/, );

  add_file($time, $process, $command, $file);
}

sub parse_command($$$$$) {

  my $time = shift;
  my $command = shift;
  my $pid = shift;
  my $syscall = shift;
  my $args = shift;
  
  if ($syscall eq "fork" ) {
    my $npid = $args->[0];
    if ($npid < 0) {
      # We might see error returns in the systap that are associated with
      # syscall restarting (see e.g. bnc #551635).  Ignore those.
      return 0;
    }
    if (defined $ppids{$npid}) {
      print STDERR "prepare_preload warning: Too many forks, PIDs got reused, we're confused " if ($warning);
      print STDERR "at $time $command $pid fork $npid\n" if ($warning);
      return 1;
    }
    $ppids{$npid} = $pid;
    $pwds{$npid} = $pwds{$pid};
    return 0;
  }

  #print STDERR "$time-$command-$pid-$syscall-", join(',', @$args), "\n";;
  
  my $fname = undef;
  if (!defined $pwds{$pid}) {
    $pwds{$pid} = "/";
  }

  # we need to make sure we get the first command and not later execve
  if (!defined $commandnames{$pid} && $syscall eq "open")
    {
      $commandnames{$pid} = $command;
    }

  if ($syscall eq "chdir") {
    $pwds{$pid} = make_abs($pwds{$pid}, $args->[0]);
    return 0;
  } elsif ($syscall eq 'fchdir') {
    my $arg = $fds{$pid}->{$args->[0]};
    $pwds{$pid} = $arg;
    return 0;
  } elsif ($syscall eq "open") {
    $fname = make_abs($pwds{$pid}, $args->[1]);
    my $result = $args->[0];
    $fds{$pid}->{$result} = $fname;
    #print STDERR "FD $result of $pid now $fname\n";
  } else {
    $fname = make_abs($pwds{$pid}, $args->[0]);
  }

  return if (!$fname || $fname =~ m,^/proc/, || $fname =~ m,^/sys/, || $fname =~ m,^/dev/,);
  return if (grep(/^$syscall$/, ("mkdir", "chown32", "chown", "unlink", "chmod", "mknod", "statfs", "rename", "rmdir", "link", "getcwd", "creat","close", "fork")));
  return if ($command eq "preload");

  if ($syscall eq 'open' || $syscall eq 'failedopen') {
    add_command($time, $pid, "stat", $fname);
  } elsif ($syscall eq 'execve') {
    add_command($time, $pid, "open", $fname);
    add_command($time, $pid, "stat", $fname);
  } elsif ($syscall eq 'read') {
    #print STDERR "READ read $fds{$pid}->{$args->[0]}\n";
    if (defined $fds{$pid}->{$args->[0]}) {
      add_command($time, $pid, "open", $fds{$pid}->{$args->[0]});
    }
  } elsif (grep(/^$syscall$/, ("stat", "stat64", "access", "readlink", "lstat", "lstat64"))) {
    #print STDERR $fname . "\n";
    add_command($time, $pid, "stat", $fname);
  } elsif ($syscall eq "fstat") {
    if (defined $fds{$pid}->{$args->[0]}) {
      my $file = $fds{$pid}->{$args->[0]};
      add_command($time, $pid, "stat", $file);
    }
  } elsif ($syscall eq "openat") {
    my $dir = undef;
    if ($args->[1] eq "AT_FDCWD") {
      $dir = make_abs($pwds{$pid}, ".");
    } else {
      $dir = make_abs($pwds{$pid}, $fds{$pid}->{$args->[2]});
    }
    my $file = make_abs($dir, $args->[2]);
    $fds{$pid}->{$args->[0]} = $file;
    add_command($time, $pid, "open", $file) if $file;
  }
  return 0;
}

sub get_process_tree($);

sub get_process_tree($)
{
  my $pid = shift;
  if (!$pid || !defined $commandnames{$pid}) { return "" };
  if (!defined $ppids{$pid}) { return $commandnames{$pid}; }
  my $parent = get_process_tree($ppids{$pid}) || '';
  if ($parent =~ m,/$commandnames{$pid}$, || $parent eq $commandnames{$pid} || !$commandnames{$pid}) { return $parent; }
  return $parent . "/" . $commandnames{$pid};
}

sub merge_filelists($$)
{
  my $init = shift;
  my $pid = shift;

  if (!defined $filelists{$init}) {
    $filelists{$init} = $filelists{$pid};
  } else {
    # starttime does no longer matter
    while (my ($id,$time) = each %{$filelists{$pid}->{unique}}) {
      $filelists{$init}->{unique}->{$id} |= $time;
      if ($time < $filelists{$init}->{unique}->{$id}) {
 	$filelists{$init}->{unique}->{$id} = $time;
      }
    }
  }
  delete $filelists{$pid};
  #print $init, Dumper($filelists{$init});
}

sub read_strace
{
  my $lasttime = 0;
  while ( <STDIN> ) {
    chomp;
    my $line = $_;
    my @split = split(/ /, $line);
    next if (@split < 4);

    my $time = shift @split;
    $time =~ s,\.,,;
    $time = int($time);
    my $pid = shift @split;
    my $command = shift @split;
    my $syscall = shift @split;
    my $args = [];
    my $rest = shift @split;
    while (defined $rest) { push(@$args, $rest); $rest = shift @split; }
    
    if ($lasttime != 0 && abs($time - $lasttime) > 20000)
      {
	print STDERR "clock jump: " . abs($time-$lasttime) / 1000 . "\n" if ($warning);
	# reset times before clock jump
	foreach my $process (keys %filelists)
	  {
	    $filelists{$process}->{starttime} = $time;
	  }
      }
    $lasttime = $time;
    if (parse_command($time, $command, $pid, $syscall, $args)) {
      last;
    }
  }
  my @pids = keys %filelists;
  foreach my $pid (@pids) {
    my $ptree = get_process_tree($pid);
    if ($ptree =~ m/bootchart/) {
      delete $filelists{$pid};
      next;
    }
    if ($ptree =~ m,startpar, || $ptree =~ m,boot.startprelo, ) {
      if ($ptree =~ m,startpar/([^/]*), ) {
	merge_filelists($1, $pid);
      } else {
	merge_filelists("startpar", $pid);
      }
      next;
    }
    if ($ptree =~ m,khelper,) {
      merge_filelists("boot.udev", $pid);
      next;
    }
    delete $filelists{$pid};
  }
  $filelists{"xsession"} = { unique => {} };
  while (my ($id, $time) = each %{$filelists{"earlyxdm"}->{unique}}) {
     if ($time > $startofxsession) {
	$filelists{"xsession"}->{unique}->{$id} = $time;
	# this is safe (see perldof -f each)
	delete $filelists{"earlyxdm"}->{unique}->{$id};
     } 
  }
}

# Invariant: No fragments overlap (except the pseudo fragment on block 0,
# for holes).  Hence we can represent each fragment just by its starting
# block, and don't check for overlap.
sub add_blocks ($$$$)
{
  my ($file, $startblock, $stride, $file_ofs) = @_;
  push (@{$file2blocks{$file}}, [$startblock, $stride, $file_ofs]);
  if (exists $block2file{$startblock}) {
    # This can happen with soft links.  We don't ignore softlinks
    # and instead look at the block of the file linked to
    #print "Block $startblock already seen (in $file vs. $block2file{$startblock}->[1]).\n";
  } else {
    $block2file{$startblock} = [$stride, $file, $file_ofs];
  }
}

sub gather_layout
{
  foreach my $f (keys %opened_files) {
    if (-e $f) {
      my $st = stat ($f);
      $file2stat{$f} = $st;
      # save us from filenames containing shell commands
      if ($warning) {
	  open (I, "-|", "/sbin/print-bmap", "-v", $f);
      } else {
	  open (I, "-|", "/sbin/print-bmap", $f);
      }
      my $blocks = <I>;
      close (I);
      if ($blocks) {
	chomp $blocks;
	my @blocks = split (/ +/, $blocks);
	my $num_holes = 0;
	my $file_ofs = 0;
	foreach my $block (@blocks) {
	  print "$f $block\n" if ($verbose);
	  if ($block eq 'e') {
	    $stat_files{$f} = 1;
	  } elsif ($block eq 't') {
	    if (exists $file2blocks{$f}) {
	      printf STDERR "Huh?  $f has tail _and_ normal blocks.\n" if ($warning);
	    } else {
	      $small_files{$f} = 1;
	    }
	    $num_holes = 0;
	  } elsif ($block eq 'h') {
	    # We handle holes by accounting them to the next fragment
	    # As holes are cheap to read, it's most efficient to include
	    # them in a normal read() call, instead of doing them out-of-order
	    $num_holes++;
	  } else {
	    my ($start_block, $stride) = split (/\+/, $block);
	    add_blocks ($f, $start_block, $stride + $num_holes, $file_ofs);
	    $file_ofs += $stride + $num_holes;
	    $num_holes = 0;
	  }
	}
	$file2size{$f} = $file_ofs;
      }
    } else {
      # We want to only stat non-existent files, as this will create
      # a dentry pointing to nothing
      $stat_files{$f} = 1;
    }
  }
  foreach my $f (keys %stated_files) {
    $stat_files{$f} = 1;
    if (-e $f) {
      my $st = stat($f);
      $file2stat{$f} = $st;
    }
  }
}

my %file2state = ();
my @current_stack = ();
my ($last_file, $last_ofs, $last_len, $last_idx) = ('', 0, 0, -1);

my @commands = [];
my %order2cmd = ();
my %cmd2order = ();
my $command_index = 0;

sub add_ordered_command($$)
{
  my ($cid, $cmd) = @_;
  #print STDERR "$cid $cmd\n";
  $commands[$command_index] = $cmd;
  $order2cmd{$command_index} = $cid;
  $cmd2order{$cid} = $command_index;
  $command_index++;
}

sub flush_queue
{
  if ($verbose) {
    print "read $last_idx, fileofs $last_ofs, stride $last_len\n" if ($last_file);
  } else {
    if ($last_file)
      {
	add_ordered_command($filecounter, "R $last_idx $last_ofs $last_len");
	push(@{$opened_files{$last_file}}, $filecounter);
	$filecounter++;
      }
  }
  ($last_file, $last_ofs, $last_len, $last_idx) = ('', 0, 0, -1);
}

sub do_read ($$$$)
{
  my ($file, $idx, $file_ofs, $stride) = @_;
  # Try to merge with last read, but not over file borders
  if ($file eq $last_file && $last_ofs + $last_len == $file_ofs) {
    $last_len += $stride;
  } else {
    flush_queue();
    $last_file = $file;
    $last_ofs = $file_ofs;
    $last_len = $stride;
    $last_idx = $idx;
  }
}

sub order_commands
{
  # First we want all small and stated files sorted by their
  # inode.  file2stat contains the subset of stat and small files, which
  # exist.
  my @ordered = keys %file2stat;
  @ordered = sort { $file2stat{$a}->dev <=> $file2stat{$b}->dev
  		    || $file2stat{$a}->ino <=> $file2stat{$b}->ino } @ordered;
  foreach my $f (@ordered) {
    #print STDERR "$f: ". $file2stat{$f}->dev . ":" . $file2stat{$f}->ino . "\n";
    if (exists $small_files{$f}) {
      # W == read Whole file
      add_ordered_command(@{$opened_files{$f}}[0], "W $f");
    } elsif (exists $stated_alldirs{$f}) {
      print "D $f\n";  # D == stat all dir entries, recursive
    } elsif (exists $stated_dirs{$f}) {
      print "d $f\n";  # d == stat all dir entries, nonrecursive
    } else {
      # Must be a simple stat
      add_ordered_command($stated_files{$f}, "S $f");
    }
  }
  # Stat the non-existent files
  # TODO sort these stats too, based on the inode of the containing
  # directory
  add_ordered_command($stated_files{$_}, "S $_") foreach sort grep (!exists $file2stat{$_}, keys %stat_files);
  # There are no small files which don't have a file2stat entry.

  # Now do the big files
  foreach my $block (sort {$a <=> $b} keys %block2file) {
    my ($stride, $file, $file_ofs) = @{$block2file{$block}};
    if (!exists $file2state{$file}) {
      push @current_stack, $file;
      add_ordered_command(@{$opened_files{$file}}[0], "O $file $#current_stack");
      $file2state{$file} = [$#current_stack, 0, 0];
    }
    my ($idx, $cur_ofs, $cur_len) = @{$file2state{$file}};
    do_read ($file, $idx, $file_ofs, $stride);
    $file2state{$file}->[1] += $stride;
    if ($file2state{$file}->[1] == $file2size{$file}) {
      flush_queue();
      add_ordered_command($filecounter, "C $file2state{$file}->[0]");
      push(@{$opened_files{$file}}, $filecounter++);
      if ($#current_stack == $file2state{$file}->[0]) {
	pop @current_stack;
      } else {
	$current_stack[$file2state{$file}->[0]] = '';
      }
    }
    #print "$block+".$block2file{$block}->[0]." ".$block2file{$block}->[1]."\@$block2file{$block}->[2] size=$file2size{$file}\n";
  }
  flush_queue();
}

sub print_commands
{
  print "n " . scalar(@commands) . "\n";
  my $index = 0;
  foreach my $cmd (@commands)
    {
      print "$cmd\n";
      $index++;
    }
  print "P " . scalar(keys %filelists) . "\n";
  foreach my $process (keys %filelists)
    {
      my @pcommands;

      foreach my $cid (keys %{$filelists{$process}->{unique}})
	{
	  if (defined $allfiles{"open$cid"} ) {
	    my $file = $allfiles{"open$cid"};
	    foreach my $cid2 (@{$opened_files{$file}})
	      {
		next if (! defined $cmd2order{$cid2} );
		my $cidx = $cmd2order{$cid2};
		push (@pcommands, $cidx);
	      }
	    next;
	  }
	  if (! defined $cmd2order{$cid} )
	    {
	      # e.g. not existant files don't need to be opened
	      next;
	    }
	  my $cidx = $cmd2order{$cid};
	  push (@pcommands, $cidx);
	}
      @pcommands = sort {$a <=> $b} @pcommands;
      print "p $process " . scalar(@pcommands) . "\n";
      foreach my $cidx (@pcommands)
	{
	  print "$cidx # " . $commands[$cidx] . "\n";
	}
    }
}

our ($opt_c, $opt_s, $opt_p, $opt_d, $opt_h, $opt_l, $opt_v, $opt_W);
getopts ('Wvcspdhl:');

if ($opt_h) {
  print "prepare_preload [-cspdh] < input\n";
  print "  -c input is a cmdfile, like in /etc/preload.d/*\n";
  print "  -s input is a strace dump (default)\n";
  print "  -p output is a cmdfile (makes sense only with strace input)\n";
  print "  -d print some debugging info\n";
  print "  -h print this help\n";
  print "  -l <file> dump a list of files (in order)\n";
  print "  -v be a bit verbose\n";
  print "  -W give warnings about strange input\n";
  print "By default this program produces output commands consumable by the\n";
  print "preload program.\n";
  exit 0;
}

$print_cmdlist = $opt_p;
$debug = $opt_d;
$verbose = 1 if ($opt_v);
$warning = 1 if ($opt_W);

if ($opt_l) {
	open($filesintimeorder, ">$opt_l") || die "can not open $opt_l";
}

read_strace;
if (!$opt_p && $< != 0) {
  die "You must be root to get the layout of the disk";
}

gather_layout;
order_commands;
print_commands;

#print "open $_\n" foreach keys %opened_files;
#print "stat $_\n" foreach @stated_files;

