#! /usr/bin/perl -w
#
# ioana -- an I/O analyzer
#
# Copyright (c) 2006,2007, jw@suse.de, Novell Inc.
# Distribute under the terms of GPLv2
#
# 2006-06-04, jw V0.05 -- lsof() impl.
# 2006-06-05, jw V0.06 -- strace framework done. no parser yet.
# 2006-06-08, jw V0.08 -- syscall dispatcher started. open, fstat64, old_mmap...
# 2006-07-05, jw V0.11 -- added code for stealth_writable, bytes_r, bytes_w, add_dependency().
#                         Now keeping copies of cwd and rtd in all resources, so that there
#                         is no confusion when these change later.
# 2006-07-07, jw V0.13 -- global inode table initializend in lsof()
# 2006-07-16, jw V0.16 -- store_inode() does {fs}{inode|path|obj};
# 2006-07-16, jw V0.16 -- ana_write done. is_system_path() added.
# 2006-07-17, jw V0.17 -- exec_seen improved, ana_read fixed, write_dumpfile added.
# 2006-07-17, jw V0.18 -- make test added. pipe,getcwd done. fcntl,fork,exec started.
# 2006-07-17, jw V0.19 -- syscall continuation done. fork,exec,dup done. 
#                         full_pathname() fixed. ana_unlink started.
# 2006-08-03, jw V0.20 -- unlink done. 
#                         my $a if 0; usage fixed in store_inode. Aaaargh.
# 2006-08-04, jw V0.21 -- ana_close needs debugging.
# 2006-08-16, jw V0.22 -- new type {res}{load} introduced to rescue close() and unmap().
#                         To be honored by mmap(PROT_WRITE) and write().
# 2006-08-17, jw V0.23 -- first usable report generated. 
#                         exec-write dependedcies are missing from behind pipes.
# 2007-02-02, jw V0.24 -- clone not implemented but used by make. Added option -x.
# 2007-02-03, jw V0.25 -- Usage reworked: added -l; changed -o -O into -d -f; -t, -x dummies.
# 2007-02-04, jw V0.26 -- Added -l filtering via -i, -o, -t and friends. -x dummy.
# 2007-02-04, jw V0.27 -- -x and -X done. Heavy debugging. ana_clone partially done,
#                         will bail out when it gets tricky.
#                         Pipes have no inode, but we reference objects by inode in {res}{load};
#                         remember_loaded() exploads when closing a pipe.
# 2007-02-05, jw V0.28 -- removed malfunctioning debug code. 
#                         Added __DIE__ hook to killall and print linenumber..
# 2007-02-05, jw V0.29 -- fixed tokenize_mkarrays, debugging lost dependencies during pipe reads.
# 2007-02-06, jw V0.30 -- pipes are now found. We must not list them as system files,
#                         because a chain 'user >> system >>' user is only printed with -a. 
#                         Is that good? excessive debugging removed. 
#                         Die hook now prints strace ring buffer and caller chain.
#                         test/Makefile now excercises pipes. unlinkat done.
# 2007-02-06, jw V0.31 -- flatten_array_refs, free_obj_if_unlinked_unused, do_free_obj,
#                         usecount_obj, ana_rename done. ana_symlink missing.
# 2007-05-26, jw V0.32 -- check_consitency() added. 
#                         We have objs that neither provide nor require.
# 2007-05-27, jw V0.33 -- -e added. exec_seen(), remember_loaded() enhanced for -e.
#                         exec_seen() now also discards {res}{load} unless -e.
#                         I personally miss the Makefile dependencies, but this is correct.
#                         {features} added to dumpfile, including version number checking.
# 2007-05-27, jw V0.34 -- started do_trunc0(), unfinished.
# 2007-05-29, jw V0.35 -- merged: ana_symlink added untested. ana_fstatat started
#                         added at- testsuite.
# 2007-05-29, jw V0.36 -- ana_chdir fixed.
# 2007-06-04, jw V0.37 -- do_trunc0 finished. untested.
# 2007-06-15, jw V0.38 -- did some trunc0 testing, looks good. test/Makefile enhanced. 
#                         do_dup2() fixed: disinherit FD_CLOEXEC.
#                         ./configure now dies with newfstatat() not impl.
# 2007-06-18, jw V0.39 -- ana_fstatat() done. %global zapped. exec_seen, store_inode without $pp.
#                         ana_symlink() done. resolve_symlinks() started. test-symlink added.
# 2007-06-19, jw V0.40 -- expand_symlinks() is now a universal canonification tool.
#                         canonical_path() has issues and is no longer used.
# 2007-07-04, jw V0.41 -- renamed all {idx} to {oidx} to disabiguate with list-idx
#                         Used <name>#<oidx> as dependency keys to make them unique across name reuse.
# 2011-02-10, jw V0.42 -- -O offline mode for strace dumps added.
# 2011-02-14, jw V0.43 -- be more relaxed in offline mode, accept unknown filedescriptors in dup, close ...
# 2011-02-15, jw V0.44 -- ana_openat() ana_unlinkat() added, used by /bin/rm -rf
# 2011-02-16, jw V0.45 -- socket() I/O added. seen with /var/run/nscd/socket
#
#
# test like this:
#
# $ ./ioana.pl -d /tmp/cp.ioa cp Makefile /tmp/x
# $ cat /tmp/cp.ioa
# {
#   '/tmp/x' => [
#      { n => '/bin/cp', rtd => '/', cwd => '/home/jw/', exec => 1,
#        cmd => 'cp Makefile /tmp/x', pid=>1234, ppid=>1233 },
#      { n => '/lib/libc.so', rtd => '/', cwd => '/home/jw/', mmap => 123456,
#        cmd => 'cp Makefile /tmp/x', pid=>1234, ppid=>1233 },
#      { n => 'Makefile', rtd => '/', cwd => '/home/jw/', read => 123,
#        cmd => 'cp Makefile /tmp/x', pid=>1234, ppid=>1233 },
#               ],
# }
#
# dependencies in filesystem objects are tracked like this:
# $fs->{obj}{depends_on}{oidx} = { res => $readable_r, bytes => $n, type => mmap|fd };
#
# Then number of {bytes} here is a smallest maximum value:
# - several long writes after a read of 10 bytes contribute for 10.
# - several long reads followed by a write of 10 bytes contribute for 10.
# -  in reality the dependency may be less, or even 0, As we do not know if or how
#   the bytes written are influenced by the previous reads.
# - with type mmap: the metric is even less precise: we always list the mmap'ed size.
#
# Horror-Scenario:
# (This is an example, where identical path names can simultaneously point to
# different inodes:)
# A process creates a temporary directory using a fixed name, then chdirs there
# and unlinks the directory. Now it operates on a set of fixed filenames in
# there.  Another process does the same.  These processe may or may not see
# each others files, depending on their unlink and chdir timing.
#
# Do we need to support islands in the filesystem?
#
# The best we can do is this:
# Whenever we learn the inode_pair corresponding to a path, we store that information
# in a global table. As soon as we need to support islands, we introduce an additional
# per process mapping between paths and inode_pairs.
# lookup_inode() consults both, first the per process_table (if it exists) then the global table.
# What does our fetch_cwd_rtd() do, when called within islands?.
#
# strace optimization:
# strace -v -e verbose=fstat64
# prints all the syscalls that have structures with only a hex structure pointer,
# except for fstat64, which is exposed in full, including "st_dev=makedev(0,10), st_ino=7"
# other syscalls like read, write are abbreviated as usual ...

#
## res->{o}:  object id of this resource
## res->{i}:  st_ino inode number of the resource, if known.
## res->{D}:  st_dev device ID containing the resource
## res->{I}:  "{D}/{i}" constructed by inode_pair()
## res->{t}:  lower case are dummies, uppercase are REG, DIR, LNK, BLK, CHR, FIFO, SOCK
## res->{s}:  filesize of the resource
## res->{N}:  created by full_pathname() expanding rtd, cwd to of {n} and expand_symlinks()
#
## $pp global root of all 
## $pp->{p} processes {p} and 
## $pp->{fs}{obj}[] filesystem objects
## $pp->{fs}{inode}{} inode lookup cache
## $pp->{fs}{symlink}{} symlink lookup cache for expand_symlinks()
#
## res->{N} has only one name, whereas obj->{N} has multiple.
## obj is constructed by store_inode() from either a res, an inode_pair, or a ":special_name"
##  if it is constructed from a res, res->{o} = obj->{oidx}
## multiple res can point to the same obj. As multiple filedescriptors can be open to the same file, when hardlinked, 
## even by different names. do_ana_unlink() has some insights on the relationship between fd, path, res, obj.
## each fd has exactly one res; a res or fd is shortlived, compared to an obj; an obj may have multiple $pp->{fs}{path}{}
## an obj corresponds to an inode or vnode. It *knows* its inode number unless we are offline, or need to avoid stat().
#
## obj->{oidx}	index of this obj on $pp->{fs}{obj}[]
## obj->{N}:  	hash with keys of names for this resource
## obj->{del}{N}: hash of all names of this resource that have been unlinked.
## obj->{I}:	same as res->{I}, if known.
#




use strict;
use POSIX;
use Data::Dumper;
use Time::HiRes qw(gettimeofday time);
use IO::Handle;
use IO::File;

my $otrunc_warned  = 0;
my $version 	   = '0.45';
my $verbose	   = 1;
my $cmd_pid	   = undef;
my $dumpfile 	   = "dump.ioa";	
my $dumpfile_ex	   = "%p.%n.%t.ioa";
my $strace_s       = 256;
my $strace_verbose = 0;
my $avoid_block	   = 0;
my $show_all	   = 0;
my $do_examine	   = 0;
my $do_list	   = 0;
my $keep_running   = 0;
my $trace_to	   = 0;
my $trace_from	   = 0;
my $trace_name	   = undef;
my $use_rpm	   = 1;
my $rpm_cmd	   = '/bin/rpm -qf';
my $dump_proc	   = 1;		# include a process table in dumpfile.
my $show_canonical     = 0;
my $check_consitency   = 0;
my $depend_across_exec = 0;
my $offline = undef;	# have an strace dump, nothing more.
my $die_unknown = 0;	# die on unknown system-calls.

my %dep_verbose;	# dependecies already printed if verbose.

my %ign_syscall	= map { $_ => 1 } qw(
	access 
	alarm
	arch_prctl 
	brk 
	chmod
	chown32
	clock_gettime
	fadvise64 
	fchmod
	fchmodat
	fchown32
	futex
	getdents64 
	getegid 
	getegid32 
	geteuid 
	geteuid32 
	getgid 
	getgid32 
	getgroups
	getgroups32
	getpgrp
	getppid 
	getrlimit 
	getrusage 
	gettimeofday 
	getuid 
	getuid32 
	lchown32
	llistxattr 
	madvise 
	mkdir
	mount
	mprotect 
	poll
	readlink
	rt_sigaction 
	rt_sigprocmask 
	rt_sigreturn
	select
	setrlimit 
	set_robust_list
	set_thread_area 
	set_tid_address 
	sigaltstack
	sigreturn 
	statfs
	statfs64
	syscall_273 
	_sysctl 
	time
	umask 
	uname 
	utimensat
	wait4 
	waitpid 
);

exit usage() unless @ARGV;
while (defined (my $arg = shift))
  {
    if    ($arg !~ m{^-})		   { unshift @ARGV, $arg; last; }
    elsif ($arg =~ m{^(-h|--help|-\?)})    { exit usage(); }
                                           
    elsif ($arg =~ m{^--?d})		   { $dumpfile = shift; }
    elsif ($arg =~ m{^--?D})		   { $die_unknown = 1; }
    elsif ($arg =~ m{^--?p})		   { $cmd_pid = shift; }
    elsif ($arg =~ m{^--?q})		   { $verbose = 0; }
    elsif ($arg =~ m{^--?b})		   { $avoid_block = 1; }
    elsif ($arg =~ m{^--?a})		   { $show_all = 1; }
    elsif ($arg =~ m{^--?e})		   { $depend_across_exec = 1; }
    elsif ($arg =~ m{^--?s})		   { $strace_s = shift; }
    elsif ($arg =~ m{^--?S})		   { $strace_verbose = 1; }
    elsif ($arg =~ m{^--?O})		   { $offline = shift; }
    elsif ($arg =~ m{^--?r})	   	   { $use_rpm = 1 - $use_rpm; }
    elsif ($arg =~ m{^--?v})		   { $verbose++; }
    elsif ($arg =~ m{^--?c})		   { $show_canonical++; }
    elsif ($arg =~ m{^--?C})		   { $check_consitency++; }
    elsif ($arg =~ m{^--?k})		   { $keep_running++; }
    elsif ($arg =~ m{^--?l})		   { $do_list +=2;   $do_list-- if $arg =~ m{ls}; }
    elsif ($arg =~ m{^--?L})		   { $do_list +=3;   $do_list++ if $arg =~ m{LL}; }
    elsif ($arg =~ m{^--?x}i)		   { $do_examine++; $do_examine++ if $arg =~ m{X}; }
    elsif ($arg =~ m{^-(io|t|-trace)})	   { $trace_from=$trace_to=1; $trace_name = shift; }
    elsif ($arg =~ m{^-(i|-requires)})	   { $trace_from++; $trace_name = shift; }
    elsif ($arg =~ m{^-(o|-contrib)})      { $trace_to++;   $trace_name = shift; }
    elsif ($arg =~ m{^--provides})         { $trace_to++;   $trace_name = shift; }
    elsif ($arg =~ m{^-(t|-trace)})	   { $trace_from=$trace_to=1; $trace_name = shift; }
  }

# imply -l with -t unless -x given.
$do_list = 1 if !$do_list and !$do_examine and defined $trace_name;
$use_rpm = 0 if $offline;

exit usage("use either -p or specify a command to start, not both") if defined($ARGV[0]) and defined($cmd_pid);
exit usage("use either -O or specify a command to start, not both") if defined($ARGV[0]) and defined($offline);
exit usage("use either -l or -x, not both") if $do_list and $do_examine;
exit usage("option -l takes no parameters (@ARGV)") if defined($ARGV[0]) and $do_list;
exit usage() unless @ARGV or defined($cmd_pid) or $do_list or $do_examine or $check_consitency or $offline;

$SIG{__DIE__} = sub
{
  # first a simple die handler....
  my @a = ((caller 3)[1..3], '=>', (caller 2)[1..3], '=>', (caller 1)[1..3], '=>', (caller 0)[1..3]);
  print "__DIE__: (@a)\n";
  die @_;
};

if ($do_examine or $do_list or $check_consitency)
  {
    # imply a suffix.
    $dumpfile .= '.ioa' unless $dumpfile =~ m{\.\w+$};

    open(IN, "<", $dumpfile) or die "cannot read '$dumpfile': $!\n";
    my $text = join('', <IN>);
    close(IN);
    my ($dump, @more) = eval $text;
    die "cannot eval $dumpfile: $!: $@" if $@;
    warn "OOPS, excessive elements after first hash in $dumpfile.\n" if @more;
    warn "OOPS, no {obj} in $dumpfile.\n" unless $dump->{obj};
    my $dv = $dump->{features}{version}||'<unknown>';
    warn "OOPS, version mismatch: $dumpfile from $dv, this is $version.\n" if $dv ne $version;
    $dump->{dep} = find_dependencies($dump->{obj}, $show_all||$check_consitency);

    exit check_consitency($dump) if $check_consitency;
    exit do_list   ($dump, $do_list - 1) if $do_list;
    exit do_examine($dump, \@ARGV, $do_examine-1);
  }

my $pp;		# global root of all data.

$SIG{__DIE__} = sub
{
  # now an elaborated die handler....
  #
  # We first kill our processes with us, 
  # so that user actually notices that ioana has a problem.
  kill(1, keys %{$pp->{p}}) if $pp->{p};	## killall

  # caller really does the right thing here. hooray.
  # show where the damned thing dies.
  my @a = ((caller 3)[1..3], '=>', (caller 2)[1..3], '=>', (caller 1)[1..3], '=>', (caller 0)[1..3]);
  map { print "strace: $_\n" } @{$pp->{tbuf}};
  print "__DIE__: (@a)\n";
  die @_;
};

if (defined($cmd_pid))
  {
    $pp = attach_cmd($cmd_pid);
  }
elsif ($offline)
  {
    $pp->{strace_fd} = new IO::File $offline, "r";
    $pp->{tstamp} = (stat($pp->{strace_fd}))[10];
    $pp->{cmdname} = $offline;
    $pp->{cmdname} =~ s{^.*/}{};
    $pp->{cmdname} =~ s{\.[^\.]+$}{};
    $pp->{pid} = '0000';
    die "open($offline) failed: $!\n" unless $pp->{strace_fd};
  }
else
  {
    ## we fill in $pp as a sideeffect.
    start_cmd(@ARGV);
  }
$pp->{dumpfile} = expand_dumpfilename($pp, $dumpfile);

## now we read-loop at $pp->{strace_fd}
## and record what we learn.
##
while (defined (my $ev = tokenize($pp, $pp->{strace_fd}->getline)))
  {
    analyze($pp, $ev);
  }

warn "syscalls ignored: ", Dumper $pp->{ignored} if $verbose > 1;
write_dumpfile($pp);

sleep 1;	# our children will die with us.
exit 0;
################################################################

## return true if $path is not normally 
## worth to be listed as a dependency
##
## Do not add [:pipe/ here. It is not a real object, but needs to be part of 
## user dependency chains for find_dependencies() to do the right thing. 
##
sub is_system_path
{
  my ($path) = @_;

  die "path undefined" unless defined $path;

  return 1 if $path =~ m{^(
  	/usr/lib(|64)/(gcc|locale|gconv)/ |
	/usr/lib(|64)/(libc|libgcc||crt|libbfd)[^/]*\.(so|o|a) |
	/usr/bin/(ld|cc|make|gcc) |
	/usr/[^/]+-linux/bin/(as|ld)$ |
	/lib(|64)/(tls/|)(ld|lib)[^/]+\.(so|o|a) |
	/etc/(mtab|ld.so.cache|ld.so.conf) |
	/usr/share/locale/ |
	/usr/include/(asm|asm-\w+|linux|bits|sys|gnu)/ |
	/usr/include/(pwd|string|unistd|getopt|errno|stdio|stdlib|wchar|_G_config|libio|gconv|alloca|features|time|endian)\.h$ |
	/dev/pts/ |
	/proc/mem
    )}x;
  return 0;
}

sub usage
{
  my ($msg) = @_;

  print STDERR qq{$0 V$version usage:

ioana [options] cmd [args ...]
ioana [options] -O strace.log
ioana [options] -p pid 
ioana [options] -l
ioana [options] -x file ...


valid options are:
  -h                   Print this online help.

  -v                   Be more verbose. Default: $verbose;
  -q                   Be quiet.

  -O strace.log        Offline mode. Analyze only an 'strace -f' output file.

  -r                   Toggle use of '$rpm_cmd' to determine prerequisites. 
                       Effective when writing the dumpfile.
		       Output of -l and -L will be affected.

                       Default $use_rpm.
  		       
  -s <strlen>          String capture size for strace. Default $strace_s.
  -S		       Strace verbose debugging.

  -b                   Avoid potentially blocking syscalls like e.g. stat().
  -p pid               Analyze existing process-id pid.

  -d <dumpfile>.ioa    Where to dump output to.  (Or with -x, -l, -t: dumpfile
		       to read) May use %p for pid, %n for name, %t for
		       timestamp. (E.g.  "$dumpfile_ex")
                       Default "$dumpfile".

  -D                   Die on unknown system calls. Helps development and correctness.
                       Default: record and print them, but continue.

  -e                   Extended dependencies.
		       Resources accumulated before an exec() system call are
		       considered dependencies for output generated after the
		       exec() call.
		       Use -e for analysing process chains, where a child
		       depends on the commandline constructed by its parent.
		       Without -e dependencies only exist within the same
		       namespace.

  -ls                  List (from dumpfile) all filenames used or created.
  -l                   As -ls, but with dependencies. Dependiencies include
                       RPM-filenames if recorded in the dumpfile, see -r. 
  -L                   As -ls, but print as a formatted table: 
                       Index number, file name, size, used bytes.
  -LL                  As -L, with dependcencies.

  -x [file ...]        Print dependency chains from and to the named files.
                       Glob patterns or index numbers are accepted.
		       Not specifying a file defaults to '*', i.e. all files.
  -X [file ...]        As -x, but with more details like -L for every stage of
		       the chain.  Also includes the process name that first
		       opened a dependency.

  -io file             
  -t  file             With -ls -l, -L. -LL, -x or -X: limit output to
		       dependency chains leading from or to the named file. An
		       additional column distance is prepended to the output of
		       -ls, -l, -L, -LL.  -t implies -ls unless another -l or
		       -x style option is also given.

		       Glob patterns or index numbers are accepted instead of
		       file names, but glob patterns must have exactly one
		       match.

		       Use -x file1 -t file2, to check if there is any
		       dependency between the two files.

  -i         file
  --requires file      As -t, but output only chains coming from the named
                       file.  (Where file is input).
		       Example: '-x file1 --requires file2' checks, if 
		       file1 requires file2 and show the chain if so.
  -o            file
  --contributes file
  --provides    file   As -t, but output only chains leading to the named file.
                       (Where file is output).
		       Example: '-x file1 --contributes file2' checks, if 
		       file1 is a requirement of file2 and show the chain if so.
		       Example: '-ls --provides file2' lists the input files
		       required to provide file2.


  -a                   Show all files and dependencies, including system
                       libraries.

  -c                   Show all files as canonical path names.  Default: use
                       initial working directory to shorten names.

  -C                   Run consistency checks on dumpfile. Only recommended for
                       dumpfiles created with -e in effect.
                        

};
  print STDERR "\nERROR: $msg\n" if $msg;
  return 0;
}

sub start_cmd
{
  my ($cmd, @args) = @_;
  my %global = ();
  my $p = 
    { 
  	cmdname => $cmd, 
	argv => [ $cmd, @args],
	pid => getpid, 
	tstamp => time,
    };

  ## Check and record all open resources using /proc and-or lsof. 
  ## (at least record where stdin/stdout/stderr point to)
  ##
  lsof($p, 1);

  ## 
  ## Create a pipe and fork.
  ## (record the child pid as a preliminary pid.)

  my $rfd = new IO::Handle();
  my $wfd = new IO::Handle();
  pipe(\*$rfd, \*$wfd);

  if (my $pid = fork())
    {
      # parent
      close $wfd;
      $p->{pid} = $pid;
    }
  else
    {
      # child
      close $rfd;
      fcntl($wfd, F_SETFD, 0);		## set FD_CLOEXEC bit to 0;

      ## Execute 
      ##	strace -f -F -o /dev/fd/%d cmd args

      ##
      ## (for hard cases we could use 
      ## 	strace -f -F -o /dev/fd/%d perl -e 'exec @ARGV' cmd args
      ## and parse up to the second execve to retrieve additional info made
      ## available by additional -e commands or from the standard perl setup
      ## code. E.g. perl uses set_tid_address() which alway returns the pid.)

      my @cmd = (qw(/usr/bin/strace -f -F -s ), $strace_s, '-o', "/dev/fd/".$wfd->fileno, @{$p->{argv}});
      my $cmd = join ' ', map { /\s/ ? (/"/ ? qq{'$_'} : qq{"$_"}) : $_ } @cmd;
      print "$cmd\n" if $verbose > 1 or $strace_verbose;
      exec @cmd;
      die "exec('" . join(' ', @cmd) . "') failed: $!\n";
      exit 0;
    }

  ## expect the first line seen on the pipe to be of the format
  ## 	pid execve("...

  my $ring = { tbuf => [] };
  my $tok = tokenize($ring, $rfd->getline);
  ##
  ##
  ## parse pid and store it.
  ##
  ## Pack child pid and pipe fildescriptor and all recorded I/O connections 
  ## into a struct (ignore our own pipe)
  ## read from the strace output until we saw the first successful exec call.
  ## check if it is plausible, warn if not.
  ##
  ## Discard all resources that cannot survive an exec. e.g. mmaped files.
  ##

  die "no report - nothing to analyze\n" unless defined $tok;

  die "execve(...\nexpected, not $tok->{tok}[0]\n" unless $tok->{tok}[0] =~ m{^exec};
  $p->{pid} = $tok->{pid};
  die "exec failed ", Dumper $tok if $tok->{tok}[-1] ne '= 0';

  my $free = exec_seen($p, strace_unquote($tok->{tok}[2]));
  ## this initial exec is special.
  ## all objects free'd here, are no longer prerequisites.
  for my $id (@$free)
    {
      $global{fs}{obj}[$id]{prereq} = 0;
    }

  ## FIXME. this did not check for a *successful* exec.

  $pp->{p}         = { $p->{pid} => $p };
  $pp->{init}      = { proc => $p, cwd => $p->{res}{cwd}{n}, rtd => $p->{res}{rtd}{n} };
  $pp->{strace_fd} = $rfd;
  $pp->{ibuf}      = $ring->{tbuf};

  # $pp->{fs} is already populated through exec_seen, store_inode
  return $pp;
}

sub expand_dumpfilename
{
  my ($p, $dumpfile) = @_;
  return '-' if $dumpfile eq '-';

  my $tstamp = POSIX::strftime "%Y%m%d%H%M%S", localtime($p->{tstamp}||$p->{init}{proc}{tstamp});

  my $cmdname = $p->{cmdname} || $p->{init}{proc}{cmdname};
  my $pid     = $p->{pid}     || $p->{init}{proc}{pid};
  $cmdname =~ s{/}{_}g;
  $cmdname =~ s{^_+}{};

  $dumpfile =~ s{%p}{$pid}g;
  $dumpfile =~ s{%n}{$cmdname}g;
  $dumpfile =~ s{%t}{$tstamp}g;

  # imply a suffix.
  $dumpfile .= '.ioa' unless $dumpfile =~ m{\.\w+$};

  print "dumpfile: $dumpfile\n" if $verbose > 1;

# -f no longer exits, it is implicit.
#  die "dumpfile '$dumpfile' exists. (Try -f ?)\n" if -e $dumpfile and !$noclobber;
  return $dumpfile;
}

##
## retrieve information about open fildescriptors,
## and mmaped files, root dir, cwd.
## If a second parameter fd is given, only the entry for that
## particular filedescriptor number is updated.
##
## start_cmd calls us is_ioana=1, so that the text_segment is flagged {ioana}=1
## This flag helps exec_seen() to draw the line where dependency chains should begin.
##
sub lsof
{
  my ($p, $is_ioana, $fd) = @_;
  return if $offline;
  
  my $f = '0';		# NUL separated output.
  $f .= 'a';		# r=read, w=write, u=update
  $f .= 'd';		# device character code, (does not work?)
  $f .= 'D';		# filesystem-devnode in hex
  $f .= 'r';		# device major/minor in hex
  $f .= 'i';		# inode number
  $f .= 'f';		# file descriptor
  $f .= 'o';		# file offset (does not work?)
  $f .= 's';		# file size
  $f .= 't';		# file type: REG, CHR, DIR
  $f .= 'T';		# TCP/IP info
  $f .= 'S';		# stream module and device names
  $f .= 'n';		# name, comment or internet address, 
                        # (using \r\n but not \\, beware!)

  my $cmd = "env LC_ALL=C /usr/bin/lsof -F $f -p $p->{pid}";
  $cmd .= " -a -d $fd" if defined $fd;	# limit to a certain fd number.

  # -b is essential to avoid blocking in stat() calls on dead filesystems.
  # but it makes certain fields not available, sigh.
  # types CHR and DIR may show up as 'unknown', and
  # n-names may look like '/usr/bin/perl (stat: Resource temporarily unavailable)'
  # or '/suse/jw/src/perl/ioana-0.08 (wotan:/real-home/jw)'
  # some of the silly texts behind the names can be prevented with -w, but not all.
  # sigh.
  $cmd .= " -w -b" if $avoid_block;

  open IN, "$cmd 2>/dev/null|" or die "failed to run $cmd: $!\n";
  while (defined (my $line = <IN>))
    {
      my %h = $line =~ m{(\w)([^\0]*)\0}g;

      if (defined($h{n}) and $h{n} =~ m{^(.*?)\s+\((\S+:.*?)\)$})
        {
	  ## this did not happen on sles9. it happens on code10.
	  # fmema tREGD0x307i28484n/usr/bin/perl (stat: Resource temporarily unavailable)
	  # fmema tREGD0x0i0n[heap] (stat: Resource temporarily unavailable)
	  # fmema tREGD0x307i10926n/lib64/ld-2.3.91.so (stat: Resource temporarily unavailable)
	  # or:
	  # fmema tREGD0x0i0n[heap] (stat: No such file or directory)
	  #
	  # lsof appears to be immune to locale, but we still force LC_ALL=C make sure
	  # future releases still talk something we can parse.

	  $h{n} = $1;
	  my $rest = $2;
	  if ($verbose > 1)
	    {
	      warn "lsof: message '$rest' stripped from path name ($h{n})\n" unless 
		    $h{n} eq '[heap]' or 
		    $rest =~ m{^stat: Resource temporarily unavailable$};
	    }
	}

      next unless defined $h{f};	# ignore p line

      if ($h{f} =~ m{^(\d+)$})
        {
          $p->{res}{fd}{$1} = \%h;
	}
      elsif ($h{f} =~ m{^(txt|rtd|cwd)$})
        {
	  $p->{res}{$1} = \%h;
	}
      elsif ($h{f} eq 'mem')
        {
	  $p->{res}{mmap}{inode_pair(\%h,1)} = \%h;
	}
      else
        {
	  die Dumper "unknown f:", \%h;
	}
    }

  $p->{res}{txt}{ioana} = 1 if $is_ioana;

  close IN;

  for my $r (values %{$p->{res}{fd}}, values %{$p->{res}{mmap}}, $p->{res}{txt})
    {
      next if $r->{n} =~ m{^\[.*\]$} and $r->{i} == 0 and $r->{D} eq '0x0';	# skip [heap] and such.
      next if $r->{t} eq 'FIFO' and $r->{n} eq 'pipe';				# skip pipes
      next if $r->{ioana};						# skip ioana's own txt.

      ## we come here only with actual file system objects.
      store_cwd_rtd($p, $r);	# stores in resource $r
      store_inode($p, $r)->{prereq}++;	# stores in global process independant space $pp;
    }
  print "exiting lsof\n" if $verbose > 2;
}


=for commment
## provide either $i or $p.
## lookup_inode returns the inode and a list of paths associated with it.
## as fas as known. It returns undef, if the object is unknown.

sub lookup_inode
{
  my ($g, $res, $path) = @_;

  unless (defined $path)
    {
      my $in = inode_pair($res, 1);
      return [ $in, $g->{inode}{i}{$in} ] if defined $in;

      $path = full_pathname($res);
    }
  my $in = $g->{inode}{p}{$path};
  return undef unless defined $in;

  return [ $in, $g->{inode}{i}{$in} ];
}
=cut


## stores path, inode and maintains unique and persistent filesystem objects.
##
## store_inode stores the association between an inode_pair $i and a 
## global pathname $p in the inode table of $pp->{fs}.
## Instead of $i, $p it can also take a reference to a resource -- 
## in which case it constructs the values from the resource if possible.
## if $i, or $p were not supplied or could not be derived, nothing is done.
##
## {fs}{inode}{$i}{N} is a hash of global pathnames under which this object 
## is currently known.
##
sub store_inode
{
  my ($proc, $i, $p) = @_;
  my $res;
  if (ref $i)
    {
      $p = full_pathname($i) unless defined $p;
      $i = try_stat($proc, $res = $i);
      undef $i if $i =~ m{^:};	# avoid_block or offline stopped us or stat failed on a short lived object.
    }
  else
    {
#      $verbose += 2;
    }

  my $obj;  $obj  = $pp->{fs}{inode}{$i} if defined $i;
  # make update possible, (not used in ana_connect(), but hey...)
  $obj = $pp->{fs}{obj}[$res->{o}] if !defined($obj) and defined($res) and defined($res->{o});	
  my $obj2; $obj2 = $pp->{fs}{path}{$p}  if defined $p;
  if ($obj and $obj2 and $obj != $obj2)
    {
      warn Dumper $obj, $obj2, $pp->{fs}{inode}{$i}, $i;
      die "store_inode: divergence: $p -> $obj2->{I}, $i -> $obj->{N}" 
    }
  $obj ||= $obj2;

  die "store_inode: inconsistency: res->{o}=$res->{o} ($pp->{fs}{obj}[$res->{o}]{N}), oidx=$obj->{oidx} ($obj->{N})" 
    if $obj and $res and $res->{o} and $res->{o} != $obj->{oidx};
  
  ## object constructor. keep in sync with do_trunc0().
  unless ($obj)
    {
      $obj = 		## create a new one.
        { 
	  tstamp => time,
          pid => $proc->{pid}, 
          cmd => flatten_array_refs(@{$proc->{argv}})
	};
      if ($res)
	{
	  for my $k qw(t s)
	    {
	      $obj->{$k} = $res->{$k} if defined $res->{$k};
	    }
	}
      push @{$pp->{fs}{obj}}, $obj;
      $obj->{oidx} = $#{$pp->{fs}{obj}};
    }

  if (defined $res)
    {
      $res->{o} = $obj->{oidx};		## helps add_dependency to hit the obj table.
      $res->{I} = $i if defined $i and !defined($res->{I});	## helps remember_loaded
    }

  $obj->{N}{$p} = 1  if defined $p;	## inplace overwrite. Is that slower than testing?
  $obj->{I}     = $i if defined $i;	## inplace overwrite. Is that slower than testing?

  $pp->{fs}{inode}{$i} = $obj if defined $i and $i !~ m{^:};
  $pp->{fs}{path}{$p}  = $obj if defined $p;
  return $obj;
}

sub flatten_array_refs
{
  my @a;
  for my $a (@_)
    {
      push @a, (ref $a) ? '[' . flatten_array_refs(@$a) . ']' : $a;
    }
  return join ' ', @a;
}

# copy cwd and rtd from the process $p into the resource $r
sub store_cwd_rtd
{
  my ($p, $r) = @_;
  if ($p->{res}{rtd})
    {
      $r->{rtd} = { n => $p->{res}{rtd}{n}, D => $p->{res}{rtd}{D}, i => $p->{res}{rtd}{i} };
    }
  else
    {
      $r->{rtd} = { n => '/' };
    }
  if ($p->{res}{cwd})
    {
      $r->{cwd} = { n => $p->{res}{cwd}{n}, D => $p->{res}{cwd}{D}, i => $p->{res}{cwd}{i} };
    }
  else
    {
      $r->{cwd} = { n => '/' };
    }
}

# return cwd and rtd from the proc $p
sub fetch_cwd_rtd
{
  my ($p) = @_;
  my $r = {};
  store_cwd_rtd($p, $r);
  return (cwd => $r->{cwd}, rtd => $r->{rtd});
}

sub strace_unquote
{
  my ($str) = @_;

  my $r = '';
  my $state = '0';
  return [ $str ] if $str eq '...'; 	# -s shortened an array of doublequoted strings, e.g. in execve()
  while ($str =~ m{(.)}g)
    {
      my $ch = $1;
      if ($state ne '\\')
	{
	  if ($ch eq '"')
	    {
	      if ($state eq '"')
	        {
		  return [ $r, $1 ] if $str =~ m{\G(\.\.\.)};
	          return $r;
		}
	      $state = '"';
	    }
	  elsif ($ch eq '\\')
	    {
	      $state = '\\';
	    }
	  else
	    {
	      $r .= $ch;
	    }
	}
      else
        {
	  $r .= $ch;
	  $state = '"';
	}
    }

  die qq{strace_unquote('$str'): not terminated by '"'} if $state;
  die qq{strace_unquote('$str'): not a doubleqoted string};
}

sub exec_seen
{
  my ($p, $cmdname, $argv) = @_;

  my @unused_obj;
  if ($depend_across_exec and !defined($p->{res}{txt}{ioana}))
    {
      # we cannot have multiple text segments for a process.
      # FIXME: should we promote {res}{txt} to ARRAY, like {fd} and {mmap}?
      # Oh, remember_loaded() looks like a good solution, too.

      # But do not depend on the initial perl-process that runs ioana.
      # start_cmd() sets {ioana} on the initial perl text segment.
      remember_loaded($p, $p->{res}{txt}, 'exec') unless $p->{res}{txt}{ioana};
    }
  else
    {
      # mmaps never survive exec, program text neither.
      @unused_obj = grep { defined } map { $_->{o} } values %{$p->{res}{mmap}}, $p->{res}{txt}, $p->{res}{load};
      delete $p->{res}{mmap};

      # With tossing {load} here, 'make test' no longer lists Makefile as a dependency.
      # Surprising as it is, but this is correct!
      # It never occured to me that a Makefile is the same type of dependency as /usr/bin/cc is.
      # Both shall only be listed under -e.
      delete $p->{res}{load};
    }

  # we have a new text segment.
  $p->{res}{txt} = { n => $cmdname, t => 'REG', fetch_cwd_rtd($p) };

  # FIXME: Really set prereq here?
  # If this process only execs children without writing anything, its txt could only be 
  # a prereq under $depend_across_exec. Right?
  # eg. /usr/bin/cc 
  my $obj = store_inode($p, $p->{res}{txt});
  $obj->{prereq}++ if $depend_across_exec and !defined($p->{res}{txt}{ioana});

  $p->{res}{txt}{bytes_r} = $p->{res}{txt}{s};

  ## honour the close-on-exec fcntl.
  for my $fd (keys %{$p->{res}{fd}})
    {
      my $res = $p->{res}{fd}{$fd};
      if ($res->{fcntl} && $res->{fcntl}{FD_CLOEXEC})
        {
	  push @unused_obj, $res->{o} if $res->{o};
	  $p->{FD_CLOEXEC}{$fd} = $res->{fcntl}{FD_CLOEXEC};
          delete $p->{res}{fd}{$fd};
	}
      else
        {
	  unless ($depend_across_exec)
	    {
	      ## We reset the byte counters for read resources to 0 now. 
	      ## There is no dependency in this new namespace unless we read 
	      ## some more.
	      $res->{bytes_r} = 0 if defined $res->{bytes_r};
	    }
	}
    }

  free_obj_if_unlinked_unused(@unused_obj);

  $p->{cmdname} = $cmdname;
  $p->{argv} = $argv if defined $argv;
  return \@unused_obj;
}


# a simple tokenizer for strace output
#
# It returns
# { 
#   pid => 1234,
#   tok => [ 'write', '(', '1', '"8.pl\\n8.pl.orig\\naudio\\nbal.out\\nbin"...', '607', ')', '= 607' ],
# };
# also expect things like this:
# 'open', '(', '"/proc/mounts"', 'O_RDONLY|O_LARGEFILE', ')', '= 3'
# 'execve', '(', '"/bin/ls"', [ '"ls"', ], [ '/* 91 vars */' ], ')', '= 0' 
# 
# all ev elements starting with one of <[{-" need postprocessing!

sub tokenize
{
  my ($pp, $line) = @_;
  return undef unless defined $line;
  chomp $line;
  my $r = {};

  ## make life easy for the doublequote scanner
  $line =~ s{\\\\}{\\134}g;
  $line =~ s{\\"}{\\42}g;

  if ($pp->{ibuf})
    {
      push  @{$pp->{tbuf}}, $line;
      shift @{$pp->{tbuf}} if scalar @{$pp->{tbuf}} > 10;
    }

  unless ($line =~ s{^(\d+)\s+}{})
    {
      warn "garbage line: '$line'\n";
      return;
    }
  $r->{pid} = $1;
  # strace -tt -f prints this format:
  # 26880 19:05:42.974133 execve("/usr/bin/build...
  $r->{tstamp} = $1 if $line =~ s{(\d\d:\d\d:\d\d\.\d+)\s+}{};

  $pp->{strace_lnr}++;
  print "$pp->{strace_lnr}: $r->{pid}:$line\n" if $verbose > 1 or $strace_verbose;
  my @token;

  # odd things seen:
  # 9033  set_thread_area({entry_number:-1 ->  <unfinished ...>
  # }

  while ($line =~ m{\G
    ("[^"]*"(\.\.\.)?			# string with optional elipsis
    |/\*.*?\*/				# an inline comment
    |[\(\[\{]				# any opening parens
    |~\[				# negated brackets, rt_sigprocmask(SIG_BLOCK, ~[TRAP], ...
    |(\.\.\.)?[\)\]\}]			# any closing parens with optional elipsis
    |---\s+SIG.*			# a signal event
    |(&&|\|\||==|\-\>)			# operators used in waitpid and set_thread_area
    |[\w|\*|-]+				# bareword, multiplication or OR-expression
    |=\ .*				# entire return value
    |[=:]				# parameter value
    |<unfinished\ \.\.\.>		# begin of continuation
    |<\.\.\.\ \w+\ resumed>		# end of continuation
    )\s*(,\s*)?}gx)
    {
      push @token, $1;
    }

  unless ($token[-1] =~ m{^(= |<unfinished|---)})
    {
      die "tokenizer failed on $line: unknown ending '$token[-1]'\n";
    }

  $r->{tok} = \@token;
  return $r;
}

##
## we transform all groups within {} and [] into array objects.
## this yields a correct element count.
## parens () are not collapsed here, they are special.
##
## think this:
## [ 'set_thread_area', '(', '{', 'entry_number', ':', '-1', '->', '<unfinished ...>' ];
## we shall come here after reassembly, not before!
##
sub tokenize_mkarrays
{
  my (@a) = @_;

  my @r;
  my $a_idx = 0;
  my $in_array = 0;
  for my $i (0..$#a)
    {
      if ($a[$i] =~ m{^[\{\[]$})
        {
	  $a_idx = $i if ++$in_array == 1;
	}
      elsif ($a[$i] =~ m{^(\.\.\.)?[\]\}]$})
        {
	  my $elipsis = $1;
	  if ($in_array-- == 1)
	    {
	      my @aa = tokenize_mkarrays(@a[$a_idx+1..$i-1]);
	      push @aa, $elipsis if $elipsis;

	      ## lazy. We pack hashes as arrays here. 
	      ## this is sufficient for parameter counting.
	      @aa = ('{', @aa, '}') if $a[$a_idx] eq '{';
	      push @r, \@aa;
	    }
	}
      else
        {
	  push @r, $a[$i] if !$in_array;
	}
    }

  if ($in_array)
    {
      push @r, @a[$a_idx..$#a];
      warn "tokenize_mkarrays: unclose array starting at $a_idx found! " .Dumper \@r;
    }

  return @r;
}

## full_pathname constructs an absolute and canonical 
## path name for the resource $f,
## caches it in $f->{N} and returns it.
sub full_pathname
{
  my ($f) = @_;
  return $f->{N} if defined $f->{N};

  my $r = $f->{rtd}{n};
  my $n = $f->{n} || "<anon_fd=$f->{f}>";
  unless ($n =~ m{^/})
    {
      my $d = $f->{cwd}{n};
      $n = "$d/$n";
    }
  $n = "$r/$n";
  $n =~ s{/\./}{/}g;
  $n =~ s{/+}{/}g;

  # expand_symlinks also canonifies the name. Call always!
  my $cp = expand_symlinks($f->{ignore_symlinks} ? undef : $pp->{fs}{symlink}, $n);
  return $f->{N} = $cp;
}


# returns either a "maj/min" pair, or a ":full_pathname/<fd>" susbtitute
# or :fullname/txt, if it is the txt resource of $p.
sub try_stat
{
  my ($p, $f) = @_;

  my $in = inode_pair($f, 1);
  return $in if defined $in;

  my $n = full_pathname($f);
  unless ($offline or $avoid_block)
    {
      ## don't use lstat here.
      ## we vountarily follow symlinks.
      if (my @s = stat $n)
	{
	  $f->{D} = sprintf "0x%x", $s[0];
	  $f->{i} = $s[1];
	  $f->{s} = $s[7] if $s[7] > ($f->{s}||0);
	  if ($f->{t} eq 'open')
	    {
	      $f->{t} = 'REG'  if -f _;
	      $f->{t} = 'DIR'  if -d _;
#	      $f->{t} = 'LNK'  if -l _;
	      $f->{t} = 'BLK'  if -b _;
	      $f->{t} = 'CHR'  if -c _;
	      $f->{t} = 'FIFO' if -p _;
	      $f->{t} = 'SOCK' if -S _;
	    }
	  return inode_pair($f);
	}
    }
  my $suff = $f->{f};
  unless (defined $suff)
    {
      $suff = 'txt' if $f == $p->{res}{txt};
    }
  die "try_stat: need \$f->{f} or {res}{txt}: " . Dumper $f,$n,$p unless defined $suff;
  $n .= "/$suff" if defined $suff;
  return ":$n";
}

## returns device/inode in string representation.
sub inode_pair
{
  my ($h, $fail) = @_;
  return $h->{I} if defined $h->{I};

  if ($fail)
    {
      return undef unless defined $h->{D} and defined $h->{i};
    }

  my $d = $h->{D}||0;
  $d = hex $d if $d =~ m{^0x}i;
  $d .= "/" . ($h->{i}||0);
  return $h->{I} = $d;  
}

# enumerate_writables returns a list of resources
# which may be written by library code without using write() syscalls.
# Writable filedescriptors are not returned here, as we 
# handle them when a write() syscall actually happens.
sub enumerate_stealth_writables
{ 
  my ($p) = @_;
  return grep { $_->{stealth_writable} } values %{$p->{res}{mmap}};
}

sub res_obj_name
{
  my ($g, $res) = @_;
  return $res->{N} if defined $res->{N};
  my $obj = $res->{o};
  die "res_obj_name: no {o} in res= " . Dumper $res unless defined $obj;
  die "g->{fs}{obj}[$obj] not there " . Dumper $res unless defined $g->{fs}{obj}[$obj];
  return obj_name($g->{fs}{obj}[$obj]);
}

## an object with multiple names returns the name with the lowest sort.
sub obj_name
{
  my ($obj) = @_;
  die "obj undef" unless defined $obj;
  return ((sort keys %{$obj->{N}})[0])      if $obj->{N}      and scalar keys %{$obj->{N}};
  return ((sort keys %{$obj->{del}{N}})[0]) if $obj->{del}{N} and scalar keys %{$obj->{del}{N}};
  die "no name found: " . Dumper $obj unless $obj->{I};
  return "[$obj->{I}]";
}

sub add_dependency
{
  my ($g, $res_w, $res_r, $bytes, $type) = @_;

  my ($i_w, $i_r) = ($res_w->{o}, $res_r->{o});
  return if not defined($i_w) and ($res_w->{t}||'') eq 'CHR';	# character devices are save to ignore.
  return if not defined($i_r) and ($res_r->{t}||'') eq 'CHR';	# character devices are save to ignore.
  die "add_dependency: failed.", Dumper $res_w, $res_r, $bytes, $type unless defined $i_w and defined $i_r;

  my ($obj_w, $obj_r) = ($g->{fs}{obj}[$i_w], $g->{fs}{obj}[$i_r]);

  return if $i_w == $i_r;	## skip self dependency.

  my $total = $bytes;
  $total += $obj_w->{depends_on}{$i_r}{bytes} if $obj_w->{depends_on}{$i_r};

  ## Depend on no more bytes than what have been written.
  ## This catches repeated writes.
  $total = ($res_r->{bytes_r}||0) if $total > ($res_r->{bytes_r}||0);

  # We cannot rely on having $res_r->{s}.
  # we don't get it under $avoid_block,
  # try_stat() may also fail, if the file was removed meanwhile.

  ## For regular files keep their size as an upper limit of bytes read.
  $total = $res_r->{s} if $res_r->{t} ne 'FIFO' and defined $res_r->{s} and $total > $res_r->{s};
  return unless $total;


#  if ($res_r->{t} eq 'FIFO')
#    {
#      warn "add_dependency ($i_r -> $i_w) $total bytes: pipe seen at $type, " . Dumper $res_r, $res_w;
#    }

  my $res_r_name = res_obj_name($g, $res_r);
  my $res_w_name = res_obj_name($g, $res_w);

  die 'nameless r ' . Dumper $res_r, $obj_r unless $res_r_name;
  die 'nameless w ' . Dumper $res_w, $obj_w unless $res_w_name;

  my $l = ($verbose && is_system_path($res_r_name)) ? 2 : 1;
  if ($verbose > $l)
    {

      if (($dep_verbose{$res_w_name}{$res_r_name}||0) < $bytes)
        {
          print "$res_w_name depends on $res_r_name \t $bytes, $type\n";
	  $dep_verbose{$res_w_name}{$res_r_name} = $bytes;
	}
    }
  $obj_w->{depends_on}{$i_r} = 
    { 
      N => obj_name($obj_r), 
      type => $type, 
      pid => $obj_r->{pid}, 
      cmd => $obj_r->{cmd} 
    } 
    unless $obj_w->{depends_on}{$i_r};
  $obj_w->{depends_on}{$i_r}{bytes} += $total;
  $obj_w->{depends_on}{$i_r}{counter}++;
}
  
sub printable_name
{
  my ($this, $name) = @_;
  $name =~ s{^$this->{init}{cwd}/?}{} if $this->{init}{cwd} and !$show_canonical;
  return $name;
}

# all=1: include system dependencies, 
# {N}[] is used or if empty {del}{N}[] or {o} is used for names for the obj.
#
# We must not use path names as keys here. 
# Object <-> pathname mapping is nonunique.
# Since 0.41, we use "$name#$oid" as key.
# 
sub find_dependencies
{
  my ($obj, $all) = @_;

  my $dep;
  for my $o (@{$obj})
    {
      # wheed out the completly unused
      next if !$o->{used} and 0 == scalar keys %{$o->{depends_on}};

      my @n = keys %{$o->{N}};
      push @n, keys %{$o->{del}{N}} if $o->{del};
      push @n, "[$o->{I}]" if defined $o->{I} and 0 == scalar @n;
      push @n, "[$o->{del}{I}]" if defined $o->{del}{I} and 0 == scalar @n;
      die "find_dependencies: object has no names: ", Dumper $o unless scalar @n;

      for my $p (@n)
        {
	  next if !$all and is_system_path($p);

          my $n = "$p#$o->{oidx}";
          $dep->{$n}{oidx} = $o->{oidx};
          $dep->{$n}{name} = $p;
	  if (my $rpminfo = $o->{rpminfo})
	    { 
	      $dep->{$n}{init} = "$rpminfo->{$p}.rpm" if $rpminfo->{$p};
	    }
	  
	  if ($o->{del} and $o->{del}{N}{$p} and $o->{created} and !$o->{N}{$p})
	    {
	      # a file that we created and unlinked is volatile.

	      my %volatile = ( unlink => $o->{del}{N}{$p} );
	      $volatile{seen} = $o->{tstamp} if defined $o->{tstamp};
	      $volatile{created} = $o->{created} if defined $o->{created};
	      $dep->{$n}{volatile} = \%volatile;
	    }

	  for my $d (keys %{$o->{depends_on}})
	    {
	      # $d is object-idx
	      my $v = $o->{depends_on}{$d};
#	      push @{$dep->{$n}{dep}}, $d if $all or !is_system_path($v->{N});
	      $dep->{$n}{dep}{$d} = $v if $all or !is_system_path($v->{N});
	    }
	}
    }
  return $dep;
}

##  rpm=1: lookup rpm-names for prereqs.
sub write_dumpfile
{
  my ($g) = @_;

  printf "write_dumpfile: %d objects.%s\n", scalar @{$g->{fs}{obj}}, ($use_rpm ? " Running RPM-lookup. ": "") if $verbose;
  for my $o (@{$g->{fs}{obj}})
    {
      # we created it, it is still there, so it is important.
      $o->{used}++ if $o->{prereq} or ($o->{created} and scalar keys %{$o->{N}});

      # something depends on it, so it is important.
      my @dep = keys %{$o->{depends_on}};
      for my $i (@dep)
        {
	  $g->{fs}{obj}[$i]{used}++;
	}

      ## FIXME: 
      ## 'used' does not honor 'being depended on'. it just counts what this 
      ## file depends_on.
      ## Aeh, so what? Do we have any other dependancy tracking outside of {depends_on}?

      unless (defined $o->{s})
        {
	  ## oh, we never learned its size.
	  ## check if it is still around and cheat a little.
          for my $p (keys %{$o->{N}})
	    {
	      next if $p =~ m{^/proc/};
	      if (-f $p)
	        {
		  $o->{s} = - -s _;
		  printf "faked size of $p to %d\n", -$o->{s} if $verbose > 1;
		  last;
		}
	    }
	}

      if ($use_rpm and $o->{prereq} and !scalar(@dep) and !$o->{created})
        {
          ## Objects that were created or are known to have 
          ## dependencies (i.e. were written to) need not be 
	  ## checked for rpm here.
	  ## Objects that were never used as prerequisites likewise.

          my @n = keys %{$o->{N}};
          push @n, keys %{$o->{del}{N}} if $o->{del};
	  for my $p (sort @n)
	    {
	      next unless $o->{t} eq 'REG' or $o->{t} eq 'DIR';
	      next unless -e $p;

	      if ($p !~ m{^([^;`!^]+)$})
	        {
		  warn "rpm_lookup: $p is tainted.";
		  next;
		}
	      my $r = `$rpm_cmd $1`;
	      chomp $r;
	      next if $r =~ m{is not owned by any package$};

	      if ($r =~ m{\s})
	        {
		  warn "$rpm_cmd $p: $r\n";
		  next;
		}

	      print "$p installed from $r.rpm\n" if $verbose > 1;
	      $o->{rpminfo}{$p} = $r;
	    }
	}
    }

  local $Data::Dumper::Sortkeys = 1;
  local $Data::Dumper::Terse = 1;
  local $Data::Dumper::Indent = 1;
  local $Data::Dumper::Quotekeys = 0;

  my $dump = { obj => $g->{fs}{obj}, init => $g->{init} };

  if ($dump_proc)
    {
      print "write_dumpfile: dump_proc\n" if $verbose > 1;
      for my $p (values %{$g->{p}})
	{
	  ## housekeeping!
	  delete $p->{children}  unless scalar keys %{$p->{children}};
	  delete $p->{txt}       unless scalar keys %{$p->{txt}};
	  delete $p->{res}{fd}   unless scalar keys %{$p->{res}{fd}};
	  delete $p->{res}{mmap} unless scalar keys %{$p->{res}{mmap}};
	  delete $p->{res}       unless scalar keys %{$p->{res}};
	}
      $dump->{proc} = $g->{p};

      # avoid references via $VAR1.
      $dump->{init}{proc} = deepcopy($dump->{init}{proc}); 
    }

  $dump->{features}{use_rpm}            = 1 if $use_rpm;
  $dump->{features}{depend_across_exec} = 1 if $depend_across_exec;
  $dump->{features}{version} 		= $version;
  $dump->{features}{avoid_block}	= 1 if $avoid_block;
  $dump->{features}{offline}	        = $offline if $offline;

  if ($g->{dumpfile} eq '-')
    {
      print STDOUT Dumper $dump;
    }
  else
    {
      print "$g->{dumpfile} written.\n" if $verbose;
      open OUT, ">$g->{dumpfile}" or die "open($g->{dumpfile}) failed: $!";
      print OUT Dumper $dump;
      close OUT;
    }
}

## Taken from w3dcm.pl:
## Collapse /foo/../ BUG alert: this also hits /../../
##
## canonical_path adds $base in front of $path, to make it an 
## absolute path, if needed. 
## The usual // and . and  .. handling is also done here.
## We do not check if $base has a leading slash, but we provide one anyway.
## The return value is untainted!
##
## BUG ALERT:  /foo/../bar is miscollapsed when symlinks are tricky.
##
sub canonical_path
{
  my ($path, $base) = @_;

  $path = '' unless defined $path;
  $base = '' unless defined $base;

  # make relative paths absolute
  $path = "/$base/$path" unless $path =~ m{^/};

  # collapse /./ sequences first, so that /../ cannot match them
  $path =~ s{/(\./+)+}{/}g;

  # Add trailing / so that .. at the end is caught.
  $path = "/$path/";	

  # Collapse /foo/../ CAUTION: 
  # We use a negative look ahead assertion to matching /../../
  while ($path =~ s{/(?!\.\./)[^/]+/+\.\./}{/}g) {;}

  # collapse multiple slashes into one.
  $path =~ s{//+}{/}g;

  # remove all leading /.., so that we cannot escape the root.
  $path =~ s{^(/\.\.)+}{/};

  # remove trailing slash.
  $path =~ s{(.)/+$}{$1};

  # remove trailing dot.
  $path =~ s{/\.$}{/};

  # remove trailing slashes again...
  $path =~ s{(.)/+$}{$1};

  # collapse multiple slashes again...
  $path =~ s{//+}{/}g;

  return $1 if $path =~ m{^(.*)$}s;
}

##
## call this only with return values from canonical_path();
##
sub resolve_symlinks
{
  my ($name, $rec) = @_;
  return $name unless defined $pp->{fs}{symlink};

  $rec ||= 0;
  if ($rec > 20)
    {
      warn "$0: resolve_symlinks($name, $rec): too many levels of symbolic links.\n";
      return $name;
    }
  die "resolve_symlinks: not impl.", Dumper $pp->{fs}{symlink}, $name;
}

##
## expand_symlinks returns a canonified version if $path
## with all symlinks expanded recursivly.
##
## $path must be an absolute path, but need not be canonic.
## $lnk is a hash of all existing symlinks.
##
sub expand_symlinks
{
  my ($lnk, $path, $rec) = @_;
  $rec ||= 0;
  my @p = split '/', $path;
  shift @p;	# leading slash.
  my $r = '';
  for my $n (@p)
    {
      $r .= "/" . $n;
      $r =~ s{/+\.?$}{};		# remove trailing / and /.
      $r =~ s{/+[^/]+/\.\.$}{};		# collapse /foo/..

      my $s = $lnk->{$r};
      if (defined $s)
        {
          print "expand_symlinks: $r -> $s" if $verbose > 1;
	  unless ($s =~ m{^/})
	    {
	      (my $d = $r) =~ s{/[^/]+$}{/};

	      ## efficiency loss alert:
	      ## expand_symlinks below only needs to process $s,
	      ## but will process $d . $s, this could be more
	      ## efficient.

	      $s = $d . $s;
              print " (absolute: -> $s) " if $verbose > 1;
	    }
          
          if ($rec > 20)
            {
              warn "expand_symlinks: recursion depth $rec: $path\n";
	    }
	  else
            {
	      $r = expand_symlinks($lnk, $s, $rec + 1) if defined $s;
	    }

          print "-> $r\n" if $verbose > 1;
	}
    }
  return $r;
}

sub deepcopy
{
  my ($old, $x) = @_;
  die "deepcopy recursion limit 100 reached\n" if ++$x >= 100;

  return undef unless defined $old;
  my $ref = ref $old;
  return $old if $ref eq '';

  if ($ref eq 'HASH')
    {
      my %new;
      for my $k (keys %$old)
        {
	  $new{$k} = deepcopy($old->{$k}, $x);
	}
      return \%new;
    }
  if ($ref eq 'ARRAY')
    {
      my @new;
      for my $i (0..$#$old)
        {
	  $new[$i] = deepcopy($old->[$i], $x);
	}
      return \@new;
    }
  if ($ref eq 'SCALAR')
    {
      my $new = deepcopy($$old, $x);
      return \$new;
    }
  die "deepcopy type '$ref' not impl.\n";
}

#
# input: {dep} hash, 
# output array of used elements.
#  each such element has {name}, {oidx}, {dep} = [ "<name#oidx>", ...], ...
#
# FIXME:
# We must not use path names as keys in {dep}. 
# A file 'conftest.vals' is created and deleted often during a typical 
# configure run. A later 'conftest.vals' may well depend on an 
# earlier 'conftest.vals'
# and would thus create false and even cyclic dependencies.
#
# {dep}{<name#oidx>}{oidx} are object ids.
# {dep}{<name#oidx>}{name} are names
# {dep}{<name#oidx>}{dep}{<oidx>}{N} are names
#
sub build_list
{
  my ($dump) = @_;

  my $dep = $dump->{dep};
  my @l;
  for my $k (sort { $dep->{$a}{oidx} <=> $dep->{$b}{oidx} } keys %{$dep})
    {
      my $f = $dep->{$k}{name};
      next if -d $f;		## FIXME: should not be done here.
      my $l = { oidx => $dep->{$k}{oidx}, name => printable_name($dump, $f) };
      $l->{flags} = 'i' if $dump->{dep}{$k}{init};
      $l->{flags} .= 't' if $dump->{dep}{$k}{volatile};

      my $v = $dump->{dep}{$k}{dep};
      my %d;
      my $input = 0;
      if (ref $v eq 'HASH')
	{
	  for my $oidx (keys %$v)
	    {
	      my $o = $v->{$oidx};
	      $input += $o->{bytes} if $o->{bytes};
	      $d{printable_name($dump, $o->{N})."#$oidx"}++ if $o->{N};
	    }
	}
      elsif (defined $v)
        {
	  die "hashref of dependecies expected: ", Dumper $v;
	}

      $l->{input} = $input;
      $l->{dep} = [sort keys %d];
      push @{$l->{dep}}, "#$dump->{dep}{$k}{init}" if $dump->{dep}{$k}{init} and $use_rpm;

      my $o = $dump->{obj}[$l->{oidx}];
      $l->{tstamp} = $o->{tstamp}||0;
      $l->{cmd}    = $o->{cmd}||'';
      $l->{size}   = '-';
      $l->{input_sep}    = '/';
      if (defined $o->{s})
        {
	  if ($o->{s} < 0)
	    {
	      $l->{size} = -$o->{s};
	      $l->{input_sep} = '?';
	    }
	  else
	    {
	      $l->{size} = $o->{s};
	    }
          $l->{input}  = $l->{size} if $l->{size} < $l->{input};
	}
      $l->{input} = $l->{input_sep} = ' ' if !$l->{input};
      $l->{flags}  .= 'd' if $o->{t} eq 'DIR';
      $l->{flags}  .= substr($o->{t},0,1) if $o->{t} ne 'REG' and $o->{t} ne 'DIR';
      push @l, $l;
    }
  return \@l;
}

sub glob2re
{
  my ($pat) = @_;

  $pat = "^\Q$pat\E\$";		# quote everything and add anchors
  $pat =~ s{\\\*}{.*}g;		# * -> .*
  $pat =~ s{\\\?}{.}g;		# ? -> .
  $pat =~ s{^\^\.\*}{};		# remove front anchor?
  $pat =~ s{\.\*\$$}{};		# remove back anchor?
  return $pat;
}

## find_matches returns matching entries from array $all.
## It matches name as filename or as index number.
## FIXME: there is no way to force an index match if there 
## is a file with same numeric name.
##
sub find_matches
{
  my ($all, @names) = @_;
  
  my @r;

  for my $name (@names)
    {
      my $pat = glob2re($name);
      my @matches;

      ## first try: match by glob pattern.
      for my $i (@$all)
	{
	  push @matches, $i if $i->{name} =~ m{$pat};
	}

      ## second try: match without path components
      if (!scalar(@matches) and $name !~ m{^/})
	{
	  $pat =~ s{^\^}{(^|/)};
	  for my $i (@$all)
	    {
	      push @matches, $i if $i->{name} =~ m{$pat};
	    }
	}

      ## third try: match by index number.
      if (!scalar(@matches) and $name =~ m{^\d+$})
	{
	  for my $i (@$all)
	    {
	      push @matches, $i if $i->{oidx} == $name;
	    }
	}
      push @r, @matches;
    }
  return \@r;
}

##
## find_indices() takes build_list(find_dependencies()) as input and
## returns forward and backward dependency trees using only indices.
## an $idx->{by_name} mapping is returned as a third parameter.
##
## The indices returned here (%dep, %rdep) are build_list-idx, not object-idx.
## The third return value maps from object-idx to build_list-idx.
sub find_indices
{
  my ($all) = @_;

  my %dep;	## usual 'depends on', offsets in $all
  my %rdep;	## 'is used as a dependency by', offsets in $all
  my %i2i;	## by_oidx
  my %k2i;	## by_name#oidx
  for my $i (0 .. $#$all)
    {
      $i2i{$all->[$i]{oidx}} = $i;
      $k2i{$all->[$i]{name}.'#'.$all->[$i]{oidx}} = $i;
    }
  for my $i (0 .. $#$all)
    { 
      for my $d (@{$all->[$i]{dep}})
        {
	  next if $d =~ m{^#};	# rpm files are not in our $all-list.
	  my $j = $k2i{$d};
	  die Dumper $all, "$d not in \$k2i" unless defined $j;
	  push @{$dep{$i}}, $j;
	  push @{$rdep{$j}}, $i;
	}
    }
  return (\%dep, \%rdep, \%i2i);
}

sub find_target
{
  my ($all, $name) = @_;
  return undef unless defined $name;

  my $target = find_matches($all, $name);
  die "'$name' not found. (Try -l)\n" unless scalar @$target;
  if (scalar(@$target) > 1)
    {
      warn "Pattern '$name' is ambiguous:\n";
      for my $l (@$target)
        {
	  warn sprintf "%4d %s\n", $l->{oidx}, $l->{name};
	}
      die "Choose index-number or name.\n";
    }
  return $target->[0];
}

sub find_trace
{
  my ($all, $name) = @_;

  my $target = find_target($all, $name);
  my ($didx, $rdidx, $oidx2idx) = find_indices($all);

  $target->{d} = 0;

  my @a;
  if ($trace_to)
    {
      @a = ($oidx2idx->{$target->{oidx}});
      for (my $n = 1; ; $n++)
	{
	  my %a;
	  for my $i (@a)
	    {
	      for my $j (@{$didx->{$i}})
		{
		  $a{$j}++;
		  $all->[$j]{d} = $n;
		}
	    }
	  @a = keys %a;
	  last unless scalar @a;
	}
    }

  if ($trace_from)
    {
      @a = ($oidx2idx->{$target->{oidx}});
      for (my $n = -1; ; $n--)
	{
	  my %a;
	  for my $i (@a)
	    {
	      for my $j (@{$rdidx->{$i}})
		{
		  $a{$j}++;
		  $all->[$j]{d} = $n;
		}
	    }
	  @a = keys %a;
	  last unless scalar @a;
	}
    }

  return [grep { defined $_->{d} } @$all];
}

##
## mode = 0: just list names, one by one.
## mode = 1: list names, one by one; with dependencies.
## mode = 2: list table: index, name, size, used_bytes.
## mode = 3: list table: index, name, size, used_bytes; with dependencies.
##
sub do_list
{
  my ($dump, $mode) = @_;

  my $list = build_list($dump);
  
  if (defined $trace_name)
    {
      $list = find_trace($list, $trace_name) ;
      $list = [ sort { $b->{d} <=> $a->{d} || $a->{oidx} <=> $b->{oidx} } @$list ];
    }

  unless (scalar @$list)
    {
      print "do_list: empty list\n" if $verbose;
      return 1;
    }

  print "\n" if -t STDOUT;
  for my $l (@$list)
    {
      my $d = '';
      if (defined $trace_name)
	{
	  $d = "";
	  $d = sprintf " %2d> ", $l->{d}  if $l->{d} > 0;
	  $d = sprintf " %2d< ", -$l->{d} if $l->{d} < 0;
	}

      if (!$mode)
        {
	  print $l->{name};
	}
      elsif ($mode == 1)
        {
	  print "$d$l->{name}: ";
	  print "(temp) " if ($l->{flags}||'') =~ m{t};
	  print join ' ', @{$l->{dep}};
	}
      else
        {
	  printf "$d%-4d %3s %10s%s%-11s%s", $l->{oidx}, $l->{flags}||'-', 
	  	$l->{size}, $l->{input_sep}, $l->{input}, $l->{name};
	  print ": " . join ' ', @{$l->{dep}} if $mode > 2;
	}
      print "\n";
    }
  print "\n" if -t STDOUT;

  return 0;
}

sub check_consitency
{
  my ($dump) = @_;

  my $oops = 0;
  
  ## checks on 
  ## $dump->{dep}{$name} has the following fields:
  ## oidx: numerical object index this filename belongs to.
  ## init: rpm-package this filename came from (optional)
  ## dep: array of dependencies this filename depends on (optional)
  ## volatile: created/unlinked timestamps (optional)

  ## No filename may have init and dep.
  my %nodeps;
  for my $v (values %{$dump->{dep}})
    {
      my $n = $v->{name};
      $nodeps{$v->{oidx}} = $n unless defined $v->{dep};
      if (defined $v->{init} and defined $v->{dep})
        {
          print STDERR "$n#$v->{oidx}: from $v->{init} has dependencies.\n";
	  $oops++;
	}
    }

  ## all oidx must either have dependencies, or be part of other dependencies.
  ##
  ## OOPS. not true.
  ## Text segments are flagged prereq, but have not necesarrily written anything.
  ##
  for my $v (values %{$dump->{dep}})
    {
      for my $d (@{$v->{dep}})
        {
	  my $dd = $dump->{dep}{$d->{N}};
	  unless (defined $dd)
	    {
	      print STDERR "$v->{oidx}: unknown dependency $d->{N}\n";
	      $oops++;
	    }
	  delete $nodeps{$dd->{oidx}};
	}
    }

  for my $i (keys %nodeps)
    {
      printf STDERR "%4d: %s\t neither provides nor requires.\n", $i, $nodeps{$i};
      $oops++;
    }

  print STDERR "ok.\n" unless $oops;
  return 0;
}

##
## details=1:  include timestamps and processes involved.
## do_examine finds all (longest) chains ending or starting at 
## one of the @ARGV files. 
## If trace_name is given, only these chains are reported that
## have trace_name at the other end.

sub do_examine
{
  my ($dump, $argv, $details) = @_;
  $argv = ['*'] unless scalar @$argv;

  my $list = build_list($dump);
  my $args = find_matches($list, @$argv);
  my $target = find_target($list, $trace_name);
  my ($didx, $rdidx, $oidx2idx) = find_indices($list);

  my $target_idx = $target ? $oidx2idx->{$target->{oidx}} : undef;
  my @chains;
  for my $a (@$args)
    {
      if (!$target or $trace_to)
        {
	  print "trace_to\n" if $verbose > 2;
	  push @chains, walk_dep_tree($rdidx,  [$oidx2idx->{$a->{oidx}}], $target_idx, [{}]);
	}

      if (!$target or $trace_from)
        {
	  print "trace_from\n" if $verbose > 2;
	  push @chains, walk_dep_tree($didx, [$oidx2idx->{$a->{oidx}}], $target_idx, [{rev=>1}]);
	}
    }

  for my $c (@chains)
    {
      my ($d, @l) = @$c;
      $c->[0] = {%$d};		# unshare first element, we are going to modify it!
      $c->[0]{skip}++ if scalar @l < 2;
      next if $c->[0]{skip};

      if ($c->[0]{rev})
        {
          @l = reverse @l;
          $c = [{rev=>2}, @l];
	}
      $c->[0]{str} = ',' . join ',', @l, '';
    }

  for my $i (@chains)
    {
      next if $i->[0]{skip};
      for my $j (@chains)
         {
	   next if $i == $j or $j->[0]{skip};
	   if (index($i->[0]{str}, $j->[0]{str}) >= 0)
	     {
	       print "skip: $i, $j, index($i->[0]{str}, $j->[0]{str})\n" if $verbose > 2;
	       $j->[0]{skip}++ ;
	     }
	 }
    }

  return 1 unless scalar @chains;
  print "\n" if -t STDOUT;
  for my $c (@chains)
    {
      my ($d, @l) = @$c;
      next if $d->{skip};
      my $pre = $d->{rev} ? "\t" : "";
      if ($details)
        {
	  for my $i (0..$#l)
	    {
	      my $l = $list->[$l[$i]];
	      printf "%s%*.*s", $pre, 3*$i,3*$i, ">> ";
	      printf "[%d] %s %1s%s%s %-30s ", $l->{oidx}, $l->{flags}||'-', 
	  	$l->{size}, $l->{input_sep}, $l->{input}, $l->{name};
	      print "{$l->{cmd}}\n";
	    }
	}
      else
        {
          print $pre . join " >> ", map { $list->[$_]{name} } @l;
	}
      print "\n";
    }
  print "\n" if -t STDOUT and !$details;
  return 0;
}

my %inside;

#
# the numbers seen in walk_dep_tree are not object numbers. 
# They are indices in @$list.
#
sub walk_dep_tree
{
  my ($tree, $list, $stop, $path) = @_;
  print "enter list=(@$list) stop=$stop path=(@$path) tree=" . Dumper $tree if $verbose > 2;

  my @chains = ();
  for my $i (@$list)
    {
      die "dep cycle detected: (path, idx, dep) ", Dumper $path, $i, $tree->{$i} if $inside{$i};
      $inside{$i}++;
      if (defined $stop and $stop == $i)
        {
          push @chains, [ @$path, $i ];
	}
      elsif (defined $tree->{$i} and scalar @{$tree->{$i}})
        {
	  push @chains, walk_dep_tree($tree, $tree->{$i}, $stop, [ @$path, $i ]);
	}
      elsif (!defined $stop)
        {
          push @chains, [ @$path, $i ];
	}
      $inside{$i}--;
    }
  print "exit chains=".Dumper \@chains if $verbose > 2;
  return @chains;
}

###############################################################

## analyze takes a tokenized event (as parsed from strace output)
## and dispatches in the corresponding ana_* handler functions.
## The handler function return 0 on success.
##
## FIXME: anything we do not understand is fatal. E.g:
## - Resumed syscall need to be implemented here!
## - Signals and other line noise shall be ignored here.

sub analyze
{
  my ($pp, $ev) = @_;

  # fetch the process this event is for.
  my $p = $pp->{p}{$ev->{pid}};
  unless (defined $p)
    {
      if ($offline)
        {
	  # create dummy on the fly
	  $p = $pp->{p}{$ev->{pid}} = { offline => $offline, pid => $ev->{pid} };
	}
      else
        {
          die "analyze: process $ev->{pid} not there." 
	}
    }
  my $syscall = $ev->{tok}[0];
  my $retval_s = $ev->{tok}[-1];

  if ($p->{unfinished})
    {
      my $s1; $s1 = $1 if $syscall =~ m{^<\.\.\.\s+(\w+)\s+resumed>$};
      die "analyze: resume expected: ", Dumper $ev 
        unless defined $s1;
      die "analyze: resume mismatch: '$s1' was '$p->{unfinished}[0]'\n" 
        if $s1 ne $p->{unfinished}[0];

      $syscall = $s1;
      shift @{$ev->{tok}};
      pop @{$p->{unfinished}};
      $ev->{tok} = [ @{$p->{unfinished}}, @{$ev->{tok}} ];
      delete $p->{unfinished};	# done with
    }

  ## the usual case:
  ## syscall is a simple word, return value a signed number or question mark.
  if ($syscall =~ m{^\w+$} and $retval_s =~ m{^=\s+(0x[\da-f]+|-?\d+|\?)})
    {
      my $retval = $1;

      ## testing for typeglob, (should actually test for subroutine.)
      if (my $fn = $main::{"ana_$syscall"})	
        {
          print "analyze: ana_$syscall($ev->{tok}[2], ...) = $retval\n" if $verbose > 2; 

          $ev->{tok} = [ tokenize_mkarrays(@{$ev->{tok}}) ];

	  my $r = $fn->($p, $ev->{tok}, $retval);
	  ## what to do with $r here?
	}
      else
        {
	  unless ($ign_syscall{$syscall})
	    {
	      unless ($pp->{ignored}{$syscall})
		{
		  warn Dumper $ev->{tok}, "ana_$syscall() is undefined.";
		  print "Please either add '$syscall' to '\%ign_syscall' or implement a 'sub ana_$syscall'.\n";
		  print "Press Enter to continue (and ignore it now). Press Ctrl-C to abort.\n";
		  <STDIN> unless $keep_running;
		}
	    }
	  $pp->{ignored}{$syscall}++;
	}
    }
  elsif ($retval_s =~ m{^<unfinished})
    {
      die "analyze: second 'unfinished' without resume:", Dumper $p->{unfinished}, $ev
        if defined $p->{unfinished};
      $p->{unfinished} = $ev->{tok};
    }
  elsif ($retval_s =~ m{^\-\-\-\s+(\w+).*\s+\-\-\-$})
    {
      print "pid=$p->{pid} signal $1 seen.\n" if $verbose > 1;
    }
  else
    {
      die Dumper $ev, "strange syscall syntax";
    }
}

sub ana_fstat64 { ana_fstat(@_); }
sub ana_fstat
{
  my ($p, $t, $r) = @_;
  return if $r < 0;			# ignore unsucessful stat

  my $fd = $t->[2];
  unless ($p->{res}{fd}{$fd})
    {
      warn "fstat64(fd=$fd), open missed \n";
      $p->{res}{fd}{$fd} = { t => 'fstat', f => $fd };
    }
  $fd = $p->{res}{fd}{$fd};

  my $stat = join ',', @{$t->[3]};
  $stat =~ s{,=,}{=}g;	# '{,st_mode=S_IFREG|0644,st_size=55804,...,}' 
  $fd->{s} = $1 if $stat =~ m{st_size=(\d+)};
  my $type = $1 if $stat =~ m{st_mode=S_IF(\w+)};
  $type = 'FIFO' if $type eq 'IFO';	# argh, who defined these symbols?
  die Dumper $fd, $stat, "now '$type' in ana_fstat" if $fd->{t} ne $type and $fd->{t} !~ m{^(open|fstat)$};
  $fd->{t} = $type;
  warn Dumper $fd, $stat, "in ana_fstat" if $verbose > 2;
}

sub ana_lstat64   { ana_stat(@_) }
sub ana_stat64    { ana_stat(@_) }
sub ana_lstat     { ana_stat(@_) }
sub ana_stat
{
  my ($p, $t, $r) = @_;
  return if $r < 0;			# ignore unsucessful stat

  my $name = strace_unquote($t->[2]);
  return do_stat($p, $name, $t->[3]);
}

sub do_stat
{
  my ($p, $name, $t3) = @_;
  ## we cannot store anything, there is no resource open.
  ## but we can savely stat, because the application also just succeeded.
  my $stat = join ',', @$t3;
  $stat =~ s{,=,}{=}g;	# '{,st_mode=S_IFREG|0644,st_size=55804,...,}' 

  my $res = { n => $name, f => "stat" };	# f is here to assist try_stat() fails.
  $res->{s} = $1 if $stat =~ m{st_size=(\d+)};
  $res->{t} = $1 if $stat =~ m{st_mode=S_IF(\w+)};
  store_cwd_rtd($p, $res);	# a nice fake...
  store_inode($p, $res);

  return 0;
}

sub ana_old_mmap { ana_mmap(@_) }
sub ana_mmap2    { ana_mmap(@_) }
sub ana_mmap
{
  ## mmap', '(', 'NULL', 144821, 'PROT_READ', 'MAP_PRIVATE', '3', '0', ')', '= 0x2b2cbaa42000'
  
  my ($p, $t, $addr) = @_;
  return unless $addr =~ m{^0x};	# ignore unsucessful mmap.

  my $fd = $t->[6];
  return if $fd < 1;		# ignore anonymous mmaps (fd = -1).

  unless ($p->{res}{fd}{$fd})
    {
      warn "mmap(fd=$fd), open missed \n";
      $p->{res}{fd}{$fd} = { t => 'mmap', f => $fd, s => $t->[3]};
    }
  $fd = $p->{res}{fd}{$fd};
  $fd->{s} = $t->[3] if $t->[3] > $fd->{s};
  my $mmap = try_stat($p, $fd);
  while (exists $p->{res}{mmap}{$mmap})
    {
      $mmap .= "/";	## /lib64/libacl.so.1 is mapped multiple times. sigh.
    }

  my $mode = $t->[4];  
  $mode = 'READ_ONLY' if $t->[5] =~ m{MAP_PRIVATE} or $fd->{a} eq 'r';	# we never change this file!
  my $mm = $p->{res}{mmap}{$mmap} = { %$fd, addr => $addr, mode => $mode, bytes_r => $t->[3]};

  ## tell all stealth_writables that we arrived.
  for my $w (enumerate_stealth_writables($p))
    {
      ## may create self dependency. This is accepted, as we have to 
      ## guard against circular dependenies later anyway. 
      # if $mm ne $w;
      add_dependency($pp, $w, $mm, $t->[3], 'mmap-mmap');
    }

  ## if it is writable, we have no clue when it may be written to.
  ## may happen any time from now to unmap. Thus we mark us as stealth_writable.
  ## All mmap calls and read calls must add to {depends_on}{} of all {stealth_writable}s.

  if ($mm->{mode} =~ m{(WRITE|UPDATE)})
    {
      $mm->{stealth_writable}++;

      ## a writable mmap depends on the exec'ed program, (to start with)...
      add_dependency($pp, $mm, $p->{res}{txt}, $t->[3], 'exec-mmap');

      ## and on all loaded resources so far.
      for my $res_l (values %{$p->{res}{load}})
        {
	  my $t = defined($res_l->{was}) ? "$res_l->{was}-load-mmap" : 'load-mmap';
          add_dependency($pp, $mm, $res_l, $t->[3], $t);
	}
    }
}

sub ana_dup2 { ana_dup(@_) }
sub ana_dup
{
  my ($p, $t, $r) = @_;
  return if $r < 0;
  my $res = $p->{res}{fd}{$t->[2]};
  die "ana_dup: fd=$t->[2] unknown.\n" unless $res;
  do_dup2($p, $res, $r);
  return 0;
}

sub do_dup2
{
  my ($p, $res, $fd) = @_;

  if (exists $p->{res}{fd}{$fd})
    {
      # a careful programmer will not use dup2() or dup3() without closing  newfd
      # first, says the man page.
      warn "do_dup2: left over: ", Dumper $p->{res}{fd}{$fd} if $verbose > 2;

      # CAUTION: keep in sync with ana_close()
      my $close_res = $p->{res}{fd}{$fd};
      remember_loaded($p, $close_res, 'read') if $close_res->{bytes_r};
      free_obj_if_unlinked_unused($close_res->{o});
    }
  $p->{res}{fd}{$fd} = deepcopy($res);
  delete $p->{res}{fd}{$fd}{fcntl}{FD_CLOEXEC};	# see man dup.
  $p->{res}{fd}{$fd}{f} = $fd;
  return 0;
}

sub ana_pipe
{
  my ($p, $t, $r) = @_;
  return if $r < 0;
  my ($rfd, $wfd) = ($t->[2][0], $t->[2][1]);
  warn "rfd=$rfd left over: " . Dumper $p->{res}{fd}{$rfd} if $verbose > 1 && exists $p->{res}{fd}{$rfd};
  warn "wfd=$wfd left over: " . Dumper $p->{res}{fd}{$wfd} if $verbose > 1 && exists $p->{res}{fd}{$wfd};

  my $obj = store_inode($p, ":pipe/$p->{pid}/$rfd/$wfd");
  $p->{res}{fd}{$rfd} = { t => 'FIFO', n => '[pipe]', a => 'r', f => $rfd, o => $obj->{oidx} };
  $p->{res}{fd}{$wfd} = { t => 'FIFO', n => '[pipe]', a => 'w', f => $wfd, o => $obj->{oidx} };
  $obj->{t} = 'FIFO';
  return 0;
}

sub ana_socket
{
  my ($p, $t, $fd) = @_;
  #  0         1    2          3                                         4    5
  # 'socket', '(', 'PF_FILE', 'SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK', '0', ')',

  $p->{res}{fd}{$fd} = { fetch_cwd_rtd($p), t => 'socket', fcntl => { $t->[2] => 1 }, a => 'rw', f => $fd };
  $p->{res}{fd}{$fd}{fcntl}{FD_CLOEXEC} = 1 if $t->[3] =~ m{SOCK_CLOEXEC};

  # CAUTION
  # we cannot store the unconnected socket as a filesystem object and update the path later.
  # This easily leads to two objects, for the same pathname: just connect two sockets.
  # 
  # my $obj = store_inode($p, ":socket/$p->{pid}/$fd");
  # # allow store_inode() to re-recognize this obj later.
  # # re-recogniztion normally works via a path or an inode number. A socket has neither.
  # $p->{res}{fd}{$fd}{o} = $obj->{oidx};	
  # $obj->{t} = 'SOCK';
  # #die Dumper $obj, $p->{res}{fd}{$fd};
  return 0;
}

sub ana_connect
{
  my ($p, $t, $r) = @_;
  # 0          1    2    3    4      5
  # 'connect', '(', '3', [], '110', ')',
  # 
  #  0    1            2    3          4       5    6                         8    8      9
  # '{', 'sa_family', '=', 'AF_FILE', 'path', '=', '"/var/run/nscd/socket"', '}', '110', ')',
  #
  my $fd = $t->[2];
  my $t3 = $t->[3];
  my $res = $p->{res}{fd}{$fd};
  if ($t3->[1] eq 'sa_family' and $t3->[3] eq 'AF_FILE' and $t3->[4] eq 'path')
    {
      my $name = strace_unquote($t3->[6]);
      $res->{n} = $name;	# socket was without name up to now.
      my $obj = store_inode($p, $res);	# push the name into the obj, too.
      $obj->{t} = 'SOCK';
    }
  else
    {
      warn "connect($fd,@{$t3}): not impl., expected sa_family = AF_FILE, path = ...\n";
      die unless $keep_running;
    }
  return 0
}

sub ana_open
{
  my ($p, $t, $fd) = @_;
  return ana_openat($p, [$t->[0], $t->[1], undef, $t->[2], $t->[3], $t->[4]], $fd);
}


sub ana_openat
{
  my ($p, $t, $fd) = @_;
  return if $fd < 0;			# we ignore unsuccessful open()'s.

  warn Dumper $t, $fd, "in ana_open" if $verbose > 2;
  my $mode = '?';
  my $name = strace_unquote($t->[3]);

  #  0         1    2          3                           4                    5
  # 'openat', '(', 'AT_FDCWD','"/var/tmp/build-root/bin"','O_RDONLY|O_NOCTTY', ')'

  if (defined $t->[2])	# this is really openat(), not just open()
    {
      if (substr($name, 0, 1) ne '/')	# keep in sync with ana_unlinkat()
	{
	  if ($t->[2] eq 'AT_FDCWD')
	    {
	      $name = full_pathname({ fetch_cwd_rtd($p), n => $name });
	    }
	  else
	    {
	      my $fd = $p->{res}{fd}{$t->[2]};
	      die "ana_openat: '$t->[2]' fd not found." unless defined $fd;
	      $name = full_pathname($fd) . '/' . $name;
	    }
	}
    }

  $mode = 'r'  if $t->[4] =~ m{O_RDONLY};
  $mode = 'w'  if $t->[4] =~ m{O_WRONLY};
  $mode = 'rw' if $t->[4] =~ m{O_RDWR};
  warn "fd=$fd was left over: " . Dumper $p->{res}{fd}{$fd} if $verbose > 1 && exists $p->{res}{fd}{$fd};
  $name = "/proc/$p->{pid}/fd/$1" if $name =~ m{^/dev/fd/(\d+)$};
  $p->{res}{fd}{$fd} = { fetch_cwd_rtd($p), t => 'open', n => $name, a => $mode, f => $fd };

  ## This brings transitivity to our dependecies.
  my $obj = store_inode($p, $p->{res}{fd}{$fd});

  ## we need to record if we depend on preexisting files.
  ## if this is open for read or update, 
  ## and we have not created the file it is an import, creating a preexisting dependency.

  ## don't rely on the size returned by our stat, 
  ## the traced process may have already written some bytes when we check the size.
  ## so, also check the open mode.
  if ($t->[4] =~ m{O_TRUNC})
    {
      $obj = do_trunc0($obj);	## obj remains itself, if zero-size.
    }
  else
    {
      $obj->{prereq}++ if $obj->{s};
    }

  ## record, if we know that this is not a true (temporary at best) prerequisite.
  if ($t->[4] =~ m{O_CREAT})
    {
      $obj->{created} = $obj->{tstamp};

      ## FIXME: should check usecount_obj(), while ignoring $p->{res}{fd}{$fd}.
      ## if others have the file open, we missed an unlink or rename.
      ## do it now, before we collect any false dependencies.
      unless ($t->[4] =~ m{O_TRUNC})
        {
          ## Similar actions as for O_TRUNC ?
	  ## But don't do it twice if we had a O_CREAT|O_TRUNC combination.
          $obj = do_trunc0($obj);	## obj remains itself, if zero-size.
	}
      $obj->{s} = 0;	# certainly empty.
    }
  return 0;
}

sub ana_close
{
  my ($p, $t, $r) = @_;
  return if $r < 0;			# we ignore unsuccessful open()'s.
  my $res = $p->{res}{fd}{$t->[2]};
  if ($offline and not defined $res)
    {
      # missing filedescriptors are to be expected in offline mode.
      warn "pid=$p->{pid}:close($t->[2]): unknown fd\n" if $verbose > 1;
      return 0 
    }
  die Dumper $t, $p unless defined $res;	# oops, successful close and we did not see the resource?

  warn Dumper $res, $t, "in ana_close" if $verbose > 2;
#  warn "ana_close: " . Dumper $p->{pid}, $res if $res->{t} eq 'FIFO' and ($res->{bytes_r}||0) > 0;

  # if we have read from the file, move it into {res}{load} array.
  # CAUTION: keep in sync with do_dup2()

  remember_loaded($p, $res, 'read') if $res->{bytes_r};

  free_obj_if_unlinked_unused($res->{o});
  delete $p->{res}{fd}{$t->[2]};

  return 0;
}

sub remember_loaded
{
  my ($p, $res, $was) = @_;
  my $key = $res->{I};

  # Try hard to get a key...
  # we may also learn the inode number of an object through a differnt resource, so ask the object, if we can.
  $key = $res->{I} = $pp->{fs}{obj}[$res->{o}]{I} if !defined($key) and $res->{o} and $pp->{fs}{obj}[$res->{o}];

  # well, {load}{$key} is not really critical. 
  # We can also use the obj oidx, avoid conflicts by prefixing.
  $key = "o" . $res->{o} if !defined($key) and defined $res->{o};

  # Ouch, not even an object? that calls for an error now.
  # could also use ":$res->{N}" here, first let us see where such an error occurs. 
  $key ||= '';

  ## pipes have no inode. Take care.
  unless (length $key)
    {
      die "remember_loaded: inode not known", Dumper [caller], $res, $key, $pp->{fs}{obj}[$res->{o}];
    }

  if (exists $p->{res}{load}{$key})
    {
      $p->{res}{load}{$key}{bytes_r} += $res->{bytes_r};
    }
  else
    {
      my %r;

      # fullName, object-oidx, size, Inode, type, access-mode, mmap-addr bytes_read
      for my $k qw(N o s I t a addr bytes_r)
        {
	  $r{$k} = $res->{$k} if defined $res->{$k};
	}
      $r{was} = $was if defined $was;	# remember read, exec, mmap
      $p->{res}{load}{$key} = \%r;
    }
# warn "remember_loaded : ", Dumper $res, $p->{res}{load}{$key};
# <STDIN>;
}

sub ana_send { ana_write(@_) }	# $t->[2] matches, $r matches, nothing else matters.
sub ana_write
{
  my ($p, $t, $r) = @_;
  my $fd = $t->[2];
  return if $fd < 0 or $r <= 0;		# ignore bogus writes.
  my $res = $p->{res}{fd}{$fd};
  die "ana_write($fd) resource not found." unless defined $res;

  die "ana_write: /bin/cc " . Dumper $p, $t if $p->{res}{txt}{N} =~ m{/bin/cc};

  # we go through all open files, and check bytes_r to see if we have read
  # from them.  if so, we record the minimum of bytes_r there and bytes
  # written here as a dependency for this $fd; For all mmap resources, we
  # add the bytes written as a dependency, up to the mmapped size.

  my %typemap = (fd => 'read-write', mmap => 'mmap-write', load => 'load-write');

  for my $type (keys %typemap)
    {
      for my $src (values %{$p->{res}{$type}})
	{
	  my $t = $typemap{$type};
	  $t = $src->{was} . '-' . $t if defined $src->{was};
	  add_dependency($pp, $res, $src, $r, $t);
	}
    }
  add_dependency($pp, $res, $p->{res}{txt}, $r, 'exec-write') if $p->{res}{txt};

  # for the records, we also increment {res}{fd}{bytes_w}, as a global
  # estimate.

  $res->{bytes_w} += $r;

  # {s} must be added to. We have no way to know if the write really increased
  # the file, but we must assume so, to keep the {bytes} counters done by
  # add_dependency() happy.  We can verify {s} whenever we see stat() or
  # truncate() or tell() syscalls.

  $res->{s} += $r;
}

sub ana_recvmsg
{
  my ($p, $t, $r) = @_;
  my $fd = $t->[2];
  return if $fd < 0 or $r < 0;		# ignore bogus recvmsg
  my $len = 0;
  my $iov_count = 0;
  for my $i (0..$#{$t->[3]})
    {
      if ($t->[3][$i] eq 'msg_iov')
        {
	  # ..., 'msg_iov', '(', '2', ')', '=', [], ...
	  while (!ref($t->[3][$i])) { $i++ }
	  my @iov = @{$t->[3][$i]};
	  # @iov = ( [ '{', '"...."', '6', '}' ], [ '{', '"..."', '8', '}' ] );
	  for my $iov (@iov)
	    {
	      $len += $iov->[2];
	      $iov_count++;
	    }
	}
    }
  die "no msg_iov in recvmsg???" unless $iov_count;
  return ana_read($p, [ $t->[0], $t->[1], $fd ], $len);
}

sub ana_readv { ana_read(@_) }	# $t->[2] matches, $r matches, nothing else matters
sub ana_read
{
  my ($p, $t, $r) = @_;
  my $fd = $t->[2];
  return if $fd < 0 or $r <= 0;		# ignore bogus reads

#  warn Dumper $t, $p->{res}{fd}{$fd}, $r, "in ana_read" if $verbose > 2 or $p->{res}{fd}{$fd}{t} ne 'REG';

  ## this is the second most important call
  $p->{res}{fd}{$fd}{bytes_r} += $r;

  ## read calls must add to {depends_on}{} of all {stealth_writable}s.
  for my $w (enumerate_stealth_writables($p))
    {
      add_dependency($pp, $w, $p->{res}{fd}{$fd}, $r, 'read-mmap');
    }

  return 0;
}

sub ana_munmap
{
  my ($p, $t, $r) = @_;
  return unless $r == 0;

  # this too must add the resource to the load table, just like ana_close().
  my $mm;
  for my $m (keys %{$p->{res}{mmap}})
    {
      if ($p->{res}{mmap}{$m}{addr} eq $t->[2] and
          $p->{res}{mmap}{$m}{s}    <= $t->[3])
        {
	  ## FIXME: We should not call remember_loaded, if this was a write only mmap.
	  ## Currently we may thus depend on our own output.
	  remember_loaded($p, $p->{res}{mmap}{$m}, 'mmap');
	  free_obj_if_unlinked_unused($p->{res}{mmap}{$m}{o});
          delete $p->{res}{mmap}{$m};
	}
    }

  return 0;
}

## can be called with obj indices or obj references.
sub free_obj_if_unlinked_unused
{
  my (@obj) = @_;
  return unless defined $pp;	# be happy, all is free. happens when called from start_cmd();

  for my $n (@obj)
    {
      next if $offline and not defined $n;	# relax
      die Dumper \@obj unless defined $n;
      my $obj;
      if (ref $n)
        {
	  $obj = $n;
	}
      else
        {
	  die Dumper \@obj, $n, ": no such object" unless defined $pp->{fs}{obj}[$n];
	  $obj = $pp->{fs}{obj}[$n];
	}
      next if scalar keys %{$obj->{N}};		# it has names. nothing to do.
      next if usecount_obj($obj);		# processes still have it open.

      do_free_obj($obj);
    }
}

sub ana_symlink
{
  my ($p, $t, $r) = @_;
  return if $r;
  
  ## XXX should we ignore_symlinks here?
  my $n_new = full_pathname({ fetch_cwd_rtd($p), n => strace_unquote($t->[3]), ignore_symlinks => 0 });

  ## take the link target as is. Strange moves can happen before it gets dereferenced.
  $pp->{fs}{symlink}{$n_new} = strace_unquote($t->[2]);
  return 0;
}

sub ana_link
{
  my ($p, $t, $r) = @_;
  return if $r;

  # both, hardlinks and symlinks are relevant here.
  # add a new name to our inode table.
  # Really add. Zap it if it was there before.
  #
  # If oldpath does not refer to a known obj, we need to create one, 
  # so that in case of offline mode, we can remember that this 
  # is two names for the same thing. Without offline, the inode number would tell us.
  my $oldpath = $t->[2];
  my $newpath = $t->[3];
  warn Dumper $t;
  die unless $keep_running;
}

sub ana_newfstatat { ana_fstatat(@_) }
sub ana_fstatat64  { ana_fstatat(@_) }
sub ana_fstatat
{
  my ($p, $t, $r) = @_;
  return if $r;

  my $path = strace_unquote($t->[3]);
  if (substr($path, 0, 1) ne '/')
    {
      if ($t->[2] eq 'AT_FDCWD')
        {
          $path = full_pathname({ fetch_cwd_rtd($p), n => $path }) 
	}
      else
        {
          warn "ana_fstatat($t->[2], ...) not impl. " . Dumper $t;
	  die unless $keep_running;
	  return;
	}
    }
  return do_stat($p, $path, $t->[4]);
}


sub ana_unlinkat
{
  my ($p, $t, $r) = @_;
  return if $r;

  my $path = strace_unquote($t->[3]);
  if ($t->[4] eq 'AT_REMOVEDIR')
    {
      # so it is a directory, and we should feel like rmdir(). 
      # What's the difference anyway?
    }

  if (substr($path, 0, 1) ne '/')	# keep in sync with ana_openat()
    {
      if ($t->[2] eq 'AT_FDCWD')
        {
          $path = full_pathname({ fetch_cwd_rtd($p), n => $path }) 
	}
      else
        {
	  my $fd = $p->{res}{fd}{$t->[2]};
	  die "ana_unlinkat: '$t->[2]' fd not found." unless defined $fd;
	  $path = full_pathname($fd) . '/' . $path;
          # die "ana_unlinkat: '$t->[2]' not impl. " . Dumper $t, $fd, $path;
	}
    }
  return do_ana_unlink($path);
}

sub ana_oldumount { ana_umount(@_) }
sub ana_umount
{
  my ($p, $t, $r) = @_;
  return if $r;

  # if an object within this mountpoint is successfully used later, we know something has been mounted here again.
  # we have no record if it is the same filesystem again, or something different.
  # Better assume it is the same, to be save.
  warn "umount ignored. may cause false positives\n" if $verbose > 1;
  return undef;
}

sub ana_rmdir { ana_unlink(@_) }	# any difference??
sub ana_unlink
{
  my ($p, $t, $r) = @_;
  return if $r;
  
  my $name = strace_unquote($t->[2]);
  my $path = full_pathname({ fetch_cwd_rtd($p), n => $name });

  return do_ana_unlink($path);
}


# drop an old name from the inode table, if it is there.
# If the last name is dropped, and the last filedescriptor is closed and
# the last mmap is unmapped, we have a problem: the inode can be re-used.
# we need to flag the inode as old somehow and report the collission,
# as soon as we try to reuse it.
sub do_ana_unlink
{
  my ($path) = @_;

  delete $pp->{fs}{symlink}{$path};

  my $obj = $pp->{fs}{path}{$path};
  return unless defined $obj;		# we don't know that one. So who cares?
  die "ana_unlink: internal obj-ref-problem $path, ", Dumper $obj unless exists $obj->{N}{$path};

  ## keep in SYNC with do_trunc0()
  $obj->{del}{N}{$path} = time;		# memorize when the name was unlinked.
  delete $obj->{N}{$path};
  delete $pp->{fs}{path}{$path};
  return 1 if scalar keys %{$obj->{N}};			# still some names left, fine.

  ## prefer the expensive loop here. 
  ## correct refcounting *everywhere* is too complex tonight.
  ## We delete the name from any resource, so that it cannot point to a wrong object later.
  ## The lookup function res_obj_name() can resort to {I} or {o} in that case.
  return 1 if usecount_obj($obj, $path);

  do_free_obj($obj);
}

## do_free_obj() deletes it from the {inode} table, zaps its inode, but
## the object remains in the {obj} array.
sub do_free_obj
{
  my ($obj, $when) = @_;

#  die "do_free_obj $when" . Dumper $obj if $obj->{I};

  # no longer in in the filesystem, in mmap, nor opened; so free it.
  $obj->{inaccessible} = $when || time;
  return 1 unless defined $obj->{I};
  $obj->{del}{I} = $obj->{I};
  delete $pp->{fs}{inode}{$obj->{I}} if $pp->{fs}{inode}{$obj->{I}};
  delete $obj->{I};
  return 1;
}

##
## determines the number of open resources pointing
## to $obj. $del_name is optional and will be removed from
## all open resources. Useful during unlink.
## 
sub usecount_obj
{
  my ($obj, $del_name) = @_;

  my $count = 0;
  for my $proc (values %{$pp->{p}})
    {
      next unless $proc->{res};	# whyt does that happen? All should have {res}{txt}, no?

      for my $res ($proc->{res}{txt}, values %{$proc->{res}{fd}}, values %{$proc->{res}{mmap}})
        {
	  my $found = 0;
# print "checking $del_name, $obj->{oidx}, $obj->{I} \n", Dumper $res if $del_name;
          if (defined($res->{N}) and defined($del_name) and $res->{N} eq $del_name)
	    {
	      $found++;
	      delete $res->{N};
	    }
	  $found++ if $obj->{I} and $res->{I} and $obj->{I} eq $res->{I};
	  die Dumper $obj unless defined $obj->{oidx};
	  $found++ if $obj->{oidx} == ($res->{o}||-1);
	  $count++ if $found;
	}
    }
  return $count;
}


sub ana_rename
{
  my ($p, $t, $r) = @_;
  return if $r;

  # link and rename syscalls operate on paths only, here we need to rely on
  # names rather than inode numbers. sigh.

  my $n_f =   full_pathname({ fetch_cwd_rtd($p), n => strace_unquote($t->[2]) });
  my $n_t =   full_pathname({ fetch_cwd_rtd($p), n => strace_unquote($t->[3]) });
  my $o_f = $pp->{fs}{path}{$n_f};
  my $o_t = $pp->{fs}{path}{$n_t};

  # Drop an old name from the inode table, if it is there.
  do_ana_unlink($n_t);

  # update our symlink database, if applicable.
  if (defined $pp->{fs}{symlink}{$n_f})
    {
      $pp->{fs}{symlink}{$n_t} = $pp->{fs}{symlink}{$n_f};
      delete $pp->{fs}{symlink}{$n_f};
    }

  return if !defined($o_f) or (defined($o_t) and $o_t eq $o_f);	# trival cases, dismissed.

  # add a new name to our object. 
  delete $o_f->{N}{$n_f};
  $o_f->{N}{$n_t} = 1;

  # $o_f is now reachable under $n_t
  $pp->{fs}{path}{$n_t} = $o_f;
  delete $pp->{fs}{path}{$n_f};

}

sub ana__llseek 
{ 
  my ($p, $t, $r) = @_;
  return if $r;
  my $res = $p->{res}{fd}{$t->[2]};
  die "ana__llseek: fd=$t->[2] unknown.\n" unless $res;

  # learn a new minimum for {s}.
  # _llseek(3, 0, [0], SEEK_SET)      = 0
  my $off = $t->[4][0];
  $res->{s} = $off if ($res->{s}||-1) < $off;

  return 0;
}

sub ana_lseek { ana_seek(@_) }
sub ana_seek
{
  my ($p, $t, $r) = @_;
  return if $r < 0;
  my $res = $p->{res}{fd}{$t->[2]};
  die "ana_seek: fd=$t->[2] unknown.\n" unless $res;

  # learn a new minimum for {s}.
  $res->{s} = $r if ($res->{s}||-1) < $r;

  return 0;
}

sub ana_truncate
{
  my ($p, $t, $r) = @_;
  ## truncate to size 0 effectivly
  ## makes this object a new object.
  ## we implement this by dropping all dependencies from the object.
  #
  # or better reduce the bytes_r in all dependencies of the object to the 
  # to the new length of the object.
  # Hmm. We should duplicate the object and update all open resources 
  # to point to the new object, to get rid of bogus dependencies, right?

  # do_trunc0(); same as see open(O_TRUNC)

  die "truncate() not impl. " . Dumper $t, $r;
}

# truncate an object to size zero.
sub do_trunc0
{
  my ($obj) = @_;

  # we clone the object, only if it had a size.
  return $obj unless $obj->{s}||0;

  ## create a new object.
  ## object constructor. keep in sync with store_inode().
  my $obj2 = {};
  for my $k qw(t pid cmd I)
    {
      $obj2->{$k} = $obj->{$k} if defined $obj->{$k};
    }
  $obj2->{N} = { map { $_ => 1 } keys %{$obj->{N}} };
  $obj2->{tstamp} = time;
  push @{$pp->{fs}{obj}}, $obj2;
  $obj2->{oidx} = $#{$pp->{fs}{obj}};

  ## now make the obj ptr inaccessible for lookup.
  ## it may still have names. we treat them as deleted.
  for my $n (keys %{$obj->{N}})
    {
      ## keep in SYNC with do_ana_unlink()
      $obj->{del}{N}{$n} = time;		# memorize when the name was unlinked.
      delete $obj->{N}{$n};
      delete $pp->{fs}{path}{$n};
    }

  ## do_free_obj() deletes it from the {inode} table, zaps its inode, but
  ## the object remains in the {obj} array.
  my $I = $obj->{I};
  do_free_obj($obj, $obj2->{tstamp});

  ## similar to usecount_obj(), walk through all open references
  ## but update them to point to the new object.
  for my $proc (values %{$pp->{p}})
    {
      next unless $proc->{res};

      # we ignore {txt} and {load} resources, as we cannot read/write them anyway.
      for my $res (values %{$proc->{res}{fd}}, values %{$proc->{res}{mmap}})
        {
	  if (($res->{o}||-1) == $obj->{oidx} or
	      ($I and $res->{I} and $I eq $res->{I}))
	    {
	      warn "do_trunc0: res identified by {I}='$I'. Had no {o}. Is this sane?" unless defined $res->{o};

	      $res->{o} = $obj->{oidx};	# just to be sure....
	      my $what = ($res->{t} eq 'mmap') ? 'mmap' : 'read';
              remember_loaded($proc, $res, $what.'-trunc') if $res->{bytes_r};	# mmap has bytes_r == mapped size
	      $res->{o} = $obj2->{oidx};	# now we redirect it.
	      $res->{bytes_r} = $res->{bytes_w} = 0;
	    }
	}
    }

  warn "do_trunc0: ", Dumper $obj, $obj2 if $verbose > 2;
  $obj2->{s} = 0;	# we are zero length after truncating.
  return $obj2;
}

sub ana_ioctl
{
  my ($p, $t, $r) = @_;
  return if $r;		# succeeded iocntl's return 0.
  my $fd = $t->[2];
  while ('' eq ref $t->[4])
    {
      $t->[3] .= ' ' . $t->[4];
      splice(@$t, 4, 1);
    }
  return if $offline;	# what whould we do here?
  die "ana_ioctl($fd, $t->[3]) resource $fd unknown." unless $p->{res}{fd}{$fd};
  return if $t->[3] =~ m{ \b(TCGETS|TIOC) }x;
  die "ana_ioctl: ", Dumper $t;
}

sub ana_fcntl64 { ana_fcntl(@_) }
sub ana_fcntl
{
  my ($p, $t, $r) = @_;
  return if $r eq '-1';		# succeeded fcntl's return >= 0.
  my $res = $p->{res}{fd}{$t->[2]};
  if ($offline and not $res) 		# relax
    {
      $res = $p->{res}{fd}{$t->[2]} =
        {
	  'n' => "/proc/$p->{pid}/fd/$t->[2]",
	  'f' => $t->[2],
	  't' => 'early'
	};
      my $obj = store_inode($p, $res);
    }
  die "ana_fcntl: fd=$t->[2] unknown.\n", Dumper $p->{res} unless $res;

  if ($t->[3] =~ m{(F_GETFD|F_GETFL)})
    {
      # harmless, like ign_syscall.
    }
  elsif ($t->[3] eq 'F_SETFD')
    {
      for my $v (split /\|/, $t->[4])
        {
	  print "$p->{pid}: '@{$p->{argv}}' fcntl($t->[2], $t->[3], $v);\n" if $verbose > 1;
          $res->{fcntl}{$v} = { pid => $p->{pid}, fd => $t->[2] };
	}
    }
  elsif ($t->[3] eq 'F_DUPFD')
    {
      die "ana_fcntl failed: " . Dumper $t, $r unless $r =~ m{^\d+$};
      do_dup2($p, $res, $r);
    }
  else
    {
      my $msg = "ana_fcntl($t->[3]) = $r not impl.\n";
      if ($offline) { warn $msg } else { die $msg }
    }
  return 0;
}

sub ana_fchdir
{
  my ($p, $t, $r) = @_;
  return if $r != 0;
  my $fd = $p->{res}{fd}{$t->[2]};
  my $path = full_pathname($fd);
  if ($path ne $p->{res}{cwd}{n})
    {
      $p->{res}{cwd} = { n => $path, t => 'DIR', f => 'cwd' };
    }
  return 0;
}

sub ana_chdir
{
  my ($p, $t, $r) = @_;
  return if $r != 0;
  my $name = strace_unquote($t->[2]);
  print "ana_chdir('$name');\n" if $verbose > 2;
  my $path = full_pathname({ fetch_cwd_rtd($p), n => $name });

  return 0 if defined($p->{res}{cwd}) and $path eq $p->{res}{cwd}{n};

  $p->{res}{cwd} = { n => $path, t => 'DIR', f => 'cwd' };
  return 0;
}


sub ana_getcwd
{
  my ($p, $t, $r) = @_;
  return unless $r <= 0;		## should have length of string here.

  my $name = strace_unquote($t->[2]);
  my $path = full_pathname({ fetch_cwd_rtd($p), n => $name });	# rtd may be missing.
  if ($path ne $p->{res}{cwd}{n})
    {
      print "new cwd seen: $path (was $p->{res}{cwd}{n})\n" if $verbose;
      $p->{res}{cwd} = { n => $path, t => 'DIR', f => 'cwd' };
    }
  return 0;
}

sub ana_getpid
{
  my ($p, $t, $r) = @_;
  die "ana_getpid mismatch: $r was $p->{pid}\n" if $r != $p->{pid};
  return 0;
}

sub ana_execve
{
  my ($p, $t, $r) = @_;
  return if $r < 0;
 
  my $cmdname = strace_unquote($t->[2]);
  my @argv;
  for my $i (0..$#{$t->[3]})
    {
      $argv[$i] = strace_unquote($t->[3][$i]);
    }

  exec_seen($p, $cmdname, \@argv);
  
  return 0;
}

sub ana_clone
{
  my ($p, $t, $pid) = @_;

  ## tricky.
  ## clone allows us to share memory space with our parent.
  ## in that case, both processes work together.
  ## We can implement this by just not using deepcopy, right?
  # look for CLONE_PARENT. 
  my $child = deepcopy($p);
  $child->{pid} = $pid;
  my $flags = $t->[7];	# 5: 'flags', 6: '=' 

  if ($flags =~ m{CLONE_PARENT} or
      $flags =~ m{CLONE_THREAD})
    {
      $child->{ppid} = $p->{ppid};
      $pp->{p}{$child->{ppid}}{children}{$pid} = 1;
    }
  else
    {
      $child->{ppid} = $p->{pid};
      $p->{children}{$pid} = 1;
    }

  if ($flags =~ m{CLONE_FILES})
    {
      delete $child->{res}{fd};
      $child->{res}{fd} = $p->{res}{fd};
      print "Warning, untested and dangerous codepath:\n" if $verbose;
      print "$p->{pid}, $pid: {res}{fd} is shared due to clone(CLONE_FILES)\n" if $verbose;
    }

  if ($flags =~ m{CLONE_VM})
    {
      for my $res qw(mmap load load-write)
        {
          delete $child->{res}{$res};
          $child->{res}{$res} = $p->{res}{$res};
	}
      print "Warning, untested and dangerous codepath:\n" if $verbose;
      print "$p->{pid}, $pid: {res}{mmap,load,load-write} are shared due to clone(CLONE_VM)\n" if $verbose;
      print "What about cwd, rtd, txt and {res}{fd} now? Are they shared? What happes at exec?\n" if $verbose;
    }

  if ($flags =~ m{CLONE_THREAD})
    {

      warn "Warning, untested and dangerous codepath:\n";
      warn "threads are not supported in ioana today.\n";
      ##
      ## horror to implement: If one of a thread-group calls exec, then 
      ## all but thread-group leader terminate, and this thread does the exec.
      ##
      ## see also CLONE_PARENT
      die Data::Dumper::Dumper "ana_clone(CLONE_THREAD)", $p, $t, $pid;
    }

  if ($flags =~ m{CLONE_NEWFS})
    {
      print "Warning, untested and dangerous codepath:\n" if $verbose;
      print "$pid enters a new filesystem namespace due to clone(CLONE_FILES)\n" if $verbose;
    }

  if ($flags =~ m{CLONE_UNTRACED})
    {
      print "Warning, untested and dangerous codepath:\n" if $verbose;
      print "$pid may not be traceable due to clone(CLONE_FILES)\n" if $verbose;
    }

  $pp->{p}{$pid} = $child; 
  return 0;
}


sub ana_vfork { ana_fork(@_) }
sub ana_fork
{
  my ($p, $t, $pid) = @_;

  # 
  # If we have a writable mmap and another readable mmap,
  # then we have stealth_ipc between father and child.
  # (A pipe is much easier to track. we see read / write calles there.)
  # If anything strange happens before we see an exec after fork,
  # we should holler.

  my $child = deepcopy($p);
  $child->{pid} = $pid;
  $child->{ppid} = $p->{pid};
  $p->{children}{$pid} = 1;
  $pp->{p}{$pid} = $child; 

  return 0;
}

sub ana_exit_group { ana_exit(@_) }
sub ana_exit
{
  my ($p, $t, $r) = @_;
  $p->{exit_tstamp} = time;
  $p->{exit_value} = $t->[2]+0;

  # collect all remaining resource objects.
  my @unused = grep { defined } map { $_->{o} } $p->{res}{txt}, 
  	values %{$p->{res}{mmap}}, values %{$p->{res}{fd}};

  delete $p->{res};			# give up all resources. 
  free_obj_if_unlinked_unused(@unused);	# now see if the objects should be freed.

  # Say goodby to parent process:
  delete $pp->{p}{$p->{ppid}}{children}{$p->{pid}} if $p->{ppid} and $pp->{p}{$p->{ppid}};
  return 0;
}
