#!/bin/sh
#
# nbest-error --
#	compute minimum error of nbest lists
#
# $Header: /home/srilm/devel/utils/src/RCS/nbest-error,v 1.5 2003/01/11 01:25:44 stolcke Exp $
#

if [ $# -lt 2 ]; then
	echo "usage: $0 score-dir refs [nbest-lattice-option ...]" >&2
	echo "    or $0 file-list refs [nbest-lattice-option ...]" >&2
	exit 2
fi

scoredir="$1"
refs="$2"
shift; shift

option=-nbest-error

case "$*" in
*-lattice-error*)	option= ;;
esac

if [ ! -r $scoredir ]; then
	echo "$0: cannot access $scoredir" >&2
	exit 1
fi

if [ ! -r $refs ]; then
	echo "$0: cannot access $refs" >&2
	exit 1
fi

if [ -d $scoredir ]; then
    find $scoredir -follow \
	 	 -type f \( -name \*.score -o \
			    -name \*.Z -o \
			    -name \*.gz \) \
		    -print | sort
else
    cat $scoredir
fi | \
nbest-lattice -nbest-files - -refs $refs $option "$@" | \
gawk '
$2 ~ /^[0-9]*$/ && $10 ~ /^[0-9]*$/ && $9 == "words" {
	nsents ++;
	nwords += $10;
	nerrors += $2;
	print;
}
END {
	printf "%d sentences, %d words, %d errors (%.2f%%)\n", \
		nsents, nwords, nerrors, 100*nerrors/nwords;
}'

