#!/bin/sh
#
# rescore-nbest --
#	output LM scores for nbest lists
#
# $Header: /home/srilm/CVS/srilm/utils/src/rescore-nbest,v 1.3 1996/03/28 19:12:01 stolcke Exp $
#

if [ $# -lt 3  ]; then
	echo "usage: $0: nbest-file-list score-dir lm-options ..." >&2
	exit 1
fi

filelist="$1"
scoredir="$2"
shift; shift

#
# STRATEGY:
#	Concatenate hyps for all nbest list, record number of hyps for
#		each file in the output stream
#	Strip hyp ids, !SENT_START, !SENT_END 
#	Feed to ngram -ppl (using lm-options)
#	Parse ngram output into lm scores and deposit into target files
#

escape="***FILE:"

cat $filelist | ( \
while read filename; do
	set -e
	numhyps=`wc -l < $filename`
	echo "$escape `basename $filename .trans`.score $numhyps"
	sed \
		-e 's/^ *([^ ]*) //' \
		-e 's/!SENT_START //' \
		-e 's/!SENT_END //' \
		$filename
done
)  | \
ngram -debug 1 -ppl - -escape "$escape " "$@" | \
gawk '
BEGIN {
	currentfile = "";
	scoredir = "";
	scorefile = "";
	numhyps = 0;
	M_LN10 = 2.30258509299404568402;		# from <math.h>
}
$1 == escape {
	if (currentfile) {
		close(scorefile);
	}
	currentfile = $2;
	scorefile = scoredir "/" currentfile;
	numhyps = $3;
	printf "processing %d hyps for %s\n", numhyps, currentfile;
	hypno = 0;
	next;
}
/logprob=/ {
	logprob = $4;

	hypno ++;

	# rescale LM scores to natural logs
	printf "%g\n", logprob * M_LN10 > scorefile;

	next;
}
END {
	if (currentfile) {
		close(scorefile);
	}
}
' scoredir=$scoredir escape="$escape"

