#!/usr/local/bin/gawk -f
#
# compare test outputs allowing for small numerical differences
#
# Copyright (c) 2003-2006 SRI International, 2012 Microsoft Corp. All Right Reserved.
#
# $Header: /home/srilm/CVS/srilm/sbin/compare-outputs,v 1.5 2017/07/20 03:20:03 stolcke Exp $
#

BEGIN {
	numerical = "^([-+])?[0-9]*(.[0-9]*)?([Ee][-+]?[0-9][0-9]*)?[,]?$";

	epsilon = 1e-5;
	tolerance = 0.001;
}

function abs(x) {
	return (x < 0) ? -x : x;
}
		
function same_words(w1, w2, eq_sign) {

	# split words of the form STRING=NUMBER
	eq_sign = index(w1, "=");
	if (eq_sign > 0 && index(w2, "=") == eq_sign) {
		return same_words(substr(w1, 1, eq_sign-1), substr(w2, 1, eq_sign-1)) \
		   && same_words(substr(w1, eq_sign+1), substr(w2, eq_sign+1));
	}

	# ignore filename differences due to optional gzipping
	gsub("\\.gz", "", w1);
	gsub("\\.gz", "", w2);

	# MSVC infinity values
	if (w1 == "1.#INF") w1 = "inf";
	if (w2 == "1.#INF") w2 = "inf";
	if (w1 == "-1.#INF") w1 = "-inf";
	if (w2 == "-1.#INF") w2 = "-inf";

	# normalize case in infinity
	if (tolower(w1) ~ /inf/) w1 = tolower(w1);
	if (tolower(w2) ~ /inf/) w2 = tolower(w2);

	if (w1 == w2) {
		return 1;
	} else if (w1 ~ numerical && w2 ~ numerical) {
		# allow some slippage when comparing numerical values 

		# two values that are both very close to zero are ok
		if (abs(w1) < epsilon && abs(w2) < epsilon) {
			return 1; 
		# two values that differ by a small relative difference are ok
		} else if (abs((w1 - w2)/(w1 + w2)) < tolerance) {
			return 1;
		}
	}
	return 0;
}

function same_contents(file1, file2) {

	lineno = 0;

	while ((getline line1 < file1) > 0) {
		if ((getline line2 < file2) <= 0) {
			print "premature EOF on " file2 > "/dev/stderr";
			return 0;
		}

		lineno ++;

		sub("
$", "", line1);		# strip MSDOS EOL
		n1 = split(line1, words1);

		sub("
$", "", line2);		# strip MSDOS EOL
		n2 = split(line2, words2);

		if (n1 != n2) {
			print "line " lineno ": mismatched number of fields" \
								> "/dev/stderr";
			return 0;
		}

		for (i = 1; i <= n1; i ++) {
			if (!same_words(words1[i], words2[i])) {
				print "line " lineno ": words differ -- " \
					words1[i] " != " words2[i] \
								> "/dev/stderr";
				return 0;
			}
		}
	}

	if ((getline line2 < file2) > 0) {
		print "premature EOF on " file1 > "/dev/stderr";
		return 0;
	}

	return 1;
}

BEGIN {
	if (same_contents(ARGV[1], ARGV[2])) {
		exit(0);
	} else {
		exit(1);
	}
}

