Skip to content
Snippets Groups Projects
fuzzycomparedata.py 3.78 KiB
""" A module for fuzzy comparing data files.

This module provides methods to compare two data files.
Applicable for all style formats like e.g. csv files.
Fuzzy compares numbers by using absolute and/or relative difference comparison.

"""
import argparse
import csv
import json
import sys
from fuzzycomparevtu import is_fuzzy_equal_text

def compare_data(dataFile1, dataFile2, delimiter, absolute=1.5e-7, relative=1e-2, zeroValueThreshold={}, verbose=True):
    """ take two data files and compare them. Returns an exit key as returnvalue.

    Arguments:
    ----------
    dataFile1, dataFile2 : string
        The filenames of the data files to compare
    delimiter: string
        The delimiter for the columns

    Keyword Arguments:
    ------------------
    absolute : float
        The epsilon used for comparing numbers with an absolute criterion
    relative: float
        The epsilon used for comparing numbers with an relative criterion
    zeroValueThreshold: dict
        A dictionary of parameter value pairs that set the threshold under
        which a number is treated as zero for a certain parameter. Use this parameter if
        you have to avoid comparisons of very small numbers for a certain parameter.
    verbose : bool
        If the script should produce informative output. Enabled by default as the details
        give the tester a lot more information on why tests fail.
    """

    if verbose:
        print("Comparing {} and {}".format(dataFile1, dataFile2))
        print("... with a maximum relative error of {} and a maximum absolute error of {}*max_abs_parameter_value.".format(relative, absolute))

    # construct element tree from data files
    data1 = list(csv.reader(open(dataFile1, 'rb'), delimiter=delimiter))
    data2 = list(csv.reader(open(dataFile2, 'rb'), delimiter=delimiter))

    if (len(data1) != len(data2)):
        print "Length of data1 and data2 not equal: ref=", len(data1), ",new=", len(data2), ". Aborting!"
        exit (3)

    is_equal = True
    for i in range(0,len(data1[0])):
        a = data1[0][i]
        b = data2[0][i]
        for j in range(1,len(data1)):
            a += " {0}".format(data1[j][i])
            b += " {0}".format(data2[j][i])

        if not is_fuzzy_equal_text(a, b, "row {0}".format(i), len(data1), absolute, relative, zeroValueThreshold, verbose):
            if verbose:
                is_equal = False
            else:
                return False

    if is_equal:
        return 0
    else:
        return 1


# main program if called as script return appropriate error codes
if __name__ == "__main__":
    # handle arguments and print help message
    parser = argparse.ArgumentParser(description='Fuzzy compare of two data files (e.g csv). \
        The files are accepted if for every value the difference is below the absolute error \
        or below the relative error or below both.')
    parser.add_argument('data_file_1', type=str, help='first file to compare')
    parser.add_argument('data_file_2', type=str, help='second file to compare')
    parser.add_argument('delimiter', type=str, help='second file to compare')
    parser.add_argument('-r', '--relative', type=float, default=1e-2, help='maximum relative error (default=1e-2)')
    parser.add_argument('-a', '--absolute', type=float, default=1.5e-7, help='maximum absolute error (default=1.5e-7)')
    parser.add_argument('-v', '--verbose', type=bool, default=True, help='verbosity of the script')
    parser.add_argument('-z', '--zeroThreshold', type=json.loads, default='{}', help='Thresholds for treating numbers as zero for a parameter as a python dict e.g. {"vel":1e-7,"delP":1.0}')
    args = vars(parser.parse_args())

    sys.exit(compare_data(args["data_file_1"], args["data_file_2"], args["delimiter"], args["absolute"], args["relative"], args["zeroThreshold"], args["verbose"]))