[bin] Add script for fuzzy comparison of data sets (e.g. csv files)

3c6dcd3d · Thomas Fetzer · 7ad7a53e · 3c6dcd3d · 3c6dcd3d · 3c6dcd3d
Commit 3c6dcd3d authored 7 years ago by Thomas Fetzer
--- a/bin/testing/fuzzycomparedata.py
+++ b/bin/testing/fuzzycomparedata.py
+""" A module for fuzzy comparing data files.
+
+This module provides methods to compare two data files.
+Applicable for all style formats like e.g. csv files.
+Fuzzy compares numbers by using absolute and/or relative difference comparison.
+
+"""
+import argparse
+import csv
+import json
+import sys
+from fuzzycomparevtu import is_fuzzy_equal_text
+
+def compare_data(dataFile1, dataFile2, delimiter, absolute=1.5e-7, relative=1e-2, zeroValueThreshold={}, verbose=True):
+    """ take two data files and compare them. Returns an exit key as returnvalue.
+
+    Arguments:
+    ----------
+    dataFile1, dataFile2 : string
+        The filenames of the data files to compare
+    delimiter: string
+        The delimiter for the columns
+
+    Keyword Arguments:
+    ------------------
+    absolute : float
+        The epsilon used for comparing numbers with an absolute criterion
+    relative: float
+        The epsilon used for comparing numbers with an relative criterion
+    zeroValueThreshold: dict
+        A dictionary of parameter value pairs that set the threshold under
+        which a number is treated as zero for a certain parameter. Use this parameter if
+        you have to avoid comparisons of very small numbers for a certain parameter.
+    verbose : bool
+        If the script should produce informative output. Enabled by default as the details
+        give the tester a lot more information on why tests fail.
+    """
+
+    if verbose:
+        print("Comparing {} and {}".format(dataFile1, dataFile2))
+        print("... with a maximum relative error of {} and a maximum absolute error of {}*max_abs_parameter_value.".format(relative, absolute))
+
+    # construct element tree from data files
+    data1 = list(csv.reader(open(dataFile1, 'rb'), delimiter=delimiter))
+    data2 = list(csv.reader(open(dataFile2, 'rb'), delimiter=delimiter))
+
+    if (len(data1) != len(data2)):
+        print "Length of data1 and data2 not equal: ref=", len(data1), ",new=", len(data2), ". Aborting!"
+        exit (3)
+
+    is_equal = True
+    for i in range(0,len(data1[0])):
+        a = data1[0][i]
+        b = data2[0][i]
+        for j in range(1,len(data1)):
+            a += " {0}".format(data1[j][i])
+            b += " {0}".format(data2[j][i])
+
+        if not is_fuzzy_equal_text(a, b, "row {0}".format(i), len(data1), absolute, relative, zeroValueThreshold, verbose):
+            if verbose:
+                is_equal = False
+            else:
+                return False
+
+    if is_equal:
+        return 0
+    else:
+        return 1
+
+
+# main program if called as script return appropriate error codes
+if __name__ == "__main__":
+    # handle arguments and print help message
+    parser = argparse.ArgumentParser(description='Fuzzy compare of two data files (e.g csv). \
+        The files are accepted if for every value the difference is below the absolute error \
+        or below the relative error or below both.')
+    parser.add_argument('data_file_1', type=str, help='first file to compare')
+    parser.add_argument('data_file_2', type=str, help='second file to compare')
+    parser.add_argument('delimiter', type=str, help='second file to compare')
+    parser.add_argument('-r', '--relative', type=float, default=1e-2, help='maximum relative error (default=1e-2)')
+    parser.add_argument('-a', '--absolute', type=float, default=1.5e-7, help='maximum absolute error (default=1.5e-7)')
+    parser.add_argument('-v', '--verbose', type=bool, default=True, help='verbosity of the script')
+    parser.add_argument('-z', '--zeroThreshold', type=json.loads, default='{}', help='Thresholds for treating numbers as zero for a parameter as a python dict e.g. {"vel":1e-7,"delP":1.0}')
+    args = vars(parser.parse_args())
+
+    sys.exit(compare_data(args["data_file_1"], args["data_file_2"], args["delimiter"], args["absolute"], args["relative"], args["zeroThreshold"], args["verbose"]))
--- a/bin/testing/runtest.py
+++ b/bin/testing/runtest.py
@@ -3,12 +3,14 @@ import os, sys
 import subprocess
 import json
 from fuzzycomparevtu import compare_vtk
+from fuzzycomparedata import compare_data

 # parse arguments
 parser = argparse.ArgumentParser()
 parser.add_argument('-c', '--command', nargs=1, help='The executable and optional arguments as a single string', required=True)
-parser.add_argument('-s', '--script', nargs=1, help="The comparison script. [fuzzy, exact, <path_to_script>] where the script takes two vtu files as arguments.")
-parser.add_argument('-f', '--files', nargs='+', help="Pairs of reference and vtu file names. Usage: '[-f ref1 vtu1 [[ref2] [vtu2] ...]]'")
+parser.add_argument('-s', '--script', nargs=1, help="The comparison script. [fuzzy, fuzzyData, exact, <path_to_script>] where the script takes two files as arguments.")
+parser.add_argument('-f', '--files', nargs='+', help="Pairs of file names (first reference, then current). Usage: '[-f ref1 cur1 [[ref2] [cur2] ...]]'")
+parser.add_argument('-d', '--delimiter', type=str, default=',', help='Column delimiter for data files')
 parser.add_argument('-r', '--relative', type=float, default=1e-2, help='maximum relative error (default=1e-2) when using fuzzy comparison')
 parser.add_argument('-a', '--absolute', type=float, default=1.5e-7, help='maximum absolute error (default=1.5e-7) when using fuzzy comparison')
 parser.add_argument('-z', '--zeroThreshold', type=json.loads, default='{}', help='Thresholds for treating numbers as zero for a parameter as a python dict e.g. {"vel":1e-7,"delP":1.0}')
@@ -17,14 +19,14 @@ args = vars(parser.parse_args())
 # check parameters
 if args['script']:
    if len(args['files'])%2 != 0 or not args['files']:
-        sys.stderr.write("The files have to be pairs of vtu and reference files. Usage '-f [ref1] [vtu1] [[ref2] [vtu2] ...]'")
+        sys.stderr.write("The files have to be pairs of reference and current solution files. Usage '-f [ref1] [cur1] [[ref2] [cur2] ...]'")
        parser.print_help()
        sys.exit(1)
    for i in range(0, len(args['files'])//2):
        # delete the vtu files to compare
        ref_dir = os.path.dirname(os.path.abspath(__file__)).rstrip("bin") + "test/references"
        if os.path.dirname(args['files'][(i*2)+1]) == ref_dir:
-            sys.stderr.write("Tried to delete a reference solution. Specify reference file first, then the VTU file. Usage: '[-f ref1 vtu1 [[ref2] [vtu2] ...]]'")
+            sys.stderr.write("Tried to delete a reference solution. Specify reference file first, then the current solution. Usage: '[-f ref1 cur1 [[ref2] [cur2] ...]]'")
            sys.exit(1)
        subprocess.call(['rm', '-fv', args['files'][(i*2)+1]])

@@ -61,6 +63,16 @@ if args['script']:
                return_code = 1
        sys.exit(return_code)

+    # fuzzy comparison of data sets?
+    elif args['script'] == ["fuzzyData"]:
+        return_code = 0
+        for i in range(0, len(args['files'])//2):
+            print("\nFuzzy data comparison...")
+            result = compare_data(args['files'][i*2], args['files'][(i*2)+1], args['delimiter'], relative=args['relative'], absolute=args['absolute'], zeroValueThreshold=args['zeroThreshold'])
+            if result:
+                return_code = 1
+        sys.exit(return_code)
+
    # other script?
    else:
        return_code = 0

--- a/test/material/fluidmatrixinteractions/2p/CMakeLists.txt
+++ b/test/material/fluidmatrixinteractions/2p/CMakeLists.txt
@@ -2,14 +2,14 @@ add_input_file_links()

 add_dumux_test(test_thermalconductivityjohansen test_thermalconductivityjohansen test_thermalconductivityjohansen.cc
               python ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
-                 --script exact
+                 --script fuzzyData --delimiter " "
                 --files ${CMAKE_SOURCE_DIR}/test/references/thermalconductivityjohansen-reference.dat
                         ${CMAKE_CURRENT_BINARY_DIR}/johansen_lambda_eff.dat
                 --command "${CMAKE_CURRENT_BINARY_DIR}/test_thermalconductivityjohansen")

 add_dumux_test(test_thermalconductivitysomerton test_thermalconductivitysomerton test_thermalconductivitysomerton.cc
               python ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
-                 --script exact
+                 --script fuzzyData --delimiter " "
                 --files ${CMAKE_SOURCE_DIR}/test/references/thermalconductivitysomerton-reference.dat
                         ${CMAKE_CURRENT_BINARY_DIR}/somerton_lambda_eff.dat
                 --command "${CMAKE_CURRENT_BINARY_DIR}/test_thermalconductivitysomerton")

--- a/test/material/fluidmatrixinteractions/CMakeLists.txt
+++ b/test/material/fluidmatrixinteractions/CMakeLists.txt
@@ -4,21 +4,21 @@ add_input_file_links()

 add_dumux_test(test_effectivediffusivitymillingtonquirk test_effectivediffusivitymillingtonquirk test_effectivediffusivitymillingtonquirk.cc
               python ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
-                 --script exact
+                 --script fuzzyData --delimiter " "
                 --files ${CMAKE_SOURCE_DIR}/test/references/effectivediffusivitymillingtonquirk-reference.dat
                         ${CMAKE_CURRENT_BINARY_DIR}/millingtonquirk_d_eff.dat
                 --command "${CMAKE_CURRENT_BINARY_DIR}/test_effectivediffusivitymillingtonquirk")

 add_dumux_test(test_effectivediffusivityconstant test_effectivediffusivityconstant test_effectivediffusivityconstant.cc
              python ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
-                 --script exact
+                 --script fuzzyData --delimiter " "
                 --files ${CMAKE_SOURCE_DIR}/test/references/effectivediffusivityconstanttau-reference.dat
                         ${CMAKE_CURRENT_BINARY_DIR}/constant_d_eff.dat
                 --command "${CMAKE_CURRENT_BINARY_DIR}/test_effectivediffusivityconstant")

 add_dumux_test(test_effectivediffusivityconstanttau test_effectivediffusivityconstanttau test_effectivediffusivityconstanttau.cc
              python ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
-                 --script exact
+                 --script fuzzyData --delimiter " "
                 --files ${CMAKE_SOURCE_DIR}/test/references/effectivediffusivityconstanttau-reference.dat
                         ${CMAKE_CURRENT_BINARY_DIR}/constanttau_d_eff.dat
                 --command "${CMAKE_CURRENT_BINARY_DIR}/test_effectivediffusivityconstanttau")