From 54fcb2def6e7ca31e124418bf73d2c792067adbe Mon Sep 17 00:00:00 2001
From: Jameson Graef Rollins <jrollins@finestructure.net>
Date: Thu, 16 Apr 2020 14:43:32 -0700
Subject: [PATCH] tests compare against git hash, eliminate cached .h5

This updates the test infrastructure to store a git hash of the reference
code, instead of generated .h5 files.  The test infrastructure creates a
cache of the reference .h5 files from the specified git hash if it doesn't
already exist, and then compares against that cache.

This gets rid of the need for git-lfs and the cached .h5 files, requiring
instead the code is checked out from git.
---
 gwinc/test/__main__.py      | 166 ++++++++++++++++++++++--------------
 gwinc/test/cache/Aplus.h5   |   3 -
 gwinc/test/cache/CE1.h5     |   3 -
 gwinc/test/cache/CE2.h5     |   3 -
 gwinc/test/cache/Voyager.h5 |   3 -
 gwinc/test/cache/aLIGO.h5   |   3 -
 gwinc/test/gen_cache.sh     |  26 ++++++
 gwinc/test/ref_hash         |   1 +
 8 files changed, 131 insertions(+), 77 deletions(-)
 delete mode 100644 gwinc/test/cache/Aplus.h5
 delete mode 100644 gwinc/test/cache/CE1.h5
 delete mode 100644 gwinc/test/cache/CE2.h5
 delete mode 100644 gwinc/test/cache/Voyager.h5
 delete mode 100644 gwinc/test/cache/aLIGO.h5
 create mode 100755 gwinc/test/gen_cache.sh
 create mode 100644 gwinc/test/ref_hash

diff --git a/gwinc/test/__main__.py b/gwinc/test/__main__.py
index 8ccd2dba..c6900de3 100644
--- a/gwinc/test/__main__.py
+++ b/gwinc/test/__main__.py
@@ -5,6 +5,7 @@ import signal
 import logging
 import tempfile
 import argparse
+import subprocess
 import numpy as np
 import matplotlib.pyplot as plt
 from collections import OrderedDict
@@ -25,8 +26,54 @@ logging.basicConfig(
     level=os.getenv('LOG_LEVEL', logging.INFO))
 
 
+FREQ = np.logspace(np.log10(5), np.log10(6000), 3000)
 TOLERANCE = 1e-6
-CACHE_PATH = os.path.join(os.path.dirname(__file__), 'cache')
+
+
+def test_path(*args):
+    return os.path.join(os.path.dirname(__file__), *args)
+
+
+def gen_cache(ref_hash, path):
+    """generate cache from git reference
+
+    The ref_hash should be a git hash, and path should be the location
+    of the generated cache.
+
+    The included shell script is used to extract the gwinc code from
+    the appropriate git commit, and invoke a new python instance to
+    generate the noise curves.
+
+    """
+    logging.info("creating new cache from reference {}...".format(ref_hash))
+    subprocess.run(
+        [test_path('gen_cache.sh'), ref_hash, path]
+    )
+
+
+def load_cache(path):
+    """load a cache from path
+
+    returns a dictionary with 'ref_hash' and 'ifos' keys.
+
+    """
+    logging.info("loading cache {}...".format(path))
+    cache = {}
+    ref_hash_path = os.path.join(path, 'ref_hash')
+    if os.path.exists(ref_hash_path):
+        with open(ref_hash_path) as f:
+            ref_hash = f.read().strip()
+    else:
+        ref_hash = None
+    logging.info("cache git hash: {}".format(ref_hash))
+    cache['ref_hash'] = ref_hash
+    cache['ifos'] = {}
+    for f in sorted(os.listdir(path)):
+        name, ext = os.path.splitext(f)
+        if ext != '.h5':
+            continue
+        cache['ifos'][name] = os.path.join(path, f)
+    return cache
 
 
 def walk_traces(traces, root=()):
@@ -107,9 +154,7 @@ def compare_traces(tracesA, tracesB, tolerance=TOLERANCE, skip=None):
     return diffs
 
 
-def plot_diffs(freq, diffs, tolerance,
-               name, styleA, styleB, fom_title='',
-               save=None):
+def plot_diffs(freq, diffs, styleA, styleB):
     spec = (len(diffs)+1, 2)
     sharex = None
     for i, nname in enumerate(diffs):
@@ -136,21 +181,9 @@ def plot_diffs(freq, diffs, tolerance,
         if i == 0:
             axr.set_title("fractional difference")
 
-        plt.suptitle('''{} {}/{} noise comparison
-(noises that differ by more than {} ppm)
-{}'''.format(name, styleA['label'], styleB['label'], tolerance*1e6, fom_title))
-
     axl.set_xlabel("frequency [Hz]")
     axr.set_xlabel("frequency [Hz]")
-
     plt.subplots_adjust(top=0.8, right=0.85, wspace=0.3)
-    if save:
-        pwidth = 10
-        pheight = (len(diffs) * 5) + 2
-        plt.gcf().set_size_inches(pwidth, pheight)
-        plt.savefig(save)
-    else:
-        plt.show()
 
 ##################################################
 
@@ -163,35 +196,49 @@ def main():
         '--skip', '-k', metavar='NOISE', action='append',
         help='traces to skip in comparison (multiple may be specified)')
     parser.add_argument(
-        '--cache', '-c', metavar='PATH', default=CACHE_PATH,
-        help='specify alternate IFO traces cache path')
+        '--git-ref', '-g', metavar='HASH',
+        help='specify git ref to compare against')
     rgroup = parser.add_mutually_exclusive_group()
+    rgroup.add_argument(
+        '--update-ref', '-u', metavar='HASH', nargs='?', const='HEAD',
+        help="update the stored reference git hash to HASH (or 'HEAD' if not specified) and exit")
     rgroup.add_argument(
         '--plot', '-p', action='store_true',
         help='plot differences')
     rgroup.add_argument(
         '--report', '-r', metavar='REPORT.pdf',
         help='create PDF report of test results (only created if differences found)')
-    rgroup.add_argument(
-        '--gen-cache', action='store_true',
-        help='update/create IFO traces cache directory')
     parser.add_argument(
         'ifo', metavar='IFO', nargs='*',
         help='specific ifos to test (default all)')
     args = parser.parse_args()
 
-    if args.gen_cache:
-        try:
-            os.makedirs(args.cache)
-        except FileExistsError:
-            pass
-        freq = np.logspace(np.log10(5), np.log10(6000), 3000)
-        for name in IFOS:
-            Budget = load_budget(name)
-            traces = Budget(freq).run()
-            path = os.path.join(args.cache, name+'.h5')
-            save_hdf5(path, freq, traces)
-        return
+    if args.update_ref:
+        if args.update_ref == 'HEAD':
+            ref_hash = subprocess.run(
+                ['git', 'show', '-s', '--format=format:%H', 'HEAD'],
+                capture_output=True, universal_newlines=True,
+            ).stdout
+        else:
+            ref_hash = args.update_ref
+        logging.info("updating reference git hash to {}...".format(ref_hash))
+        with open(test_path('ref_hash'), 'w') as f:
+            f.write('{}\n'.format(ref_hash))
+        sys.exit()
+
+    if args.git_ref:
+        ref_hash = args.git_ref
+    elif os.path.exists(test_path('ref_hash')):
+        with open(test_path('ref_hash')) as f:
+            ref_hash = f.read().strip()
+    else:
+        sys.exit("Unspecified reference git hash, could not run test.")
+
+    # load the cache
+    cache_path = test_path('cache', ref_hash)
+    if not os.path.exists(cache_path):
+        gen_cache(ref_hash, cache_path)
+    cache = load_cache(cache_path)
 
     if args.report:
         base, ext = os.path.splitext(args.report)
@@ -199,22 +246,12 @@ def main():
             parser.error("Test reports only support PDF format.")
         outdir = tempfile.TemporaryDirectory()
 
-    # find all cached IFOs
-    logging.info("loading cache {}...".format(args.cache))
-    cached_ifos = {}
-    for f in sorted(os.listdir(args.cache)):
-        name, ext = os.path.splitext(f)
-        if ext != '.h5':
-            continue
-        cached_ifos[name] = os.path.join(args.cache, f)
-
-    # select
     if args.ifo:
         ifos = args.ifo
     else:
         ifos = IFOS
 
-    style_cache = dict(label='cache', linestyle='-')
+    style_cache = dict(label='reference', linestyle='-')
     style_head = dict(label='head', linestyle='--')
 
     fail = False
@@ -223,10 +260,10 @@ def main():
     for name in ifos:
         logging.info("{} tests...".format(name))
 
-        path = cached_ifos[name]
-
-        if not os.path.exists(path):
-            logging.warning("{} test cache not found".format(name))
+        try:
+            path = cache['ifos'][name]
+        except KeyError:
+            logging.warning("IFO {} not found in cache")
             fail |= True
             continue
 
@@ -262,21 +299,26 @@ inspiral {func} {m1}/{m2} Msol:
 
         diffs = compare_traces(traces_cache, traces_head, args.tolerance, args.skip)
 
-        if diffs:
-            logging.warning("{} tests FAIL".format(name))
-            fail |= True
-            if args.plot or args.report:
-                if args.report:
-                    save = os.path.join(outdir.name, name+'.pdf')
-                else:
-                    save = None
-                plot_diffs(
-                    freq, diffs, args.tolerance,
-                    name, style_cache, style_head, fom_summary,
-                    save=save,
-                )
-        else:
+        if not diffs:
             logging.info("{} tests pass.".format(name))
+            continue
+
+        logging.warning("{} tests FAIL".format(name))
+        fail |= True
+        if args.plot or args.report:
+            plot_diffs(freq, diffs, style_cache, style_head)
+            plt.suptitle('''{} {}/{} noise comparison
+(noises that differ by more than {} ppm)
+reference git hash: {}
+{}'''.format(name, style_cache['label'], style_head['label'],
+             args.tolerance*1e6, cache['ref_hash'], fom_summary))
+            if args.report:
+                pwidth = 10
+                pheight = (len(diffs) * 5) + 2
+                plt.gcf().set_size_inches(pwidth, pheight)
+                plt.savefig(os.path.join(outdir.name, name+'.pdf'))
+            else:
+                plt.show()
 
     if not fail:
         logging.info("all tests pass.")
diff --git a/gwinc/test/cache/Aplus.h5 b/gwinc/test/cache/Aplus.h5
deleted file mode 100644
index 369454f3..00000000
--- a/gwinc/test/cache/Aplus.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8394387e7e87828e3976097a30af22ee015e7d48d749b2eb8c4616b5308b8937
-size 276288
diff --git a/gwinc/test/cache/CE1.h5 b/gwinc/test/cache/CE1.h5
deleted file mode 100644
index d747589b..00000000
--- a/gwinc/test/cache/CE1.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:38db82407ca43b0e2142f887668ea5040079359174608023e5f9e4b8d407eb7e
-size 400384
diff --git a/gwinc/test/cache/CE2.h5 b/gwinc/test/cache/CE2.h5
deleted file mode 100644
index add79d72..00000000
--- a/gwinc/test/cache/CE2.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:75a4a59c2205fbc2fb3f4d54f449dc12fabc2db6e628f07fb983c270dd341c99
-size 450432
diff --git a/gwinc/test/cache/Voyager.h5 b/gwinc/test/cache/Voyager.h5
deleted file mode 100644
index 25504e4d..00000000
--- a/gwinc/test/cache/Voyager.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:55f1f7457fd734d3a399306cdb7df6617adf2df4f11ad68f3b4ac7ea7a15aaab
-size 324288
diff --git a/gwinc/test/cache/aLIGO.h5 b/gwinc/test/cache/aLIGO.h5
deleted file mode 100644
index 7a33dc5d..00000000
--- a/gwinc/test/cache/aLIGO.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9168d3d879f7df53ccba7aae911a8f5dcbaa82cbb1a3a8399efc997f1d1994b0
-size 276288
diff --git a/gwinc/test/gen_cache.sh b/gwinc/test/gen_cache.sh
new file mode 100755
index 00000000..7f49f441
--- /dev/null
+++ b/gwinc/test/gen_cache.sh
@@ -0,0 +1,26 @@
+#!/bin/bash -e
+
+if [ -z "$1" ] || [ -z "$2" ] ; then
+    echo "usage: $(basename $0) git_ref_hash cache_dir_path"
+    echo "generate a cache of IFO budget traces from a particular git commit"
+    exit 1
+fi
+
+ref_hash="$1"
+cache_dir="$2"
+
+mkdir -p $cache_dir
+cache_dir=$(cd $cache_dir && pwd)
+gwinc_dir=$cache_dir/gwinc
+mkdir -p $gwinc_dir
+
+git archive $ref_hash | tar -x -C $gwinc_dir
+
+cd $gwinc_dir
+
+export LOG_LEVEL=INFO
+for ifo in $(python3 -c "import gwinc; print(' '.join(gwinc.IFOS))") ; do
+    python3 -m gwinc --save $cache_dir/${ifo}.h5 $ifo
+done
+
+echo $ref_hash > $cache_dir/ref_hash
diff --git a/gwinc/test/ref_hash b/gwinc/test/ref_hash
new file mode 100644
index 00000000..c5170040
--- /dev/null
+++ b/gwinc/test/ref_hash
@@ -0,0 +1 @@
+9ff4ba5463895698f48c243e329c4138ab163c1f
-- 
GitLab