diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e7b2de49312cde25b824aef9f5809dc294dffdca..d7f1edd44416e181f0f3e9ff9f3a9dec14e2a87b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,24 +3,45 @@
 The pygwinc project welcomes your contributions.  Our policy is that
 all contributions should be peer-reviewed.  To facilitate the review,
 please do not commit your work to this repository yourself.  Instead,
-fork the repository and send a merge request.
+fork the repository and create a [merge
+request](https://git.ligo.org/gwinc/pygwinc/-/merge_requests/new)
+against the main pygwinc master branch for you code changes.
+
+When submitting code for merge, please follow good coding practice.
+Respect the existing coding style, which in the case of `pygwinc`
+should follow the standard python
+[PEP8](https://www.python.org/dev/peps/pep-0008/) style as much as
+possible.  Make individual commits as logically distinct and atomic as
+possible, and provide a complete, descriptive log of the changes
+(including a one line summary followed by a blank line).  Think of
+your reviewers when you make code, as your code will be more readily
+merged if follow good coding practice.
 
 `pygwinc` comes with a test suite that will calculate all canonical
-IFO budgets with the current state of the code, and compare them
-against cached hdf5 traces (stored in the repo with
-[git-lfs](https://git-lfs.github.com/) ( [see
-also](https://wiki.ligo.org/Computing/GitLFS)).  Contributors should
-run the tests before submitting any merge requests:
+IFO budgets with the current state of the code and compare them
+against traces generated with code from a reference git commit.
+Contributors should run the tests before submitting any merge
+requests:
 ```shell
 $ python3 -m gwinc.test
 ```
-The test suite will be run automatically upon push to git.ligo.org as
-part of the continuous integration, and code that fails the CI will
-not be merged, unless failures are well justified.
+On initial run a cache of the reference traces will be generated.
+Subsequent runs will be sped up by use the existing cache.  Use the
+'--plot' or '--report' options to produce visual comparisons of the
+noise differences.
+
+If you would like to submit your code changes for inclusion upstream,
+and your changes modify the noise calculations such that the tests
+fail, then the reference hash will need to be updated to point to the
+latest good commit.  This can be done with the '--update-ref' option
+to the test script:
+```shell
+$ python3 -m gwinc.test --update-ref
+```
+If the no specific reference is provided a pointer to the last commit
+will be made.  *Note: reference updates need to be made in a separate
+commit after the commits that change the noise calculations.*
 
-If your change affects the shape of a noise curve, your commit message
-should make note of that, and provide a justification.  It will also
-be necessary to update the reference curves stored in cache, but that
-should be done in a separate commit not a part of the original merge
-request, so that all reviewers can see the changes introduced in the
-CI test failure report.
+Once you submit your merge request, an automated pipeline will run the
+test script to validate your changes.  If the tests fail but noise
+changes are detected, then the merge request will be rejected.
diff --git a/gwinc/test/__main__.py b/gwinc/test/__main__.py
index 8ccd2dba2d50ef80cbc144151c9fe1c07bdd5149..55b4e35cdfcd154d2bb4d117897b201487bcc859 100644
--- a/gwinc/test/__main__.py
+++ b/gwinc/test/__main__.py
@@ -1,10 +1,12 @@
 import os
 import sys
 import glob
+import shutil
 import signal
 import logging
 import tempfile
 import argparse
+import subprocess
 import numpy as np
 import matplotlib.pyplot as plt
 from collections import OrderedDict
@@ -25,8 +27,106 @@ logging.basicConfig(
     level=os.getenv('LOG_LEVEL', logging.INFO))
 
 
+FREQ = np.logspace(np.log10(5), np.log10(6000), 3000)
 TOLERANCE = 1e-6
-CACHE_PATH = os.path.join(os.path.dirname(__file__), 'cache')
+CACHE_LIMIT = 5
+
+
+def test_path(*args):
+    """Return path to package file."""
+    return os.path.join(os.path.dirname(__file__), *args)
+
+
+def git_ref_resolve_hash(git_ref):
+    """Resolve a git reference into its hash string."""
+    try:
+        return subprocess.run(
+            ['git', 'show', '-s', '--format=format:%H', git_ref],
+            capture_output=True, universal_newlines=True,
+        ).stdout
+    except subprocess.CalledProcessError:
+        return None
+
+
+def write_ref_hash(ref_hash):
+    """Write ref hash to reference file
+
+    """
+    with open(test_path('ref_hash'), 'w') as f:
+        f.write('{}\n'.format(ref_hash))
+
+
+def load_ref_hash():
+    """Load the current reference git hash.
+
+    """
+    try:
+        with open(test_path('ref_hash')) as f:
+            return f.read().strip()
+    except IOError:
+        return None
+
+
+def prune_cache_dir():
+    """Prune all but the N most recently accessed caches.
+
+    """
+    cache_dir = test_path('cache')
+    if not os.path.exists(cache_dir):
+        return
+    expired_paths = sorted(
+        [os.path.join(cache_dir, path) for path in os.listdir(cache_dir)],
+        key=lambda path: os.stat(path).st_atime,
+    )[CACHE_LIMIT:]
+    if not expired_paths:
+        return
+    logging.info("pruning {} old caches...".format(len(expired_paths)))
+    for path in expired_paths:
+        logging.debug("pruning {}...".format(path))
+        shutil.rmtree(path)
+
+
+def gen_cache_for_ref(ref_hash, path):
+    """generate cache from git reference
+
+    The ref_hash should be a git hash, and path should be the location
+    of the generated cache.
+
+    The included shell script is used to extract the gwinc code from
+    the appropriate git commit, and invoke a new python instance to
+    generate the noise curves.
+
+    """
+    logging.info("creating new cache from reference {}...".format(ref_hash))
+    subprocess.run(
+        [test_path('gen_cache.sh'), ref_hash, path],
+        check=True,
+    )
+
+
+def load_cache(path):
+    """load a cache from path
+
+    returns a dictionary with 'ref_hash' and 'ifos' keys.
+
+    """
+    logging.info("loading cache {}...".format(path))
+    cache = {}
+    ref_hash_path = os.path.join(path, 'ref_hash')
+    if os.path.exists(ref_hash_path):
+        with open(ref_hash_path) as f:
+            ref_hash = f.read().strip()
+    else:
+        ref_hash = None
+    logging.debug("cache hash: {}".format(ref_hash))
+    cache['ref_hash'] = ref_hash
+    cache['ifos'] = {}
+    for f in sorted(os.listdir(path)):
+        name, ext = os.path.splitext(f)
+        if ext != '.h5':
+            continue
+        cache['ifos'][name] = os.path.join(path, f)
+    return cache
 
 
 def walk_traces(traces, root=()):
@@ -107,9 +207,7 @@ def compare_traces(tracesA, tracesB, tolerance=TOLERANCE, skip=None):
     return diffs
 
 
-def plot_diffs(freq, diffs, tolerance,
-               name, styleA, styleB, fom_title='',
-               save=None):
+def plot_diffs(freq, diffs, styleA, styleB):
     spec = (len(diffs)+1, 2)
     sharex = None
     for i, nname in enumerate(diffs):
@@ -136,21 +234,9 @@ def plot_diffs(freq, diffs, tolerance,
         if i == 0:
             axr.set_title("fractional difference")
 
-        plt.suptitle('''{} {}/{} noise comparison
-(noises that differ by more than {} ppm)
-{}'''.format(name, styleA['label'], styleB['label'], tolerance*1e6, fom_title))
-
     axl.set_xlabel("frequency [Hz]")
     axr.set_xlabel("frequency [Hz]")
-
     plt.subplots_adjust(top=0.8, right=0.85, wspace=0.3)
-    if save:
-        pwidth = 10
-        pheight = (len(diffs) * 5) + 2
-        plt.gcf().set_size_inches(pwidth, pheight)
-        plt.savefig(save)
-    else:
-        plt.show()
 
 ##################################################
 
@@ -163,35 +249,70 @@ def main():
         '--skip', '-k', metavar='NOISE', action='append',
         help='traces to skip in comparison (multiple may be specified)')
     parser.add_argument(
-        '--cache', '-c', metavar='PATH', default=CACHE_PATH,
-        help='specify alternate IFO traces cache path')
+        '--git-ref', '-g', metavar='HASH',
+        help='specify git ref to compare against')
     rgroup = parser.add_mutually_exclusive_group()
+    rgroup.add_argument(
+        '--update-ref', '-u', metavar='HASH', nargs='?', const='HEAD',
+        help="update the stored reference git hash to HASH (or 'HEAD' if not specified) and exit")
     rgroup.add_argument(
         '--plot', '-p', action='store_true',
         help='plot differences')
     rgroup.add_argument(
         '--report', '-r', metavar='REPORT.pdf',
         help='create PDF report of test results (only created if differences found)')
-    rgroup.add_argument(
-        '--gen-cache', action='store_true',
-        help='update/create IFO traces cache directory')
     parser.add_argument(
         'ifo', metavar='IFO', nargs='*',
         help='specific ifos to test (default all)')
     args = parser.parse_args()
 
-    if args.gen_cache:
-        try:
-            os.makedirs(args.cache)
-        except FileExistsError:
+    # get the current hash of HEAD
+    head_hash = git_ref_resolve_hash('HEAD')
+    if not head_hash:
+        logging.warning("could not determine git HEAD hash.")
+
+    # update the reference if specified
+    if args.update_ref:
+        if args.update_ref == 'HEAD':
+            if not head_hash:
+                sys.exit("Could not update reference to head.")
+            logging.info("updating reference to HEAD...")
+            ref_hash = head_hash
+        else:
+            ref_hash = git_ref_resolve_hash(args.update_ref)
+        logging.info("updating reference git hash: {}".format(ref_hash))
+        write_ref_hash(ref_hash)
+        sys.exit()
+
+    # get the reference hash
+    if args.git_ref:
+        ref_hash = git_ref_resolve_hash(args.git_ref)
+    else:
+        ref_hash = load_ref_hash()
+        if not ref_hash:
             pass
-        freq = np.logspace(np.log10(5), np.log10(6000), 3000)
-        for name in IFOS:
-            Budget = load_budget(name)
-            traces = Budget(freq).run()
-            path = os.path.join(args.cache, name+'.h5')
-            save_hdf5(path, freq, traces)
-        return
+        try:
+            with open(test_path('ref_hash')) as f:
+                ref_hash = f.read().strip()
+        except IOError:
+            logging.warning("could not open reference")
+            sys.exit("Unspecified reference git hash, could not run test.")
+
+    logging.info("head hash: {}".format(head_hash))
+    logging.info("ref  hash: {}".format(ref_hash))
+
+    # don't bother test if hashes match
+    if ref_hash == head_hash:
+        logging.info("HEAD matches reference, not bothering to calculate.")
+        logging.info("Use --git-ref to compare against an arbitrary git commit.")
+        sys.exit()
+
+    # load the cache
+    cache_path = test_path('cache', ref_hash)
+    if not os.path.exists(cache_path):
+        prune_cache_dir()
+        gen_cache_for_ref(ref_hash, cache_path)
+    cache = load_cache(cache_path)
 
     if args.report:
         base, ext = os.path.splitext(args.report)
@@ -199,22 +320,12 @@ def main():
             parser.error("Test reports only support PDF format.")
         outdir = tempfile.TemporaryDirectory()
 
-    # find all cached IFOs
-    logging.info("loading cache {}...".format(args.cache))
-    cached_ifos = {}
-    for f in sorted(os.listdir(args.cache)):
-        name, ext = os.path.splitext(f)
-        if ext != '.h5':
-            continue
-        cached_ifos[name] = os.path.join(args.cache, f)
-
-    # select
     if args.ifo:
         ifos = args.ifo
     else:
         ifos = IFOS
 
-    style_cache = dict(label='cache', linestyle='-')
+    style_cache = dict(label='reference', linestyle='-')
     style_head = dict(label='head', linestyle='--')
 
     fail = False
@@ -223,10 +334,10 @@ def main():
     for name in ifos:
         logging.info("{} tests...".format(name))
 
-        path = cached_ifos[name]
-
-        if not os.path.exists(path):
-            logging.warning("{} test cache not found".format(name))
+        try:
+            path = cache['ifos'][name]
+        except KeyError:
+            logging.warning("IFO {} not found in cache")
             fail |= True
             continue
 
@@ -262,21 +373,26 @@ inspiral {func} {m1}/{m2} Msol:
 
         diffs = compare_traces(traces_cache, traces_head, args.tolerance, args.skip)
 
-        if diffs:
-            logging.warning("{} tests FAIL".format(name))
-            fail |= True
-            if args.plot or args.report:
-                if args.report:
-                    save = os.path.join(outdir.name, name+'.pdf')
-                else:
-                    save = None
-                plot_diffs(
-                    freq, diffs, args.tolerance,
-                    name, style_cache, style_head, fom_summary,
-                    save=save,
-                )
-        else:
+        if not diffs:
             logging.info("{} tests pass.".format(name))
+            continue
+
+        logging.warning("{} tests FAIL".format(name))
+        fail |= True
+        if args.plot or args.report:
+            plot_diffs(freq, diffs, style_cache, style_head)
+            plt.suptitle('''{} {}/{} noise comparison
+(noises that differ by more than {} ppm)
+reference git hash: {}
+{}'''.format(name, style_cache['label'], style_head['label'],
+             args.tolerance*1e6, cache['ref_hash'], fom_summary))
+            if args.report:
+                pwidth = 10
+                pheight = (len(diffs) * 5) + 2
+                plt.gcf().set_size_inches(pwidth, pheight)
+                plt.savefig(os.path.join(outdir.name, name+'.pdf'))
+            else:
+                plt.show()
 
     if not fail:
         logging.info("all tests pass.")
diff --git a/gwinc/test/cache/Aplus.h5 b/gwinc/test/cache/Aplus.h5
deleted file mode 100644
index 369454f3c2117aa0447c80f1256bee1fe570010d..0000000000000000000000000000000000000000
--- a/gwinc/test/cache/Aplus.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8394387e7e87828e3976097a30af22ee015e7d48d749b2eb8c4616b5308b8937
-size 276288
diff --git a/gwinc/test/cache/CE1.h5 b/gwinc/test/cache/CE1.h5
deleted file mode 100644
index d747589b8c4c27cf93d0afebc817c28800966fbe..0000000000000000000000000000000000000000
--- a/gwinc/test/cache/CE1.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:38db82407ca43b0e2142f887668ea5040079359174608023e5f9e4b8d407eb7e
-size 400384
diff --git a/gwinc/test/cache/CE2.h5 b/gwinc/test/cache/CE2.h5
deleted file mode 100644
index add79d726b802b6fb803de8225ea1b159277b18a..0000000000000000000000000000000000000000
--- a/gwinc/test/cache/CE2.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:75a4a59c2205fbc2fb3f4d54f449dc12fabc2db6e628f07fb983c270dd341c99
-size 450432
diff --git a/gwinc/test/cache/Voyager.h5 b/gwinc/test/cache/Voyager.h5
deleted file mode 100644
index 25504e4dd4f033b45920e59e7a4d39810471fda1..0000000000000000000000000000000000000000
--- a/gwinc/test/cache/Voyager.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:55f1f7457fd734d3a399306cdb7df6617adf2df4f11ad68f3b4ac7ea7a15aaab
-size 324288
diff --git a/gwinc/test/cache/aLIGO.h5 b/gwinc/test/cache/aLIGO.h5
deleted file mode 100644
index 7a33dc5d4f6ab77ea6d6e57a842dd7c81b4ab77d..0000000000000000000000000000000000000000
--- a/gwinc/test/cache/aLIGO.h5
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9168d3d879f7df53ccba7aae911a8f5dcbaa82cbb1a3a8399efc997f1d1994b0
-size 276288
diff --git a/gwinc/test/gen_cache.sh b/gwinc/test/gen_cache.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7f49f4415bc34154cd41ee47327b57ad5ffb08a6
--- /dev/null
+++ b/gwinc/test/gen_cache.sh
@@ -0,0 +1,26 @@
+#!/bin/bash -e
+
+if [ -z "$1" ] || [ -z "$2" ] ; then
+    echo "usage: $(basename $0) git_ref_hash cache_dir_path"
+    echo "generate a cache of IFO budget traces from a particular git commit"
+    exit 1
+fi
+
+ref_hash="$1"
+cache_dir="$2"
+
+mkdir -p $cache_dir
+cache_dir=$(cd $cache_dir && pwd)
+gwinc_dir=$cache_dir/gwinc
+mkdir -p $gwinc_dir
+
+git archive $ref_hash | tar -x -C $gwinc_dir
+
+cd $gwinc_dir
+
+export LOG_LEVEL=INFO
+for ifo in $(python3 -c "import gwinc; print(' '.join(gwinc.IFOS))") ; do
+    python3 -m gwinc --save $cache_dir/${ifo}.h5 $ifo
+done
+
+echo $ref_hash > $cache_dir/ref_hash
diff --git a/gwinc/test/ref_hash b/gwinc/test/ref_hash
new file mode 100644
index 0000000000000000000000000000000000000000..c5170040deb0cbb983eed01994a3cc7da40580cf
--- /dev/null
+++ b/gwinc/test/ref_hash
@@ -0,0 +1 @@
+9ff4ba5463895698f48c243e329c4138ab163c1f