diff --git a/.gitignore b/.gitignore index 53b17381e383c81afaa24b6960372dd06c981a0b..684adee5cac3c318d61d660a906faa48e7185d1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ #for docs and setup.py outputs build/ +# test cache +gwinc/test/cache # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e7b2de49312cde25b824aef9f5809dc294dffdca..03470579ac84f96007f5ad569b9ff5cd97164683 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,25 +2,47 @@ The pygwinc project welcomes your contributions. Our policy is that all contributions should be peer-reviewed. To facilitate the review, -please do not commit your work to this repository yourself. Instead, -fork the repository and send a merge request. +please do not commit your work to this repository directly. Instead, +please fork the repository and create a [merge +request](https://git.ligo.org/gwinc/pygwinc/-/merge_requests/new) +against the main pygwinc master branch. -`pygwinc` comes with a test suite that will calculate all canonical -IFO budgets with the current state of the code, and compare them -against cached hdf5 traces (stored in the repo with -[git-lfs](https://git-lfs.github.com/) ( [see -also](https://wiki.ligo.org/Computing/GitLFS)). Contributors should -run the tests before submitting any merge requests: +When submitting code for merge, please follow good coding practice. +Respect the existing coding style, which for `pygwinc` is standard +[PEP8](https://www.python.org/dev/peps/pep-0008/) (with some +exceptions). Make individual commits as logically distinct and atomic +as possible, and provide a complete, descriptive log of the changes +(including a top summary line). Review efficiency follows code +legibility. + +`pygwinc` comes with a validation command that can compare budgets +between different versions of the code and produce a graphical report. +`pygwinc` stores a reference to the git commit that is considered the +latest good reference for the budget calculation functions, and the +validation is run against that reference by default. The command can +be run with: ```shell $ python3 -m gwinc.test ``` -The test suite will be run automatically upon push to git.ligo.org as -part of the continuous integration, and code that fails the CI will -not be merged, unless failures are well justified. +Use the '--plot' or '--report' options to produce visual comparisons +of the noise differences. The comparison can be done against an +arbitrary commit using the '--git-ref' option. Traces for referenced +commits are cached, which speeds up subsequent comparison runs +significantly. + +Commits for code changes that modify budget calculations should be +followed up updates to test reference. This can be done with the +'--update-ref' option to the test command: +```shell +$ python3 -m gwinc.test --update-ref +``` +If no specific reference is provided, a pointer to the last commit +will be made. *Note: reference updates need to be made in a separate +commit after the commits that change the noise calculations* (git +commits can't have foreknowledge of their own commit hash). -If your change affects the shape of a noise curve, your commit message -should make note of that, and provide a justification. It will also -be necessary to update the reference curves stored in cache, but that -should be done in a separate commit not a part of the original merge -request, so that all reviewers can see the changes introduced in the -CI test failure report. +Once you submit your merge request, an automated pipeline will run the +test command to validate your changes. If budgets differences are +detected the pipeline will fail, alerting reviewers that a more +detailed review is required. Test issues will need to be resolved +before code can be merged. diff --git a/gwinc/test/__main__.py b/gwinc/test/__main__.py index 8ccd2dba2d50ef80cbc144151c9fe1c07bdd5149..532030e46517ee9300be9edd839406d3ca8a7f3c 100644 --- a/gwinc/test/__main__.py +++ b/gwinc/test/__main__.py @@ -1,10 +1,12 @@ import os import sys import glob +import shutil import signal import logging import tempfile import argparse +import subprocess import numpy as np import matplotlib.pyplot as plt from collections import OrderedDict @@ -12,7 +14,7 @@ from collections.abc import Mapping from PyPDF2 import PdfFileReader, PdfFileWriter from .. import IFOS, load_budget -from ..io import load_hdf5, save_hdf5 +from ..io import load_hdf5 try: import inspiral_range @@ -26,7 +28,104 @@ logging.basicConfig( TOLERANCE = 1e-6 -CACHE_PATH = os.path.join(os.path.dirname(__file__), 'cache') +CACHE_LIMIT = 5 + + +def test_path(*args): + """Return path to package file.""" + return os.path.join(os.path.dirname(__file__), *args) + + +def git_ref_resolve_hash(git_ref): + """Resolve a git reference into its hash string.""" + try: + return subprocess.run( + ['git', 'show', '-s', '--format=format:%H', git_ref], + capture_output=True, universal_newlines=True, + ).stdout + except subprocess.CalledProcessError: + return None + + +def write_ref_hash(ref_hash): + """Write ref hash to reference file + + """ + with open(test_path('ref_hash'), 'w') as f: + f.write('{}\n'.format(ref_hash)) + + +def load_ref_hash(): + """Load the current reference git hash. + + """ + try: + with open(test_path('ref_hash')) as f: + return f.read().strip() + except IOError: + return None + + +def prune_cache_dir(): + """Prune all but the N most recently accessed caches. + + """ + cache_dir = test_path('cache') + if not os.path.exists(cache_dir): + return + expired_paths = sorted( + [os.path.join(cache_dir, path) for path in os.listdir(cache_dir)], + key=lambda path: os.stat(path).st_atime, + )[CACHE_LIMIT:] + if not expired_paths: + return + logging.info("pruning {} old caches...".format(len(expired_paths))) + for path in expired_paths: + logging.debug("pruning {}...".format(path)) + shutil.rmtree(path) + + +def gen_cache_for_ref(ref_hash, path): + """generate cache from git reference + + The ref_hash should be a git hash, and path should be the location + of the generated cache. + + The included shell script is used to extract the gwinc code from + the appropriate git commit, and invoke a new python instance to + generate the noise curves. + + """ + logging.info("creating new cache from reference {}...".format(ref_hash)) + subprocess.run( + [test_path('gen_cache.sh'), ref_hash, path], + check=True, + ) + + +def load_cache(path): + """load a cache from path + + returns a dictionary with 'ref_hash' and 'ifos' keys. + + """ + logging.info("loading cache {}...".format(path)) + cache = {} + ref_hash_path = os.path.join(path, 'ref_hash') + if os.path.exists(ref_hash_path): + with open(ref_hash_path) as f: + ref_hash = f.read().strip() + else: + ref_hash = None + logging.debug("cache hash: {}".format(ref_hash)) + cache['ref_hash'] = ref_hash + cache['ifos'] = {} + for f in sorted(os.listdir(path)): + name, ext = os.path.splitext(f) + if ext != '.h5': + continue + cache['ifos'][name] = os.path.join(path, f) + return cache def walk_traces(traces, root=()): @@ -107,9 +206,7 @@ def compare_traces(tracesA, tracesB, tolerance=TOLERANCE, skip=None): return diffs -def plot_diffs(freq, diffs, tolerance, - name, styleA, styleB, fom_title='', - save=None): +def plot_diffs(freq, diffs, styleA, styleB): spec = (len(diffs)+1, 2) sharex = None for i, nname in enumerate(diffs): @@ -136,21 +233,9 @@ def plot_diffs(freq, diffs, tolerance, if i == 0: axr.set_title("fractional difference") - plt.suptitle('''{} {}/{} noise comparison -(noises that differ by more than {} ppm) -{}'''.format(name, styleA['label'], styleB['label'], tolerance*1e6, fom_title)) - axl.set_xlabel("frequency [Hz]") axr.set_xlabel("frequency [Hz]") - plt.subplots_adjust(top=0.8, right=0.85, wspace=0.3) - if save: - pwidth = 10 - pheight = (len(diffs) * 5) + 2 - plt.gcf().set_size_inches(pwidth, pheight) - plt.savefig(save) - else: - plt.show() ################################################## @@ -163,35 +248,70 @@ def main(): '--skip', '-k', metavar='NOISE', action='append', help='traces to skip in comparison (multiple may be specified)') parser.add_argument( - '--cache', '-c', metavar='PATH', default=CACHE_PATH, - help='specify alternate IFO traces cache path') + '--git-ref', '-g', metavar='HASH', + help='specify git ref to compare against') rgroup = parser.add_mutually_exclusive_group() + rgroup.add_argument( + '--update-ref', '-u', metavar='HASH', nargs='?', const='HEAD', + help="update the stored reference git hash to HASH (or 'HEAD' if not specified) and exit") rgroup.add_argument( '--plot', '-p', action='store_true', help='plot differences') rgroup.add_argument( '--report', '-r', metavar='REPORT.pdf', help='create PDF report of test results (only created if differences found)') - rgroup.add_argument( - '--gen-cache', action='store_true', - help='update/create IFO traces cache directory') parser.add_argument( 'ifo', metavar='IFO', nargs='*', help='specific ifos to test (default all)') args = parser.parse_args() - if args.gen_cache: - try: - os.makedirs(args.cache) - except FileExistsError: + # get the current hash of HEAD + head_hash = git_ref_resolve_hash('HEAD') + if not head_hash: + logging.warning("could not determine git HEAD hash.") + + # update the reference if specified + if args.update_ref: + if args.update_ref == 'HEAD': + if not head_hash: + sys.exit("Could not update reference to head.") + logging.info("updating reference to HEAD...") + ref_hash = head_hash + else: + ref_hash = git_ref_resolve_hash(args.update_ref) + logging.info("updating reference git hash: {}".format(ref_hash)) + write_ref_hash(ref_hash) + sys.exit() + + # get the reference hash + if args.git_ref: + ref_hash = git_ref_resolve_hash(args.git_ref) + else: + ref_hash = load_ref_hash() + if not ref_hash: pass - freq = np.logspace(np.log10(5), np.log10(6000), 3000) - for name in IFOS: - Budget = load_budget(name) - traces = Budget(freq).run() - path = os.path.join(args.cache, name+'.h5') - save_hdf5(path, freq, traces) - return + try: + with open(test_path('ref_hash')) as f: + ref_hash = f.read().strip() + except IOError: + logging.warning("could not open reference") + sys.exit("Unspecified reference git hash, could not run test.") + + logging.info("head hash: {}".format(head_hash)) + logging.info("ref hash: {}".format(ref_hash)) + + # don't bother test if hashes match + if ref_hash == head_hash: + logging.info("HEAD matches reference, not bothering to calculate.") + logging.info("Use --git-ref to compare against an arbitrary git commit.") + sys.exit() + + # load the cache + cache_path = test_path('cache', ref_hash) + if not os.path.exists(cache_path): + prune_cache_dir() + gen_cache_for_ref(ref_hash, cache_path) + cache = load_cache(cache_path) if args.report: base, ext = os.path.splitext(args.report) @@ -199,22 +319,12 @@ def main(): parser.error("Test reports only support PDF format.") outdir = tempfile.TemporaryDirectory() - # find all cached IFOs - logging.info("loading cache {}...".format(args.cache)) - cached_ifos = {} - for f in sorted(os.listdir(args.cache)): - name, ext = os.path.splitext(f) - if ext != '.h5': - continue - cached_ifos[name] = os.path.join(args.cache, f) - - # select if args.ifo: ifos = args.ifo else: ifos = IFOS - style_cache = dict(label='cache', linestyle='-') + style_cache = dict(label='reference', linestyle='-') style_head = dict(label='head', linestyle='--') fail = False @@ -223,10 +333,10 @@ def main(): for name in ifos: logging.info("{} tests...".format(name)) - path = cached_ifos[name] - - if not os.path.exists(path): - logging.warning("{} test cache not found".format(name)) + try: + path = cache['ifos'][name] + except KeyError: + logging.warning("IFO {} not found in cache") fail |= True continue @@ -262,21 +372,26 @@ inspiral {func} {m1}/{m2} Msol: diffs = compare_traces(traces_cache, traces_head, args.tolerance, args.skip) - if diffs: - logging.warning("{} tests FAIL".format(name)) - fail |= True - if args.plot or args.report: - if args.report: - save = os.path.join(outdir.name, name+'.pdf') - else: - save = None - plot_diffs( - freq, diffs, args.tolerance, - name, style_cache, style_head, fom_summary, - save=save, - ) - else: + if not diffs: logging.info("{} tests pass.".format(name)) + continue + + logging.warning("{} tests FAIL".format(name)) + fail |= True + if args.plot or args.report: + plot_diffs(freq, diffs, style_cache, style_head) + plt.suptitle('''{} {}/{} noise comparison +(noises that differ by more than {} ppm) +reference git hash: {} +{}'''.format(name, style_cache['label'], style_head['label'], + args.tolerance*1e6, cache['ref_hash'], fom_summary)) + if args.report: + pwidth = 10 + pheight = (len(diffs) * 5) + 2 + plt.gcf().set_size_inches(pwidth, pheight) + plt.savefig(os.path.join(outdir.name, name+'.pdf')) + else: + plt.show() if not fail: logging.info("all tests pass.") diff --git a/gwinc/test/cache/Aplus.h5 b/gwinc/test/cache/Aplus.h5 deleted file mode 100644 index 369454f3c2117aa0447c80f1256bee1fe570010d..0000000000000000000000000000000000000000 --- a/gwinc/test/cache/Aplus.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8394387e7e87828e3976097a30af22ee015e7d48d749b2eb8c4616b5308b8937 -size 276288 diff --git a/gwinc/test/cache/CE1.h5 b/gwinc/test/cache/CE1.h5 deleted file mode 100644 index d747589b8c4c27cf93d0afebc817c28800966fbe..0000000000000000000000000000000000000000 --- a/gwinc/test/cache/CE1.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38db82407ca43b0e2142f887668ea5040079359174608023e5f9e4b8d407eb7e -size 400384 diff --git a/gwinc/test/cache/CE2.h5 b/gwinc/test/cache/CE2.h5 deleted file mode 100644 index add79d726b802b6fb803de8225ea1b159277b18a..0000000000000000000000000000000000000000 --- a/gwinc/test/cache/CE2.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75a4a59c2205fbc2fb3f4d54f449dc12fabc2db6e628f07fb983c270dd341c99 -size 450432 diff --git a/gwinc/test/cache/Voyager.h5 b/gwinc/test/cache/Voyager.h5 deleted file mode 100644 index 25504e4dd4f033b45920e59e7a4d39810471fda1..0000000000000000000000000000000000000000 --- a/gwinc/test/cache/Voyager.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55f1f7457fd734d3a399306cdb7df6617adf2df4f11ad68f3b4ac7ea7a15aaab -size 324288 diff --git a/gwinc/test/cache/aLIGO.h5 b/gwinc/test/cache/aLIGO.h5 deleted file mode 100644 index 7a33dc5d4f6ab77ea6d6e57a842dd7c81b4ab77d..0000000000000000000000000000000000000000 --- a/gwinc/test/cache/aLIGO.h5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9168d3d879f7df53ccba7aae911a8f5dcbaa82cbb1a3a8399efc997f1d1994b0 -size 276288 diff --git a/gwinc/test/gen_cache.sh b/gwinc/test/gen_cache.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f49f4415bc34154cd41ee47327b57ad5ffb08a6 --- /dev/null +++ b/gwinc/test/gen_cache.sh @@ -0,0 +1,26 @@ +#!/bin/bash -e + +if [ -z "$1" ] || [ -z "$2" ] ; then + echo "usage: $(basename $0) git_ref_hash cache_dir_path" + echo "generate a cache of IFO budget traces from a particular git commit" + exit 1 +fi + +ref_hash="$1" +cache_dir="$2" + +mkdir -p $cache_dir +cache_dir=$(cd $cache_dir && pwd) +gwinc_dir=$cache_dir/gwinc +mkdir -p $gwinc_dir + +git archive $ref_hash | tar -x -C $gwinc_dir + +cd $gwinc_dir + +export LOG_LEVEL=INFO +for ifo in $(python3 -c "import gwinc; print(' '.join(gwinc.IFOS))") ; do + python3 -m gwinc --save $cache_dir/${ifo}.h5 $ifo +done + +echo $ref_hash > $cache_dir/ref_hash diff --git a/gwinc/test/ref_hash b/gwinc/test/ref_hash new file mode 100644 index 0000000000000000000000000000000000000000..c5170040deb0cbb983eed01994a3cc7da40580cf --- /dev/null +++ b/gwinc/test/ref_hash @@ -0,0 +1 @@ +9ff4ba5463895698f48c243e329c4138ab163c1f