Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • duncan.meacher/gwdatafind
  • duncanmmacleod/gwdatafind
  • computing/gwdatafind/client
3 results
Show changes
Commits on Source (13)
......@@ -87,7 +87,7 @@ deb:bullseye:
.test:debian:
extends:
# see /.gitlab/ci/test.yml
- .test
- .test:linux
# https://computing.docs.ligo.org/gitlab-ci-templates/debian/#.debian:base
- .debian:base
before_script:
......@@ -107,6 +107,7 @@ deb:bullseye:
# install testing dependencies
- apt-get -y -q install
findutils
man-db
python3-coverage
python3-pytest
python3-pytest-cov
......
......@@ -80,7 +80,7 @@ rpm:el8:
.test:el:
extends:
# see /.gitlab/ci/test.yml
- .test
- .test:linux
before_script:
# set up yum caching
- !reference [".rhel:base", before_script]
......@@ -90,6 +90,7 @@ rpm:el8:
- PY3=$(rpm --eval '%{?python3_pkgversion:%{python3_pkgversion}}%{!?python3_pkgversion:3}')
- yum -y -q install
findutils
man-db
python${PY3}-coverage
python${PY3}-pytest
python${PY3}-pytest-cov
......
......@@ -42,6 +42,12 @@ include:
# the coverage job
coverage: null
.test:linux:
extends: .test
script:
- !reference [.test, script]
- man -Pcat gw_data_find
# -- coverage ---------------
#
# This job combinces the individual
......
......@@ -18,9 +18,9 @@ No auth
GWDataFind servers can be operated without requiring any
authorisation credentials.
An example of this in production is the GWDataFind server operated
for the `GW Open Science Center (GWOSC) <https://www.gw-openscience.org/>`__:
for the `GW Open Science Center (GWOSC) <https://www.gwosc.org/>`__:
https://datafind.gw-openscience.org
https://datafind.gwosc.org
.. _scitokens:
......
......@@ -45,7 +45,7 @@ For example:
... "L1_GWOSC_O2_4KHZ_R1",
... 1187008880,
... 1187008884,
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... )
>>> print(urls)
['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/L1/1186988032/L-L1_GWOSC_O2_4KHZ_R1-1187008512-4096.gwf']
......
......@@ -21,7 +21,7 @@ For example:
... )
>>> with Session() as sess:
... obs = find_observatories(
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... session=sess,
... )
... print(obs)
......@@ -29,10 +29,10 @@ For example:
... for ifo in obs:
... urls[ifo] = find_urls(
... ifo,
... "{}1_GWOSC_O2_4KHZ_R1".format(ifo),
... f"{ifo}1_GWOSC_O2_4KHZ_R1",
... 1187008880,
... 1187008884,
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... session=sess,
... )
... print(urls)
......@@ -41,7 +41,7 @@ For example:
'V': ['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/V1/1186988032/V-V1_GWOSC_O2_4KHZ_R1-1187008512-4096.gwf'],
'L': ['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/L1/1186988032/L-L1_GWOSC_O2_4KHZ_R1-1187008512-4096.gwf']}
In the above example the connection to ``datafind.gw-openscience.org`` is
In the above example the connection to ``datafind.gwosc.org`` is
held open and reused to simplify subsequent queries and minimise the risk
of network communication issues.
......
......@@ -20,8 +20,10 @@ BuildRequires: python%{python3_pkgversion}-pip
BuildRequires: python%{python3_pkgversion}-setuptools
BuildRequires: python%{python3_pkgversion}-wheel
# manpage requirements (all runtime requirements)
BuildRequires: help2man
# man pages
%if 0%{?rhel} == 0 || 0%{?rhel} >= 8
BuildRequires: argparse-manpage >= 3
%endif
BuildRequires: python%{python3_pkgversion}-igwn-auth-utils >= 0.3.1
BuildRequires: python%{python3_pkgversion}-ligo-segments
......@@ -76,28 +78,31 @@ Python %{python3_version} interface libraries.
%install
%py3_install_wheel gwdatafind-%{version}-*.whl
%if 0%{?rhel} == 0 || 0%{?rhel} >= 8
mkdir -vp %{buildroot}%{_mandir}/man1
env PYTHONPATH="%{buildroot}%{python3_sitelib}" \
help2man \
argparse-manpage \
--description "discover available GW data" \
--function command_line \
--module gwdatafind.__main__ \
--output %{buildroot}%{_mandir}/man1/gw_data_find.1 \
--name "discover available GW data" \
--no-discard-stderr \
--no-info \
--section 1 \
--source %{srcname} \
--version-string %{version} \
%{buildroot}%{_bindir}/gw_data_find \
--prog gw_data_find \
--project-name %{srcname} \
--version %{version} \
--url %{url} \
;
%endif
%check
export PYTHONPATH="%{buildroot}%{python3_sitelib}"
# sanity checks
%{__python3} -m gwdatafind --help
%{buildroot}%{_bindir}/gw_data_find --help
%if 0%{?rhel} == 0 || 0%{?rhel} >= 8
# test man pages
env MANPATH="%{buildroot}%{_mandir}" man -P cat gw_data_find
# run test suite
%if 0%{?rhel} == 0 || 0%{?rhel} >= 8
%{__python3} -m pytest --pyargs gwdatafind
%endif
......@@ -108,7 +113,9 @@ rm -rf $RPM_BUILD_ROOT
%license LICENSE
%doc README.md
%{_bindir}/gw_data_find
%if 0%{?rhel} == 0 || 0%{?rhel} >= 8
%{_mandir}/man1/gw_data_find.1*
%endif
%files -n python%{python3_pkgversion}-%{srcname}
%license LICENSE
......
......@@ -49,7 +49,7 @@ For example:
>>> from gwdatafind import find_urls
>>> urls = find_urls("L", "L1_GWOSC_O2_4KHZ_R1", 1187008880, 1187008884,
... host="datafind.gw-openscience.org")
... host="datafind.gwosc.org")
>>> print(urls)
['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/L1/1186988032/L-L1_GWOSC_O2_4KHZ_R1-1187008512-4096.gwf']
......@@ -61,7 +61,7 @@ For example:
>>> from gwdatafind import (find_observatories, find_urls, Session)
>>> with Session() as sess:
... obs = find_observatories(
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... session=sess,
... )
... print(obs)
......@@ -69,10 +69,10 @@ For example:
... for ifo in obs:
... urls[ifo] = find_urls(
... ifo,
... "{}1_GWOSC_O2_4KHZ_R1".format(ifo),
... f"{ifo}1_GWOSC_O2_4KHZ_R1",
... 1187008880,
... 1187008884,
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... session=sess,
... )
... print(urls)
......
......@@ -35,6 +35,13 @@ __credits__ = 'Scott Koranda, The LIGO Scientific Collaboration'
# -- command line parsing -----------------------------------------------------
class DataFindFormatter(
argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter,
):
pass
class DataFindArgumentParser(argparse.ArgumentParser):
"""Custom `~argparse.ArgumentParser` for GWDataFind.
......@@ -43,6 +50,7 @@ class DataFindArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
"""Create a new `DataFindArgumentParser`.
"""
kwargs.setdefault("formatter_class", DataFindFormatter)
super(DataFindArgumentParser, self).__init__(*args, **kwargs)
self._optionals.title = "Optional arguments"
......@@ -95,87 +103,197 @@ def command_line():
defhost = None
parser = DataFindArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__,
)
parser.add_argument('-V', '--version', action='version',
version=__version__,
help='show version number and exit')
parser.add_argument(
"-V",
"--version",
action="version",
version=__version__,
help="show version number and exit",
)
qargs = parser.add_argument_group(
"Query types", "Select one of the following, if none are selected a "
"query for frame URLS will be performed"
"Query types",
"Select one of the following, if none are selected a "
"query for frame URLS will be performed",
)
qtype = qargs.add_mutually_exclusive_group(required=False)
parser._mutually_exclusive_groups.append(qtype) # bug in argparse
qtype.add_argument('-p', '--ping', action='store_true', default=False,
help='ping the DataFind server')
qtype.add_argument('-w', '--show-observatories', action='store_true',
default=False, help='list available observatories')
qtype.add_argument('-y', '--show-types', action='store_true',
default=False, help='list available file types')
qtype.add_argument('-a', '--show-times', action='store_true',
default=False, help='list available segments')
qtype.add_argument('-f', '--filename', action='store', metavar='FILE',
help='resolve URL(s) for a particular file name')
qtype.add_argument('-T', '--latest', action='store_true', default=False,
help='resolve URL(s) for the most recent file of the '
'specified type')
qtype.add_argument(
"-p",
"--ping",
action="store_true",
default=False,
help="ping the DataFind server",
)
qtype.add_argument(
"-w",
"--show-observatories",
action="store_true",
default=False,
help="list available observatories",
)
qtype.add_argument(
"-y",
"--show-types",
action="store_true",
default=False,
help="list available file types",
)
qtype.add_argument(
"-a",
"--show-times",
action="store_true",
default=False,
help="list available segments",
)
qtype.add_argument(
"-f",
"--filename",
action="store",
metavar="FILE",
help="resolve URL(s) for a particular file name",
)
qtype.add_argument(
"-T",
"--latest",
action="store_true",
default=False,
help="resolve URL(s) for the most recent file of the specified type",
)
dargs = parser.add_argument_group(
"Data options", "Parameters for your query. Which options are "
"required depends on the query type"
)
dargs.add_argument('-o', '--observatory', metavar='OBS',
help='observatory(ies) that generated frame file; use '
'--show-observatories to see what is available.')
dargs.add_argument('-t', '--type', help='type of frame file, use --show-'
'types to see what is available.')
dargs.add_argument('-s', '--gps-start-time', type=int, dest='gpsstart',
metavar='GPS', help='start of GPS time search')
dargs.add_argument('-e', '--gps-end-time', type=int, dest='gpsend',
metavar='GPS', help='end of GPS time search')
"Data options",
"Parameters for your query. "
"Which options are required depends on the query type",
)
dargs.add_argument(
"-o",
"--observatory",
metavar="OBS",
help=(
"observatory(ies) that generated frame file; "
"use --show-observatories to see what is available."
),
)
dargs.add_argument(
"-t",
"--type",
help="type of frame file, use --show-types to see what is available.",
)
dargs.add_argument(
"-s",
"--gps-start-time",
type=int,
dest="gpsstart",
metavar="GPS",
help="start of GPS time search",
)
dargs.add_argument(
"-e",
"--gps-end-time",
type=int,
dest="gpsend",
metavar="GPS",
help="end of GPS time search",
)
dargs.add_argument(
"-x",
"--extension",
metavar="EXT",
default="gwf",
help="file extension for which to search",
)
sargs = parser.add_argument_group(
'Connection options', 'Authentication and connection options.')
sargs.add_argument('-r', '--server', metavar='HOST:PORT', default=defhost,
required=not defhost,
help='hostname and optional port of server to query '
'(default: %(default)s)')
sargs.add_argument('-P', '--no-proxy', action='store_true',
help='attempt to authenticate without a grid proxy '
'(default: %(default)s)')
"Connection options",
"Authentication and connection options.",
)
sargs.add_argument(
"-r",
"--server",
metavar="HOST",
default=defhost,
required=not defhost,
help="hostname and optional port of server to query",
)
sargs.add_argument(
"-P",
"--no-proxy",
action="store_true",
help="attempt to authenticate without a grid proxy",
)
oargs = parser.add_argument_group(
'Output options', 'Parameters for parsing and writing output.')
"Output options",
"Parameters for parsing and writing output.",
)
oform = oargs.add_mutually_exclusive_group()
parser._mutually_exclusive_groups.append(oform) # bug in argparse
oform.add_argument('-l', '--lal-cache', action='store_const',
const="lal", dest="format",
help='format output for use as a LAL cache file')
oform.add_argument('-W', '--frame-cache', action='store_const',
const="omega", dest="format",
help='format output for use as a frame cache file')
oform.add_argument('-n', '--names-only', action='store_const',
const="names", dest="format",
help='display only the basename of each file')
oargs.add_argument('-m', '--match', help='return only results that match '
'a regular expression')
oargs.add_argument('-u', '--url-type', default='file',
help='return only URLs with a particular scheme or '
'head such as \'file\' or \'gsiftp\'')
oargs.add_argument('-g', '--gaps', action='store_true',
help='check the returned list of URLs or paths to see '
'if the files cover the requested interval; a '
'return value of zero (0) indicates the interval '
'is covered, a value of one (1) indicates at '
'least one gap exists and the interval is not , '
'covered and a value of (2) indicates that the '
'entire interval is not covered; missing gaps are '
'printed to stderr (default: %(default)s)')
oargs.add_argument('-O', '--output-file', metavar='PATH',
help='path to output file, defaults to stdout')
oform.add_argument(
"-l",
"--lal-cache",
action="store_const",
const="lal",
default=False,
dest="format",
help="format output for use as a LAL cache file",
)
oform.add_argument(
"-W",
"--frame-cache",
action="store_const",
const="omega",
default=False,
dest="format",
help="format output for use as a frame cache file",
)
oform.add_argument(
"-n",
"--names-only",
action="store_const",
const="names",
default=False,
dest="format",
help="display only the basename of each file",
)
oargs.add_argument(
"-m",
"--match",
help="return only results that match a regular expression",
)
oargs.add_argument(
"-u",
"--url-type",
default="file",
help=(
"return only URLs with a particular scheme or head "
"such as \"file\" or \"gsiftp\""
),
)
oargs.add_argument(
"-g",
"--gaps",
action="store_true",
help=(
"check the returned list of URLs or paths to see "
"if the files cover the requested interval; a "
"return value of zero (0) indicates the interval "
"is covered, a value of one (1) indicates at "
"least one gap exists and the interval is not , "
"covered and a value of (2) indicates that the "
"entire interval is not covered; missing gaps are "
"printed to stderr"
),
)
oargs.add_argument(
"-O",
"--output-file",
metavar="PATH",
help="path to output file, defaults to stdout",
)
return parser
......@@ -198,8 +316,8 @@ def ping(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
ui.ping(host=args.server)
print("LDRDataFindServer at {0.server} is alive".format(args), file=out)
ui.ping(host=args.server, ext=args.extension)
print(f"LDRDataFindServer at {args.server} is alive", file=out)
def show_observatories(args, out):
......@@ -218,7 +336,11 @@ def show_observatories(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
sitelist = ui.find_observatories(host=args.server, match=args.match)
sitelist = ui.find_observatories(
host=args.server,
match=args.match,
ext=args.extension,
)
print("\n".join(sitelist), file=out)
......@@ -238,8 +360,12 @@ def show_types(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
typelist = ui.find_types(site=args.observatory, match=args.match,
host=args.server)
typelist = ui.find_types(
site=args.observatory,
match=args.match,
host=args.server,
ext=args.extension,
)
print("\n".join(typelist), file=out)
......@@ -259,15 +385,19 @@ def show_times(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
seglist = ui.find_times(site=args.observatory, frametype=args.type,
gpsstart=args.gpsstart, gpsend=args.gpsend,
host=args.server)
seglist = ui.find_times(
site=args.observatory,
frametype=args.type,
gpsstart=args.gpsstart,
gpsend=args.gpsend,
host=args.server,
ext=args.extension,
)
print('# seg\tstart \tstop \tduration', file=out)
for i, seg in enumerate(seglist):
print(
'{n}\t{segment[0]:10}\t{segment[1]:10}\t{duration}'.format(
n=i, segment=seg, duration=abs(seg),
), file=out,
f'{i}\t{seg[0]:10}\t{seg[1]:10}\t{abs(seg)}',
file=out,
)
......@@ -287,8 +417,14 @@ def latest(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_latest(args.observatory, args.type, urltype=args.url_type,
on_missing='warn', host=args.server)
cache = ui.find_latest(
args.observatory,
args.type,
urltype=args.url_type,
on_missing='warn',
host=args.server,
ext=args.extension,
)
return postprocess_cache(cache, args, out)
......@@ -308,8 +444,12 @@ def filename(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_url(args.filename, urltype=args.url_type,
on_missing='warn', host=args.server)
cache = ui.find_url(
args.filename,
urltype=args.url_type,
on_missing='warn',
host=args.server,
)
return postprocess_cache(cache, args, out)
......@@ -329,10 +469,17 @@ def show_urls(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_urls(args.observatory, args.type,
args.gpsstart, args.gpsend,
match=args.match, urltype=args.url_type,
host=args.server, on_gaps='ignore')
cache = ui.find_urls(
args.observatory,
args.type,
args.gpsstart,
args.gpsend,
match=args.match,
urltype=args.url_type,
host=args.server,
on_gaps='ignore',
ext=args.extension,
)
return postprocess_cache(cache, args, out)
......@@ -362,7 +509,7 @@ def postprocess_cache(urls, args, out):
if missing:
print("Missing segments:\n", file=sys.stderr)
for seg in missing:
print("%d %d" % tuple(seg), file=sys.stderr)
print(f"{seg[0]:d} {seg[1]:d}", file=sys.stderr)
if span in missing:
return 2
return 1
......
......@@ -26,6 +26,7 @@ from os.path import basename
__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'
DEFAULT_EXT = "gwf"
DEFAULT_SERVICE_PREFIX = "LDR/services/data/v1"
......@@ -38,49 +39,41 @@ def _prefix(func):
def wrapped(*args, **kwargs):
prefix = kwargs.pop("prefix", DEFAULT_SERVICE_PREFIX)
suffix = func(*args, **kwargs)
return "{}/{}".format(prefix, suffix)
return f"{prefix}/{suffix}"
return wrapped
@_prefix
def ping_path():
def ping_path(ext=DEFAULT_EXT):
"""Return the API path to ping the server.
"""
return "gwf/H/R/1,2"
return f"{ext}/H/R/1,2"
@_prefix
def find_observatories_path():
def find_observatories_path(ext=DEFAULT_EXT):
"""Return the API path to query for all observatories.
"""
return "gwf.json"
return f"{ext}.json"
@_prefix
def find_types_path(site=None):
def find_types_path(site=None, ext=DEFAULT_EXT):
"""Return the API path to query for datasets for one or all sites.
"""
if site:
return "gwf/{site[0]}.json".format(site=site)
return "gwf/all.json"
return f"{ext}/{site[0]}.json"
return f"{ext}/all.json"
@_prefix
def find_times_path(site, frametype, start, end):
def find_times_path(site, frametype, start, end, ext=DEFAULT_EXT):
"""Return the API path to query for data availability segments.
"""
if start is None and end is None:
return "gwf/{site}/{type}/segments.json".format(
site=site,
type=frametype,
)
return "gwf/{site}/{type}/segments/{start},{end}.json".format(
site=site,
type=frametype,
start=start,
end=end,
)
return f"{ext}/{site}/{frametype}/segments.json"
return f"{ext}/{site}/{frametype}/segments/{start},{end}.json"
@_prefix
......@@ -88,41 +81,38 @@ def find_url_path(framefile):
"""Return the API path to query for the URL of a specific filename.
"""
filename = basename(framefile)
ext = filename.split(".", 1)[1]
site, frametype, _ = filename.split("-", 2)
return "gwf/{site}/{type}/{filename}.json".format(
site=site,
type=frametype,
filename=filename,
)
return f"{ext}/{site}/{frametype}/{filename}.json"
@_prefix
def find_latest_path(site, frametype, urltype):
def find_latest_path(site, frametype, urltype, ext=DEFAULT_EXT):
"""Return the API path to query for the latest file in a dataset.
"""
stub = "gwf/{site}/{type}/latest".format(
site=site,
type=frametype,
)
stub = f"{ext}/{site}/{frametype}/latest"
if urltype:
return "{stub}/{urltype}.json".format(stub=stub, urltype=urltype)
return f"{stub}/{urltype}.json"
return stub + ".json"
@_prefix
def find_urls_path(site, frametype, start, end, urltype=None, match=None):
def find_urls_path(
site,
frametype,
start,
end,
urltype=None,
match=None,
ext=DEFAULT_EXT,
):
"""Return the API path to query for all URLs for a dataset in an interval.
"""
stub = "gwf/{site}/{type}/{start},{end}".format(
site=site,
type=frametype,
start=start,
end=end,
)
stub = f"{ext}/{site}/{frametype}/{start},{end}"
if urltype:
path = "{stub}/{urltype}.json".format(stub=stub, urltype=urltype)
path = f"{stub}/{urltype}.json"
else:
path = stub + ".json"
if match:
path += "?match={0}".format(match)
path += f"?match={match}"
return path
......@@ -37,26 +37,29 @@ URLS = [
'file:///test/X-test-1-1.gwf',
'file:///test2/X-test-2-1.gwf',
'file:///test2/X-test-7-4.gwf',
'file:///test/X-test-0-1.h5',
'file:///test/X-test-1-1.h5',
]
OUTPUT_URLS = """
GWF_URLS = [url for url in URLS if url.endswith(".gwf")]
GWF_OUTPUT_URLS = """
file:///test/X-test-0-1.gwf
file:///test/X-test-1-1.gwf
file:///test2/X-test-2-1.gwf
file:///test2/X-test-7-4.gwf
"""[1:] # strip leading line return
OUTPUT_LAL_CACHE = """
GWF_OUTPUT_LAL_CACHE = """
X test 0 1 file:///test/X-test-0-1.gwf
X test 1 1 file:///test/X-test-1-1.gwf
X test 2 1 file:///test2/X-test-2-1.gwf
X test 7 4 file:///test2/X-test-7-4.gwf
"""[1:]
OUTPUT_NAMES_ONLY = """
GWF_OUTPUT_NAMES_ONLY = """
/test/X-test-0-1.gwf
/test/X-test-1-1.gwf
/test2/X-test-2-1.gwf
/test2/X-test-7-4.gwf
"""[1:]
OUTPUT_OMEGA_CACHE = """
GWF_OUTPUT_OMEGA_CACHE = """
X test 0 2 1 file:///test
X test 2 3 1 file:///test2
X test 7 11 4 file:///test2
......@@ -116,7 +119,10 @@ def test_sanity_check_fail(clargs):
@mock.patch('gwdatafind.ui.ping')
def test_ping(mping):
args = argparse.Namespace(server='test.datafind.com:443')
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
)
out = StringIO()
main.ping(args, out)
assert mping.called_with(host=args.server)
......@@ -130,6 +136,7 @@ def test_show_observatories(mfindobs):
mfindobs.return_value = ['A', 'B', 'C']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
match='test',
)
out = StringIO()
......@@ -144,6 +151,7 @@ def test_show_types(mfindtypes):
mfindtypes.return_value = ['A', 'B', 'C']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
match='test',
)
......@@ -160,6 +168,7 @@ def test_show_times(mfindtimes):
mfindtimes.return_value = [segment(0, 1), segment(1, 2), segment(3, 4)]
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
type='test',
gpsstart=0,
......@@ -181,6 +190,7 @@ def test_latest(mlatest):
mlatest.return_value = ['file:///test/X-test-0-10.gwf']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
type='test',
url_type='file',
......@@ -216,10 +226,16 @@ def test_filename(mfindurl):
@mock.patch('gwdatafind.ui.find_urls')
def test_show_urls(mfindurls):
mfindurls.return_value = URLS
@pytest.mark.parametrize("ext", [
"gwf",
"h5",
])
def test_show_urls(mfindurls, ext):
urls = [x for x in URLS if x.endswith(f".{ext}")]
mfindurls.return_value = urls
args = argparse.Namespace(
server='test.datafind.com:443',
extension=ext,
observatory='X',
type='test',
gpsstart=0,
......@@ -231,19 +247,26 @@ def test_show_urls(mfindurls):
)
out = StringIO()
main.show_urls(args, out)
assert mfindurls.called_with(args.observatory, args.type, args.gpsstart,
args.gpsend, match=args.match,
urltype=args.url_type, on_gaps='ignore',
host=args.server)
assert mfindurls.called_with(
args.observatory,
args.type,
args.gpsstart,
args.gpsend,
match=args.match,
urltype=args.url_type,
on_gaps='ignore',
ext=ext,
host=args.server,
)
out.seek(0)
assert list(map(str.rstrip, out.readlines())) == URLS
assert list(map(str.rstrip, out.readlines())) == urls
@pytest.mark.parametrize('fmt,result', [
("urls", OUTPUT_URLS),
("lal", OUTPUT_LAL_CACHE),
("names", OUTPUT_NAMES_ONLY),
("omega", OUTPUT_OMEGA_CACHE),
("urls", GWF_OUTPUT_URLS),
("lal", GWF_OUTPUT_LAL_CACHE),
("names", GWF_OUTPUT_NAMES_ONLY),
("omega", GWF_OUTPUT_OMEGA_CACHE),
])
def test_postprocess_cache_format(fmt, result):
# create namespace for parsing
......@@ -257,7 +280,7 @@ def test_postprocess_cache_format(fmt, result):
# run
out = StringIO()
assert not main.postprocess_cache(URLS, args, out)
assert not main.postprocess_cache(GWF_URLS, args, out)
out.seek(0)
assert out.read() == result
......@@ -269,9 +292,9 @@ def test_postprocess_cache_sft():
gaps=None,
)
out = StringIO()
main.postprocess_cache(URLS, args, out)
main.postprocess_cache(GWF_URLS, args, out)
out.seek(0)
assert out.read() == OUTPUT_URLS.replace('.gwf', '.sft')
assert out.read() == GWF_OUTPUT_URLS.replace('.gwf', '.sft')
def test_postprocess_cache_gaps(capsys):
......@@ -285,9 +308,8 @@ def test_postprocess_cache_gaps(capsys):
out = StringIO()
assert main.postprocess_cache(URLS, args, out) == 1
_, err = capsys.readouterr()
assert err == 'Missing segments:\n\n{0}\n'.format(
'\n'.join('{0[0]:d} {0[1]:d}'.format(seg) for seg in GAPS),
)
segs = "\n".join(f"{seg[0]:d} {seg[1]:d}" for seg in GAPS)
assert err == "Missing segments:\n\n{}\n".format(segs)
args.gpsstart = 4
args.gpsend = 7
......@@ -305,11 +327,11 @@ def test_postprocess_cache_gaps(capsys):
(['-o', 'X', '-t', 'test', '-s', '0', '-e', '10'], 'show_urls'),
])
def test_main(args, patch, tmpname):
with mock.patch('gwdatafind.__main__.{0}'.format(patch)) as mocked:
with mock.patch(f"gwdatafind.__main__.{patch}") as mocked:
main.main(args)
assert mocked.call_count == 1
# call again with output file
args.extend(('--output-file', tmpname))
with mock.patch('gwdatafind.__main__.{0}'.format(patch)) as mocked:
with mock.patch(f"gwdatafind.__main__.{patch}") as mocked:
main.main(args)
assert mocked.call_count == 1
......@@ -30,7 +30,7 @@ from . import yield_fixture
__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'
TEST_SERVER = "test.datafind.org"
TEST_URL_BASE = "https://{}".format(TEST_SERVER)
TEST_URL_BASE = f"https://{TEST_SERVER}"
TEST_DATA = {
"A": {
"A1_TEST": [(0, 10), (10, 20), (30, 50)],
......@@ -47,7 +47,7 @@ TEST_DATA = {
def _url(suffix):
return "{}/{}".format(TEST_URL_BASE, suffix)
return f"{TEST_URL_BASE}/{suffix}"
@yield_fixture(autouse=True)
......@@ -191,10 +191,7 @@ def test_find_latest(requests_mock):
def _file_url(seg):
return "file:///data/A/A1_TEST/A-A1_TEST-{}-{}.gwf".format(
seg[0],
seg[1]-seg[0],
)
return f"file:///data/A/A1_TEST/A-A1_TEST-{seg[0]}-{seg[1]-seg[0]}.gwf"
def test_find_urls(requests_mock):
......
......@@ -183,10 +183,10 @@ def _url(host, api_func, *args, **kwargs):
else:
host = f"https://{host}"
return "{host}/{path}".format(host=host.rstrip("/"), path=path)
return f"{host.rstrip('/')}/{path}"
def ping(host=None, session=None, **request_kw):
def ping(host=None, ext=api.DEFAULT_EXT, session=None, **request_kw):
"""Ping the GWDataFind host to test for life.
Parameters
......@@ -196,6 +196,9 @@ def ping(host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -217,12 +220,18 @@ def ping(host=None, session=None, **request_kw):
requests.RequestException
if the request fails for any reason
"""
qurl = _url(host, api.ping_path)
qurl = _url(host, api.ping_path, ext=ext)
response = get(qurl, session=session, **request_kw)
response.raise_for_status()
def find_observatories(match=None, host=None, session=None, **request_kw):
def find_observatories(
match=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
"""Query a GWDataFind host for observatories with available data.
Parameters
......@@ -236,6 +245,9 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -264,12 +276,12 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
Examples
--------
>>> find_observatories(host="datafind.gw-openscience.org")
>>> find_observatories(host="datafind.gwosc.org")
['L', 'V', 'H']
>>> find_observatories(match="H", host="datafind.gw-openscience.org")
>>> find_observatories(match="H", host="datafind.gwosc.org")
['H']
"""
qurl = _url(host, api.find_observatories_path)
qurl = _url(host, api.find_observatories_path, ext=ext)
sites = set(get_json(qurl, session=session, **request_kw))
if match:
match = compile_regex(match).search
......@@ -277,7 +289,14 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
return list(sites)
def find_types(site=None, match=None, host=None, session=None, **request_kw):
def find_types(
site=None,
match=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
"""Query a GWDataFind host for dataset types.
Parameters
......@@ -294,6 +313,9 @@ def find_types(site=None, match=None, host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`~igwn_auth_utils.Session` will be
......@@ -322,14 +344,14 @@ def find_types(site=None, match=None, host=None, session=None, **request_kw):
Examples
--------
>>> find_types(host="datafind.gw-openscience.org")
>>> find_types(host="datafind.gwosc.org")
['H2_LOSC_4_V1', 'V1_GWOSC_O3a_16KHZ_R1', 'H1_LOSC_16_V1', 'L1_LOSC_4_V1', 'V1_GWOSC_O2_16KHZ_R1', 'V1_GWOSC_O3a_4KHZ_R1', 'L1_GWOSC_O3a_4KHZ_R1', 'L1_GWOSC_O2_16KHZ_R1', 'L1_GWOSC_O2_4KHZ_R1', 'V1_GWOSC_O2_4KHZ_R1', 'H1_LOSC_4_V1', 'H1_GWOSC_O3a_16KHZ_R1', 'H1_GWOSC_O2_16KHZ_R1', 'H1_GWOSC_O3a_4KHZ_R1', 'L1_GWOSC_O3a_16KHZ_R1', 'H1_GWOSC_O2_4KHZ_R1', 'L1_LOSC_16_V1']
>>> find_types(site='V', host="datafind.gw-openscience.org")
>>> find_types(site='V', host="datafind.gwosc.org")
['V1_GWOSC_O3a_4KHZ_R1', 'V1_GWOSC_O3a_16KHZ_R1', 'V1_GWOSC_O2_4KHZ_R1', 'V1_GWOSC_O2_16KHZ_R1']
(accurate as of Nov 18 2021)
""" # noqa: E501
qurl = _url(host, api.find_types_path, site=site)
qurl = _url(host, api.find_types_path, site=site, ext=ext)
types = set(get_json(qurl, session=session, **request_kw))
if match:
match = compile_regex(match).search
......@@ -343,6 +365,7 @@ def find_times(
gpsstart=None,
gpsend=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -370,6 +393,9 @@ def find_times(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -399,7 +425,7 @@ def find_times(
... "V1_GWOSC_O3a_4KHZ_R1",
... gpsstart=1238249472,
... gpsend=1239429120,
... host="datafind.gw-openscience.org",
... host="datafind.gwosc.org",
... )
[segment(1238249472, 1238417408), segment(1238421504, 1238605824), segment(1238609920, 1238827008), segment(1238839296, 1239429120)]
......@@ -408,7 +434,15 @@ def find_times(
requests.RequestsException
if the request fails for any reason
""" # noqa: E501
qurl = _url(host, api.find_times_path, site, frametype, gpsstart, gpsend)
qurl = _url(
host,
api.find_times_path,
site,
frametype,
gpsstart,
gpsend,
ext=ext,
)
times = get_json(qurl, session=session, **request_kw)
return segments.segmentlist(map(segments.segment, times))
......@@ -506,6 +540,7 @@ def find_latest(
urltype="file",
on_missing="error",
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -534,6 +569,9 @@ def find_latest(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -565,10 +603,17 @@ def find_latest(
Examples
--------
>>> find_latest('H', 'H1_GWOSC_O2_4KHZ_R1', urltype='file', host='datafind.gw-openscience.org'))
>>> find_latest('H', 'H1_GWOSC_O2_4KHZ_R1', urltype='file', host='datafind.gwosc.org'))
['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/H1/1186988032/H-H1_GWOSC_O2_4KHZ_R1-1187733504-4096.gwf']
""" # noqa: E501
qurl = _url(host, api.find_latest_path, site, frametype, urltype=urltype)
qurl = _url(
host,
api.find_latest_path,
site,
frametype,
ext=ext,
urltype=urltype,
)
return _get_urls(qurl, on_missing=on_missing, **request_kw)
......@@ -581,6 +626,7 @@ def find_urls(
urltype="file",
on_gaps="warn",
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -619,6 +665,9 @@ def find_urls(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -650,6 +699,7 @@ def find_urls(
frametype,
gpsstart,
gpsend,
ext=ext,
urltype=urltype,
match=match,
)
......@@ -667,7 +717,7 @@ def find_urls(
return urls
# warn or error on missing
msg = "Missing segments: \n%s" % "\n".join(map(str, missing))
msg = "Missing segments: \n{}".format("\n".join(map(str, missing)))
if on_gaps == "warn":
warn(msg)
return urls
......