Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • duncan.meacher/gwdatafind
  • duncanmmacleod/gwdatafind
  • computing/gwdatafind/client
3 results
Show changes
Commits on Source (5)
......@@ -199,6 +199,13 @@ def command_line():
metavar="GPS",
help="end of GPS time search",
)
dargs.add_argument(
"-x",
"--extension",
metavar="EXT",
default="gwf",
help="file extension for which to search",
)
sargs = parser.add_argument_group(
"Connection options",
......@@ -309,7 +316,7 @@ def ping(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
ui.ping(host=args.server)
ui.ping(host=args.server, ext=args.extension)
print(f"LDRDataFindServer at {args.server} is alive", file=out)
......@@ -329,7 +336,11 @@ def show_observatories(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
sitelist = ui.find_observatories(host=args.server, match=args.match)
sitelist = ui.find_observatories(
host=args.server,
match=args.match,
ext=args.extension,
)
print("\n".join(sitelist), file=out)
......@@ -349,8 +360,12 @@ def show_types(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
typelist = ui.find_types(site=args.observatory, match=args.match,
host=args.server)
typelist = ui.find_types(
site=args.observatory,
match=args.match,
host=args.server,
ext=args.extension,
)
print("\n".join(typelist), file=out)
......@@ -370,9 +385,14 @@ def show_times(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
seglist = ui.find_times(site=args.observatory, frametype=args.type,
gpsstart=args.gpsstart, gpsend=args.gpsend,
host=args.server)
seglist = ui.find_times(
site=args.observatory,
frametype=args.type,
gpsstart=args.gpsstart,
gpsend=args.gpsend,
host=args.server,
ext=args.extension,
)
print('# seg\tstart \tstop \tduration', file=out)
for i, seg in enumerate(seglist):
print(
......@@ -397,8 +417,14 @@ def latest(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_latest(args.observatory, args.type, urltype=args.url_type,
on_missing='warn', host=args.server)
cache = ui.find_latest(
args.observatory,
args.type,
urltype=args.url_type,
on_missing='warn',
host=args.server,
ext=args.extension,
)
return postprocess_cache(cache, args, out)
......@@ -418,8 +444,12 @@ def filename(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_url(args.filename, urltype=args.url_type,
on_missing='warn', host=args.server)
cache = ui.find_url(
args.filename,
urltype=args.url_type,
on_missing='warn',
host=args.server,
)
return postprocess_cache(cache, args, out)
......@@ -439,10 +469,17 @@ def show_urls(args, out):
exitcode : `int` or `None`
the return value of the action or `None` to indicate success.
"""
cache = ui.find_urls(args.observatory, args.type,
args.gpsstart, args.gpsend,
match=args.match, urltype=args.url_type,
host=args.server, on_gaps='ignore')
cache = ui.find_urls(
args.observatory,
args.type,
args.gpsstart,
args.gpsend,
match=args.match,
urltype=args.url_type,
host=args.server,
on_gaps='ignore',
ext=args.extension,
)
return postprocess_cache(cache, args, out)
......
......@@ -26,6 +26,7 @@ from os.path import basename
__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'
DEFAULT_EXT = "gwf"
DEFAULT_SERVICE_PREFIX = "LDR/services/data/v1"
......@@ -44,35 +45,35 @@ def _prefix(func):
@_prefix
def ping_path():
def ping_path(ext=DEFAULT_EXT):
"""Return the API path to ping the server.
"""
return "gwf/H/R/1,2"
return f"{ext}/H/R/1,2"
@_prefix
def find_observatories_path():
def find_observatories_path(ext=DEFAULT_EXT):
"""Return the API path to query for all observatories.
"""
return "gwf.json"
return f"{ext}.json"
@_prefix
def find_types_path(site=None):
def find_types_path(site=None, ext=DEFAULT_EXT):
"""Return the API path to query for datasets for one or all sites.
"""
if site:
return f"gwf/{site[0]}.json"
return "gwf/all.json"
return f"{ext}/{site[0]}.json"
return f"{ext}/all.json"
@_prefix
def find_times_path(site, frametype, start, end):
def find_times_path(site, frametype, start, end, ext=DEFAULT_EXT):
"""Return the API path to query for data availability segments.
"""
if start is None and end is None:
return f"gwf/{site}/{frametype}/segments.json"
return f"gwf/{site}/{frametype}/segments/{start},{end}.json"
return f"{ext}/{site}/{frametype}/segments.json"
return f"{ext}/{site}/{frametype}/segments/{start},{end}.json"
@_prefix
......@@ -80,25 +81,34 @@ def find_url_path(framefile):
"""Return the API path to query for the URL of a specific filename.
"""
filename = basename(framefile)
ext = filename.split(".", 1)[1]
site, frametype, _ = filename.split("-", 2)
return f"gwf/{site}/{frametype}/{filename}.json"
return f"{ext}/{site}/{frametype}/{filename}.json"
@_prefix
def find_latest_path(site, frametype, urltype):
def find_latest_path(site, frametype, urltype, ext=DEFAULT_EXT):
"""Return the API path to query for the latest file in a dataset.
"""
stub = f"gwf/{site}/{frametype}/latest"
stub = f"{ext}/{site}/{frametype}/latest"
if urltype:
return f"{stub}/{urltype}.json"
return stub + ".json"
@_prefix
def find_urls_path(site, frametype, start, end, urltype=None, match=None):
def find_urls_path(
site,
frametype,
start,
end,
urltype=None,
match=None,
ext=DEFAULT_EXT,
):
"""Return the API path to query for all URLs for a dataset in an interval.
"""
stub = f"gwf/{site}/{frametype}/{start},{end}"
stub = f"{ext}/{site}/{frametype}/{start},{end}"
if urltype:
path = f"{stub}/{urltype}.json"
else:
......
......@@ -37,26 +37,29 @@ URLS = [
'file:///test/X-test-1-1.gwf',
'file:///test2/X-test-2-1.gwf',
'file:///test2/X-test-7-4.gwf',
'file:///test/X-test-0-1.h5',
'file:///test/X-test-1-1.h5',
]
OUTPUT_URLS = """
GWF_URLS = [url for url in URLS if url.endswith(".gwf")]
GWF_OUTPUT_URLS = """
file:///test/X-test-0-1.gwf
file:///test/X-test-1-1.gwf
file:///test2/X-test-2-1.gwf
file:///test2/X-test-7-4.gwf
"""[1:] # strip leading line return
OUTPUT_LAL_CACHE = """
GWF_OUTPUT_LAL_CACHE = """
X test 0 1 file:///test/X-test-0-1.gwf
X test 1 1 file:///test/X-test-1-1.gwf
X test 2 1 file:///test2/X-test-2-1.gwf
X test 7 4 file:///test2/X-test-7-4.gwf
"""[1:]
OUTPUT_NAMES_ONLY = """
GWF_OUTPUT_NAMES_ONLY = """
/test/X-test-0-1.gwf
/test/X-test-1-1.gwf
/test2/X-test-2-1.gwf
/test2/X-test-7-4.gwf
"""[1:]
OUTPUT_OMEGA_CACHE = """
GWF_OUTPUT_OMEGA_CACHE = """
X test 0 2 1 file:///test
X test 2 3 1 file:///test2
X test 7 11 4 file:///test2
......@@ -116,7 +119,10 @@ def test_sanity_check_fail(clargs):
@mock.patch('gwdatafind.ui.ping')
def test_ping(mping):
args = argparse.Namespace(server='test.datafind.com:443')
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
)
out = StringIO()
main.ping(args, out)
assert mping.called_with(host=args.server)
......@@ -130,6 +136,7 @@ def test_show_observatories(mfindobs):
mfindobs.return_value = ['A', 'B', 'C']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
match='test',
)
out = StringIO()
......@@ -144,6 +151,7 @@ def test_show_types(mfindtypes):
mfindtypes.return_value = ['A', 'B', 'C']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
match='test',
)
......@@ -160,6 +168,7 @@ def test_show_times(mfindtimes):
mfindtimes.return_value = [segment(0, 1), segment(1, 2), segment(3, 4)]
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
type='test',
gpsstart=0,
......@@ -181,6 +190,7 @@ def test_latest(mlatest):
mlatest.return_value = ['file:///test/X-test-0-10.gwf']
args = argparse.Namespace(
server='test.datafind.com:443',
extension='gwf',
observatory='X',
type='test',
url_type='file',
......@@ -216,10 +226,16 @@ def test_filename(mfindurl):
@mock.patch('gwdatafind.ui.find_urls')
def test_show_urls(mfindurls):
mfindurls.return_value = URLS
@pytest.mark.parametrize("ext", [
"gwf",
"h5",
])
def test_show_urls(mfindurls, ext):
urls = [x for x in URLS if x.endswith(f".{ext}")]
mfindurls.return_value = urls
args = argparse.Namespace(
server='test.datafind.com:443',
extension=ext,
observatory='X',
type='test',
gpsstart=0,
......@@ -231,19 +247,26 @@ def test_show_urls(mfindurls):
)
out = StringIO()
main.show_urls(args, out)
assert mfindurls.called_with(args.observatory, args.type, args.gpsstart,
args.gpsend, match=args.match,
urltype=args.url_type, on_gaps='ignore',
host=args.server)
assert mfindurls.called_with(
args.observatory,
args.type,
args.gpsstart,
args.gpsend,
match=args.match,
urltype=args.url_type,
on_gaps='ignore',
ext=ext,
host=args.server,
)
out.seek(0)
assert list(map(str.rstrip, out.readlines())) == URLS
assert list(map(str.rstrip, out.readlines())) == urls
@pytest.mark.parametrize('fmt,result', [
("urls", OUTPUT_URLS),
("lal", OUTPUT_LAL_CACHE),
("names", OUTPUT_NAMES_ONLY),
("omega", OUTPUT_OMEGA_CACHE),
("urls", GWF_OUTPUT_URLS),
("lal", GWF_OUTPUT_LAL_CACHE),
("names", GWF_OUTPUT_NAMES_ONLY),
("omega", GWF_OUTPUT_OMEGA_CACHE),
])
def test_postprocess_cache_format(fmt, result):
# create namespace for parsing
......@@ -257,7 +280,7 @@ def test_postprocess_cache_format(fmt, result):
# run
out = StringIO()
assert not main.postprocess_cache(URLS, args, out)
assert not main.postprocess_cache(GWF_URLS, args, out)
out.seek(0)
assert out.read() == result
......@@ -269,9 +292,9 @@ def test_postprocess_cache_sft():
gaps=None,
)
out = StringIO()
main.postprocess_cache(URLS, args, out)
main.postprocess_cache(GWF_URLS, args, out)
out.seek(0)
assert out.read() == OUTPUT_URLS.replace('.gwf', '.sft')
assert out.read() == GWF_OUTPUT_URLS.replace('.gwf', '.sft')
def test_postprocess_cache_gaps(capsys):
......
......@@ -186,7 +186,7 @@ def _url(host, api_func, *args, **kwargs):
return f"{host.rstrip('/')}/{path}"
def ping(host=None, session=None, **request_kw):
def ping(host=None, ext=api.DEFAULT_EXT, session=None, **request_kw):
"""Ping the GWDataFind host to test for life.
Parameters
......@@ -196,6 +196,9 @@ def ping(host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -217,12 +220,18 @@ def ping(host=None, session=None, **request_kw):
requests.RequestException
if the request fails for any reason
"""
qurl = _url(host, api.ping_path)
qurl = _url(host, api.ping_path, ext=ext)
response = get(qurl, session=session, **request_kw)
response.raise_for_status()
def find_observatories(match=None, host=None, session=None, **request_kw):
def find_observatories(
match=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
"""Query a GWDataFind host for observatories with available data.
Parameters
......@@ -236,6 +245,9 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -269,7 +281,7 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
>>> find_observatories(match="H", host="datafind.gwosc.org")
['H']
"""
qurl = _url(host, api.find_observatories_path)
qurl = _url(host, api.find_observatories_path, ext=ext)
sites = set(get_json(qurl, session=session, **request_kw))
if match:
match = compile_regex(match).search
......@@ -277,7 +289,14 @@ def find_observatories(match=None, host=None, session=None, **request_kw):
return list(sites)
def find_types(site=None, match=None, host=None, session=None, **request_kw):
def find_types(
site=None,
match=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
"""Query a GWDataFind host for dataset types.
Parameters
......@@ -294,6 +313,9 @@ def find_types(site=None, match=None, host=None, session=None, **request_kw):
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`~igwn_auth_utils.Session` will be
......@@ -329,7 +351,7 @@ def find_types(site=None, match=None, host=None, session=None, **request_kw):
(accurate as of Nov 18 2021)
""" # noqa: E501
qurl = _url(host, api.find_types_path, site=site)
qurl = _url(host, api.find_types_path, site=site, ext=ext)
types = set(get_json(qurl, session=session, **request_kw))
if match:
match = compile_regex(match).search
......@@ -343,6 +365,7 @@ def find_times(
gpsstart=None,
gpsend=None,
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -370,6 +393,9 @@ def find_times(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -408,7 +434,15 @@ def find_times(
requests.RequestsException
if the request fails for any reason
""" # noqa: E501
qurl = _url(host, api.find_times_path, site, frametype, gpsstart, gpsend)
qurl = _url(
host,
api.find_times_path,
site,
frametype,
gpsstart,
gpsend,
ext=ext,
)
times = get_json(qurl, session=session, **request_kw)
return segments.segmentlist(map(segments.segment, times))
......@@ -506,6 +540,7 @@ def find_latest(
urltype="file",
on_missing="error",
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -534,6 +569,9 @@ def find_latest(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -568,7 +606,14 @@ def find_latest(
>>> find_latest('H', 'H1_GWOSC_O2_4KHZ_R1', urltype='file', host='datafind.gwosc.org'))
['file://localhost/cvmfs/gwosc.osgstorage.org/gwdata/O2/strain.4k/frame.v1/H1/1186988032/H-H1_GWOSC_O2_4KHZ_R1-1187733504-4096.gwf']
""" # noqa: E501
qurl = _url(host, api.find_latest_path, site, frametype, urltype=urltype)
qurl = _url(
host,
api.find_latest_path,
site,
frametype,
ext=ext,
urltype=urltype,
)
return _get_urls(qurl, on_missing=on_missing, **request_kw)
......@@ -581,6 +626,7 @@ def find_urls(
urltype="file",
on_gaps="warn",
host=None,
ext=api.DEFAULT_EXT,
session=None,
**request_kw,
):
......@@ -619,6 +665,9 @@ def find_urls(
:func:`~gwdatafind.utils.get_default_host` will be used to
discover the default host.
ext : `str`, optional
the file extension for which to search.
session : `requests.Session`, optional
the connection session to use; if not given, a
:class:`igwn_auth_utils.requests.Session` will be
......@@ -650,6 +699,7 @@ def find_urls(
frametype,
gpsstart,
gpsend,
ext=ext,
urltype=urltype,
match=match,
)
......