diff --git a/docs/index.rst b/docs/index.rst index 2ffff77619652d6dee97fcb2f2d45a55c030e4e1..1981e3e7161dd4ed7ba6a8cfab2084bf2612f15e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,4 +24,3 @@ The client library for the GWDataFind service. :maxdepth: 2 api - migrating diff --git a/docs/migrating.rst b/docs/migrating.rst deleted file mode 100644 index d0a061304693425a394492f709e052478d85b023..0000000000000000000000000000000000000000 --- a/docs/migrating.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. sectionauthor:: Duncan Macleod <duncan.macleod@ligo.org> -.. currentmodule:: gwdatafind - -################################### -Migrating from :mod:`glue.datafind` -################################### - -This document provides some basic instructions on how to update code written -to use :mod:`glue.datafind` to using :mod:`gwdatafind`. - -=============== -Renamed objects -=============== - -The table below summarise all renamings between :mod:`glue.datafind` and -:mod:`gwdatafind`. - -========================================== ====================================== -:mod:`glue.datafind` name :mod:`gwdatafind` name -========================================== ====================================== -`GWDataFindHTTPConnection` `HTTPConnection` -`GWDataFindHTTPSConnection` `HTTPSConnection` -`GWDataFindHTTPConnection.find_frame` `HTTPConnection.find_url` -`GWDataFindHTTPConnection.find_frame_urls` `HTTPConnection.find_urls` -========================================== ====================================== - -================= -Query output type -================= - -:mod:`glue.datafind` returns list of URLs as a :class:`glue.lal.Cache` of -:class:`lal.CacheEntry` objects. -:mod:`gwdatafind` returns simple `lists <list>` of `str`. -You can translate the new form back to the old easily:: - - from glue.lal import Cache - cache = Cache.from_urls(urls) - -===================== -Creating a connection -===================== - -:mod:`glue.datafind` provided no convenience methods for opening a new -connection, so you probably wrote your own function that stripped the port -number from the server name, and handled HTTP/HTTPS manually. -With `gwdatafind`, you can just use the :meth:`gwdatafind.connect` method to -handle that:: - - >>> from gwdatafind import connect - >>> connection = connect() - -or if you know the server URL:: - - >>> connection = connect('datafind.server.url:443') - -======================= -Simplified single calls -======================= - -If you are only interested in a single query to a single server (the typical -use case), you can utilise one of the new top-level functions. -So, instead of:: - - >>> from glue.datafind import GWDataFindHTTPConnection - >>> connection = GWDataFindHTTPConnection() - >>> cache = connection.find_frame_urls(...) - -you can just use:: - - >>> from gwdatafind import find_urls - >>> urls = find_urls(...) - -The arguments and syntax for :func:`~gwdatafind.find_urls` is the same as that of the -old :meth:`glue.datafind.GWDataFindHTTPConnection.find_frame_urls` method. - -Similar top-level functions exist for -:func:`~gwdatafind.ping`, -:func:`~gwdatafind.find_observatories`, -:func:`~gwdatafind.find_types`, -:func:`~gwdatafind.find_times`, -:func:`~gwdatafind.find_url`, -:func:`~gwdatafind.find_latest`, and -:func:`~gwdatafind.find_urls` - -================== -Command-line usage -================== - -The `lscsoft-glue <https://pypi.org/project/lscsoft-glue/>`__ package provides the -``gw_data_find`` script, used to perform queries from the command-line. -`gwdatafind` provides an identical interface via Python module execution (`python -m`). - -To migrate, replace ``gw_data_find`` with ``python -m gwdatafind``. diff --git a/gwdatafind/__init__.py b/gwdatafind/__init__.py index c364be07aa2144e3dbf908590ecc17517de115e7..0702d8a07a71b1b19acd4adfbc7490b97e370457 100644 --- a/gwdatafind/__init__.py +++ b/gwdatafind/__init__.py @@ -83,7 +83,6 @@ For example: from igwn_auth_utils.requests import Session -from .http import * from .ui import * __author__ = 'Duncan Macleod <duncan.macleod@ligo.org>' diff --git a/gwdatafind/http.py b/gwdatafind/http.py deleted file mode 100644 index 83de621342d37d789ca9c1f4cc63e7a0b07ee4ff..0000000000000000000000000000000000000000 --- a/gwdatafind/http.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (C) Scott Koranda (2012-2015) -# Lousiana State University (2015-2017) -# Cardiff University (2017-2022) -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -"""Connection utilities for the GW datafind service. -""" - -import re -import socket -import warnings -from http import client as http_client -from json import loads -from urllib.error import HTTPError -from urllib.parse import urlparse - -import igwn_segments as segments - -from . import api -from .utils import ( - file_segment, - get_default_host, -) - -__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>' -__all__ = ['HTTPConnection', 'HTTPSConnection'] - - -class HTTPConnection(http_client.HTTPConnection): - """Connect to a GWDataFind host using HTTP. - - .. warning:: - - This class is deprecated and will be removed in a future release. - Use the functions from the :ref:`gwdatafind-top-api` instead. - - Parameters - ---------- - host : `str` - the name of the server with which to connect. - - port : `int`, optional - the port on which to connect. - - kwargs - other keywords are passed directly to `http.client.HTTPConnection` - """ - def __init__(self, host=None, port=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None, - **kwargs): - """Create a new connection. - """ - warnings.warn( - "this class is deprecated and will be removed in a future " - "release, please use the gwdatafind.ui module instead", - DeprecationWarning, - ) - if host is None: - host = get_default_host() - http_client.HTTPConnection.__init__(self, host, port, timeout, - source_address, **kwargs) - - def _request_response(self, method, url, **kwargs): - """Internal method to perform request and verify reponse. - - Parameters - ---------- - method : `str` - name of the method to use (e.g. ``'GET'``). - - url : `str` - remote URL to query. - - kwargs - other keyword arguments are passed to - :meth:`http.client.HTTPConnection.request`. - - Returns - ------- - response : `str` - reponse from server query - - Raises - ------ - RuntimeError - if query is unsuccessful - """ - if not url.startswith("/"): # ensure all URLs start with a slash - url = f"/{url}" - self.request(method, url, **kwargs) - response = self.getresponse() - if response.status != 200: - raise HTTPError(url, response.status, response.reason, - response.getheaders(), response.fp) - return response - - def get_json(self, url, **kwargs): - """Perform a 'GET' request and return the decode the result as JSON - - Parameters - ---------- - url : `str` - remote URL to query - - kwargs - other keyword arguments are passed to - :meth:`HTTPConnection._request_response` - - Returns - ------- - data : `object` - JSON decoded using :func:`json.loads` - """ - response = self._request_response('GET', url, **kwargs).read() - if isinstance(response, bytes): - response = response.decode('utf-8') - return loads(response) - - def get_urls(self, url, scheme=None, on_missing='ignore', **kwargs): - """Perform a 'GET' request and return a list of URLs. - - Parameters - ---------- - url : `str` - remote URL to query - - scheme : `str`, `None`, optional - the URL scheme to match, default: `None` - - on_missing : `str`, optional - how to handle an empty (but successful) response, one of - - - ``'ignore'``: do nothing, return empty `list` - - ``'warn'``: print warning, return empty `list` - - ``'raise'``: raise `RuntimeError` - - kwargs - other keyword arguments are passed to - :meth:`HTTPConnection.get_json` - - Returns - ------- - urls : `list` of `str` - a list of file paths as returned from the server. - """ - urls = self.get_json(url, **kwargs) - - # sieve for correct file scheme - if scheme: - urls = list(filter(lambda e: urlparse(e).scheme == scheme, urls)) - - # handle empty result - if not urls: - err = "no files found" - if on_missing == 'warn': - warnings.warn(err) - elif on_missing != 'ignore': - raise RuntimeError(err) - - return urls - - # -- supported interactions ----------------- - - def ping(self): - """Ping the LDR host to test for life. - - Raises - ------ - RuntimeError - if the ping fails - """ - self._request_response("HEAD", api.ping_path()) - return 0 - - def find_observatories(self, match=None): - """Query the LDR host for observatories. - - Parameters - ---------- - match : `str`, `re.Pattern` - restrict returned observatories to those matching a - regular expression. - - Returns - ------- - obs : `list` of `str` - the list of known osbervatory prefices (and combinations) - - Examples - -------- - >>> from gwdatafind import connect - >>> conn = connect() - >>> conn.find_observatories() - ['AGHLT', 'G', 'GHLTV', 'GHLV', 'GHT', 'H', 'HL', 'HLT', - 'L', 'T', 'V', 'Z'] - >>> conn.find_observatories("H") - ['H', 'HL', 'HLT'] - """ - sitelist = set(self.get_json(api.find_observatories_path())) - if match: - regmatch = re.compile(match) - return [site for site in sitelist if regmatch.search(site)] - return list(sitelist) - - def find_types(self, site=None, match=None): - """Query the LDR host for frame types. - - Parameters - ---------- - site : `str` - single-character name of site to match - - match : `str`, `re.Pattern` - regular expression to match against known types - - Returns - ------- - types : `list` of `str` - list of frame types - - Examples - -------- - >>> from gwdatafind import connect - >>> conn = connect() - >>> conn.find_types("L", "RDS") - ['L1_RDS_C01_LX', - 'L1_RDS_C02_LX', - 'L1_RDS_C03_L2', - 'L1_RDS_R_L1', - 'L1_RDS_R_L3', - 'L1_RDS_R_L4', - 'PEM_RDS_A6', - 'RDS_R_L1', - 'RDS_R_L2', - 'RDS_R_L3', - 'TESTPEM_RDS_A6'] - """ - path = api.find_types_path(site=site) - typelist = set(self.get_json(path)) - if match: - regmatch = re.compile(match) - return [type_ for type_ in typelist if regmatch.search(type_)] - return list(typelist) - - def find_times(self, site, frametype, gpsstart=None, gpsend=None): - """Query the LDR for times for which files are avaliable. - - Parameters - ---------- - site : `str` - single-character name of site to match - - frametype : `str` - name of frametype to match - - gpsstart : `int` - GPS start time of query - - gpsend : `int` - GPS end time of query - - Returns - ------- - segments : `igwn_segments.segmentlist` - the list of `[start, stop)` intervals for which files are - available. - """ - path = api.find_times_path( - site, - frametype, - gpsstart or 1, - gpsend or int(2e9), - ) - segmentlist = self.get_json(path) - return segments.segmentlist(map(segments.segment, segmentlist)) - - def find_url(self, framefile, urltype='file', on_missing="error"): - """Query the LDR host for a single filename. - - Parameters - ---------- - framefile : `str` - base name of file to match - - urltype : `str`, optional - file scheme to search for, one of ``'file'``, ``'gsiftp'``, or - `None`, default: 'file' - - on_missing : `str` - what to do when the requested file isn't found, one of: - - - ``'warn'``: print a warning (default), - - ``'error'``: raise a `RuntimeError`, or - - ``'ignore'``: do nothing - - Returns - ------- - urls : `list` of `str` - a list of structured file paths for all instances of ``filename``. - """ - path = api.find_url_path(framefile) - return self.get_urls(path, scheme=urltype, on_missing=on_missing) - - def find_frame(self, *args, **kwargs): - """DEPRECATED, use :meth:`~HTTPConnection.find_url` instead. - """ - warnings.warn('find_frame() was renamed find_url()', - DeprecationWarning) - return self.find_url(*args, **kwargs) - - def find_latest(self, site, frametype, urltype='file', on_missing="error"): - """Query for the most recent file of a given type. - - Parameters - ---------- - site : `str` - single-character name of site to match - - frametype : `str` - name of frametype to match - - urltype : `str`, optional - file scheme to search for, one of 'file', 'gsiftp', or - `None`, default: 'file' - - on_missing : `str`, optional - what to do when the requested frame isn't found, one of: - - - ``'warn'`` print a warning (default), or - - ``'error'``: raise a `RuntimeError`, or - - ``'ignore'``: do nothing - - Returns - ------- - latest : `list` with one `str` - the URLs of the latest file found (all file types) - - Raises - ------ - RuntimeError - if no frames are found - """ - path = api.find_latest_path(site, frametype, urltype=urltype) - return self.get_urls(path, scheme=urltype, on_missing=on_missing) - - def find_urls(self, site, frametype, gpsstart, gpsend, - match=None, urltype='file', on_gaps="warn"): - """Find all files of the given type in the [start, end) GPS interval. - - site : `str` - single-character name of site to match - - frametype : `str` - name of frametype to match - - gpsstart : `int` - integer GPS start time of query - - gpsend : `int` - integer GPS end time of query - - match : `str`, `re.Pattern`, optional - regular expression to match against - - urltype : `str`, optional - file scheme to search for, one of 'file', 'gsiftp', or - `None`, default: 'file' - - on_gaps : `str`, optional - what to do when the requested frame isn't found, one of: - - - ``'warn'`` print a warning (default), or - - ``'error'``: raise a `RuntimeError`, or - - ``'ignore'``: do nothing - - Returns - ------- - cache : `list` of `str` - the list of discovered file URLs - """ - # make query - path = api.find_urls_path( - site, - frametype, - gpsstart, - gpsend, - urltype=urltype, - match=match, - ) - urls = self.get_urls(path) - - # ignore missing data - if on_gaps == "ignore": - return urls - - # handle missing data - span = segments.segment(gpsstart, gpsend) - seglist = segments.segmentlist(map(file_segment, urls)).coalesce() - missing = (segments.segmentlist([span]) - seglist).coalesce() - if not missing: # no gaps - return urls - - # warn or error on missing - msg = "Missing segments: \n{}".format("\n".join(map(str, missing))) - if on_gaps == "warn": - warnings.warn(msg) - return urls - raise RuntimeError(msg) - - def find_frame_urls(self, *args, **kwargs): - """DEPRECATED, use :meth:`~HTTPConnection.find_urls` instead. - """ - warnings.warn('find_frame_urls() was renamed find_urls()', - DeprecationWarning) - return self.find_urls(*args, **kwargs) - - -class HTTPSConnection(http_client.HTTPSConnection, HTTPConnection): - """Connect to a GWDataFind host using HTTPS. - - .. warning:: - - This class is deprecated and will be removed in a future release. - Use the functions from the :ref:`gwdatafind-top-api` instead. - - This requires a valid X509 credential registered with the remote host. - - Parameters - ---------- - host : `str` - the name of the server with which to connect. - - port : `int`, optional - the port on which to connect. - - kwargs - other keywords are passed directly to `http.client.HTTPSConnection` - """ - def __init__(self, host=None, port=None, **kwargs): - """Create a new connection. - """ - if host is None: - host = get_default_host() - http_client.HTTPSConnection.__init__(self, host, port=port, **kwargs) diff --git a/gwdatafind/tests/conftest.py b/gwdatafind/tests/conftest.py index 3481da700dfeca6b1496ead26c2bf32a8154ae38..8db926b6d30a8118c787e23423797a14b868a48b 100644 --- a/gwdatafind/tests/conftest.py +++ b/gwdatafind/tests/conftest.py @@ -19,22 +19,10 @@ import os import tempfile -from unittest import mock from . import yield_fixture -@yield_fixture -def response(): - """Patch an HTTPConnection to do nothing in particular. - - Yields the patch for `http.client.HTTPConnection.getresponse` - """ - with mock.patch('http.client.HTTPConnection.request'), \ - mock.patch('http.client.HTTPConnection.getresponse') as resp: - yield resp - - @yield_fixture def tmpname(): """Return a temporary file name, cleaning up after the method returns. diff --git a/gwdatafind/tests/test_http.py b/gwdatafind/tests/test_http.py deleted file mode 100644 index 5a6caadfe56196b40b6453c0ba63e2ef1ad28e6e..0000000000000000000000000000000000000000 --- a/gwdatafind/tests/test_http.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (C) Cardiff University (2018-2022) -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -"""Tests for :mod:`gwdatafind.http`. -""" - -import json -import warnings -from unittest import mock -from urllib.error import HTTPError - -import pytest -from igwn_segments import ( - segment, - segmentlist, -) - -from ..http import ( - HTTPConnection, - HTTPSConnection, -) -from . import yield_fixture - - -def fake_response(output, status=200): - resp = mock.Mock() - resp.status = int(status) - if not isinstance(output, (str, bytes)): - output = json.dumps(output) - resp.read.return_value = output.encode('utf-8') - return resp - - -class TestHTTPConnection: - CONNECTION = HTTPConnection - - @classmethod - def setup_class(cls): - cls._create_connection_patch = mock.patch('socket.create_connection') - cls._create_connection_patch.start() - - @classmethod - def teardown_class(cls): - cls._create_connection_patch.stop() - - @classmethod - @yield_fixture - def connection(cls): - with mock.patch.dict( - "os.environ", - {"GWDATAFIND_SERVER": "test.gwdatafind.com:123"}, - ): - with pytest.warns(DeprecationWarning): - yield cls.CONNECTION() - - def test_init(self, connection): - assert connection.host == 'test.gwdatafind.com' - assert connection.port == 123 - - def test_get_json(self, response, connection): - response.return_value = fake_response({'test': 1}) - jdata = connection.get_json('something') - assert jdata['test'] == 1 - - def test_ping(self, response, connection): - response.return_value = fake_response('') - assert connection.ping() == 0 - response.return_value = fake_response('', 500) - with pytest.raises(HTTPError): - connection.ping() - - @pytest.mark.parametrize('match, out', [ - (None, ['A', 'B', 'C', 'D', 'ABCD']), - ('B', ['B', 'ABCD']), - ]) - def test_find_observatories(self, response, connection, match, out): - response.return_value = fake_response(['A', 'B', 'C', 'D', 'ABCD']) - assert sorted(connection.find_observatories(match=match)) == ( - sorted(out)) - - @pytest.mark.parametrize('site, match, out', [ - (None, None, ['A', 'B', 'C', 'D', 'ABCD']), - ('X', 'B', ['B', 'ABCD']), - ]) - def test_find_types(self, response, connection, site, match, out): - response.return_value = fake_response(['A', 'B', 'C', 'D', 'ABCD']) - assert sorted(connection.find_types(match=match)) == ( - sorted(out)) - - def test_find_times(self, response, connection): - segs = [(0, 1), (1, 2), (3, 4)] - response.return_value = fake_response(segs) - times = connection.find_times('X', 'test') - assert isinstance(times, segmentlist) - assert isinstance(times[0], segment) - assert times == segs - - # check keywords - times = connection.find_times('X', 'test', 0, 10) - assert times == segs - - def test_find_url(self, response, connection): - out = ['file:///tmp/X-test-0-10.gwf'] - response.return_value = fake_response(out) - url = connection.find_url('X-test-0-10.gwf') - assert url == out - - response.return_value = fake_response([]) - with pytest.raises(RuntimeError): - connection.find_url('X-test-0-10.gwf') - with pytest.warns(UserWarning): - url = connection.find_url('X-test-0-10.gwf', on_missing='warn') - assert url == [] - with warnings.catch_warnings(): - warnings.simplefilter("error") - url = connection.find_url('X-test-0-10.gwf', on_missing='ignore') - assert url == [] - - def test_find_frame(self, response, connection): - out = ['file:///tmp/X-test-0-10.gwf'] - response.return_value = fake_response(out) - with pytest.warns(DeprecationWarning): - url = connection.find_frame('X-test-0-10.gwf') - assert url == out - - def test_find_latest(self, response, connection): - out = ['file:///tmp/X-test-0-10.gwf'] - response.return_value = fake_response(out) - url = connection.find_latest('X', 'test') - assert url == out - - response.return_value = fake_response([]) - with pytest.raises(RuntimeError): - connection.find_latest('X', 'test') - with pytest.warns(UserWarning): - url = connection.find_latest('X', 'test', on_missing='warn') - assert url == [] - with warnings.catch_warnings(): - warnings.simplefilter("error") - url = connection.find_latest('X', 'test', on_missing='ignore') - assert url == [] - - def test_find_urls(self, response, connection): - files = [ - 'file:///tmp/X-test-0-10.gwf', - 'file:///tmp/X-test-10-10.gwf', - 'file:///tmp/X-test-20-10.gwf', - ] - response.return_value = fake_response(files) - urls = connection.find_urls('X', 'test', 0, 30, match='anything') - assert urls == files - - # check gaps - with pytest.raises(RuntimeError): - connection.find_urls('X', 'test', 0, 40, on_gaps='error') - with pytest.warns(UserWarning): - urls = connection.find_urls('X', 'test', 0, 40, on_gaps='warn') - assert urls == files - with warnings.catch_warnings(): - warnings.simplefilter("error") - urls = connection.find_urls('X', 'test', 0, 40, on_gaps='ignore') - assert urls == files - - def test_find_frame_urls(self, response, connection): - files = [ - 'file:///tmp/X-test-0-10.gwf', - 'file:///tmp/X-test-10-10.gwf', - 'file:///tmp/X-test-20-10.gwf', - ] - response.return_value = fake_response(files) - with pytest.warns(DeprecationWarning): - urls = connection.find_frame_urls('X', 'test', 0, 30) - assert urls == files - - -class TestHTTPSConnection(TestHTTPConnection): - CONNECTION = HTTPSConnection diff --git a/gwdatafind/ui.py b/gwdatafind/ui.py index 20aa314e2b991ab238329b77b22d6765d4262348..63dec370ec11dd7bf10319c1ee1fc9f6d6d8c3f8 100644 --- a/gwdatafind/ui.py +++ b/gwdatafind/ui.py @@ -40,7 +40,6 @@ from .utils import ( __author__ = 'Duncan Macleod <duncan.macleod@ligo.org>' __all__ = [ - "connect", "ping", "find_observatories", "find_types", @@ -51,65 +50,6 @@ __all__ = [ ] -# -- deprecated methods ------------------------- - -def connect(host=None, port=None): # pragma: no cover - """Open a new connection to a Datafind server. - - This method will auto-select between HTTP and HTTPS based on port, - and (for HTTPS) will automatically load the necessary X509 credentials - using :func:`gwdatafind.utils.find_credential`. - - .. warning:: - - This method is deprecated and will be removed in a future release - - Parameters - ---------- - host : `str`, optional - the name of the datafind server to connect to; if not given will be - taken from the ``GWDATAFIND_SERVER`` environment variable. - - port : `int`, optional - the port on the server to use, if not given it will be stripped from - the ``host`` name. - - Returns - ------- - connection : `gwdatafind.HTTPConnection` or `gwdatafind.HTTPSConnection` - a newly opened connection - """ - import ssl - - from .http import ( - HTTPConnection, - HTTPSConnection, - ) - from .utils import find_credential - - warn( - "this method is deprecated and will be removed in a future release", - DeprecationWarning, - ) - if host is None: - host = get_default_host() - if port is None: - try: - host, port = host.rsplit(':', 1) - except ValueError: - pass - else: - port = int(port) - if port not in (None, 80): - cert, key = find_credential() - context = ssl.create_default_context() - context.load_cert_chain(cert, key) - return HTTPSConnection(host=host, port=port, context=context) - return HTTPConnection(host=host, port=port) - - -# -- new methods -------------------------------- - @wraps(_get) def get(url, *args, **kwargs): if url.startswith("http://") and requests.__version__ < "2.15.0":