Skip to content
Snippets Groups Projects
Commit f63b4958 authored by Alexander Pace's avatar Alexander Pace
Browse files

Upgrade igwn-alert-overseer

parent 1a90ee96
No related branches found
No related tags found
1 merge request!191Upgrade igwn-alert-overseer
Pipeline #593832 passed
......@@ -150,6 +150,10 @@ SEND_MATTERMOST_ALERTS = False
# IGWN_ALERT_GROUP environment variable.
DEFAULT_IGWN_ALERT_GROUP = 'lvalert-dev'
# overseer timeout:
OVERSEER_TIMEOUT = float(get_from_env('IGWN_ALERT_OVERSEER_TIMEOUT',
fail_if_not_found=False, default_value=0.1))
# Use LVAlert Overseer?
USE_LVALERT_OVERSEER = True
# For each LVAlert server, a separate instance of LVAlert Overseer
......
......@@ -3,7 +3,7 @@ autostart=%(ENV_ENABLE_IGWN_OVERSEER)s
command=igwn_alert_overseer -a %(ENV_IGWN_ALERT_USER)s -b %(ENV_IGWN_ALERT_PASSWORD)s
-s %(ENV_IGWN_ALERT_SERVER)s -p %(ENV_IGWN_ALERT_OVERSEER_PORT)s
-g %(ENV_IGWN_ALERT_GROUP)s
-l - -e - -q - -c
-l - -e - -q - -c -f -i %(ENV_IGWN_ALERT_FLUSH_INTERVAL)s
user=gracedb
group=www-data
stdout_logfile=/dev/stdout
......
import logging
from multiprocessing import Process
import os
import asyncio
import datetime
import json
import time
import functools
from django.conf import settings
from igwn_alert import client
from igwn_alert_overseer.overseer.overseer_client import overseer_client
from tornado.ioloop import IOLoop
import asyncio
import json
from tornado.iostream import StreamClosedError
from asyncio.exceptions import InvalidStateError
# Set up logger
logger = logging.getLogger(__name__)
# man, just shorten the variable name
OVERSEER_TIMEOUT = settings.OVERSEER_TIMEOUT
def timeout_and_stop(io_loop):
logger.critical(f'Overseer IO Loop timed out after {OVERSEER_TIMEOUT} seconds.')
io_loop.stop()
def send_with_lvalert_overseer(node_name, message, port):
......@@ -30,11 +39,44 @@ def send_with_lvalert_overseer(node_name, message, port):
# Format message. FIXME maybe move this step into the overseer client?
msg_dict = json.dumps(msg_dict)
# Start IOLoop:
asyncio.set_event_loop(asyncio.new_event_loop())
resp = client.send_to_overseer(msg_dict, logger)
IOLoop.instance().start()
rdict = json.loads(resp.result())
alert_loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(alert_loop)
# Start the async request to push the message to the overseer, and
# await the success/failure response.
resp = client.send_to_overseer(msg_dict, logger)
# Start the async I/O loop within the current thread
io_loop = IOLoop.instance()
# Construct a callable that passes io_loop as an argument
overseer_timeout = functools.partial(timeout_and_stop, io_loop)
# Add a timeout for the scenario where the overseer server isn't
# running or responding. This shouldn't actually happen, but hey.
io_loop.add_timeout(time.time() + OVERSEER_TIMEOUT, overseer_timeout)
# Start the I/O loop
io_loop.start()
# Interpret the response
rdict = json.loads(resp.result())
# Two scenarios here: the overseer client code gives a StreamClosedError
# when the I/O loop was stopped after it timed out. I think the
# InvalidStateError came as a result of prior implementation of this logic,
# so i don't think it would occur again... but if it does it still represents
# an invalid response from the overseer, so the alert should be sent again.
except (StreamClosedError, InvalidStateError) as e:
# close the loop and free up the port:
alert_loop.close()
# return false and then attempt to send with the client code.
return False
finally:
# close the loop and free up the port:
alert_loop.close()
# Return a boolean indicating whether the message was sent
# successfully or not
......
......@@ -21,10 +21,10 @@ flake8==3.9.2
gpstime==0.8.1
gssapi==1.8.2
gunicorn[gthread]==21.2.0
hop-client==0.8.0
hop-client==0.9.0
html5lib==1.1.0
igwn-alert==0.4.0
igwn-alert-overseer==0.6.1
igwn-alert==0.5.0
igwn-alert-overseer==0.7.0
ipdb==0.13.13
ipython==8.14.0
#jwt==1.3.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment