From bdb1c5eb1e6796599b328f2ef16c1d7ef8088293 Mon Sep 17 00:00:00 2001
From: Alexander Pace <alexander.pace@ligo.org>
Date: Mon, 16 Sep 2024 18:15:23 +0000
Subject: [PATCH] Add `time-to-alert` to beta reports page

the reports page is accessible on production, but the link to it is exposed on playground and test.
---
 config/urls.py                         |   2 +-
 gracedb/events/reports.py              | 189 ++++++++++++++++++++++---
 gracedb/templates/gracedb/reports.html | 127 ++++++++++++-----
 requirements.txt                       |   6 +-
 4 files changed, 263 insertions(+), 61 deletions(-)

diff --git a/config/urls.py b/config/urls.py
index 142930a4f..ddfeae86e 100644
--- a/config/urls.py
+++ b/config/urls.py
@@ -43,7 +43,7 @@ urlpatterns = [
     re_path(r'^other/$', TemplateView.as_view(template_name='other.html'),
         name='other'),
     re_path(r'^performance/$', events.views.performance, name="performance"),
-    re_path(r'^reports/$', events.reports.histo, name="reports"),
+    re_path(r'^reports/$', events.reports.reports_page_context, name="reports"),
     re_path(r'^latest/$', search.views.latest, name="latest"),
     #(r'^reports/(?P<path>.+)$', 'django.views.static.serve',
     #        {'document_root': settings.LATENCY_REPORT_DEST_DIR}),
diff --git a/gracedb/events/reports.py b/gracedb/events/reports.py
index c249844f8..928c3c5f2 100644
--- a/gracedb/events/reports.py
+++ b/gracedb/events/reports.py
@@ -3,8 +3,10 @@ from django.http import HttpResponseForbidden
 from django.template import RequestContext
 from django.shortcuts import render
 from django.conf import settings
+from django import forms
 
-from .models import Event, Group, Search, Pipeline
+from .models import Event, Group, Search, Pipeline, Label
+from superevents.models import Superevent, Labelling
 from ligoauth.decorators import internal_user_required
 from django.db.models import Q
 
@@ -16,43 +18,191 @@ from search.query.events import parseQuery
 
 from django.db.models import Max, Min, Avg 
 from django.db.models.aggregates import StdDev
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta
+from gpstime import gpstime
 from django.utils import timezone
+from plotly.io import to_html
 from plotly.offline import plot
+from tailslide import Median, Percentile
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
 import plotly.graph_objects as go
 
-plot_title = "Events Uploaded Since {0} UTC"
-plot_sub_title = "<br><sup> Online, Production Events in the Last Seven Days </sup>"
+YEAR_RANGE = list(range(datetime.now().year, 2013, -1))
+INITIAL_DAYS_BACK = 31
+
+EVENT_SEARCH_CHOICES = (
+    ('AllSky', 'AllSky'),
+    ('EarlyWarning', 'EarlyWarning'),
+    ('SSM', 'SSM'),
+    ('BBH', 'BBH'),
+    ('VTInjection', 'VTInjection'),
+)
+
+EVENT_SEARCH_CHOICES_INITAL = ['AllSky']
 
-plt_title = plot_title + plot_sub_title
+SUPEREVENT_CATEGORIES = (
+    (Superevent.SUPEREVENT_CATEGORY_PRODUCTION, 'Production'),
+    (Superevent.SUPEREVENT_CATEGORY_MDC, 'MDC'),
+)
 
-days_back = 7
+class lookback_days_form(forms.Form):
+    start_date = forms.DateField(label='Start Date',
+        widget=forms.SelectDateWidget(years=YEAR_RANGE),
+                    )
+    end_date = forms.DateField(label='End Date',
+        widget=forms.SelectDateWidget(years=YEAR_RANGE),
+        initial = datetime.now()
+                    )
+
+    searches_choice = forms.MultipleChoiceField(widget=forms.CheckboxSelectMultiple,
+                          choices=EVENT_SEARCH_CHOICES,
+                          required=False,
+                          label='Event Searches',
+                          initial=EVENT_SEARCH_CHOICES_INITAL)
+
+    superevent_type = forms.ChoiceField(widget=forms.RadioSelect,
+                          choices=SUPEREVENT_CATEGORIES,
+                          label='Superevent Category',
+                          initial=Superevent.SUPEREVENT_CATEGORY_PRODUCTION)
+    
 
 @internal_user_required
-#@method_decorator(internal_user_required(raise_exception=True),
-#    name='dispatch')
-def histo(request):
+def reports_page_context(request):
+
+    # start with a blank context dict: 
+    context = {}
+
+    if request.method == 'POST':
+        form = lookback_days_form(request.POST)
+
+        if form.is_valid():
+            start_date = form.cleaned_data['start_date']
+            start_date = datetime.combine(start_date, datetime.min.time())
+            end_date = form.cleaned_data['end_date']
+            end_date = datetime.combine(end_date, datetime.min.time())
+            days_back = (end_date - start_date).days
+            event_searches = form.cleaned_data['searches_choice']
+            superevent_category=form.cleaned_data['superevent_type']
+
+    else:
+        days_back = INITIAL_DAYS_BACK
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=days_back)
+        event_searches = EVENT_SEARCH_CHOICES_INITAL
+        superevent_category = Superevent.SUPEREVENT_CATEGORY_PRODUCTION
+        form = lookback_days_form(initial={'start_date': start_date})
+
+    # update context with form:
+    context.update({'form': form})
+
+    # update context with histogram div:
+    context.update({'hist': latency_histo(days_back, start_date, 
+                                end_date, event_searches)})
+
+    # update with the time-to-alert div:
+    context.update({'tta': time_to_alert(start_date, end_date, superevent_category)}) 
+
+    return render(request, 'gracedb/reports.html', context)
+
+
+def time_to_alert(start_date, end_date, superevent_category):
+    # Set up the query to get all production superevents within the date range
+    # that have the GCN_PRELIM_SENT label applied:
+
+    # Get the GCN_PRELIM_SENT label first so we only hit the db once:
+    gcn = Label.objects.get(name='GCN_PRELIM_SENT')
+
+    sevents = Superevent.objects.filter(category=superevent_category,
+                 t_0__range=(gpstime.fromdatetime(start_date).gps(),
+                             gpstime.fromdatetime(end_date).gps()),
+                 labels=gcn,
+              ).order_by('superevent_id')
+
+    # Now get the GCN_PRELIM_SENT label objects:
+    labs = Labelling.objects.filter(label=gcn, superevent__in=sevents).order_by('superevent__superevent_id')
+
+    # Make lists of the relevant values. One has superevent_id and t_0, the
+    # other has the created time of the gcn label (which is a proxy for
+    # when the alert goes out). Everything should be ordered correctly, but
+    # we're going to add a verification step just in case. 
+    sevents_values = list(sevents.values_list('superevent_id', 't_0', 'created'))
+    label_values = list(labs.values_list('superevent__superevent_id', 'created'))
+
+    # Construct a list where each item looks like:
+    # [superevent_id (string), created (datetime), time-to-alert (float)]
+    #results = 
+    if sevents_values:
+        results = [[i[0][0], i[0][2], gpstime.fromdatetime(i[1][1]).gps() - float(i[0][1])] for i in zip(sevents_values, label_values) if i[0][0] == i[1][0]]
+        np_results = np.array(results)
+    else:
+        np_results = np.zeros((1,3))
+
+    
+    # Try making a pandas dataframe:
+    pd_results = pd.DataFrame(np_results,
+                     columns=['superevent_id', 'created', 'time_to_alert'],
+                     )
+
+
+    # Make a scatter plot:
+    scatter_fig = px.scatter(pd_results,
+                         x='created',
+                         y='time_to_alert',
+                         hover_data='superevent_id',
+                      )
+    scatter_fig.update_traces(marker_size=10)
+
+    scatter_fig.update_layout(title={
+                                'text': 'Superevent Time-to-Alert',
+                                'xanchor': 'center',
+                                'x':0.5,},
+                               xaxis_title="Date",
+                               yaxis_title="Time-to-Alert (s)",
+                               autosize=True,
+                               paper_bgcolor='rgba(0, 0, 0, 0)',
+                               margin={'l': 0, 'r': 0},
+                    )
+
+
+    return {'tta_plot_div': to_html(scatter_fig,
+                                 include_plotlyjs=False,
+                                 full_html=False,
+                                 default_width='100%',),
+            'tta_stats': {'med': pd_results['time_to_alert'].median(),
+                          'nfp': pd_results['time_to_alert'].quantile(0.95)},
+       }
+
+
+def latency_histo(days_back, start_date, end_date, event_searches):
+    plot_title = "Events Uploaded Between {start} and {end}".format(
+                     start=start_date.strftime("%m/%d/%Y"),
+                     end=end_date.strftime("%m/%d/%Y"),
+                 )
+    plot_sub_title = f"<br><sup> Online, Production Events in {days_back} Days </sup>"
+    plt_title = plot_title + plot_sub_title
+
     fig = go.Figure()
 
     # Get the timedelta and get it in a queryable form:
     t_now = timezone.now()
     date_cutoff = t_now - timedelta(days=days_back)
 
-
-    #all_events_latency = list(Event.objects.filter(created__gt=date_cutoff, reporting_latency__isnull=False).values_list('reporting_latency', flat=True))
-
     # Zero out the list of pipeline statistics:
     aggregated_stats =[]
 
     # Loop over pipelines that are determined to be "Production" search pipelines and 
     # retrieve data, and populate histograms:
+
     for pipeline in Pipeline.objects.filter(pipeline_type=Pipeline.PIPELINE_TYPE_SEARCH_PRODUCTION):
 
         # Generate the queryset for production (G) events, uploaded online, that have a 
         # valid value of reporting_latency. There has to be a way to combine these so you
         # hit the db once, but for now its just once for a list of values, and once for
         # aggregated values. 
-        pipeline_query = Event.objects.filter(graceid__contains='G',
+        pipeline_query = Event.objects.filter(search__name__in=event_searches,
                                   offline=False,
                                   created__gt=date_cutoff,                                                                               reporting_latency__isnull=False,
                                   pipeline=pipeline).order_by('reporting_latency')
@@ -69,8 +219,8 @@ def histo(request):
                                    ))
 
         # Update the statistics list with aggregated values. 
-        aggregated_stats.append(pipeline_query.aggregate(avg=Avg('reporting_latency'),
-                                           std=StdDev('reporting_latency')))
+        aggregated_stats.append(pipeline_query.aggregate(med=Median('reporting_latency'),
+                                           nfp=Percentile('reporting_latency', 0.95)))
         aggregated_stats[-1].update({'name': pipeline.name})
         if pipeline_query:
             aggregated_stats[-1].update({'min': pipeline_query.first().reporting_latency,
@@ -92,7 +242,7 @@ def histo(request):
     #fig.update_layout(barmode='stack',
     fig.update_layout(barmode='overlay',
             title={
-                'text': plt_title.format(date_cutoff.strftime("%m/%d/%Y, %H:%M:%S")),
+                'text': plt_title,
                 'xanchor': 'center',
                 'x':0.5,},
             xaxis_title="Reporting Latency (s)",
@@ -102,12 +252,7 @@ def histo(request):
     fig.update_traces(opacity=0.75)
     latency_plot_div = plot(fig, output_type='div')
 
-
-
-    return render(request, 'gracedb/reports.html',
-        context=
-            {'latency_plot_div': latency_plot_div,
+    return {'latency_plot_div': latency_plot_div,
              'date_cutoff': date_cutoff,
              'pipeline_stats': aggregated_stats,}
-        )
 
diff --git a/gracedb/templates/gracedb/reports.html b/gracedb/templates/gracedb/reports.html
index 21f1ab9e5..c6af6a5b7 100644
--- a/gracedb/templates/gracedb/reports.html
+++ b/gracedb/templates/gracedb/reports.html
@@ -6,44 +6,101 @@
 
 {% block content %}
 <center>
-This page is currently in <span style="color:red;font-weight: bold;">beta</span>.
-Use for cross-checking and validation purposes only.
+  This page is currently in <span style="color:red;font-weight: bold;">beta</span>.
+  Use for cross-checking and validation purposes only.
 </center>
 
-{% autoescape off %}
-{{ latency_plot_div }}
-{% endautoescape %}
-
-<div class="row my-3 justify-content-md-center">
-    <div class="col-md-8">
-        <table class="table-hover table-condensed table-resp-gracedb shadow p-3 mb-5 rounded" style="text-align:center">
-            <thead>
-                <td> Pipeline </td>
-                <td> Uploads </td>
-                <td> Min. Latency (s) </td>
-                <td> Max Latency (s) </td>
-                <td> Avg. Latency (s) </td> 
-                <td> Std. Dev.  (s) </td> 
-            </thead>
-            {% for data in pipeline_stats %}
-              <tr>
-                  <!--<td style="font-family: monospace;"> {{data.name}} </td> -->
-                  <td style="font-family: 'Courier New', Courier, monospace; font-size: 12px; white-space: pre;"> {{data.name}} </td>
-                  <td> {{data.count}} </td>
-                  <td> {% if data.min %} {{data.min|floatformat:3}} 
-                           (<a href="{% url "view" data.min_gid %}">{{ data.min_gid }}</a>)
-                       {% endif %}
-                  </td>
-                  <td> {% if data.max %} {{data.max|floatformat:3}} 
-                           (<a href="{% url "view" data.max_gid %}">{{ data.max_gid }}</a>)
-                       {% endif %}
-                  </td>
-                  <td> {{data.avg|floatformat:3}} </td>
-                  <td> {{data.std|floatformat:3}} </td>
-              </tr>
-            {% endfor %}
-        </table>
+<center>
+  <form action="" method="post">
+    {% csrf_token %}
+    <table>
+      {{ form.as_table }}
+    </table>
+    <input type="submit" value="Submit" class="btn btn-primary">
+  </form>
+</center>
+
+<ul class="nav nav-tabs" id="perfTab" role="tablist">
+  <li class="nav-item" role="presentation">
+    <button class="nav-link active" id="latency-tab" data-toggle="tab" data-target="#latency" type="button" role="tab" aria-controls="latency" aria-selected="true">Event Latency</button>
+  </li>
+  <li class="nav-item" role="presentation">
+    <button class="nav-link" id="tta-tab" data-toggle="tab" data-target="#tta" type="button" role="tab" aria-controls="tta" aria-selected="false">Time-to-Alert</button>
+  </li>
+</ul>
+
+<div class="tab-content" id="myTabContent">
+  <div class="tab-pane fade show active" id="latency" role="tabpanel" aria-labelledby="latency-tab">
+
+
+    {% autoescape off %}
+    {{ hist.latency_plot_div }}
+    {% endautoescape %}
+
+    <div class="row my-3 justify-content-md-center">
+      <div class="col-md-8">
+	<table class="table-hover table-condensed table-resp-gracedb shadow p-3 mb-5 rounded" style="text-align:center">
+	  <thead>
+	    <td> Pipeline </td>
+	    <td> # Uploads </td>
+	    <td> Median Latency (s) </td> 
+	    <td> 95th Percentile (s) </td> 
+	    <td> Min. Latency (s) </td>
+	    <td> Max Latency (s) </td>
+	  </thead>
+	  {% for data in hist.pipeline_stats %}
+	  <tr>
+	    <!--<td style="font-family: monospace;"> {{data.name}} </td> -->
+	    <td style="font-family: 'Courier New', Courier, monospace; font-size: 12px; white-space: pre;"> {{data.name}} </td>
+	    <td> {{data.count}} </td>
+	    <td> {{data.med|floatformat:3}} </td>
+	    <td> {{data.nfp|floatformat:3}} </td>
+	    <td> {% if data.min %} {{data.min|floatformat:3}} 
+	      (<a href="{% url "view" data.min_gid %}">{{ data.min_gid }}</a>)
+	      {% endif %}
+	    </td>
+	    <td> {% if data.max %} {{data.max|floatformat:3}} 
+	      (<a href="{% url "view" data.max_gid %}">{{ data.max_gid }}</a>)
+	      {% endif %}
+	    </td>
+	  </tr>
+	  {% endfor %}
+	</table>
+      </div>
     </div>
+
+  </div>
+
+  <div class="tab-pane fade" id="tta" role="tabpanel" aria-labelledby="tta-tab">
+    <br>
+    <p width='90%'>
+    The below chart shows the time-to-alert superevents currently in GraceDB within
+    the specified time range. Time-to-alert (in seconds) is calculated by subtracting
+    the <b>time of the astrophysical phenomenon</b>
+    (<span class="text-monospace">t<sub>0</sub></span> of the superevent) from the
+    <b>time when the <span class="text-monospace">GCN_PRELIM_SENT</span> label was applied.</b>
+    The actual receipt time of the alert by observing partners likely differs from this
+    estimate. Identify the Superevent ID by hovering over the data point.
+    </p>
+    <p width='90%'>
+    A single data-point at zero indicates that no superevents were labelled
+    <span class="text-monospace">GCN_PRELIM_SENT</span> within the specified time window.
+    </p>
+    {% autoescape off %}
+    {{ tta.tta_plot_div }}
+    {% endautoescape %}
+
+        <table class="table-hover table-condensed table-resp-gracedb shadow p-3 mb-5 rounded" style="text-align:center; width:auto; margin-left: auto; margin-right: auto;">
+          <thead>
+            <td> Median Time-to-Alert (s) </td><td> 95th Percentile (s)</td>
+           </thead>
+           <tr>
+            <td>{{tta.tta_stats.med|floatformat:1}}</td><td>{{tta.tta_stats.nfp|floatformat:1}}</td>
+          </tr>
+
+        </table>
+
+  </div>
 </div>
 
 {% endblock %}
diff --git a/requirements.txt b/requirements.txt
index bffc4c1ba..7257b4f0e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,8 +38,9 @@ matplotlib==3.6.2
 mock==4.0.3
 numpy==1.25.2
 packaging==23.1
+pandas==2.2.2
 phonenumbers==8.13.18
-plotly==5.15.0
+plotly==5.23.0
 psycopg==3.1.12
 PyJWT==2.8.0
 python-ldap==3.4.4
@@ -61,5 +62,4 @@ pytest-django==4.5.2
 pytz==2023.3
 pyasn1==0.4.8
 pyasn1-modules==0.2.8
-# pinning setuptools because of 2to3 errors from ConcurrentLogHandler:
-#setuptools==54.2.0
+tailslide==0.2.0
-- 
GitLab