From 85b899d11e12406c9f11143380e27ae4c6adf2ba Mon Sep 17 00:00:00 2001
From: "ezekiel.dohmen" <ezekiel.dohmen@ligo.org>
Date: Thu, 21 Mar 2024 19:28:51 -0500
Subject: [PATCH] Adding stats to epics and updating medm screen

---
 src/epics/util/feCodeGen.pl | 13 ++++++++
 src/epics/util/lib/IPCx.pm  | 54 +++++++++++++++++++++----------
 src/fe/commData3.c          | 63 +++++++++++++++++++++++++++++++++----
 src/fe/controllerApp.c      | 18 ++++++++++-
 src/fe/controllerIop.c      | 18 +++++++++++
 src/include/commData3.h     | 12 +++++--
 src/include/controller.h    |  1 +
 7 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/src/epics/util/feCodeGen.pl b/src/epics/util/feCodeGen.pl
index 7ee41f86d..39d11afd4 100755
--- a/src/epics/util/feCodeGen.pl
+++ b/src/epics/util/feCodeGen.pl
@@ -1538,6 +1538,19 @@ if ( length($subs) == 0 ) {
 print OUTH "\tint dacDuoSet;\n";
 print EPICS "INVARIABLE_NM FEC\_$dcuId\_DACDT_ENABLE epicsOutput.dacDuoSet int bi 0 field(ZNAM,\"OFF\") field(ONAM,\"ON\")\n";
 
+#IPC Waiting Stats
+print OUTH "\tint ipcNumWaitsInSec;\n";
+print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_NUM_LATE epicsOutput.ipcNumWaitsInSec int ao 0\n";
+
+print OUTH "\tint ipcMaxWait_ns;\n";
+print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_MAX_WAIT_NS epicsOutput.ipcMaxWait_ns int ao 0\n";
+
+print OUTH "\tint ipcMaxWaitHold_ns;\n";
+print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_MAX_WAIT_HOLD_NS epicsOutput.ipcMaxWaitHold_ns int ao 0\n";
+
+print OUTH "\tint slowIpcInList;\n";
+print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_SLOW_IN_LIST epicsOutput.slowIpcInList int ao 0\n";
+
 print OUTH "\tint IPC_TOTAL_SENDERS;\n";
 print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_TOTAL_SENDERS epicsOutput.IPC_TOTAL_SENDERS int ao 0\n";
 print OUTH "\tint IPC_TOTAL_RECEIVERS;\n";
diff --git a/src/epics/util/lib/IPCx.pm b/src/epics/util/lib/IPCx.pm
index 01c2e6af2..85efb0a4f 100644
--- a/src/epics/util/lib/IPCx.pm
+++ b/src/epics/util/lib/IPCx.pm
@@ -815,7 +815,8 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_;
            );
 
     # Calculate screen height based on number of IPC RCV signals
-    my $dispH = 50;
+    my $ind_ipc_ypos_start = 55; #This is the ypos start of the individual IPC table and column header
+    my $dispH = $ind_ipc_ypos_start;
     for(my $ii=0;$ii<$ipcxCnt1;$ii++)
     {
         if($::ipcxParts[$ii][9] == 0)
@@ -823,17 +824,17 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_;
             $dispH += 20;
         }
     }
-       my $fname = "$mdlName\_IPC_STATUS.adl";
-        # Create MEDM File
-        print "creating file $medmDir\/$fname \n";
-        open(OUTMEDM, ">$medmDir/$fname") || die "cannot open $medmDir/$fname for writing ";
+    my $fname = "$mdlName\_IPC_STATUS.adl";
+    # Create MEDM File
+    print "creating file $medmDir\/$fname \n";
+    open(OUTMEDM, ">$medmDir/$fname") || die "cannot open $medmDir/$fname for writing ";
 
 
     # Generate the base screen file, with name and height/width information
-    $medmdata = ("CDS::medmGen::medmGenFile") -> ($medmDir,$fname,"740",$dispH);
+    my $width = 750;
+    $medmdata = ("CDS::medmGen::medmGenFile") -> ($medmDir,$fname, $width, $dispH+5);
     my $xpos = 0;
     my $ypos = 0;
-    my $width = 740;
     my $height = 22;
     # Put blue rectangle banner at top of screen
     $medmdata .= ("CDS::medmGen::medmGenRectangle") -> ($xpos,$ypos,$width,$height,$ecolors{blue});
@@ -841,26 +842,47 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_;
     $medmdata .= ("CDS::medmGen::medmGenTextMon") -> ("540","3","160","15","$::ifo\:FEC-$::dcuId\_TIME_STRING",$ecolors{white},$ecolors{blue});
     # Add screen title to banner
     $medmdata .= ("CDS::medmGen::medmGenText") -> ("310","3","100","15","IPC RCV STATUS",$ecolors{white});
+
+    # Add IPC Waiting Stats
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (25, 30, 100, 15,"Late/s:",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (110, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_NUM_LATE",$ecolors{white},$ecolors{black});
+
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (175, 30, 100, 15,"Max Wait (ns):",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (275, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_MAX_WAIT_NS",$ecolors{white},$ecolors{black});
+
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (345, 30, 100, 15,"Max Wait Hold (ns):",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (460, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_MAX_WAIT_HOLD_NS",$ecolors{white},$ecolors{black});
+
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (520, 30, 100, 15,"Slow IPC in List:",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (630, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_SLOW_IN_LIST",$ecolors{white},$ecolors{black});
+
+
     # Add the IPC column headings
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("50","30","100","15","SIGNAL NAME",$ecolors{black});
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("220","30","100","15","SEND COMP",$ecolors{black});
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("320","30","100","15","SENDER MODEL",$ecolors{black});
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("420","30","100","15","IPC TYPE",$ecolors{black});
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("518","30","100","15","ERR/SEC",$ecolors{black});
-    $medmdata .= ("CDS::medmGen::medmGenText") -> ("30","100","15","ERR TIME",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (80, $ind_ipc_ypos_start + 5, 100, 15,"SIGNAL NAME",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (240, $ind_ipc_ypos_start + 5, 100, 15,"SEND COMP",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (340, $ind_ipc_ypos_start + 5, 100, 15,"SENDER MODEL",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (450, $ind_ipc_ypos_start + 5, 100, 15,"IPC TYPE",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (545, $ind_ipc_ypos_start + 5, 100, 15,"ERR/SEC",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenText") -> (640, $ind_ipc_ypos_start + 5, 100, 15,"LAST ERR TIME",$ecolors{black});
+    $medmdata .= ("CDS::medmGen::medmGenRectangle") -> ($xpos, $ind_ipc_ypos_start, $width, 1, $ecolors{black});
+
     #print "My IPC count = @_[1]\n";
-    $ypos = 50;
+    $ypos = $ind_ipc_ypos_start + 25;
     $width = 50;
     $height = 15;
+    $row_index=0;
     # Place IPC info into the screen for each IPC RCV signal
     for($ii=0;$ii<$ipcxCnt1;$ii++)
     {
-        $xpos = 40;
+        $xpos = 70;
         # Verify that this is a RCV signal
         if($::ipcxParts[$ii][9] == 0)
         {
+            $row_index += 1;
+            #Number the IPCs
+            $medmdata .= ("CDS::medmGen::medmGenText") -> (5,$ypos,10,$height,$row_index,$ecolors{black});
             # Add signal name to screen file.
-            $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,"140",$height,$::ipcxParts[$ii][0],$ecolors{black});
+            $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,140,$height,$::ipcxParts[$ii][0],$ecolors{black});
             $xpos += 200;
             # Add name of sending computer to screen file.
             $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,$width,$height,$::ipcxParts[$ii][3],$ecolors{black});
diff --git a/src/fe/commData3.c b/src/fe/commData3.c
index 5a00731ea..2752d8b98 100644
--- a/src/fe/commData3.c
+++ b/src/fe/commData3.c
@@ -65,7 +65,18 @@
 #endif
 
 
-#define MULT_HEADROOM_TO_WAIT 0.8 //We will wait for 80% of the expected headroom
+/**  We will wait for 75% of the expected headroom.
+ *
+ */
+#define MULT_HEADROOM_TO_WAIT 0.75
+
+/** For slow models, we have a TON of potential headroom.
+ *  However, it really dosn't make sense to wait that long because
+ *  after enough time has passed, the IPC is not likely to come through.
+ *  This varirable controls that max wait, where we just assume the IPC
+ *  is never going to come in. 
+ */ 
+#define MAX_ABS_WAIT_TIME_NS 20000
 
 /** This is calculated in the init function, and stores a multiplier that will 
  *  convert cycle numbers from models of different rates to the equivalent
@@ -94,8 +105,18 @@ static uint64_t g_cycle_start_tsc;
  */
 static bool g_in_error_state;
 
+/** Really commData3Init() should be able to return an error, but because this
+ *  is called in feCode() that is not currently feasible. So for now, we will 
+ *  store the result of the init, and check it in the controller logic, after
+ *  the first init call to feCode().
+ */
 static bool g_init_failed;
 
+/** This structure stores stats about how long the model has waited for 
+ *  late IPC data. 
+ */
+static CDS_IPC_STATS_t g_ipc_wait_time_stats = {0, };
+
 ///	This function is called from the user application to initialize
 /// communications structures 	and pointers.
 ///	@param[in] connects = total number of IPC connections in the application
@@ -471,14 +492,19 @@ int commData3Receive(
     double tmp; // Temp location for data for checking NaN
     int numInError=0;
     uint64_t max_time_to_wait_ns;
-    int ipc_good = false;
+    int ipc_good = false, ipc_num_in_list=0, slow_ipc_num=0;
+
+    int temp_sleep_count=0;
 
+    //TODO: Do we need to account for when the max cycle time is cleared for the next second?
     if ( g_max_cycle_time_us >= ((1.0/FE_RATE)*1000000)) { //Max is larger than cycle time (no headroom)
         max_time_to_wait_ns = 0;
     }
     else {
         max_time_to_wait_ns = ((((1.0/FE_RATE)*1000000) - g_max_cycle_time_us) * 1000) * MULT_HEADROOM_TO_WAIT;
     }
+    //Never wait longer than MAX_ABS_WAIT_TIME_NS
+    if ( max_time_to_wait_ns > MAX_ABS_WAIT_TIME_NS) max_time_to_wait_ns = MAX_ABS_WAIT_TIME_NS;
 
     // Create local 65K cycle count
     cycle65k = (  cycle * g_localCycleTo65K );
@@ -492,6 +518,7 @@ int commData3Receive(
     {
         if ( ipcInfo[ ii ].mode == IRCV ) // Zero = Rcv and One = Send
         {
+            ++ipc_num_in_list;
             if ( (cycle65k % ipcInfo[ ii ].rcvCycle65k ) == 0 ) // Time to rcv
             {
 
@@ -518,25 +545,43 @@ int commData3Receive(
                     {
                         //wait, in case IPC is almost here
                         if ( g_in_error_state == false )
+                        {
                             udelay(1);
+                            ++temp_sleep_count;
+                            slow_ipc_num = ipc_num_in_list;
+                        }
                     }
 
-                } while ( timer_tock_ns(&g_cycle_start_tsc) < max_time_to_wait_ns && g_in_error_state == false );
+                } while ( timer_tock_ns(&g_cycle_start_tsc) < max_time_to_wait_ns 
+                          && g_in_error_state == false );
 
                 //We timed out
                 if( ipc_good == false ) {
                     ipcInfo[ ii ].errFlag++;
                     g_in_error_state = true;
+                    ++numInError;
                 }
                 ipc_good = false; //Reset for next IPC
-                    
 
             }
-            if (ipcInfo[ ii ].errFlag != 0)
-                ++numInError;
         }
     }
 
+    if( temp_sleep_count > 0 && g_in_error_state == false)
+    {
+        int time_waited_ns = (int)timer_tock_ns(&g_cycle_start_tsc);
+    	RTSLOG_INFO("Slept for %d ns waiting for IPC: %d\n", time_waited_ns, slow_ipc_num);
+        ++g_ipc_wait_time_stats.numWaitsInSec;
+        g_ipc_wait_time_stats.slowIpcListIndex = slow_ipc_num;
+        if( time_waited_ns > g_ipc_wait_time_stats.ipcMaxWait_ns ) 
+            g_ipc_wait_time_stats.ipcMaxWait_ns = time_waited_ns;
+    }
+    else if (  temp_sleep_count > 0 )
+    {
+        RTSLOG_INFO("Timed out waiting %d ns for IPC: %d\n", max_time_to_wait_ns, slow_ipc_num);
+    }
+
+
     // On cycle 0, set error flags to send back to EPICS
     if ( cycle == 0 )
     {
@@ -573,4 +618,10 @@ void commData3SetCycleMax(unsigned measured_max_us)
 void commData3SetCycleStartTime(uint64_t cycle_start_tsc)
 {
     g_cycle_start_tsc = cycle_start_tsc;
+}
+
+void commData3GetStats( CDS_IPC_STATS_t * stats_ptr)
+{
+    memcpy((void*)stats_ptr, &g_ipc_wait_time_stats, sizeof(*stats_ptr));
+    memset(&g_ipc_wait_time_stats, 0, sizeof(g_ipc_wait_time_stats));
 }
\ No newline at end of file
diff --git a/src/fe/controllerApp.c b/src/fe/controllerApp.c
index baf9eaa8d..141992a75 100644
--- a/src/fe/controllerApp.c
+++ b/src/fe/controllerApp.c
@@ -510,6 +510,8 @@ fe_start_controller( void )
                 clear_fpu_exceptions();
                 clearFilterHistoryChecks();
                 feStatus &= ~FE_ERROR_FPU;
+                pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = 0;
+                pLocalEpics->epicsOutput.slowIpcInList = 0;
             }
             pLocalEpics->epicsOutput.diagWord = diagWord;
         }
@@ -670,7 +672,21 @@ fe_start_controller( void )
              feStatus |= check_fpu_exceptions();
         }
 
-
+        //
+        // Cycle 24 - Update IPC Wait Time Stats
+        //
+        if( cycleNum == HKP_IPC_STATS_UPDATES)
+        {
+            CDS_IPC_STATS_t stats;
+            commData3GetStats( &stats );
+            pLocalEpics->epicsOutput.ipcNumWaitsInSec = stats.numWaitsInSec;
+            pLocalEpics->epicsOutput.ipcMaxWait_ns = stats.ipcMaxWait_ns;
+            if( stats.slowIpcListIndex != 0) 
+                pLocalEpics->epicsOutput.slowIpcInList = stats.slowIpcListIndex;
+            if( stats.ipcMaxWait_ns > pLocalEpics->epicsOutput.ipcMaxWaitHold_ns)
+                pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = stats.ipcMaxWait_ns;
+            
+        }
 
         // *****************************************************************
         // // Update end of cycle information
diff --git a/src/fe/controllerIop.c b/src/fe/controllerIop.c
index d03143ffe..d37f5205a 100644
--- a/src/fe/controllerIop.c
+++ b/src/fe/controllerIop.c
@@ -1212,6 +1212,8 @@ fe_start_controller( void )
                     clear_fpu_exceptions();
                     clearFilterHistoryChecks();
                     feStatus &= ~FE_ERROR_FPU; 
+                    pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = 0;
+                    pLocalEpics->epicsOutput.slowIpcInList = 0;
                 }
                 pLocalEpics->epicsOutput.diagWord = diagWord;
             }
@@ -1306,6 +1308,22 @@ fe_start_controller( void )
                 feStatus |= check_fpu_exceptions();
             }
 
+            //
+            // Cycle 24 - Update IPC Wait Time Stats
+            //
+            if( hkp_cycle == HKP_IPC_STATS_UPDATES)
+            {
+                CDS_IPC_STATS_t stats;
+                commData3GetStats( &stats );
+                pLocalEpics->epicsOutput.ipcNumWaitsInSec = stats.numWaitsInSec;
+                pLocalEpics->epicsOutput.ipcMaxWait_ns = stats.ipcMaxWait_ns;
+                if( stats.slowIpcListIndex != 0) 
+                    pLocalEpics->epicsOutput.slowIpcInList = stats.slowIpcListIndex;
+                if( stats.ipcMaxWait_ns > pLocalEpics->epicsOutput.ipcMaxWaitHold_ns)
+                    pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = stats.ipcMaxWait_ns;
+                
+            }
+
 #if !defined( RUN_WO_IO_MODULES ) && !defined( USE_DOLPHIN_TIMING )
             // Following read I/O
             // In systems > 64KHz, do not want to read multiple times
diff --git a/src/include/commData3.h b/src/include/commData3.h
index 319b2dbc0..cb6848a99 100644
--- a/src/include/commData3.h
+++ b/src/include/commData3.h
@@ -70,10 +70,17 @@ typedef struct CDS_IPC_INFO
     ///	Name of the model which contains the IPC sender part
     char*         senderModelName;
     ///	Pointer to the IPC data memory location
-    CDS_IPC_COMMS* pIpcDataRead;
-    CDS_IPC_COMMS* pIpcDataWrite;
+    volatile CDS_IPC_COMMS* pIpcDataRead;
+    volatile CDS_IPC_COMMS* pIpcDataWrite;
 } CDS_IPC_INFO;
 
+typedef struct CDS_IPC_STATS_t
+{
+    int32_t numWaitsInSec;
+    int32_t ipcMaxWait_ns;
+    int32_t slowIpcListIndex;
+
+} CDS_IPC_STATS_t;
 
 /// Indicates data is to be sent
 #define ISND 1
@@ -109,6 +116,7 @@ int commData3Receive(int connects, CDS_IPC_INFO ipcInfo[], int timeSec, int cycl
 void commData3SetCycleMax(unsigned measured_max_us);
 void commData3SetCycleStartTime(uint64_t cycle_start_tsc);
 int commData3GetInitStatus( void );
+void commData3GetStats( CDS_IPC_STATS_t * stats_ptr);
 
 
 #ifdef __cplusplus
diff --git a/src/include/controller.h b/src/include/controller.h
index fdb38dbd4..3846fffee 100644
--- a/src/include/controller.h
+++ b/src/include/controller.h
@@ -242,6 +242,7 @@ extern char fp[ 64 * 1024 ];
 #define HKP_ADC_DAC_STAT_UPDATES 21
 #define HKP_CHECK_EXIT 22
 #define HKP_MATH_EXCEPTION_CHECK 23
+#define HKP_IPC_STATS_UPDATES 24
 #define HKP_RFM_CHK_CYCLE 300 // ONLY IOP
 #define HKP_DAC_WD_CLK 400 // ONLY IOP
 #define HKP_DAC_WD_CHK 500 // ONLY IOP
-- 
GitLab