From 85b899d11e12406c9f11143380e27ae4c6adf2ba Mon Sep 17 00:00:00 2001 From: "ezekiel.dohmen" <ezekiel.dohmen@ligo.org> Date: Thu, 21 Mar 2024 19:28:51 -0500 Subject: [PATCH] Adding stats to epics and updating medm screen --- src/epics/util/feCodeGen.pl | 13 ++++++++ src/epics/util/lib/IPCx.pm | 54 +++++++++++++++++++++---------- src/fe/commData3.c | 63 +++++++++++++++++++++++++++++++++---- src/fe/controllerApp.c | 18 ++++++++++- src/fe/controllerIop.c | 18 +++++++++++ src/include/commData3.h | 12 +++++-- src/include/controller.h | 1 + 7 files changed, 154 insertions(+), 25 deletions(-) diff --git a/src/epics/util/feCodeGen.pl b/src/epics/util/feCodeGen.pl index 7ee41f86d..39d11afd4 100755 --- a/src/epics/util/feCodeGen.pl +++ b/src/epics/util/feCodeGen.pl @@ -1538,6 +1538,19 @@ if ( length($subs) == 0 ) { print OUTH "\tint dacDuoSet;\n"; print EPICS "INVARIABLE_NM FEC\_$dcuId\_DACDT_ENABLE epicsOutput.dacDuoSet int bi 0 field(ZNAM,\"OFF\") field(ONAM,\"ON\")\n"; +#IPC Waiting Stats +print OUTH "\tint ipcNumWaitsInSec;\n"; +print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_NUM_LATE epicsOutput.ipcNumWaitsInSec int ao 0\n"; + +print OUTH "\tint ipcMaxWait_ns;\n"; +print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_MAX_WAIT_NS epicsOutput.ipcMaxWait_ns int ao 0\n"; + +print OUTH "\tint ipcMaxWaitHold_ns;\n"; +print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_MAX_WAIT_HOLD_NS epicsOutput.ipcMaxWaitHold_ns int ao 0\n"; + +print OUTH "\tint slowIpcInList;\n"; +print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_SLOW_IN_LIST epicsOutput.slowIpcInList int ao 0\n"; + print OUTH "\tint IPC_TOTAL_SENDERS;\n"; print EPICS "OUTVARIABLE FEC\_$dcuId\_IPC_TOTAL_SENDERS epicsOutput.IPC_TOTAL_SENDERS int ao 0\n"; print OUTH "\tint IPC_TOTAL_RECEIVERS;\n"; diff --git a/src/epics/util/lib/IPCx.pm b/src/epics/util/lib/IPCx.pm index 01c2e6af2..85efb0a4f 100644 --- a/src/epics/util/lib/IPCx.pm +++ b/src/epics/util/lib/IPCx.pm @@ -815,7 +815,8 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_; ); # Calculate screen height based on number of IPC RCV signals - my $dispH = 50; + my $ind_ipc_ypos_start = 55; #This is the ypos start of the individual IPC table and column header + my $dispH = $ind_ipc_ypos_start; for(my $ii=0;$ii<$ipcxCnt1;$ii++) { if($::ipcxParts[$ii][9] == 0) @@ -823,17 +824,17 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_; $dispH += 20; } } - my $fname = "$mdlName\_IPC_STATUS.adl"; - # Create MEDM File - print "creating file $medmDir\/$fname \n"; - open(OUTMEDM, ">$medmDir/$fname") || die "cannot open $medmDir/$fname for writing "; + my $fname = "$mdlName\_IPC_STATUS.adl"; + # Create MEDM File + print "creating file $medmDir\/$fname \n"; + open(OUTMEDM, ">$medmDir/$fname") || die "cannot open $medmDir/$fname for writing "; # Generate the base screen file, with name and height/width information - $medmdata = ("CDS::medmGen::medmGenFile") -> ($medmDir,$fname,"740",$dispH); + my $width = 750; + $medmdata = ("CDS::medmGen::medmGenFile") -> ($medmDir,$fname, $width, $dispH+5); my $xpos = 0; my $ypos = 0; - my $width = 740; my $height = 22; # Put blue rectangle banner at top of screen $medmdata .= ("CDS::medmGen::medmGenRectangle") -> ($xpos,$ypos,$width,$height,$ecolors{blue}); @@ -841,26 +842,47 @@ my ($medmDir,$mdlName,$ifo,$dcuid,$medmTarget,$ipcxCnt1) = @_; $medmdata .= ("CDS::medmGen::medmGenTextMon") -> ("540","3","160","15","$::ifo\:FEC-$::dcuId\_TIME_STRING",$ecolors{white},$ecolors{blue}); # Add screen title to banner $medmdata .= ("CDS::medmGen::medmGenText") -> ("310","3","100","15","IPC RCV STATUS",$ecolors{white}); + + # Add IPC Waiting Stats + $medmdata .= ("CDS::medmGen::medmGenText") -> (25, 30, 100, 15,"Late/s:",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (110, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_NUM_LATE",$ecolors{white},$ecolors{black}); + + $medmdata .= ("CDS::medmGen::medmGenText") -> (175, 30, 100, 15,"Max Wait (ns):",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (275, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_MAX_WAIT_NS",$ecolors{white},$ecolors{black}); + + $medmdata .= ("CDS::medmGen::medmGenText") -> (345, 30, 100, 15,"Max Wait Hold (ns):",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (460, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_MAX_WAIT_HOLD_NS",$ecolors{white},$ecolors{black}); + + $medmdata .= ("CDS::medmGen::medmGenText") -> (520, 30, 100, 15,"Slow IPC in List:",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenTextMon") -> (630, 30, 50, 15, "$::ifo\:FEC-$::dcuId\_IPC_SLOW_IN_LIST",$ecolors{white},$ecolors{black}); + + # Add the IPC column headings - $medmdata .= ("CDS::medmGen::medmGenText") -> ("50","30","100","15","SIGNAL NAME",$ecolors{black}); - $medmdata .= ("CDS::medmGen::medmGenText") -> ("220","30","100","15","SEND COMP",$ecolors{black}); - $medmdata .= ("CDS::medmGen::medmGenText") -> ("320","30","100","15","SENDER MODEL",$ecolors{black}); - $medmdata .= ("CDS::medmGen::medmGenText") -> ("420","30","100","15","IPC TYPE",$ecolors{black}); - $medmdata .= ("CDS::medmGen::medmGenText") -> ("518","30","100","15","ERR/SEC",$ecolors{black}); - $medmdata .= ("CDS::medmGen::medmGenText") -> ("30","100","15","ERR TIME",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (80, $ind_ipc_ypos_start + 5, 100, 15,"SIGNAL NAME",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (240, $ind_ipc_ypos_start + 5, 100, 15,"SEND COMP",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (340, $ind_ipc_ypos_start + 5, 100, 15,"SENDER MODEL",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (450, $ind_ipc_ypos_start + 5, 100, 15,"IPC TYPE",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (545, $ind_ipc_ypos_start + 5, 100, 15,"ERR/SEC",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> (640, $ind_ipc_ypos_start + 5, 100, 15,"LAST ERR TIME",$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenRectangle") -> ($xpos, $ind_ipc_ypos_start, $width, 1, $ecolors{black}); + #print "My IPC count = @_[1]\n"; - $ypos = 50; + $ypos = $ind_ipc_ypos_start + 25; $width = 50; $height = 15; + $row_index=0; # Place IPC info into the screen for each IPC RCV signal for($ii=0;$ii<$ipcxCnt1;$ii++) { - $xpos = 40; + $xpos = 70; # Verify that this is a RCV signal if($::ipcxParts[$ii][9] == 0) { + $row_index += 1; + #Number the IPCs + $medmdata .= ("CDS::medmGen::medmGenText") -> (5,$ypos,10,$height,$row_index,$ecolors{black}); # Add signal name to screen file. - $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,"140",$height,$::ipcxParts[$ii][0],$ecolors{black}); + $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,140,$height,$::ipcxParts[$ii][0],$ecolors{black}); $xpos += 200; # Add name of sending computer to screen file. $medmdata .= ("CDS::medmGen::medmGenText") -> ($xpos,$ypos,$width,$height,$::ipcxParts[$ii][3],$ecolors{black}); diff --git a/src/fe/commData3.c b/src/fe/commData3.c index 5a00731ea..2752d8b98 100644 --- a/src/fe/commData3.c +++ b/src/fe/commData3.c @@ -65,7 +65,18 @@ #endif -#define MULT_HEADROOM_TO_WAIT 0.8 //We will wait for 80% of the expected headroom +/** We will wait for 75% of the expected headroom. + * + */ +#define MULT_HEADROOM_TO_WAIT 0.75 + +/** For slow models, we have a TON of potential headroom. + * However, it really dosn't make sense to wait that long because + * after enough time has passed, the IPC is not likely to come through. + * This varirable controls that max wait, where we just assume the IPC + * is never going to come in. + */ +#define MAX_ABS_WAIT_TIME_NS 20000 /** This is calculated in the init function, and stores a multiplier that will * convert cycle numbers from models of different rates to the equivalent @@ -94,8 +105,18 @@ static uint64_t g_cycle_start_tsc; */ static bool g_in_error_state; +/** Really commData3Init() should be able to return an error, but because this + * is called in feCode() that is not currently feasible. So for now, we will + * store the result of the init, and check it in the controller logic, after + * the first init call to feCode(). + */ static bool g_init_failed; +/** This structure stores stats about how long the model has waited for + * late IPC data. + */ +static CDS_IPC_STATS_t g_ipc_wait_time_stats = {0, }; + /// This function is called from the user application to initialize /// communications structures and pointers. /// @param[in] connects = total number of IPC connections in the application @@ -471,14 +492,19 @@ int commData3Receive( double tmp; // Temp location for data for checking NaN int numInError=0; uint64_t max_time_to_wait_ns; - int ipc_good = false; + int ipc_good = false, ipc_num_in_list=0, slow_ipc_num=0; + + int temp_sleep_count=0; + //TODO: Do we need to account for when the max cycle time is cleared for the next second? if ( g_max_cycle_time_us >= ((1.0/FE_RATE)*1000000)) { //Max is larger than cycle time (no headroom) max_time_to_wait_ns = 0; } else { max_time_to_wait_ns = ((((1.0/FE_RATE)*1000000) - g_max_cycle_time_us) * 1000) * MULT_HEADROOM_TO_WAIT; } + //Never wait longer than MAX_ABS_WAIT_TIME_NS + if ( max_time_to_wait_ns > MAX_ABS_WAIT_TIME_NS) max_time_to_wait_ns = MAX_ABS_WAIT_TIME_NS; // Create local 65K cycle count cycle65k = ( cycle * g_localCycleTo65K ); @@ -492,6 +518,7 @@ int commData3Receive( { if ( ipcInfo[ ii ].mode == IRCV ) // Zero = Rcv and One = Send { + ++ipc_num_in_list; if ( (cycle65k % ipcInfo[ ii ].rcvCycle65k ) == 0 ) // Time to rcv { @@ -518,25 +545,43 @@ int commData3Receive( { //wait, in case IPC is almost here if ( g_in_error_state == false ) + { udelay(1); + ++temp_sleep_count; + slow_ipc_num = ipc_num_in_list; + } } - } while ( timer_tock_ns(&g_cycle_start_tsc) < max_time_to_wait_ns && g_in_error_state == false ); + } while ( timer_tock_ns(&g_cycle_start_tsc) < max_time_to_wait_ns + && g_in_error_state == false ); //We timed out if( ipc_good == false ) { ipcInfo[ ii ].errFlag++; g_in_error_state = true; + ++numInError; } ipc_good = false; //Reset for next IPC - } - if (ipcInfo[ ii ].errFlag != 0) - ++numInError; } } + if( temp_sleep_count > 0 && g_in_error_state == false) + { + int time_waited_ns = (int)timer_tock_ns(&g_cycle_start_tsc); + RTSLOG_INFO("Slept for %d ns waiting for IPC: %d\n", time_waited_ns, slow_ipc_num); + ++g_ipc_wait_time_stats.numWaitsInSec; + g_ipc_wait_time_stats.slowIpcListIndex = slow_ipc_num; + if( time_waited_ns > g_ipc_wait_time_stats.ipcMaxWait_ns ) + g_ipc_wait_time_stats.ipcMaxWait_ns = time_waited_ns; + } + else if ( temp_sleep_count > 0 ) + { + RTSLOG_INFO("Timed out waiting %d ns for IPC: %d\n", max_time_to_wait_ns, slow_ipc_num); + } + + // On cycle 0, set error flags to send back to EPICS if ( cycle == 0 ) { @@ -573,4 +618,10 @@ void commData3SetCycleMax(unsigned measured_max_us) void commData3SetCycleStartTime(uint64_t cycle_start_tsc) { g_cycle_start_tsc = cycle_start_tsc; +} + +void commData3GetStats( CDS_IPC_STATS_t * stats_ptr) +{ + memcpy((void*)stats_ptr, &g_ipc_wait_time_stats, sizeof(*stats_ptr)); + memset(&g_ipc_wait_time_stats, 0, sizeof(g_ipc_wait_time_stats)); } \ No newline at end of file diff --git a/src/fe/controllerApp.c b/src/fe/controllerApp.c index baf9eaa8d..141992a75 100644 --- a/src/fe/controllerApp.c +++ b/src/fe/controllerApp.c @@ -510,6 +510,8 @@ fe_start_controller( void ) clear_fpu_exceptions(); clearFilterHistoryChecks(); feStatus &= ~FE_ERROR_FPU; + pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = 0; + pLocalEpics->epicsOutput.slowIpcInList = 0; } pLocalEpics->epicsOutput.diagWord = diagWord; } @@ -670,7 +672,21 @@ fe_start_controller( void ) feStatus |= check_fpu_exceptions(); } - + // + // Cycle 24 - Update IPC Wait Time Stats + // + if( cycleNum == HKP_IPC_STATS_UPDATES) + { + CDS_IPC_STATS_t stats; + commData3GetStats( &stats ); + pLocalEpics->epicsOutput.ipcNumWaitsInSec = stats.numWaitsInSec; + pLocalEpics->epicsOutput.ipcMaxWait_ns = stats.ipcMaxWait_ns; + if( stats.slowIpcListIndex != 0) + pLocalEpics->epicsOutput.slowIpcInList = stats.slowIpcListIndex; + if( stats.ipcMaxWait_ns > pLocalEpics->epicsOutput.ipcMaxWaitHold_ns) + pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = stats.ipcMaxWait_ns; + + } // ***************************************************************** // // Update end of cycle information diff --git a/src/fe/controllerIop.c b/src/fe/controllerIop.c index d03143ffe..d37f5205a 100644 --- a/src/fe/controllerIop.c +++ b/src/fe/controllerIop.c @@ -1212,6 +1212,8 @@ fe_start_controller( void ) clear_fpu_exceptions(); clearFilterHistoryChecks(); feStatus &= ~FE_ERROR_FPU; + pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = 0; + pLocalEpics->epicsOutput.slowIpcInList = 0; } pLocalEpics->epicsOutput.diagWord = diagWord; } @@ -1306,6 +1308,22 @@ fe_start_controller( void ) feStatus |= check_fpu_exceptions(); } + // + // Cycle 24 - Update IPC Wait Time Stats + // + if( hkp_cycle == HKP_IPC_STATS_UPDATES) + { + CDS_IPC_STATS_t stats; + commData3GetStats( &stats ); + pLocalEpics->epicsOutput.ipcNumWaitsInSec = stats.numWaitsInSec; + pLocalEpics->epicsOutput.ipcMaxWait_ns = stats.ipcMaxWait_ns; + if( stats.slowIpcListIndex != 0) + pLocalEpics->epicsOutput.slowIpcInList = stats.slowIpcListIndex; + if( stats.ipcMaxWait_ns > pLocalEpics->epicsOutput.ipcMaxWaitHold_ns) + pLocalEpics->epicsOutput.ipcMaxWaitHold_ns = stats.ipcMaxWait_ns; + + } + #if !defined( RUN_WO_IO_MODULES ) && !defined( USE_DOLPHIN_TIMING ) // Following read I/O // In systems > 64KHz, do not want to read multiple times diff --git a/src/include/commData3.h b/src/include/commData3.h index 319b2dbc0..cb6848a99 100644 --- a/src/include/commData3.h +++ b/src/include/commData3.h @@ -70,10 +70,17 @@ typedef struct CDS_IPC_INFO /// Name of the model which contains the IPC sender part char* senderModelName; /// Pointer to the IPC data memory location - CDS_IPC_COMMS* pIpcDataRead; - CDS_IPC_COMMS* pIpcDataWrite; + volatile CDS_IPC_COMMS* pIpcDataRead; + volatile CDS_IPC_COMMS* pIpcDataWrite; } CDS_IPC_INFO; +typedef struct CDS_IPC_STATS_t +{ + int32_t numWaitsInSec; + int32_t ipcMaxWait_ns; + int32_t slowIpcListIndex; + +} CDS_IPC_STATS_t; /// Indicates data is to be sent #define ISND 1 @@ -109,6 +116,7 @@ int commData3Receive(int connects, CDS_IPC_INFO ipcInfo[], int timeSec, int cycl void commData3SetCycleMax(unsigned measured_max_us); void commData3SetCycleStartTime(uint64_t cycle_start_tsc); int commData3GetInitStatus( void ); +void commData3GetStats( CDS_IPC_STATS_t * stats_ptr); #ifdef __cplusplus diff --git a/src/include/controller.h b/src/include/controller.h index fdb38dbd4..3846fffee 100644 --- a/src/include/controller.h +++ b/src/include/controller.h @@ -242,6 +242,7 @@ extern char fp[ 64 * 1024 ]; #define HKP_ADC_DAC_STAT_UPDATES 21 #define HKP_CHECK_EXIT 22 #define HKP_MATH_EXCEPTION_CHECK 23 +#define HKP_IPC_STATS_UPDATES 24 #define HKP_RFM_CHK_CYCLE 300 // ONLY IOP #define HKP_DAC_WD_CLK 400 // ONLY IOP #define HKP_DAC_WD_CHK 500 // ONLY IOP -- GitLab