Commit b84801fd authored by Jameson Rollins's avatar Jameson Rollins

worker check/init manager during pre-checks section

since shmems are initialized to value "0", we want to do all pre-exec
checks together before worker status moves out of INIT.  In particular, we
move the SUBNODES value checks to this section.

We then require that worker not be in INIT for ACTIVE to be True.
parent 676504af
......@@ -527,6 +527,8 @@ guardian version: {}
and \
self['LOAD_STATUS'] == 'DONE' \
and \
self['WORKER'] != 'INIT' \
and \
not self['ERROR'] \
and \
self['CONNECT'] == 'OK'
......
......@@ -387,6 +387,13 @@ class Worker(multiprocessing.Process):
##############################
##############################
# NOTE: shmem numbers are initialized to zero. but that
# number is probably bogus after initialization. all the
# following checks are done before setting the worker
# status to COMMAND, so that the bogus values are only
# there during INIT. the validity of these values should
# therefore only be assumed when the status is *not* INIT.
self['PV_TOTAL'] = len(ezca.pvs)
self['SPM_TOTAL'] = len(ezca.setpoints)
......@@ -440,6 +447,19 @@ class Worker(multiprocessing.Process):
self['STATUS'] = 'CERROR'
continue
# CHECK MANAGER SUBORDINATES
subnodes_total = set()
subnodes_not_ok = set()
for mngr in self._system.node_managers:
# make sure all nodes are initialized (node.init() is idempotent)
mngr.init()
# count subordinates nodes
subnodes_total |= set(mngr.nodes.keys())
subnodes_not_ok |= mngr.not_ok()
# FIXME: should we be running check_fault() here?
self['SUBNODES_TOTAL'] = len(subnodes_total)
self['SUBNODES_NOT_OK'] = len(subnodes_not_ok)
##############################
##############################
......@@ -458,19 +478,6 @@ class Worker(multiprocessing.Process):
try:
# MANAGER
subnodes_total = set()
subnodes_not_ok = set()
for mngr in self._system.node_managers:
# make sure all nodes are initialized (node.init() is idempotent)
mngr.init()
# count subordinates nodes
subnodes_total |= set(mngr.nodes.keys())
subnodes_not_ok |= mngr.not_ok()
# FIXME: should we be running check_fault() here?
self['SUBNODES_TOTAL'] = len(subnodes_total)
self['SUBNODES_NOT_OK'] = len(subnodes_not_ok)
###########
# USER CODE
retval = statefunc()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment