diff --git a/idq/batch.py b/idq/batch.py index d525a957474cc3ec16b76e414b02de3e8c8a8fc1..6d8b8f653fd22953a696629b926d34f5226959ba 100644 --- a/idq/batch.py +++ b/idq/batch.py @@ -123,6 +123,7 @@ def batch(gps_start, gps_end, config_path, verbose=False, num_bins=DEFAULT_BATCH #--------------------------------------------- trainreporter_kwargs = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter_kwargs = configparser.add_missing_kwargs(trainreporter_kwargs, group=names.tag2group(tag, 'timeseries')) batchdir = names.tag2batchdir(tag, rootdir=rootdir) nicknames = config.get('general', 'classifiers').split() @@ -387,7 +388,8 @@ def train(gps_start, gps_end, config_path, verbose=False, exclude=[]): #----------------------------------------- ### set up how we record results #----------------------------------------- - trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('train reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'train'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(traindir, gps_start, gps_end) ### we could use this for everything @@ -456,6 +458,7 @@ def train(gps_start, gps_end, config_path, verbose=False, exclude=[]): items = config.items('train data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'train')) time = items.pop('time') ### extract the name of the column we use as time classifier_data = classifier_data_factory( @@ -727,8 +730,10 @@ def evaluate(gps_start, gps_end, config_path, verbose=False, exclude=[]): #----------------------------------------- ### set up how we record results #----------------------------------------- - evaluatereporter = reporter_factory(evaluatedir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('evaluate reporting')))) - trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('train reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('evaluate reporting'))) + evaluatereporter = reporter_factory(evaluatedir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'evaluate'))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'evaluate'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(evaluatedir, gps_start, gps_end) ### we could use this for everything @@ -789,6 +794,7 @@ def evaluate(gps_start, gps_end, config_path, verbose=False, exclude=[]): items = config.items('evaluate data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'evaluate')) time = items.pop('time') ### extract the name of the column we use as time classifier_data = classifier_data_factory( gps_start, @@ -1020,8 +1026,10 @@ def calibrate(gps_start, gps_end, config_path, verbose=False): #----------------------------------------- ### set up how we record results #----------------------------------------- - calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('calibrate reporting')))) - evaluatereporter = reporter_factory(evaluatedir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('evaluate reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('evaluate reporting'))) + evaluatereporter = reporter_factory(evaluatedir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'calibrate'))) + ritems = configparser.evalkwargs(**dict(config.items('calibrate reporting'))) + calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'calibrate'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(calibratedir, gps_start, gps_end) ### we could use this for everything @@ -1243,9 +1251,12 @@ def timeseries(gps_start, gps_end, config_path, verbose=False, exclude=[]): #----------------------------------------- ### set up how we record results #----------------------------------------- - timeseriesreporter = reporter_factory(timeseriesdir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('timeseries reporting')))) - trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('train reporting')))) - calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('calibrate reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) + ritems = configparser.evalkwargs(**dict(config.items('calibrate reporting'))) + calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) + ritems = configparser.evalkwargs(**dict(config.items('timeseries reporting'))) + timeseriesreporter = reporter_factory(timeseriesdir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(timeseriesdir, gps_start, gps_end) ### we could use this for everything @@ -1304,6 +1315,7 @@ def timeseries(gps_start, gps_end, config_path, verbose=False, exclude=[]): items = config.items('timeseries data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'timeseries')) classifier_data = classifier_data_factory( gps_start, gps_end, diff --git a/idq/configparser.py b/idq/configparser.py index 132290c45fcb62d7f0700b97f94a929cb4b85eb7..82edca48db0569133fae2f6426e708085eb360e9 100644 --- a/idq/configparser.py +++ b/idq/configparser.py @@ -80,9 +80,19 @@ def evalkwargs(**kwargs): return kwargs -#------------------------------------------------- +def add_missing_kwargs(kwargs, **new_kwargs): + """ + add kwargs to existing ones if they are not present. + """ + for key, val in new_kwargs.items(): + if not kwargs.has_key(key): + kwargs[key] = val + return kwargs + +#------------------------------------------------- ### configs for synthetic data + def path2streams(path, segs, verbose=False): """ read in parameters from config and set up the appropriate data structures diff --git a/idq/names.py b/idq/names.py index 98b68d3d938fa1f203322865102ddca283f8e84f..606d122c61cdb15467ff1e31d4ea3e3bc22373bd 100644 --- a/idq/names.py +++ b/idq/names.py @@ -193,6 +193,12 @@ def nickname2calibrate_topic(nickname): def nickname2timeseries_topic(nickname): return nickname2topic(nickname, 'timeseries') +#--- + +TAG2GROUP_TEMPLATE = '%s_%s' +def tag2group(tag, job): + return TAG2GROUP_TEMPLATE%(tag, job) + #------------------------------------------------- def fig2file(nickname, plot, start, dur, figtype='png'): diff --git a/idq/stream.py b/idq/stream.py index b57fd4c636c72b46243982bb9d6bdc585fdfee6c..728771c7b1c98d2a380a488e43e4c14bc8f066ba 100644 --- a/idq/stream.py +++ b/idq/stream.py @@ -95,8 +95,10 @@ def stream( # make all the reporters we care about for this reporter_factory = factories.ReporterFactory() - trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('train reporting')))) - calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.evalkwargs(**dict(config.items('calibrate reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'train'))) + ritems = configparser.evalkwargs(**dict(config.items('calibrate reporting'))) + calibratereporter = reporter_factory(calibratedir, gps_start, gps_end, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'calibrate'))) # check that something exists for each classifier nicknames = config.get('general', 'classifiers').split() @@ -248,7 +250,8 @@ def train(config_path, gps_start=None, gps_end=None, verbose=False): #----------------------------------------- ### the boundaries for these will be re-set within the main loop, hence, set to zero initially - trainreporter = reporter_factory(traindir, 0, 0, **configparser.evalkwargs(**dict(config.items('train reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'train'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(traindir, gps_start, gps_end) ### we could use this for everything @@ -293,6 +296,7 @@ def train(config_path, gps_start=None, gps_end=None, verbose=False): items = config.items('train data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'train')) time = items.pop('time') ### extract the name of the column we use as time sitems = config.items('train stream') @@ -536,8 +540,10 @@ def evaluate(config_path, gps_start=None, gps_end=None, verbose=False): #----------------------------------------- ### the boundaries for these will be re-set within the main loop, hence, set to zero initially - evaluatereporter = reporter_factory(evaluatedir, 0, 0, **configparser.evalkwargs(**dict(config.items('evaluate reporting')))) - trainreporter = reporter_factory(traindir, 0, 0, **configparser.evalkwargs(**dict(config.items('train reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('evaluate reporting'))) + evaluatereporter = reporter_factory(evaluatedir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'evaluate'))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'evaluate'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(evaluatedir, gps_start, gps_end) @@ -563,6 +569,7 @@ def evaluate(config_path, gps_start=None, gps_end=None, verbose=False): items = config.items('evaluate data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'evaluate')) time = items.pop('time') ### extract the name of the column we use as time sitems = config.items('evaluate stream') @@ -719,8 +726,10 @@ def calibrate(config_path, gps_start=None, gps_end=None, verbose=False): #----------------------------------------- ### the boundaries for these will be re-set within the main loop, hence, set to zero initially - calibratereporter = reporter_factory(calibratedir, 0, 0, **configparser.evalkwargs(**dict(config.items('calibrate reporting')))) - evaluatereporter = reporter_factory(evaluatedir, 0, 0, **configparser.evalkwargs(**dict(config.items('evaluate reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('evaluate reporting'))) + evaluatereporter = reporter_factory(evaluatedir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'calibrate'))) + ritems = configparser.evalkwargs(**dict(config.items('calibrate reporting'))) + calibratereporter = reporter_factory(calibratedir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'calibrate'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(calibratedir, gps_start, gps_end) @@ -905,9 +914,12 @@ def timeseries(config_path, gps_start=None, gps_end=None, verbose=False): #----------------------------------------- ### the boundaries for these will be re-set within the main loop, hence, set to zero initially - timeseriesreporter = reporter_factory(timeseriesdir, 0, 0, **configparser.evalkwargs(**dict(config.items('timeseries reporting')))) - trainreporter = reporter_factory(traindir, 0, 0, **configparser.evalkwargs(**dict(config.items('train reporting')))) - calibratereporter = reporter_factory(calibratedir, 0, 0, **configparser.evalkwargs(**dict(config.items('calibrate reporting')))) + ritems = configparser.evalkwargs(**dict(config.items('train reporting'))) + trainreporter = reporter_factory(traindir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) + ritems = configparser.evalkwargs(**dict(config.items('calibrate reporting'))) + calibratereporter = reporter_factory(calibratedir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) + ritems = configparser.evalkwargs(**dict(config.items('timeseries reporting'))) + timeseriesreporter = reporter_factory(timeseriesdir, 0, 0, **configparser.add_missing_kwargs(ritems, group=names.tag2group(tag, 'timeseries'))) ### other reporters used to record intermediate data products picklereporter = io.PickleReporter(timeseriesdir, gps_start, gps_end) @@ -932,6 +944,7 @@ def timeseries(config_path, gps_start=None, gps_end=None, verbose=False): items = config.items('timeseries data discovery') logger.info( 'classifier_data -> '+' '.join('%s:%s'%_ for _ in items)) items = configparser.evalkwargs(**dict(items)) + items = configparser.add_missing_kwargs(items, group=names.tag2group(tag, 'timeseries')) time = items.pop('time') ### figure out timeseres sampling rate -> dt