Add explicit synchronization point.

If there's a large number of parent / child relationships, instead
of having a set of all-to-all arcs (which can number into the tens
of millions for many parents and children in one line), add in a
explicit no-op node.  This causes the number of arcs to grow linearly
instead of quadratically.

HTCondor will do this automatically in future versions; this commit
should be reverted once the new dagman is available.
parent ae00bd49
Pipeline #65590 passed with stages
in 27 minutes and 23 seconds
......@@ -835,6 +835,8 @@ class DAG(object):
# initialize proegress report wrapper
progress = progress_wrapper(f, progress)
counter = 0
# if needed, create a dummy object to allow .write() method
# calls
if f is None and rescue is not None:
......@@ -898,7 +900,13 @@ class DAG(object):
parents_of.setdefault(frozenset( for child in node.children) & names, set()).add(
for children, parents in parents_of.items():
if children:
f.write("PARENT %s CHILD %s\n" % (" ".join(sorted(parents)), " ".join(sorted(children))))
if len(parents) * len(children) > 25:
counter += 1
f.write("JOB NOOP_NODE%s noop.submit NOOP\n" % str(counter))
f.write("PARENT %s CHILD NOOP_NODE%s\n" % (" ".join(sorted(parents)), str(counter)))
f.write("PARENT NOOP_NODE%s CHILD %s\n" % (str(counter), " ".join(sorted(children))))
f.write("PARENT %s CHILD %s\n" % (" ".join(sorted(parents)), " ".join(sorted(children))))
progress += 1
# progress
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment