Skip to content
Snippets Groups Projects
Commit 115f58ed authored by Kipp Cannon's avatar Kipp Cannon
Browse files

Revert "Add explicit synchronization point."

This reverts commit 4ee78ca7.

- that patch had a number of problems:  (i) it is inappropriate for the writing routine to have the side effect of altering the graph structure, this breaks the primary use case of editing existing DAGs, wherein one loads a DAG and then write back to disk the *same* DAG;  (ii) the particular change that was introduced requires the resulting DAG to have an extra submit file that is not provided, resulting in DAGs that cannot be run;  (iii) the code would insert no-op jobs even between a single parent and its children or an only child and its parents if there are more children or parents than some threshold, increasing the size of the DAG (the opposite of what this is supposed to be doing), and because the transformation is embedded in the writing code the expansion would be repeated every time the DAG is processed;  (iv) the problem this is trying to fix is not in this code at all but in the inspiral DAG construction script, which is creating bloated parent-child relationships and is where this fix, if desired, should have been made.
- assuming one actually wants a tool to do this to DAGs, say, instead of fixing the programs that generate the bloated DAGs, the correct solution is to write a separate utility to implement the transformation, and not embed it in the writing code.  this patch provides that tool.  it is left as an exercise to others to modify their clean-up programs to make use of the tool, but, again, just fix the DAG generator.
parent df4240cd
No related branches found
No related tags found
No related merge requests found
Pipeline #88286 failed
......@@ -73,6 +73,7 @@ the DAG object.
import copy
import itertools
import re
......@@ -861,8 +862,6 @@ class DAG(object):
# initialize proegress report wrapper
progress = progress_wrapper(f, progress)
counter = 0
# if needed, create a dummy object to allow .write() method
# calls
if f is None and rescue is not None:
......@@ -926,13 +925,7 @@ class DAG(object):
parents_of.setdefault(frozenset(child.name for child in node.children) & names, set()).add(node.name)
for children, parents in parents_of.items():
if children:
if len(parents) * len(children) > 25:
counter += 1
f.write("JOB NOOP_NODE%s noop.submit NOOP\n" % str(counter))
f.write("PARENT %s CHILD NOOP_NODE%s\n" % (" ".join(sorted(parents)), str(counter)))
f.write("PARENT NOOP_NODE%s CHILD %s\n" % (str(counter), " ".join(sorted(children))))
else:
f.write("PARENT %s CHILD %s\n" % (" ".join(sorted(parents)), " ".join(sorted(children))))
f.write("PARENT %s CHILD %s\n" % (" ".join(sorted(parents)), " ".join(sorted(children))))
progress += 1
# progress
......@@ -986,3 +979,53 @@ class DAG(object):
yield '}\n'
# done
def optimize(dag):
# validate graph edges
dag.check_edges()
# generate no-op jobs
def noopgen(dag, submit_filename):
used = frozenset(name for name in dag.nodes if name.startswith("NOOP"))
for i in itertools.count():
name = "NOOP%d" % i
if name in used:
continue
noop = JOB(
name = name,
filename = submit_filename,
noop = True
)
dag.nodes[noop.name] = noop
yield noop
noops = iter(noopgen(dag, "noop.submit"))
# visit each node, construct a set of each node's children, and
# construct a look-up table mapping each unique such set to the set
# of parents possessing that set of children. these are the
# many-to-many parent-child relationships that become PARENT ...
# CHILD ... lines in the .dag. internally dagman represents each
# of these as a collection of parents*children objects (graph
# edges), so what is one line of text in the .dag file requires a
# quadratically large amount of ram in dagman.
parents_of = {}
for name, node in dag.nodes.items():
parents_of.setdefault(frozenset(node.children), set()).add(node)
# to work-around this scaling problem we insert no-op jobs bewteen
# the parents and children to replace the n*m edges with n+m edges
# plus one new node.
for children, parents in parents_of.items():
if len(parents) < 3 or len(children) < 3 or len(parents) * len(children) < 25:
# below this number of edges we don't bother
continue
noop = noops.next()
noop.parents |= parents
noop.children |= children
for node in parents:
node.children.clear()
node.children.add(noop)
for node in children:
node.parents.clear()
node.parents.add(noop)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment