diff --git a/pipit/tests/csv-tests.py b/pipit/tests/csv-tests.py new file mode 100644 index 00000000..a4a7cfec --- /dev/null +++ b/pipit/tests/csv-tests.py @@ -0,0 +1,40 @@ +import numpy as np +from pipit import Trace + + +def test_events(data_dir, ping_pong_otf2_trace): + ping_pong_csv_trace = "data/ping-pong-csv.csv" + trace_from_file = Trace.from_csv(str(ping_pong_csv_trace)) + + # check that reading it as a string returns identical results + csv_file = open(str(ping_pong_csv_trace)) + trace_from_str = Trace.from_csv(csv_file.read()) + csv_file.close() + + # also check that it's the same as the trace read by OTF2 reader + otf2_trace = Trace.from_otf2(str(ping_pong_otf2_trace)) + + assert np.isclose( + trace_from_str.events["Timestamp (ns)"], + trace_from_file.events["Timestamp (ns)"], + ).all() + assert np.isclose( + otf2_trace.events["Timestamp (ns)"], + trace_from_file.events["Timestamp (ns)"], + ).all() + + assert (trace_from_str.events["Name"] == trace_from_file.events["Name"]).all() + assert (otf2_trace.events["Name"] == trace_from_file.events["Name"]).all() + + assert (trace_from_str.events["Thread"] == trace_from_file.events["Thread"]).all() + assert (otf2_trace.events["Thread"] == trace_from_file.events["Thread"]).all() + + assert (trace_from_str.events["Process"] == trace_from_file.events["Process"]).all() + assert (otf2_trace.events["Process"] == trace_from_file.events["Process"]).all() + + assert ( + trace_from_str.events["Event Type"] == trace_from_file.events["Event Type"] + ).all() + assert ( + otf2_trace.events["Event Type"] == trace_from_file.events["Event Type"] + ).all() diff --git a/pipit/tests/data/ping-pong-csv.csv b/pipit/tests/data/ping-pong-csv.csv new file mode 100644 index 00000000..30276349 --- /dev/null +++ b/pipit/tests/data/ping-pong-csv.csv @@ -0,0 +1,121 @@ +Timestamp (ns),Event Type,Name,Thread,Process,Attributes +0.0,Instant,ProgramBegin,0,1,"{'attributes': {'Attribute 2': 26602}, 'program_name': '/g/g92/bhatele1/umd/traces/score-p/ping-pong.otf2', 'program_arguments': []}" +30083.086937435106,Enter,"int main(int, char**)",0,1,{'region': 'Region 3'} +40288.33150186851,Enter,MPI_Init,0,1,{'region': 'Region 148'} +307730.9358165928,Instant,ProgramBegin,0,0,"{'attributes': {'Attribute 2': 26601}, 'program_name': '/g/g92/bhatele1/umd/traces/score-p/ping-pong.otf2', 'program_arguments': []}" +336979.73374932166,Enter,"int main(int, char**)",0,0,{'region': 'Region 3'} +346054.77444467926,Enter,MPI_Init,0,0,{'region': 'Region 148'} +193643138.1741584,Leave,MPI_Init,0,0, +193643835.4832178,Leave,MPI_Init,0,1, +193651646.20379105,Enter,MPI_Comm_size,0,0,{'region': 'Region 37'} +193652714.83827707,Enter,MPI_Comm_size,0,1,{'region': 'Region 37'} +193653163.00611198,Leave,MPI_Comm_size,0,0, +193654162.91198432,Leave,MPI_Comm_size,0,1, +193655654.41835713,Enter,MPI_Comm_rank,0,0,{'region': 'Region 34'} +193656280.13511068,Enter,MPI_Comm_rank,0,1,{'region': 'Region 34'} +193656794.167867,Leave,MPI_Comm_rank,0,0, +193657346.3831865,Leave,MPI_Comm_rank,0,1, +193668225.07270837,Enter,MPI_Send,0,0,{'region': 'Region 193'} +193672584.5668554,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 16384}" +193677292.95417315,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +193685930.32723847,Leave,MPI_Send,0,0, +193687379.3555098,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +193691633.37032613,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 16384}" +193696357.50795117,Leave,MPI_Recv,0,1, +193698689.50799522,Enter,MPI_Send,0,1,{'region': 'Region 193'} +193699766.2562759,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 16384}" +193713098.65276185,Leave,MPI_Send,0,1, +193715693.63520956,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 16384}" +193718856.106002,Leave,MPI_Recv,0,0, +193725622.53347322,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +193743866.6394257,Enter,MPI_Send,0,0,{'region': 'Region 193'} +193744419.3320272,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 32768}" +193764550.13388103,Leave,MPI_Send,0,0, +193764818.84366918,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 32768}" +193764846.04874542,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +193765093.75812382,Leave,MPI_Recv,0,1, +193765371.53627068,Enter,MPI_Send,0,1,{'region': 'Region 193'} +193765578.67667574,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 32768}" +193783762.16780922,Leave,MPI_Send,0,1, +193784228.47236162,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 32768}" +193784513.88702112,Leave,MPI_Recv,0,0, +193810524.32631716,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +193825567.77891403,Enter,MPI_Send,0,0,{'region': 'Region 193'} +193825972.51408336,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 65536}" +193851129.57309312,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 65536}" +193851657.4470286,Leave,MPI_Recv,0,1, +193852202.5031175,Enter,MPI_Send,0,1,{'region': 'Region 193'} +193852444.96239346,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 65536}" +193854930.16997212,Leave,MPI_Send,0,0, +193855196.49335006,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +193889661.9839724,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 65536}" +193889774.14525163,Leave,MPI_Send,0,1, +193889995.12683582,Leave,MPI_Recv,0,0, +193942036.05127355,Enter,MPI_Send,0,0,{'region': 'Region 193'} +193942509.51505655,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 131072}" +193954523.65854996,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +193992530.58190393,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 131072}" +193993059.4104035,Leave,MPI_Recv,0,1, +193993445.05429125,Enter,MPI_Send,0,1,{'region': 'Region 193'} +193993702.78659248,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 131072}" +193995956.51238206,Leave,MPI_Send,0,0, +193996173.19842792,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +194050007.74876937,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 131072}" +194050210.11635402,Leave,MPI_Send,0,1, +194050442.07542512,Leave,MPI_Recv,0,0, +194205281.91461667,Enter,MPI_Send,0,0,{'region': 'Region 193'} +194205705.74106756,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 262144}" +194220003.201837,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +194299518.38958535,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 262144}" +194300037.6724441,Leave,MPI_Recv,0,1, +194300433.81653672,Enter,MPI_Send,0,1,{'region': 'Region 193'} +194300716.367504,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 262144}" +194302812.59022063,Leave,MPI_Send,0,0, +194303143.8239559,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +194408743.42971635,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 262144}" +194408865.61391842,Leave,MPI_Send,0,1, +194409224.53001198,Leave,MPI_Recv,0,0, +194675378.9501026,Enter,MPI_Send,0,0,{'region': 'Region 193'} +194676157.87439075,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 524288}" +194762211.34879553,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +194907390.04494745,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 524288}" +194908311.1992833,Leave,MPI_Recv,0,1, +194908774.16286144,Enter,MPI_Send,0,1,{'region': 'Region 193'} +194908999.439984,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 524288}" +194911489.42038304,Leave,MPI_Send,0,0, +194911733.78878716,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +195131579.44173214,Leave,MPI_Send,0,1, +195131666.30706328,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 524288}" +195132209.45402402,Leave,MPI_Recv,0,0, +195717989.1556328,Enter,MPI_Send,0,0,{'region': 'Region 193'} +195718827.2628938,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 1048576}" +195859370.11859792,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +196135679.66863889,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 1048576}" +196136540.6854378,Leave,MPI_Recv,0,1, +196136943.51147896,Enter,MPI_Send,0,1,{'region': 'Region 193'} +196137242.29003558,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 1048576}" +196139793.8398177,Leave,MPI_Send,0,0, +196140050.61755484,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +196583827.45770124,Leave,MPI_Send,0,1, +196583934.36887804,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 1048576}" +196584467.97019798,Leave,MPI_Recv,0,0, +197613248.45135725,Enter,MPI_Send,0,0,{'region': 'Region 193'} +197614141.9233348,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 2097152}" +197951492.98251072,Enter,MPI_Recv,0,1,{'region': 'Region 176'} +198502088.884028,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 2097152}" +198502934.62780163,Leave,MPI_Recv,0,1, +198503365.13620108,Enter,MPI_Send,0,1,{'region': 'Region 193'} +198503650.55086058,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 2097152}" +198506398.7408429,Leave,MPI_Send,0,0, +198506691.79201505,Enter,MPI_Recv,0,0,{'region': 'Region 176'} +199319910.70381415,Leave,MPI_Send,0,1, +199319973.70504335,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 2097152}" +199320512.07918367,Leave,MPI_Recv,0,0, +199514778.2785141,Enter,MPI_Finalize,0,0,{'region': 'Region 104'} +199529686.66029385,Enter,MPI_Finalize,0,1,{'region': 'Region 104'} +199573648.15437022,Leave,MPI_Finalize,0,0, +199574793.63126454,Leave,MPI_Finalize,0,1, +199575243.23094556,Leave,"int main(int, char**)",0,0, +199576798.2158296,Leave,"int main(int, char**)",0,1, +199603304.5511645,Instant,ProgramEnd,0,0,{'exit_status': -9223372036854775808} +199604459.57369962,Instant,ProgramEnd,0,1,{'exit_status': -9223372036854775808} diff --git a/pipit/trace.py b/pipit/trace.py index 6f3838b6..f4279aa1 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd +from ast import literal_eval +from io import StringIO class Trace: @@ -60,6 +62,10 @@ def from_nsight(filename): @staticmethod def from_csv(filename): + # detect if the input is a CSV as a string + if "," in filename: + # wrapping with StringIO allows pandas to read it + filename = StringIO(filename) events_dataframe = pd.read_csv(filename, skipinitialspace=True) # if timestamps are in seconds, convert them to nanoseconds @@ -72,6 +78,17 @@ def from_csv(filename): # ensure that ranks are ints events_dataframe = events_dataframe.astype({"Process": "int32"}) + # this next part is needed for fake test reading + # ensure that the attributes are a dict, not a string + if "Attributes" in events_dataframe.columns: + # use literal_eval so we're not running a security risk + # don't try to literal_eval a NaN, as well + events_dataframe["Attributes"] = events_dataframe["Attributes"].apply( + lambda attr_dict: ( + literal_eval(attr_dict) if type(attr_dict) == str else attr_dict + ) + ) + # make certain columns categorical events_dataframe = events_dataframe.astype( { diff --git a/pipit/util/test_generator.py b/pipit/util/test_generator.py new file mode 100644 index 00000000..c6343256 --- /dev/null +++ b/pipit/util/test_generator.py @@ -0,0 +1,369 @@ +import random +import textwrap +import pandas as pd +import numpy as np + + +class FakeNode: + """ + A single node of the calling tree used to produce fake traces. + Represents a single function call. + """ + + def __init__( + self, name, exc_time, mpi_type="", mpi_tgt=0, mpi_volume=0, mpi_time=0 + ): + self.name = name + self.exc_time = exc_time + self.inc_time = exc_time + self.children = {} # run_time -> child node + self.is_mpi = mpi_type != "" + self.mpi_type = mpi_type + self.mpi_tgt = mpi_tgt + self.mpi_volume = mpi_volume + self.mpi_time = mpi_time + self.total_nodes = 1 + self.parent = None + + def grow_inc_time(self, time): + """ + This function adjusts inclusive time metric when a new child is added, + adding the time to each parent's inc_time, up to the root. + """ + self.inc_time += time + if self.parent is not None: + self.parent.grow_inc_time(time) + + def grow_total_nodes(self, amt): + """ + This function adjusts the total count of nodes when a new node is added, + adding the new count to each parent's total_nodes, up to the root. + """ + self.total_nodes += amt + if self.parent is not None: + self.parent.grow_total_nodes(amt) + + def add_child(self, child, run_time): + """ + This function adds a child that executes after run_time + exclusive time within the function represented by the current node. + """ + assert run_time not in self.children + self.children[run_time] = child + child.parent = self + self.grow_total_nodes(child.total_nodes) + self.grow_inc_time(child.inc_time) + + def choose_random_node(self): + """ + This function selects a random node, with all descendants + of the current node being weighted equally, and returns it. + """ + if not self.children: + return self + rng = random.random() + total = 0 + for child in self.children.values(): + weight = child.total_nodes / self.total_nodes + if rng < weight: + return child.choose_random_node() + else: + rng -= weight + return self + + def pick_by_name(self, name): + """ + Returns all nodes in this tree that have the given name. + """ + valid = [self] if self.name == name else [] + for child in self.children.values(): + valid += child.pick_by_name(name) + return valid + + def __str__(self) -> str: + return "{} ({})\n".format(self.name, self.exc_time) + "\n".join( + [ + textwrap.indent(str(run_time) + ": " + str(child), "\t") + for run_time, child in sorted(self.children.items()) + ] + ) + + def mpi_attributes(self): + """ + Returns the Attributes dictionary for this node, including + (if it is an MPI event) receiver/sender and msg_length. + """ + if not self.is_mpi: + return {} + attr = {"msg_length": self.mpi_volume} + if self.mpi_type == "MpiSend": + attr["receiver"] = self.mpi_tgt + else: + attr["sender"] = self.mpi_tgt + return attr + + def to_events(self, begin_time, process, data): + """ + Returns event data for this tree, with time starting at begin_time. + data is an array that is built up and then converted to a DataFrame + once the entire tree has been processed. + """ + data.append( + [ + begin_time, + "Enter", + self.name + "()", + process, + {}, + self.inc_time, + self.exc_time, + ] + ) + if self.is_mpi: + data.append( + [ + begin_time + self.mpi_time, + "Instant", + self.mpi_type, + process, + self.mpi_attributes(), + float("nan"), + float("nan"), + ] + ) + # total_time accumulates durations of already processed children + total_time = begin_time + for run_time, child in sorted(self.children.items()): + # children will add their own lines to data + child.to_events(total_time + run_time, process, data) + total_time += child.inc_time + + # time.inc and time.exc are both NaN for Leave events + data.append( + [ + begin_time + self.inc_time, + "Leave", + self.name + "()", + process, + {}, + float("nan"), + float("nan"), + ] + ) + + def tweak_tree(self): + """ + Adds small exclusive time perturbations to + function length to generate a "similar" tree, for other processes + to use, that is not identical to the original. + """ + exc_time = self.exc_time + factor = (0.7) + random.random() * 0.6 + exc_time_new = exc_time * factor + # preserve inclusive time relations + self.grow_inc_time(exc_time_new - exc_time) + self.exc_time = exc_time_new + for run_time, child in self.children.items(): + child.tweak_tree() + # also scale back child run times + self.children = { + run_time * factor: child for run_time, child in self.children.items() + } + + def deepcopy(self): + """ + Returns a deep copy of the tree. + """ + mycopy = FakeNode( + self.name, + self.exc_time, + self.mpi_type, + self.mpi_tgt, + self.mpi_volume, + self.mpi_time, + ) + mycopy.inc_time = self.inc_time + mycopy.total_nodes = self.total_nodes + for run_time, child in self.children.items(): + mycopy.children[run_time] = child.deepcopy() + mycopy.children[run_time].parent = mycopy + return mycopy + + def node_at_time(self, target_time): + """ + Returns the node that contains the given target_time, + as well as the offset into that node that the time occurs. + """ + total_time = 0 + # TODO: may be more efficient to store self.children sorted already + for run_time, child in sorted(self.children.items()): + if target_time < total_time + run_time: + # this time occurs in the current node! + return self, target_time - total_time + elif target_time < total_time + run_time + child.inc_time: + # this time occurs in the given child node + return child.node_at_time(target_time - total_time - run_time) + else: + # this time occurs after this child + total_time += child.inc_time + # if no children find it, it must be the current node + return self, target_time - total_time + + def insert_at_time(self, child, target_time): + """ + Adds the child to the proper node such that it executes + at target_time. + """ + node, offset = self.node_at_time(target_time) + node.add_child(child, offset) + + +def gen_fake_node(function_names): + """ + Generates a node with a random numeric name and execution time. + """ + return FakeNode(random.choice(function_names), random.random() * 10) + + +def gen_fake_tree(num_nodes, function_names, copy_subtrees=True): + """ + Generates a whole tree of FakeNodes by randomly appending children. + """ + root = gen_fake_node(function_names) + # continue to add nodes until we've reached the target + while root.total_nodes < num_nodes: + node = gen_fake_node(function_names) + # choose a node that's currently in the graph to add child to + parent = root.choose_random_node() + # select a random point for that child to run + run_time = random.random() * parent.exc_time + # find nodes with the same name to copy off of + same_name = root.pick_by_name(node.name) + if not same_name or not copy_subtrees: + parent.add_child(node, run_time) + else: + subtree = random.choice(same_name) + # larger subtrees are less likely to be copied + if random.random() > 4 / (subtree.total_nodes**0.5): + parent.add_child(node, run_time) + else: + subtree = subtree.deepcopy() + subtree.tweak_tree() + parent.add_child(subtree, run_time) + + return root + + +def gen_forest(seed_tree, num_trees): + """ + Generates num_trees new trees by tweaking seed_tree. + """ + forest = [seed_tree.deepcopy() for n in range(num_trees)] + for tree in forest: + tree.tweak_tree() + return forest + + +def add_fake_mpi_events(trees, num_pairs): + """ + Adds fake MPIevents to a set of trees (one per process). In total, + num_pairs pairs of Send/Recv events are generated and inserted. + Each event is a function with is_mpi=True. + """ + planned_evts = [] + # choose times for events to happen + last_proc = -1 + maxtime = min([t.inc_time for t in trees]) + for i in range(2 * num_pairs): + planned_evts.append(random.random() * maxtime) + # sort from last to first events in timeline + # iterate from first to last to avoid + # dependencies among the events' times + planned_evts.sort(reverse=True) + while planned_evts: + # pair two first events + first_evt = planned_evts.pop() + second_evt = planned_evts.pop() + # time that the first one has to idle + idle_time = second_evt - first_evt + # pick two different processes + first_proc, second_proc = random.sample(range(len(trees)), 2) + first_tree = trees[first_proc] + second_tree = trees[second_proc] + # either first process sends (1) or receives (0) + send_first = random.randint(0, 1) + volume = random.randint(1, 1000000) + # give both a small, random, extra time ("latency", etc) + if send_first: + # mpi sends don't need to block + idle_time = 0 + first_dur = random.random() + idle_time + second_dur = random.random() + first_node = FakeNode( + "MPI_Send" if send_first else "MPI_Recv", + first_dur, + "MpiSend" if send_first else "MpiRecv", + second_proc, + volume, + random.random() * first_dur, + ) + second_node = FakeNode( + "MPI_Recv" if send_first else "MPI_Send", + second_dur, + "MpiRecv" if send_first else "MpiSend", + first_proc, + volume, + random.random() * second_dur, + ) + first_tree.insert_at_time(first_node, first_evt) + second_tree.insert_at_time(second_node, second_evt) + + +def emit_tree_data(trees): + """ + Writes trees (one per process) as a CSV and returns them. + At the same time, return ground truth function call information. + The ground truth data will contain columns corresponding to Pipit's + time.inc, time.exc. + """ + data = [] + for process, tree in enumerate(trees): + # add small random fudge factor, so that we don't have many times of exactly 0 + # which would lead to undefined sorting order and rows not matching + tree.to_events(random.random() * 0.01, process, data) + + dataframe = pd.DataFrame( + data, + None, + [ + "Timestamp (s)", + "Event Type", + "Name", + "Process", + "Attributes", + "time.inc", + "time.exc", + ], + ).sort_values("Timestamp (s)") + data_csv = dataframe[ + ["Timestamp (s)", "Event Type", "Name", "Process", "Attributes"] + ].to_csv(index=False) + ground_csv = dataframe[["time.inc", "time.exc"]].to_csv(index=False) + return data_csv, ground_csv + + +def generate_trace( + num_events, + num_processes, + function_names=["foo", "bar", "baz", "quux", "grault", "garply", "waldo"], + num_mpi_pairs=0, +): + """ + Top level test generation function. Generates test and ground truth datasets with a + minimum of num_events Enter/Leave events per process, of which there are + num_processes. Optionally, MPI events can be added. + """ + seed_tree = gen_fake_tree(num_events // 2, function_names) + forest = gen_forest(seed_tree, num_processes) + add_fake_mpi_events(forest, num_mpi_pairs) + return emit_tree_data(forest)