diff --git a/pipit/tests/csv-tests.py b/pipit/tests/csv-tests.py
new file mode 100644
index 00000000..a4a7cfec
--- /dev/null
+++ b/pipit/tests/csv-tests.py
@@ -0,0 +1,40 @@
+import numpy as np
+from pipit import Trace
+
+
+def test_events(data_dir, ping_pong_otf2_trace):
+    ping_pong_csv_trace = "data/ping-pong-csv.csv"
+    trace_from_file = Trace.from_csv(str(ping_pong_csv_trace))
+
+    # check that reading it as a string returns identical results
+    csv_file = open(str(ping_pong_csv_trace))
+    trace_from_str = Trace.from_csv(csv_file.read())
+    csv_file.close()
+
+    # also check that it's the same as the trace read by OTF2 reader
+    otf2_trace = Trace.from_otf2(str(ping_pong_otf2_trace))
+
+    assert np.isclose(
+        trace_from_str.events["Timestamp (ns)"],
+        trace_from_file.events["Timestamp (ns)"],
+    ).all()
+    assert np.isclose(
+        otf2_trace.events["Timestamp (ns)"],
+        trace_from_file.events["Timestamp (ns)"],
+    ).all()
+
+    assert (trace_from_str.events["Name"] == trace_from_file.events["Name"]).all()
+    assert (otf2_trace.events["Name"] == trace_from_file.events["Name"]).all()
+
+    assert (trace_from_str.events["Thread"] == trace_from_file.events["Thread"]).all()
+    assert (otf2_trace.events["Thread"] == trace_from_file.events["Thread"]).all()
+
+    assert (trace_from_str.events["Process"] == trace_from_file.events["Process"]).all()
+    assert (otf2_trace.events["Process"] == trace_from_file.events["Process"]).all()
+
+    assert (
+        trace_from_str.events["Event Type"] == trace_from_file.events["Event Type"]
+    ).all()
+    assert (
+        otf2_trace.events["Event Type"] == trace_from_file.events["Event Type"]
+    ).all()
diff --git a/pipit/tests/data/ping-pong-csv.csv b/pipit/tests/data/ping-pong-csv.csv
new file mode 100644
index 00000000..30276349
--- /dev/null
+++ b/pipit/tests/data/ping-pong-csv.csv
@@ -0,0 +1,121 @@
+Timestamp (ns),Event Type,Name,Thread,Process,Attributes
+0.0,Instant,ProgramBegin,0,1,"{'attributes': {'Attribute 2': 26602}, 'program_name': '/g/g92/bhatele1/umd/traces/score-p/ping-pong.otf2', 'program_arguments': []}"
+30083.086937435106,Enter,"int main(int, char**)",0,1,{'region': 'Region 3'}
+40288.33150186851,Enter,MPI_Init,0,1,{'region': 'Region 148'}
+307730.9358165928,Instant,ProgramBegin,0,0,"{'attributes': {'Attribute 2': 26601}, 'program_name': '/g/g92/bhatele1/umd/traces/score-p/ping-pong.otf2', 'program_arguments': []}"
+336979.73374932166,Enter,"int main(int, char**)",0,0,{'region': 'Region 3'}
+346054.77444467926,Enter,MPI_Init,0,0,{'region': 'Region 148'}
+193643138.1741584,Leave,MPI_Init,0,0,
+193643835.4832178,Leave,MPI_Init,0,1,
+193651646.20379105,Enter,MPI_Comm_size,0,0,{'region': 'Region 37'}
+193652714.83827707,Enter,MPI_Comm_size,0,1,{'region': 'Region 37'}
+193653163.00611198,Leave,MPI_Comm_size,0,0,
+193654162.91198432,Leave,MPI_Comm_size,0,1,
+193655654.41835713,Enter,MPI_Comm_rank,0,0,{'region': 'Region 34'}
+193656280.13511068,Enter,MPI_Comm_rank,0,1,{'region': 'Region 34'}
+193656794.167867,Leave,MPI_Comm_rank,0,0,
+193657346.3831865,Leave,MPI_Comm_rank,0,1,
+193668225.07270837,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+193672584.5668554,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 16384}"
+193677292.95417315,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+193685930.32723847,Leave,MPI_Send,0,0,
+193687379.3555098,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+193691633.37032613,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 16384}"
+193696357.50795117,Leave,MPI_Recv,0,1,
+193698689.50799522,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+193699766.2562759,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 16384}"
+193713098.65276185,Leave,MPI_Send,0,1,
+193715693.63520956,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 16384}"
+193718856.106002,Leave,MPI_Recv,0,0,
+193725622.53347322,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+193743866.6394257,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+193744419.3320272,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 32768}"
+193764550.13388103,Leave,MPI_Send,0,0,
+193764818.84366918,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 32768}"
+193764846.04874542,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+193765093.75812382,Leave,MPI_Recv,0,1,
+193765371.53627068,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+193765578.67667574,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 32768}"
+193783762.16780922,Leave,MPI_Send,0,1,
+193784228.47236162,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 32768}"
+193784513.88702112,Leave,MPI_Recv,0,0,
+193810524.32631716,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+193825567.77891403,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+193825972.51408336,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 65536}"
+193851129.57309312,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 65536}"
+193851657.4470286,Leave,MPI_Recv,0,1,
+193852202.5031175,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+193852444.96239346,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 65536}"
+193854930.16997212,Leave,MPI_Send,0,0,
+193855196.49335006,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+193889661.9839724,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 65536}"
+193889774.14525163,Leave,MPI_Send,0,1,
+193889995.12683582,Leave,MPI_Recv,0,0,
+193942036.05127355,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+193942509.51505655,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 131072}"
+193954523.65854996,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+193992530.58190393,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 131072}"
+193993059.4104035,Leave,MPI_Recv,0,1,
+193993445.05429125,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+193993702.78659248,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 131072}"
+193995956.51238206,Leave,MPI_Send,0,0,
+193996173.19842792,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+194050007.74876937,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 131072}"
+194050210.11635402,Leave,MPI_Send,0,1,
+194050442.07542512,Leave,MPI_Recv,0,0,
+194205281.91461667,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+194205705.74106756,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 262144}"
+194220003.201837,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+194299518.38958535,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 262144}"
+194300037.6724441,Leave,MPI_Recv,0,1,
+194300433.81653672,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+194300716.367504,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 262144}"
+194302812.59022063,Leave,MPI_Send,0,0,
+194303143.8239559,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+194408743.42971635,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 262144}"
+194408865.61391842,Leave,MPI_Send,0,1,
+194409224.53001198,Leave,MPI_Recv,0,0,
+194675378.9501026,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+194676157.87439075,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 524288}"
+194762211.34879553,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+194907390.04494745,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 524288}"
+194908311.1992833,Leave,MPI_Recv,0,1,
+194908774.16286144,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+194908999.439984,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 524288}"
+194911489.42038304,Leave,MPI_Send,0,0,
+194911733.78878716,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+195131579.44173214,Leave,MPI_Send,0,1,
+195131666.30706328,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 524288}"
+195132209.45402402,Leave,MPI_Recv,0,0,
+195717989.1556328,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+195718827.2628938,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 1048576}"
+195859370.11859792,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+196135679.66863889,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 1048576}"
+196136540.6854378,Leave,MPI_Recv,0,1,
+196136943.51147896,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+196137242.29003558,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 1048576}"
+196139793.8398177,Leave,MPI_Send,0,0,
+196140050.61755484,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+196583827.45770124,Leave,MPI_Send,0,1,
+196583934.36887804,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 1048576}"
+196584467.97019798,Leave,MPI_Recv,0,0,
+197613248.45135725,Enter,MPI_Send,0,0,{'region': 'Region 193'}
+197614141.9233348,Instant,MpiSend,0,0,"{'receiver': 1, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 2097152}"
+197951492.98251072,Enter,MPI_Recv,0,1,{'region': 'Region 176'}
+198502088.884028,Instant,MpiRecv,0,1,"{'sender': 0, 'communicator': 'Comm 1', 'msg_tag': 10, 'msg_length': 2097152}"
+198502934.62780163,Leave,MPI_Recv,0,1,
+198503365.13620108,Enter,MPI_Send,0,1,{'region': 'Region 193'}
+198503650.55086058,Instant,MpiSend,0,1,"{'receiver': 0, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 2097152}"
+198506398.7408429,Leave,MPI_Send,0,0,
+198506691.79201505,Enter,MPI_Recv,0,0,{'region': 'Region 176'}
+199319910.70381415,Leave,MPI_Send,0,1,
+199319973.70504335,Instant,MpiRecv,0,0,"{'sender': 1, 'communicator': 'Comm 1', 'msg_tag': 20, 'msg_length': 2097152}"
+199320512.07918367,Leave,MPI_Recv,0,0,
+199514778.2785141,Enter,MPI_Finalize,0,0,{'region': 'Region 104'}
+199529686.66029385,Enter,MPI_Finalize,0,1,{'region': 'Region 104'}
+199573648.15437022,Leave,MPI_Finalize,0,0,
+199574793.63126454,Leave,MPI_Finalize,0,1,
+199575243.23094556,Leave,"int main(int, char**)",0,0,
+199576798.2158296,Leave,"int main(int, char**)",0,1,
+199603304.5511645,Instant,ProgramEnd,0,0,{'exit_status': -9223372036854775808}
+199604459.57369962,Instant,ProgramEnd,0,1,{'exit_status': -9223372036854775808}
diff --git a/pipit/trace.py b/pipit/trace.py
index 6f3838b6..f4279aa1 100644
--- a/pipit/trace.py
+++ b/pipit/trace.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 import pandas as pd
+from ast import literal_eval
+from io import StringIO
 
 
 class Trace:
@@ -60,6 +62,10 @@ def from_nsight(filename):
 
     @staticmethod
     def from_csv(filename):
+        # detect if the input is a CSV as a string
+        if "," in filename:
+            # wrapping with StringIO allows pandas to read it
+            filename = StringIO(filename)
         events_dataframe = pd.read_csv(filename, skipinitialspace=True)
 
         # if timestamps are in seconds, convert them to nanoseconds
@@ -72,6 +78,17 @@ def from_csv(filename):
         # ensure that ranks are ints
         events_dataframe = events_dataframe.astype({"Process": "int32"})
 
+        # this next part is needed for fake test reading
+        # ensure that the attributes are a dict, not a string
+        if "Attributes" in events_dataframe.columns:
+            # use literal_eval so we're not running a security risk
+            # don't try to literal_eval a NaN, as well
+            events_dataframe["Attributes"] = events_dataframe["Attributes"].apply(
+                lambda attr_dict: (
+                    literal_eval(attr_dict) if type(attr_dict) == str else attr_dict
+                )
+            )
+
         # make certain columns categorical
         events_dataframe = events_dataframe.astype(
             {
diff --git a/pipit/util/test_generator.py b/pipit/util/test_generator.py
new file mode 100644
index 00000000..c6343256
--- /dev/null
+++ b/pipit/util/test_generator.py
@@ -0,0 +1,369 @@
+import random
+import textwrap
+import pandas as pd
+import numpy as np
+
+
+class FakeNode:
+    """
+    A single node of the calling tree used to produce fake traces.
+    Represents a single function call.
+    """
+
+    def __init__(
+        self, name, exc_time, mpi_type="", mpi_tgt=0, mpi_volume=0, mpi_time=0
+    ):
+        self.name = name
+        self.exc_time = exc_time
+        self.inc_time = exc_time
+        self.children = {}  # run_time -> child node
+        self.is_mpi = mpi_type != ""
+        self.mpi_type = mpi_type
+        self.mpi_tgt = mpi_tgt
+        self.mpi_volume = mpi_volume
+        self.mpi_time = mpi_time
+        self.total_nodes = 1
+        self.parent = None
+
+    def grow_inc_time(self, time):
+        """
+        This function adjusts inclusive time metric when a new child is added,
+        adding the time to each parent's inc_time, up to the root.
+        """
+        self.inc_time += time
+        if self.parent is not None:
+            self.parent.grow_inc_time(time)
+
+    def grow_total_nodes(self, amt):
+        """
+        This function adjusts the total count of nodes when a new node is added,
+        adding the new count to each parent's total_nodes, up to the root.
+        """
+        self.total_nodes += amt
+        if self.parent is not None:
+            self.parent.grow_total_nodes(amt)
+
+    def add_child(self, child, run_time):
+        """
+        This function adds a child that executes after run_time
+        exclusive time within the function represented by the current node.
+        """
+        assert run_time not in self.children
+        self.children[run_time] = child
+        child.parent = self
+        self.grow_total_nodes(child.total_nodes)
+        self.grow_inc_time(child.inc_time)
+
+    def choose_random_node(self):
+        """
+        This function selects a random node, with all descendants
+        of the current node being weighted equally, and returns it.
+        """
+        if not self.children:
+            return self
+        rng = random.random()
+        total = 0
+        for child in self.children.values():
+            weight = child.total_nodes / self.total_nodes
+            if rng < weight:
+                return child.choose_random_node()
+            else:
+                rng -= weight
+        return self
+
+    def pick_by_name(self, name):
+        """
+        Returns all nodes in this tree that have the given name.
+        """
+        valid = [self] if self.name == name else []
+        for child in self.children.values():
+            valid += child.pick_by_name(name)
+        return valid
+
+    def __str__(self) -> str:
+        return "{} ({})\n".format(self.name, self.exc_time) + "\n".join(
+            [
+                textwrap.indent(str(run_time) + ": " + str(child), "\t")
+                for run_time, child in sorted(self.children.items())
+            ]
+        )
+
+    def mpi_attributes(self):
+        """
+        Returns the Attributes dictionary for this node, including
+        (if it is an MPI event) receiver/sender and msg_length.
+        """
+        if not self.is_mpi:
+            return {}
+        attr = {"msg_length": self.mpi_volume}
+        if self.mpi_type == "MpiSend":
+            attr["receiver"] = self.mpi_tgt
+        else:
+            attr["sender"] = self.mpi_tgt
+        return attr
+
+    def to_events(self, begin_time, process, data):
+        """
+        Returns event data for this tree, with time starting at begin_time.
+        data is an array that is built up and then converted to a DataFrame
+        once the entire tree has been processed.
+        """
+        data.append(
+            [
+                begin_time,
+                "Enter",
+                self.name + "()",
+                process,
+                {},
+                self.inc_time,
+                self.exc_time,
+            ]
+        )
+        if self.is_mpi:
+            data.append(
+                [
+                    begin_time + self.mpi_time,
+                    "Instant",
+                    self.mpi_type,
+                    process,
+                    self.mpi_attributes(),
+                    float("nan"),
+                    float("nan"),
+                ]
+            )
+        # total_time accumulates durations of already processed children
+        total_time = begin_time
+        for run_time, child in sorted(self.children.items()):
+            # children will add their own lines to data
+            child.to_events(total_time + run_time, process, data)
+            total_time += child.inc_time
+
+        # time.inc and time.exc are both NaN for Leave events
+        data.append(
+            [
+                begin_time + self.inc_time,
+                "Leave",
+                self.name + "()",
+                process,
+                {},
+                float("nan"),
+                float("nan"),
+            ]
+        )
+
+    def tweak_tree(self):
+        """
+        Adds small exclusive time perturbations to
+        function length to generate a "similar" tree, for other processes
+        to use, that is not identical to the original.
+        """
+        exc_time = self.exc_time
+        factor = (0.7) + random.random() * 0.6
+        exc_time_new = exc_time * factor
+        # preserve inclusive time relations
+        self.grow_inc_time(exc_time_new - exc_time)
+        self.exc_time = exc_time_new
+        for run_time, child in self.children.items():
+            child.tweak_tree()
+        # also scale back child run times
+        self.children = {
+            run_time * factor: child for run_time, child in self.children.items()
+        }
+
+    def deepcopy(self):
+        """
+        Returns a deep copy of the tree.
+        """
+        mycopy = FakeNode(
+            self.name,
+            self.exc_time,
+            self.mpi_type,
+            self.mpi_tgt,
+            self.mpi_volume,
+            self.mpi_time,
+        )
+        mycopy.inc_time = self.inc_time
+        mycopy.total_nodes = self.total_nodes
+        for run_time, child in self.children.items():
+            mycopy.children[run_time] = child.deepcopy()
+            mycopy.children[run_time].parent = mycopy
+        return mycopy
+
+    def node_at_time(self, target_time):
+        """
+        Returns the node that contains the given target_time,
+        as well as the offset into that node that the time occurs.
+        """
+        total_time = 0
+        # TODO: may be more efficient to store self.children sorted already
+        for run_time, child in sorted(self.children.items()):
+            if target_time < total_time + run_time:
+                # this time occurs in the current node!
+                return self, target_time - total_time
+            elif target_time < total_time + run_time + child.inc_time:
+                # this time occurs in the given child node
+                return child.node_at_time(target_time - total_time - run_time)
+            else:
+                # this time occurs after this child
+                total_time += child.inc_time
+        # if no children find it, it must be the current node
+        return self, target_time - total_time
+
+    def insert_at_time(self, child, target_time):
+        """
+        Adds the child to the proper node such that it executes
+        at target_time.
+        """
+        node, offset = self.node_at_time(target_time)
+        node.add_child(child, offset)
+
+
+def gen_fake_node(function_names):
+    """
+    Generates a node with a random numeric name and execution time.
+    """
+    return FakeNode(random.choice(function_names), random.random() * 10)
+
+
+def gen_fake_tree(num_nodes, function_names, copy_subtrees=True):
+    """
+    Generates a whole tree of FakeNodes by randomly appending children.
+    """
+    root = gen_fake_node(function_names)
+    # continue to add nodes until we've reached the target
+    while root.total_nodes < num_nodes:
+        node = gen_fake_node(function_names)
+        # choose a node that's currently in the graph to add child to
+        parent = root.choose_random_node()
+        # select a random point for that child to run
+        run_time = random.random() * parent.exc_time
+        # find nodes with the same name to copy off of
+        same_name = root.pick_by_name(node.name)
+        if not same_name or not copy_subtrees:
+            parent.add_child(node, run_time)
+        else:
+            subtree = random.choice(same_name)
+            # larger subtrees are less likely to be copied
+            if random.random() > 4 / (subtree.total_nodes**0.5):
+                parent.add_child(node, run_time)
+            else:
+                subtree = subtree.deepcopy()
+                subtree.tweak_tree()
+                parent.add_child(subtree, run_time)
+
+    return root
+
+
+def gen_forest(seed_tree, num_trees):
+    """
+    Generates num_trees new trees by tweaking seed_tree.
+    """
+    forest = [seed_tree.deepcopy() for n in range(num_trees)]
+    for tree in forest:
+        tree.tweak_tree()
+    return forest
+
+
+def add_fake_mpi_events(trees, num_pairs):
+    """
+    Adds fake MPIevents to a set of trees (one per process). In total,
+    num_pairs pairs of Send/Recv events are generated and inserted.
+    Each event is a function with is_mpi=True.
+    """
+    planned_evts = []
+    # choose times for events to happen
+    last_proc = -1
+    maxtime = min([t.inc_time for t in trees])
+    for i in range(2 * num_pairs):
+        planned_evts.append(random.random() * maxtime)
+    # sort from last to first events in timeline
+    # iterate from first to last to avoid
+    # dependencies among the events' times
+    planned_evts.sort(reverse=True)
+    while planned_evts:
+        # pair two first events
+        first_evt = planned_evts.pop()
+        second_evt = planned_evts.pop()
+        # time that the first one has to idle
+        idle_time = second_evt - first_evt
+        # pick two different processes
+        first_proc, second_proc = random.sample(range(len(trees)), 2)
+        first_tree = trees[first_proc]
+        second_tree = trees[second_proc]
+        # either first process sends (1) or receives (0)
+        send_first = random.randint(0, 1)
+        volume = random.randint(1, 1000000)
+        # give both a small, random, extra time ("latency", etc)
+        if send_first:
+            # mpi sends don't need to block
+            idle_time = 0
+        first_dur = random.random() + idle_time
+        second_dur = random.random()
+        first_node = FakeNode(
+            "MPI_Send" if send_first else "MPI_Recv",
+            first_dur,
+            "MpiSend" if send_first else "MpiRecv",
+            second_proc,
+            volume,
+            random.random() * first_dur,
+        )
+        second_node = FakeNode(
+            "MPI_Recv" if send_first else "MPI_Send",
+            second_dur,
+            "MpiRecv" if send_first else "MpiSend",
+            first_proc,
+            volume,
+            random.random() * second_dur,
+        )
+        first_tree.insert_at_time(first_node, first_evt)
+        second_tree.insert_at_time(second_node, second_evt)
+
+
+def emit_tree_data(trees):
+    """
+    Writes trees (one per process) as a CSV and returns them.
+    At the same time, return ground truth function call information.
+    The ground truth data will contain columns corresponding to Pipit's
+    time.inc, time.exc.
+    """
+    data = []
+    for process, tree in enumerate(trees):
+        # add small random fudge factor, so that we don't have many times of exactly 0
+        # which would lead to undefined sorting order and rows not matching
+        tree.to_events(random.random() * 0.01, process, data)
+
+    dataframe = pd.DataFrame(
+        data,
+        None,
+        [
+            "Timestamp (s)",
+            "Event Type",
+            "Name",
+            "Process",
+            "Attributes",
+            "time.inc",
+            "time.exc",
+        ],
+    ).sort_values("Timestamp (s)")
+    data_csv = dataframe[
+        ["Timestamp (s)", "Event Type", "Name", "Process", "Attributes"]
+    ].to_csv(index=False)
+    ground_csv = dataframe[["time.inc", "time.exc"]].to_csv(index=False)
+    return data_csv, ground_csv
+
+
+def generate_trace(
+    num_events,
+    num_processes,
+    function_names=["foo", "bar", "baz", "quux", "grault", "garply", "waldo"],
+    num_mpi_pairs=0,
+):
+    """
+    Top level test generation function. Generates test and ground truth datasets with a
+    minimum of num_events Enter/Leave events per process, of which there are
+    num_processes. Optionally, MPI events can be added.
+    """
+    seed_tree = gen_fake_tree(num_events // 2, function_names)
+    forest = gen_forest(seed_tree, num_processes)
+    add_fake_mpi_events(forest, num_mpi_pairs)
+    return emit_tree_data(forest)