diff --git a/examples/activity_position.py b/examples/activity_position.py index c0b898b47..74ae2d1d0 100644 --- a/examples/activity_position.py +++ b/examples/activity_position.py @@ -5,7 +5,7 @@ def execute_script(): - dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv")) dataframe["time:timestamp"] = pd.to_datetime(dataframe["time:timestamp"], utc=True, format="ISO8601") # prints the summary of the positions of two activities print(pm4py.get_activity_position_summary(dataframe, "Confirmation of receipt")) diff --git a/examples/corr_mining.py b/examples/corr_mining.py index e3df198d7..2d3f0efee 100644 --- a/examples/corr_mining.py +++ b/examples/corr_mining.py @@ -12,7 +12,7 @@ def execute_script(): - df = pd.read_csv("../tests/input_data/interval_event_log.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("../tests/input_data/interval_event_log.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") act_count = dict(df["concept:name"].value_counts()) parameters = {} diff --git a/examples/dataframe_prefix_and_fea_extraction.py b/examples/dataframe_prefix_and_fea_extraction.py index d7c4ea413..b9c322a0e 100644 --- a/examples/dataframe_prefix_and_fea_extraction.py +++ b/examples/dataframe_prefix_and_fea_extraction.py @@ -6,7 +6,7 @@ def execute_script(): # loads a dataframe. setup dates - df = pd.read_csv("../tests/input_data/receipt.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("../tests/input_data/receipt.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") print(df) # insert the case index in the dataframe diff --git a/examples/df_to_log_postpro.py b/examples/df_to_log_postpro.py index c14f32d20..3ddc1e5e7 100644 --- a/examples/df_to_log_postpro.py +++ b/examples/df_to_log_postpro.py @@ -6,7 +6,7 @@ def execute_script(): - dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv")) dataframe = pm4py.format_dataframe(dataframe, timest_format="ISO8601") log = log_converter.apply(dataframe, variant=log_converter.Variants.TO_EVENT_LOG, parameters={"stream_postprocessing": False}) pm4py.write_xes(log, "non_postprocessed.xes") diff --git a/examples/dfg_min_ex_pandas.py b/examples/dfg_min_ex_pandas.py index ad51f513d..293b8dcd7 100644 --- a/examples/dfg_min_ex_pandas.py +++ b/examples/dfg_min_ex_pandas.py @@ -18,7 +18,7 @@ def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.csv") - dataframe = pd.read_csv(log_path, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(log_path) log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = pm4py.read_xes(log_path) dataframe = pm4py.convert_to_dataframe(log) diff --git a/examples/events_distribution.py b/examples/events_distribution.py index e3e59dab1..104831367 100644 --- a/examples/events_distribution.py +++ b/examples/events_distribution.py @@ -9,7 +9,7 @@ def execute_script(): - df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv")) df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") # plots the distribution of the events over the days of a month x0, y0 = attr_get.get_events_distribution(df, distr_type="days_month") diff --git a/examples/heuminer_plusplus.py b/examples/heuminer_plusplus.py index 95846065f..5b663551a 100644 --- a/examples/heuminer_plusplus.py +++ b/examples/heuminer_plusplus.py @@ -8,7 +8,7 @@ def execute_script(): - df = pd.read_csv("../tests/input_data/interval_event_log.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("../tests/input_data/interval_event_log.csv") df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") df["start_timestamp"] = pd.to_datetime(df["start_timestamp"], utc=True, format="ISO8601") log = pm4py.read_xes("../tests/input_data/interval_event_log.xes") diff --git a/examples/link_analysis_vbfa.py b/examples/link_analysis_vbfa.py index b0a22b0c8..a9a0813d3 100644 --- a/examples/link_analysis_vbfa.py +++ b/examples/link_analysis_vbfa.py @@ -5,7 +5,7 @@ def execute_script(): - dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "ocel", "VBFA.zip"), compression="zip", dtype="str", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("..", "tests", "input_data", "ocel", "VBFA.zip"), compression="zip", dtype="str") dataframe["time:timestamp"] = dataframe["ERDAT"] + " " + dataframe["ERZET"] dataframe["time:timestamp"] = pd.to_datetime(dataframe["time:timestamp"], format="%Y%m%d %H%M%S") dataframe["RFWRT"] = dataframe["RFWRT"].astype(float) diff --git a/examples/merging_case_relations.py b/examples/merging_case_relations.py index 14911b079..9d2da16fe 100644 --- a/examples/merging_case_relations.py +++ b/examples/merging_case_relations.py @@ -7,11 +7,11 @@ def execute_script(): - dataframe1 = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe1 = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv")) dataframe1["time:timestamp"] = pd.to_datetime(dataframe1["time:timestamp"], utc=True, format="ISO8601") - dataframe2 = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe2 = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv")) dataframe2["time:timestamp"] = pd.to_datetime(dataframe2["time:timestamp"], utc=True, format="ISO8601") - case_relations = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + case_relations = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv")) merged = case_relations_merging.apply(dataframe1, dataframe2, case_relations) dfg, sa, ea = pm4py.discover_dfg(merged) pm4py.view_dfg(dfg, sa, ea, format=examples_conf.TARGET_IMG_FORMAT) diff --git a/examples/perf_spectrum_visualization.py b/examples/perf_spectrum_visualization.py index 368f7d0fb..602803cef 100644 --- a/examples/perf_spectrum_visualization.py +++ b/examples/perf_spectrum_visualization.py @@ -9,7 +9,7 @@ def execute_script(): log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes")) pm4py.view_performance_spectrum(log, ["Confirmation of receipt", "T04 Determine confirmation of receipt", "T10 Determine necessity to stop indication"], format=examples_conf.TARGET_IMG_FORMAT) - df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv")) df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") pm4py.view_performance_spectrum(df, ["Confirmation of receipt", "T04 Determine confirmation of receipt", "T10 Determine necessity to stop indication"], format=examples_conf.TARGET_IMG_FORMAT) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index 21b563a95..b02c6c85c 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -12,7 +12,7 @@ def execute_script(): log1 = pm4py.read_xes("../tests/input_data/running-example.xes") # reads a CSV into a dataframe - df = pd.read_csv("../tests/input_data/running-example.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("../tests/input_data/running-example.csv") df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") df["case:concept:name"] = df["case:concept:name"].astype("string") diff --git a/examples/temporal_profile_dataframe.py b/examples/temporal_profile_dataframe.py index a6f072aba..a18d192bc 100644 --- a/examples/temporal_profile_dataframe.py +++ b/examples/temporal_profile_dataframe.py @@ -6,7 +6,7 @@ def execute_script(): - dataframe = pd.read_csv("../tests/input_data/receipt.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("../tests/input_data/receipt.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") tf = temporal_profile_discovery.apply(dataframe) conformance = temporal_profile_conformance.apply(dataframe, tf, parameters={"zeta": 6.0}) diff --git a/examples/timestamp_granularity.py b/examples/timestamp_granularity.py index 9d5964fbf..f57e5e983 100644 --- a/examples/timestamp_granularity.py +++ b/examples/timestamp_granularity.py @@ -5,7 +5,7 @@ def execute_script(): - dataframe = pd.read_csv("../tests/input_data/receipt.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("../tests/input_data/receipt.csv") dataframe = pm4py.format_dataframe(dataframe, timest_format="ISO8601") # prints the original timestamp column of the dataframe diff --git a/examples/timestamp_interleavings.py b/examples/timestamp_interleavings.py index 30cdb1a70..4c59b7a58 100644 --- a/examples/timestamp_interleavings.py +++ b/examples/timestamp_interleavings.py @@ -7,11 +7,11 @@ def execute_script(): - receipt_even = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + receipt_even = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv")) receipt_even["time:timestamp"] = pd.to_datetime(receipt_even["time:timestamp"], utc=True, format="ISO8601") - receipt_odd = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + receipt_odd = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv")) receipt_odd["time:timestamp"] = pd.to_datetime(receipt_odd["time:timestamp"], utc=True, format="ISO8601") - case_relations = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + case_relations = pd.read_csv(os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv")) interleavings_dataframe = interleavings_miner.apply(receipt_even, receipt_odd, case_relations) print(interleavings_dataframe) # print the frequency and the direction of the interleavings diff --git a/pm4py/algo/transformation/ocel/features/objects/algorithm.py b/pm4py/algo/transformation/ocel/features/objects/algorithm.py index a6d79d239..ffd9a472f 100644 --- a/pm4py/algo/transformation/ocel/features/objects/algorithm.py +++ b/pm4py/algo/transformation/ocel/features/objects/algorithm.py @@ -18,6 +18,7 @@ from typing import Optional, Dict, Any, List from enum import Enum from pm4py.util import exec_utils +import time from pm4py.algo.transformation.ocel.features.objects import object_lifecycle_length, object_lifecycle_duration, object_degree_centrality, object_general_descendants_graph, object_general_interaction_graph, object_general_inheritance_graph, object_cobirth_graph, object_codeath_graph, object_lifecycle_activities, object_str_attributes, object_num_attributes, objects_interaction_graph_ot, object_work_in_progress, related_events_features, related_activities_features, obj_con_in_graph_features, object_lifecycle_unq_act, object_lifecycle_paths @@ -122,6 +123,8 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): filter_per_type = exec_utils.get_param_value(Parameters.FILTER_PER_TYPE, parameters, None) + T0 = time.time_ns() + ordered_objects = list(ocel.objects[ocel.object_id_column]) datas = [[] for x in ordered_objects] @@ -130,9 +133,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_lifecycle_length: if debug: print("computing enable_object_lifecycle_length") + t0 = time.time_ns() data, feature_names = object_lifecycle_length.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_lifecycle_length") + print("computed enable_object_lifecycle_length", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -140,9 +145,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_lifecycle_duration: if debug: print("computing enable_object_lifecycle_duration") + t0 = time.time_ns() data, feature_names = object_lifecycle_duration.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_lifecycle_duration") + print("computed enable_object_lifecycle_duration", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -150,9 +157,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_degree_centrality: if debug: print("computing enable_object_degree_centrality") + t0 = time.time_ns() data, feature_names = object_degree_centrality.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_degree_centrality") + print("computed enable_object_degree_centrality", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -160,9 +169,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_general_interaction_graph: if debug: print("computing enable_object_general_interaction_graph") + t0 = time.time_ns() data, feature_names = object_general_interaction_graph.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_general_interaction_graph") + print("computed enable_object_general_interaction_graph", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -170,9 +181,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_general_descendants_graph: if debug: print("computing enable_object_general_descendants_graph") + t0 = time.time_ns() data, feature_names = object_general_descendants_graph.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_general_descendants_graph") + print("computed enable_object_general_descendants_graph", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -180,9 +193,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_general_inheritance_graph: if debug: print("computing enable_object_general_inheritance_graph") + t0 = time.time_ns() data, feature_names = object_general_inheritance_graph.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_general_inheritance_graph") + print("computed enable_object_general_inheritance_graph", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -190,9 +205,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_cobirth_graph: if debug: print("computing enable_object_cobirth_graph") + t0 = time.time_ns() data, feature_names = object_cobirth_graph.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_cobirth_graph") + print("computed enable_object_cobirth_graph", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -200,9 +217,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_codeath_graph: if debug: print("computing enable_object_codeath_graph") + t0 = time.time_ns() data, feature_names = object_codeath_graph.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_codeath_graph") + print("computed enable_object_codeath_graph", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -210,9 +229,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_lifecycle_activities: if debug: print("computing enable_object_lifecycle_activities") + t0 = time.time_ns() data, feature_names = object_lifecycle_activities.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_lifecycle_activities") + print("computed enable_object_lifecycle_activities", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -220,9 +241,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_str_attributes: if debug: print("computing enable_object_str_attributes") + t0 = time.time_ns() data, feature_names = object_str_attributes.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_str_attributes") + print("computed enable_object_str_attributes", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -230,9 +253,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_num_attributes: if debug: print("computing enable_object_num_attributes") + t0 = time.time_ns() data, feature_names = object_num_attributes.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_num_attributes") + print("computed enable_object_num_attributes", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -240,9 +265,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_interaction_graph_ot: if debug: print("computing enable_object_interaction_graph_ot") + t0 = time.time_ns() data, feature_names = objects_interaction_graph_ot.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_interaction_graph_ot") + print("computed enable_object_interaction_graph_ot", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -250,9 +277,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_work_in_progress: if debug: print("computing enable_work_in_progress") + t0 = time.time_ns() data, feature_names = object_work_in_progress.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_work_in_progress") + print("computed enable_work_in_progress", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -260,9 +289,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_lifecycle_unq_act: if debug: print("computing enable_object_lifecycle_unq_act") + t0 = time.time_ns() data, feature_names = object_lifecycle_unq_act.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_lifecycle_unq_act") + print("computed enable_object_lifecycle_unq_act", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -270,9 +301,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_related_events_features: if debug: print("computing enable_related_events_features") + t0 = time.time_ns() data, feature_names = related_events_features.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_related_events_features") + print("computed enable_related_events_features", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -280,9 +313,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_related_activities_features: if debug: print("computing enable_related_activities_features") + t0 = time.time_ns() data, feature_names = related_activities_features.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_related_activities_features") + print("computed enable_related_activities_features", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -290,9 +325,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_obj_con_in_graph_features: if debug: print("computing enable_obj_con_in_graph_features") + t0 = time.time_ns() data, feature_names = obj_con_in_graph_features.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_obj_con_in_graph_features") + print("computed enable_obj_con_in_graph_features", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -300,9 +337,11 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): if enable_object_lifecycle_paths: if debug: print("computing enable_object_lifecycle_paths") + t0 = time.time_ns() data, feature_names = object_lifecycle_paths.apply(ocel, parameters=parameters) + t1 = time.time_ns() if debug: - print("computed enable_object_lifecycle_paths") + print("computed enable_object_lifecycle_paths", "%.4f" % ((t1-t0)/10**9)) feature_namess = feature_namess + feature_names for i in range(len(data)): datas[i] = datas[i] + data[i] @@ -313,6 +352,10 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): idxs = [i for i in range(len(ordered_objects)) if object_type[ordered_objects[i]] == filter_per_type] datas = [datas[i] for i in idxs] + T1 = time.time_ns() + if debug: + print("Total time: %.4f" % ((T1-T0)/10**9)) + return datas, feature_namess diff --git a/pm4py/objects/ocel/importer/csv/variants/pandas.py b/pm4py/objects/ocel/importer/csv/variants/pandas.py index f47da8808..84479f272 100644 --- a/pm4py/objects/ocel/importer/csv/variants/pandas.py +++ b/pm4py/objects/ocel/importer/csv/variants/pandas.py @@ -51,11 +51,11 @@ def apply(file_path: str, objects_path: str = None, parameters: Optional[Dict[An parameters = {} encoding = exec_utils.get_param_value(Parameters.ENCODING, parameters, pm4_constants.DEFAULT_ENCODING) - table = pd.read_csv(file_path, index_col=False, encoding=encoding, dtype_backend=pm4_constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + table = pd.read_csv(file_path, index_col=False, encoding=encoding) objects = None if objects_path is not None: - objects = pd.read_csv(objects_path, index_col=False, encoding=encoding, dtype_backend=pm4_constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + objects = pd.read_csv(objects_path, index_col=False, encoding=encoding) ocel = extended_table.get_ocel_from_extended_table(table, objects, parameters=parameters) ocel = ocel_consistency.apply(ocel, parameters=parameters) diff --git a/requirements_stable.txt b/requirements_stable.txt index 953ace731..258365607 100644 --- a/requirements_stable.txt +++ b/requirements_stable.txt @@ -2,7 +2,7 @@ colorama==0.4.6 contourpy==1.2.0 cycler==0.12.1 deprecation==2.1.0 -fonttools==4.44.0 +fonttools==4.44.1 graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.5 diff --git a/tests/algorithm_test.py b/tests/algorithm_test.py index 0941dfc06..3393c6d93 100644 --- a/tests/algorithm_test.py +++ b/tests/algorithm_test.py @@ -102,7 +102,7 @@ def test_alpha_miner_log(self): net3, im3, fm3 = alpha_miner.apply_dfg(dfg, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC) def test_alpha_miner_dataframe(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(df, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC) @@ -122,7 +122,7 @@ def test_performance_spectrum(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.performance_spectrum import algorithm as pspectrum ps = pspectrum.apply(log, ["register request", "decide"]) - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") ps = pspectrum.apply(df, ["register request", "decide"]) diff --git a/tests/alpha_test.py b/tests/alpha_test.py index 5809e0ba7..ac81f7540 100644 --- a/tests/alpha_test.py +++ b/tests/alpha_test.py @@ -26,7 +26,7 @@ def obtainPetriNetThroughAlphaMiner(self, log_name): if ".xes" in log_name: log = xes_importer.apply(log_name) else: - df = pd.read_csv(log_name, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(log_name) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") log = log_conversion.apply(df, variant=log_conversion.Variants.TO_EVENT_LOG) net, marking, fmarking = alpha_alg.apply(log) diff --git a/tests/csv_impexp_test.py b/tests/csv_impexp_test.py index 9a3df687c..a56cf2c7a 100644 --- a/tests/csv_impexp_test.py +++ b/tests/csv_impexp_test.py @@ -16,7 +16,7 @@ def test_importExportCSVtoXES(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") event_log = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) event_log = sorting.sort_timestamp(event_log) @@ -36,7 +36,7 @@ def test_importExportCSVtoCSV(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") event_log = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) event_log = sorting.sort_timestamp(event_log) @@ -49,7 +49,7 @@ def test_importExportCSVtoCSV(self): event_log_transformed = log_conversion.apply(log, variant=log_conversion.TO_EVENT_STREAM) df = log_conversion.apply(event_log_transformed, variant=log_conversion.TO_DATA_FRAME) df.to_csv(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) - df = pd.read_csv(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") event_log_imported_after_export = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) log_imported_after_export = log_conversion.apply( diff --git a/tests/doc_tests.py b/tests/doc_tests.py index 56d8a72ae..e5841db8f 100644 --- a/tests/doc_tests.py +++ b/tests/doc_tests.py @@ -13,13 +13,13 @@ def load_running_example_xes(self): return log def load_running_example_df(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") return df def load_running_example_stream(self): from pm4py.objects.conversion.log import converter - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = converter.apply(df, variant=converter.TO_EVENT_STREAM) return stream @@ -35,13 +35,13 @@ def load_receipt_xes(self): return log def load_receipt_df(self): - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") return df def load_receipt_stream(self): from pm4py.objects.conversion.log import converter - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = converter.apply(df, variant=converter.TO_EVENT_STREAM) return stream @@ -76,14 +76,14 @@ def test_3(self): import pandas as pd from pm4py.objects.conversion.log import converter as log_converter - log_csv = pd.read_csv(os.path.join("input_data", "running-example.csv"), sep=',', dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log_csv = pd.read_csv(os.path.join("input_data", "running-example.csv"), sep=',') event_log = log_converter.apply(log_csv, variant=log_converter.Variants.TO_EVENT_LOG) def test_4(self): import pandas as pd from pm4py.objects.conversion.log import converter as log_converter - log_csv = pd.read_csv(os.path.join("input_data", "running-example.csv"), sep=',', dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log_csv = pd.read_csv(os.path.join("input_data", "running-example.csv"), sep=',') log_csv.rename(columns={'case:concept:name': 'case'}, inplace=True) parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'case'} event_log = log_converter.apply(log_csv, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG) @@ -272,7 +272,7 @@ def test_38(self): def test_39(self): import os - df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") from pm4py.algo.filtering.pandas.attributes import attributes_filter diff --git a/tests/filtering_pandas_test.py b/tests/filtering_pandas_test.py index 7d058bae5..16fab933a 100644 --- a/tests/filtering_pandas_test.py +++ b/tests/filtering_pandas_test.py @@ -21,7 +21,7 @@ def test_prefiltering_dataframe(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") - dataframe = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(input_log) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") dataframe = attributes_filter.filter_df_keeping_spno_activities(dataframe, activity_key="concept:name") dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue="case:concept:name") @@ -35,7 +35,7 @@ def test_filtering_variants(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") - dataframe = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(input_log) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") variants = case_statistics.get_variant_statistics(dataframe) chosen_variants = [variants[0]["variant"]] @@ -47,7 +47,7 @@ def test_filtering_attr_events(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") - dataframe = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(input_log) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") df1 = attributes_filter.apply_events(dataframe, ["reject request"], parameters={attributes_filter.Parameters.POSITIVE: True}) @@ -61,7 +61,7 @@ def test_filtering_paths(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") - dataframe = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(input_log) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") df3 = paths_filter.apply(dataframe, [("examine casually", "check ticket")], {paths_filter.Parameters.POSITIVE: False}) @@ -75,7 +75,7 @@ def test_filtering_timeframe(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv") - df = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(input_log) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") df1 = timestamp_filter.apply_events(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") df2 = timestamp_filter.filter_traces_intersecting(df, "2011-03-09 00:00:00", "2012-01-18 23:59:59") @@ -86,24 +86,24 @@ def test_filtering_timeframe(self): def test_filtering_traces_attribute_in_timeframe(self): input_log = os.path.join(INPUT_DATA_DIR, "receipt.csv") - df = pd.read_csv(input_log, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(input_log) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") df1 = timestamp_filter.filter_traces_attribute_in_timeframe(df, "concept:name", "Confirmation of receipt", "2011-03-09 00:00:00", "2012-01-18 23:59:59") def test_AeventuallyB_pos(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_ev_B_pos = ltl_checker.eventually_follows(df, ["check ticket", "pay compensation"], parameters={ltl_checker.Parameters.POSITIVE: True}) def test_AeventuallyB_neg(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_ev_B_neg = ltl_checker.eventually_follows(df, ["check ticket", "pay compensation"], parameters={ltl_checker.Parameters.POSITIVE: False}) def test_AeventuallyBeventuallyC_pos(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_ev_B_ev_C_pos = ltl_checker.eventually_follows(df, ["check ticket", "decide", "pay compensation"], @@ -111,7 +111,7 @@ def test_AeventuallyBeventuallyC_pos(self): ltl_checker.Parameters.POSITIVE: True}) def test_AeventuallyBeventuallyC_neg(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_ev_B_ev_C_neg = ltl_checker.eventually_follows(df, ["check ticket", "decide", "pay compensation"], @@ -119,38 +119,38 @@ def test_AeventuallyBeventuallyC_neg(self): ltl_checker.Parameters.POSITIVE: False}) def test_AnextBnextC_pos(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_next_B_next_C_pos = ltl_checker.A_next_B_next_C(df, "check ticket", "decide", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: True}) def test_AnextBnextC_neg(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_A_next_B_next_C_neg = ltl_checker.A_next_B_next_C(df, "check ticket", "decide", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: False}) def test_fourEeyesPrinciple_pos(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_foureyes_pos = ltl_checker.four_eyes_principle(df, "check ticket", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: True}) def test_fourEeyesPrinciple_neg(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filt_foureyes_neg = ltl_checker.four_eyes_principle(df, "check ticket", "pay compensation", parameters={ltl_checker.Parameters.POSITIVE: False}) def test_attrValueDifferentPersons_pos(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") attr_value_different_persons_pos = ltl_checker.attr_value_different_persons(df, "check ticket", parameters={ ltl_checker.Parameters.POSITIVE: True}) def test_attrValueDifferentPersons_neg(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") attr_value_different_persons_neg = ltl_checker.attr_value_different_persons(df, "check ticket", parameters={ @@ -159,7 +159,7 @@ def test_attrValueDifferentPersons_neg(self): def test_attr_value_repetition(self): from pm4py.algo.filtering.pandas.attr_value_repetition import filter - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") filtered_df = filter.apply(df, "Sara", parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource"}) diff --git a/tests/graphs_forming.py b/tests/graphs_forming.py index 1518af4ce..95afcd3f4 100644 --- a/tests/graphs_forming.py +++ b/tests/graphs_forming.py @@ -17,7 +17,7 @@ def test_dfCasedurationPlotSemilogx(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") x, y = pd_case_statistics.get_kde_caseduration(df) json = pd_case_statistics.get_kde_caseduration_json(df) @@ -38,7 +38,7 @@ def test_dfNumericAttribute(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") x, y = pd_attributes_filter.get_kde_numeric_attribute(df, "amount") @@ -60,7 +60,7 @@ def test_dfDateAttribute(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") x, y = pd_attributes_filter.get_kde_date_attribute(df) diff --git a/tests/heuminer_test.py b/tests/heuminer_test.py index 6b6f7d408..38bd1751d 100644 --- a/tests/heuminer_test.py +++ b/tests/heuminer_test.py @@ -34,14 +34,14 @@ def test_petrinet_receipt_df(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") net, im, fm = heuristics_miner.apply(df) gviz = pn_vis.apply(net, im, fm) del gviz def test_heuplusplus_perf_df(self): - df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "interval_event_log.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "interval_event_log.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") heu_net = heuristics_miner.Variants.PLUSPLUS.value.apply_heu_pandas(df, parameters={"heu_net_decoration": "performance"}) gviz = hn_vis.apply(heu_net) @@ -52,7 +52,7 @@ def test_heuplusplus_perf_log(self): gviz = hn_vis.apply(heu_net) def test_heuplusplus_petri_df(self): - df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "interval_event_log.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "interval_event_log.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") net, im, fm = heuristics_miner.Variants.PLUSPLUS.value.apply_pandas(df) gviz = pn_vis.apply(net, im, fm) diff --git a/tests/inductive_test.py b/tests/inductive_test.py index 238623fd5..51035aff0 100644 --- a/tests/inductive_test.py +++ b/tests/inductive_test.py @@ -32,7 +32,7 @@ def obtain_petri_net_through_im(self, log_name, variant=inductive_miner.Variants if ".xes" in log_name: log = xes_importer.apply(log_name) else: - df = pd.read_csv(log_name, dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(log_name) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") log = log_conversion.apply(df, variant=log_conversion.Variants.TO_EVENT_LOG) process_tree = inductive_miner.apply(log) diff --git a/tests/main_fac_test.py b/tests/main_fac_test.py index 55109bbcb..fa0e93f79 100644 --- a/tests/main_fac_test.py +++ b/tests/main_fac_test.py @@ -46,7 +46,7 @@ def test_memory_efficient_iterparse(self): variant=xes_importer.Variants.ITERPARSE_MEM_COMPRESSED) def test_alphaminer_stream(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) net, im, fm = alpha_miner.apply(stream) @@ -59,7 +59,7 @@ def test_alphaminer_stream(self): sim = simplicity.apply(net) def test_alphaminer_df(self): - log = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log, timest_format="ISO8601") net, im, fm = alpha_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) @@ -83,7 +83,7 @@ def test_inductiveminer_log(self): sim = simplicity.apply(net) def test_inductiveminer_df(self): - log = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log, timest_format="ISO8601") process_tree = inductive_miner.apply(log) net, im, fm = process_tree_converter.apply(process_tree) @@ -107,7 +107,7 @@ def test_heu_log(self): sim = simplicity.apply(net) def test_heu_stream(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) net, im, fm = heuristics_miner.apply(stream) @@ -120,7 +120,7 @@ def test_heu_stream(self): sim = simplicity.apply(net) def test_heu_df(self): - log = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log, timest_format="ISO8601") net, im, fm = heuristics_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) @@ -136,13 +136,13 @@ def test_dfg_log(self): dfg = dfg_mining.apply(log) def test_dfg_stream(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) dfg = dfg_mining.apply(stream) def test_dfg_df(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") dfg = dfg_mining.apply(df) @@ -151,18 +151,18 @@ def test_ts_log(self): ts = ts_disc.apply(log) def test_ts_stream(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) ts = ts_disc.apply(stream) def test_ts_df(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") ts = ts_disc.apply(df) def test_csvimp_xesexp(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") log0 = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG) @@ -184,7 +184,7 @@ def test_xesimp_xesexp(self): os.remove('ru.xes') def test_pdimp_xesexp(self): - log0 = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log0 = pd.read_csv(os.path.join("input_data", "running-example.csv")) log0 = dataframe_utils.convert_timestamp_columns_in_df(log0, timest_format="ISO8601") log = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_LOG) stream = log_conversion.apply(log0, variant=log_conversion.TO_EVENT_STREAM) diff --git a/tests/other_tests.py b/tests/other_tests.py index 29497ff5d..e1435a9d3 100644 --- a/tests/other_tests.py +++ b/tests/other_tests.py @@ -73,7 +73,7 @@ def test_performance_spectrum_log(self): 1000, {}) def test_performance_spectrum_df(self): - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") pspectr = df_pspectrum.apply(df, ["T02 Check confirmation of receipt", "T03 Adjust confirmation of receipt"], 1000, {}) @@ -124,7 +124,7 @@ def test_footprints_tree(self): variant=footprints_conformance.Variants.TRACE_EXTENSIVE) def test_footprints_tree_df(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") from pm4py.algo.discovery.inductive import algorithm as inductive_miner log = converter.apply(df, variant=converter.Variants.TO_EVENT_LOG) @@ -163,7 +163,7 @@ def test_sojourn_time_xes(self): def test_sojourn_time_pandas(self): import pandas as pd - dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv")) from pm4py.objects.log.util import dataframe_utils dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") from pm4py.statistics.sojourn_time.pandas import get @@ -176,7 +176,7 @@ def test_concurrent_activities_xes(self): def test_concurrent_activities_pandas(self): import pandas as pd - dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv")) from pm4py.objects.log.util import dataframe_utils dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") from pm4py.statistics.concurrent_activities.pandas import get @@ -189,7 +189,7 @@ def test_efg_xes(self): def test_efg_pandas(self): import pandas as pd - dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv")) from pm4py.objects.log.util import dataframe_utils dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") from pm4py.statistics.eventually_follows.pandas import get @@ -214,11 +214,11 @@ def test_dfg_align(self): aligned_traces = dfg_alignment.apply(log, dfg, sa, ea) def test_insert_idx_in_trace(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = pandas_utils.insert_ev_in_tr_index(df) def test_automatic_feature_extraction(self): - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) fea_df = dataframe_utils.automatic_feature_extraction_df(df) def test_log_to_trie(self): @@ -253,7 +253,7 @@ def test_projection_univariate_log(self): def test_projection_univariate_df(self): import pandas as pd from pm4py.util.compression import util as compression_util - dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv")) dataframe["time:timestamp"] = pd.to_datetime(dataframe["time:timestamp"], utc=True, format="ISO8601") cl = compression_util.project_univariate(dataframe, "concept:name") # just verify that the set is non-empty @@ -278,7 +278,7 @@ def test_compression_univariate_log(self): def test_compression_univariate_df(self): import pandas as pd from pm4py.util.compression import util as compression_util - dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv")) dataframe["time:timestamp"] = pd.to_datetime(dataframe["time:timestamp"], utc=True, format="ISO8601") cl, lookup = compression_util.compress_univariate(dataframe, "concept:name") # just verify that the set is non-empty @@ -303,7 +303,7 @@ def test_compression_multivariate_log(self): def test_compression_multivariate_df(self): import pandas as pd from pm4py.util.compression import util as compression_util - dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv")) dataframe["time:timestamp"] = pd.to_datetime(dataframe["time:timestamp"], utc=True, format="ISO8601") cl, lookup = compression_util.compress_multivariate(dataframe, ["concept:name", "org:resource"]) # just verify that the set is non-empty @@ -331,6 +331,72 @@ def test_log_to_target_next_activity(self): log = pm4py.read_xes("input_data/running-example.xes") next_activity_target, next_activities = log_to_target.apply(log, variant=log_to_target.Variants.NEXT_ACTIVITY) + def test_ocel_split_cc_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.split_ocel import algorithm as split_ocel + res = split_ocel.apply(ocel, variant=split_ocel.Variants.CONNECTED_COMPONENTS) + + def test_ocel_split_ancestors_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.split_ocel import algorithm as split_ocel + res = split_ocel.apply(ocel, parameters={"object_type": "order"}, variant=split_ocel.Variants.ANCESTORS_DESCENDANTS) + + def test_ocel_object_features_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.features.objects import algorithm as ocel_fea + res = ocel_fea.apply(ocel) + + def test_ocel_event_features_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.features.events import algorithm as ocel_fea + res = ocel_fea.apply(ocel) + + def test_ocel_event_object_features_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.features.events_objects import algorithm as ocel_fea + res = ocel_fea.apply(ocel) + + def test_ocel_interaction_graph_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.graphs import object_interaction_graph + object_interaction_graph.apply(ocel) + + def test_ocel_descendants_graph_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.graphs import object_descendants_graph + object_descendants_graph.apply(ocel) + + def test_ocel_inheritance_graph_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.graphs import object_inheritance_graph + object_inheritance_graph.apply(ocel) + + def test_ocel_cobirth_graph_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.graphs import object_cobirth_graph + object_cobirth_graph.apply(ocel) + + def test_ocel_codeath_graph_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.graphs import object_codeath_graph + object_codeath_graph.apply(ocel) + + def test_ocel_description_non_simpl_interface(self): + import pm4py + ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") + from pm4py.algo.transformation.ocel.description.variants import variant1 + variant1.apply(ocel) + if __name__ == "__main__": unittest.main() diff --git a/tests/passed_time.py b/tests/passed_time.py index 9d3c86031..eb4502f5c 100644 --- a/tests/passed_time.py +++ b/tests/passed_time.py @@ -16,7 +16,7 @@ def test_passedtime_prepost_log(self): del prepost def test_passedtime_prepost_df(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") prepost = df_passed_time.apply(df, "decide", variant=df_passed_time.Variants.PREPOST) del prepost diff --git a/tests/role_detection.py b/tests/role_detection.py index a748eb3dc..9edf14140 100644 --- a/tests/role_detection.py +++ b/tests/role_detection.py @@ -10,7 +10,7 @@ class RoleDetectionTest(unittest.TestCase): def test_role_running_csv(self): - df = pd.read_csv(os.path.join("input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") roles = role_mining.apply(df) @@ -19,7 +19,7 @@ def test_role_running_xes(self): roles = role_mining.apply(log) def test_role_receipt_csv(self): - df = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv(os.path.join("input_data", "receipt.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format="ISO8601") roles = role_mining.apply(df) diff --git a/tests/simplified_interface.py b/tests/simplified_interface.py index 90d543795..363c016b4 100644 --- a/tests/simplified_interface.py +++ b/tests/simplified_interface.py @@ -13,7 +13,7 @@ class SimplifiedInterfaceTest(unittest.TestCase): def test_csv(self): - df = pd.read_csv("input_data/running-example.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("input_data/running-example.csv") df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") df["case:concept:name"] = df["case:concept:name"].astype("string") @@ -73,17 +73,19 @@ def test_read_tree(self): def test_read_dfg(self): dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg") - def test_alignments(self): + def test_alignments_simpl_interface(self): for legacy_obj in [True, False]: - log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) - net, im, fm = pm4py.discover_petri_net_inductive(log) - aligned_traces = pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=False) + for diagn_df in [True, False]: + log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) + net, im, fm = pm4py.discover_petri_net_inductive(log) + aligned_traces = pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df) - def test_tbr(self): + def test_tbr_simpl_interface(self): for legacy_obj in [True, False]: - log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) - net, im, fm = pm4py.discover_petri_net_inductive(log) - replayed_traces = pm4py.conformance_diagnostics_token_based_replay(log, net, im, fm, return_diagnostics_dataframe=False) + for diagn_df in [True, False]: + log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) + net, im, fm = pm4py.discover_petri_net_inductive(log) + replayed_traces = pm4py.conformance_diagnostics_token_based_replay(log, net, im, fm, return_diagnostics_dataframe=diagn_df) def test_fitness_alignments(self): for legacy_obj in [True, False]: @@ -157,7 +159,7 @@ def test_statistics_log(self): pm4py.get_variants_as_tuples(log) def test_statistics_df(self): - df = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("input_data/running-example-transformed.csv") df["Timestamp"] = pd.to_datetime(df["Timestamp"], utc="True", format="ISO8601") df["CaseID"] = df["CaseID"].astype("string") @@ -190,7 +192,7 @@ def test_new_statistics_log(self): pm4py.get_case_arrival_average(log) def test_new_statistics_df(self): - df = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("input_data/running-example-transformed.csv") df["Timestamp"] = pd.to_datetime(df["Timestamp"], utc=True, format="ISO8601") df["CaseID"] = df["CaseID"].astype("string") @@ -204,7 +206,7 @@ def test_serialization_log(self): log2 = pm4py.deserialize(ser) def test_serialization_dataframe(self): - df = pd.read_csv("input_data/running-example.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + df = pd.read_csv("input_data/running-example.csv") df["time:timestamp"] = pd.to_datetime(df["time:timestamp"], utc=True, format="ISO8601") ser = pm4py.serialize(df) df2 = pm4py.deserialize(ser) @@ -264,29 +266,32 @@ def test_ext_marking_equation_sync_net(self): res = pm4py.solve_extended_marking_equation(log[0], sync_net, sync_im, sync_fm) self.assertIsNotNone(res) - def test_alignments_tree(self): + def test_alignments_tree_simpl_interface(self): import pm4py for legacy_obj in [True, False]: - log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) - tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml")) - res = pm4py.conformance_diagnostics_alignments(log, tree, return_diagnostics_dataframe=False) - self.assertIsNotNone(res) + for diagn_df in [True, False]: + log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) + tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml")) + res = pm4py.conformance_diagnostics_alignments(log, tree, return_diagnostics_dataframe=diagn_df) + self.assertIsNotNone(res) - def test_alignments_dfg(self): + def test_alignments_dfg_simpl_interface(self): import pm4py for legacy_obj in [True, False]: - log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) - dfg, sa, ea = pm4py.read_dfg(os.path.join("input_data", "running-example.dfg")) - res = pm4py.conformance_diagnostics_alignments(log, dfg, sa, ea, return_diagnostics_dataframe=False) - self.assertIsNotNone(res) + for diagn_df in [True, False]: + log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) + dfg, sa, ea = pm4py.read_dfg(os.path.join("input_data", "running-example.dfg")) + res = pm4py.conformance_diagnostics_alignments(log, dfg, sa, ea, return_diagnostics_dataframe=diagn_df) + self.assertIsNotNone(res) - def test_alignments_bpmn(self): + def test_alignments_bpmn_simpl_interface(self): import pm4py for legacy_obj in [True, False]: - log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) - bpmn_graph = pm4py.read_bpmn(os.path.join("input_data", "running-example.bpmn")) - res = pm4py.conformance_diagnostics_alignments(log, bpmn_graph, return_diagnostics_dataframe=False) - self.assertIsNotNone(res) + for diagn_df in [True, False]: + log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) + bpmn_graph = pm4py.read_bpmn(os.path.join("input_data", "running-example.bpmn")) + res = pm4py.conformance_diagnostics_alignments(log, bpmn_graph, return_diagnostics_dataframe=diagn_df) + self.assertIsNotNone(res) def test_discovery_inductive_bpmn(self): import pm4py @@ -352,7 +357,7 @@ def test_write_bpmn(self): os.remove("test_output_data/running-example.bpmn") def test_rebase(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -375,7 +380,7 @@ def test_sample_cases_log(self): pm4py.sample_cases(log, 2) def test_sample_cases_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -387,7 +392,7 @@ def test_sample_events_log(self): pm4py.sample_events(log, 2) def test_sample_events_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -407,7 +412,7 @@ def test_artificial_start_end_log(self): pm4py.insert_artificial_start_end(log) def test_artificial_start_end_dataframe(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -435,7 +440,7 @@ def test_split_train_test_log(self): pm4py.split_train_test(log, train_percentage=0.6) def test_split_train_test_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -447,7 +452,7 @@ def test_get_prefixes_log(self): pm4py.get_prefixes_from_log(log, 3) def test_get_prefixes_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -463,7 +468,7 @@ def test_hw_log(self): pm4py.discover_handover_of_work_network(log) def test_hw_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -475,7 +480,7 @@ def test_wt_log(self): pm4py.discover_working_together_network(log) def test_wt_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -487,7 +492,7 @@ def test_act_based_res_sim_log(self): pm4py.discover_activity_based_resource_similarity(log) def test_act_based_res_sim_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -499,7 +504,7 @@ def test_subcontracting_log(self): pm4py.discover_subcontracting_network(log) def test_subcontracting_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -511,7 +516,7 @@ def test_roles_log(self): pm4py.discover_organizational_roles(log) def test_roles_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -523,7 +528,7 @@ def test_network_analysis_log(self): pm4py.discover_network_analysis(log, "case:concept:name", "case:concept:name", "org:resource", "org:resource", "concept:name") def test_network_analysis_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -535,25 +540,27 @@ def test_discover_batches_log(self): pm4py.discover_batches(log) def test_discover_batches_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_batches(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") - def test_log_skeleton_log(self): + def test_log_skeleton_log_simplified_interface(self): for legacy_obj in [True, False]: - log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) - model = pm4py.discover_log_skeleton(log) - pm4py.conformance_log_skeleton(log, model, return_diagnostics_dataframe=False) + for diagn_df in [True, False]: + log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) + model = pm4py.discover_log_skeleton(log) + pm4py.conformance_log_skeleton(log, model, return_diagnostics_dataframe=diagn_df) - def test_log_skeleton_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) - dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") - dataframe["CaseID"] = dataframe["CaseID"].astype("string") + def test_log_skeleton_df_simplified_interface(self): + for diagn_df in [True, False]: + dataframe = pd.read_csv("input_data/running-example-transformed.csv") + dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") + dataframe["CaseID"] = dataframe["CaseID"].astype("string") - model = pm4py.discover_log_skeleton(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") - pm4py.conformance_log_skeleton(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=False) + model = pm4py.discover_log_skeleton(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") + pm4py.conformance_log_skeleton(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_temporal_profile_log(self): for legacy_obj in [True, False]: @@ -562,7 +569,7 @@ def test_temporal_profile_log(self): pm4py.conformance_temporal_profile(log, model) def test_temporal_profile_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -581,7 +588,7 @@ def test_ocel_flattening(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.ocel_flattening(ocel, "order") def test_stats_var_tuples_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -593,7 +600,7 @@ def test_stats_cycle_time_log(self): pm4py.get_cycle_time(log) def test_stats_cycle_time_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -605,7 +612,7 @@ def test_stats_case_durations_log(self): pm4py.get_all_case_durations(log) def test_stats_case_durations_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -617,7 +624,7 @@ def test_stats_case_duration_log(self): pm4py.get_case_duration(log, "1") def test_stats_case_duration_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -629,7 +636,7 @@ def test_stats_act_pos_summary_log(self): pm4py.get_activity_position_summary(log, "check ticket") def test_stats_act_pos_summary_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -641,7 +648,7 @@ def test_filter_act_done_diff_res_log(self): pm4py.filter_activity_done_different_resources(log, "check ticket") def test_filter_act_done_diff_res_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -653,7 +660,7 @@ def test_filter_four_eyes_principle_log(self): pm4py.filter_four_eyes_principle(log, "register request", "check ticket") def test_filter_four_eyes_principle_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -665,7 +672,7 @@ def test_filter_rel_occ_log(self): pm4py.filter_log_relative_occurrence_event_attribute(log, 0.8, level="cases") def test_filter_rel_occ_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -677,7 +684,7 @@ def test_filter_start_activities_log(self): pm4py.filter_start_activities(log, ["register request"]) def test_filter_start_activities_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -689,7 +696,7 @@ def test_filter_end_activities_log(self): pm4py.filter_end_activities(log, ["pay compensation"]) def test_filter_end_activities_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -701,7 +708,7 @@ def test_filter_eve_attr_values_log(self): pm4py.filter_event_attribute_values(log, "concept:name", ["register request", "pay compensation", "reject request"]) def test_filter_eve_attr_values_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -718,7 +725,7 @@ def test_filter_variant_log(self): pm4py.filter_variants(log, [('register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation')]) def test_filter_variant_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -730,7 +737,7 @@ def test_filter_dfg_log(self): pm4py.filter_directly_follows_relation(log, [("register request", "check ticket")]) def test_filter_dfg_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -742,7 +749,7 @@ def test_filter_efg_log(self): pm4py.filter_eventually_follows_relation(log, [("register request", "check ticket")]) def test_filter_efg_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -754,7 +761,7 @@ def test_filter_time_range_log(self): pm4py.filter_time_range(log, "2009-01-01 01:00:00", "2011-01-01 01:00:00") def test_filter_time_range_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -766,7 +773,7 @@ def test_filter_between_log(self): pm4py.filter_between(log, "check ticket", "decide") def test_filter_between_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -778,7 +785,7 @@ def test_filter_case_size_log(self): pm4py.filter_case_size(log, 10, 20) def test_filter_case_size_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -790,7 +797,7 @@ def test_filter_case_performance_log(self): pm4py.filter_case_performance(log, 86400, 8640000) def test_filter_case_performance_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -802,7 +809,7 @@ def test_filter_activities_rework_log(self): pm4py.filter_activities_rework(log, "check ticket") def test_filter_act_rework_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -814,7 +821,7 @@ def test_filter_paths_perf_log(self): pm4py.filter_paths_performance(log, ("register request", "check ticket"), 86400, 864000) def test_filter_paths_perf_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -826,7 +833,7 @@ def test_filter_vars_top_k_log(self): pm4py.filter_variants_top_k(log, 1) def test_filter_vars_top_k_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True, format="ISO8601") dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -838,7 +845,7 @@ def test_filter_vars_coverage(self): pm4py.filter_variants_by_coverage_percentage(log, 0.1) def test_filter_vars_coverage(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -850,7 +857,7 @@ def test_filter_prefixes_log(self): pm4py.filter_prefixes(log, "check ticket") def test_filter_prefixes_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -862,7 +869,7 @@ def test_filter_suffixes_log(self): pm4py.filter_suffixes(log, "check ticket") def test_filter_suffixes_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -874,7 +881,7 @@ def test_discover_perf_dfg_log(self): pm4py.discover_performance_dfg(log) def test_discover_perf_dfg_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -891,7 +898,7 @@ def test_discover_ts_log(self): pm4py.discover_transition_system(log) def test_discover_ts_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -903,7 +910,7 @@ def test_discover_pref_tree_log(self): pm4py.discover_prefix_tree(log) def test_discover_pref_tree_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") @@ -913,25 +920,28 @@ def test_discover_ocpn(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.discover_oc_petri_net(ocel) - def test_conformance_alignments_pn_log(self): + def test_conformance_alignments_pn_log_simplified_interface(self): for legacy_obj in [True, False]: - log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) - net, im, fm = pm4py.discover_petri_net_inductive(log) - pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=False) + for diagn_df in [True, False]: + log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) + net, im, fm = pm4py.discover_petri_net_inductive(log) + pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df) - def test_conformance_alignments_pn_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) - dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) - dataframe["CaseID"] = dataframe["CaseID"].astype("string") + def test_conformance_alignments_pn_df_simplified_interface(self): + for diagn_df in [True, False]: + dataframe = pd.read_csv("input_data/running-example-transformed.csv") + dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) + dataframe["CaseID"] = dataframe["CaseID"].astype("string") - net, im, fm = pm4py.discover_petri_net_inductive(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") - pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=False) + net, im, fm = pm4py.discover_petri_net_inductive(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") + pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_conformance_diagnostics_fp_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) tree = pm4py.discover_process_tree_inductive(log) pm4py.conformance_diagnostics_footprints(log, tree) + def test_fitness_fp_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) @@ -954,73 +964,75 @@ def test_fea_ext_log(self): pm4py.extract_features_dataframe(log) def test_fea_ext_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.extract_features_dataframe(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") def test_new_alpha_miner_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_petri_net_alpha(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_heu_miner_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_petri_net_heuristics(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_dfg_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_perf_dfg_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_performance_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") - def test_new_tbr_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) - dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) - dataframe["CaseID"] = dataframe["CaseID"].astype("string") - net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") - pm4py.conformance_diagnostics_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=False) + def test_new_tbr_df_simpl_interface(self): + for ret_df in [True, False]: + dataframe = pd.read_csv("input_data/running-example-transformed.csv") + dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) + dataframe["CaseID"] = dataframe["CaseID"].astype("string") + net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") + pm4py.conformance_diagnostics_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=ret_df) def test_new_tbr_fitness_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.fitness_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_tbr_precision_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.precision_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") - def test_new_align_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) - dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) - dataframe["CaseID"] = dataframe["CaseID"].astype("string") - net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") - pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=False) + def test_new_align_df_simpl_interface(self): + for diagn_df in [True, False]: + dataframe = pd.read_csv("input_data/running-example-transformed.csv") + dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) + dataframe["CaseID"] = dataframe["CaseID"].astype("string") + net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") + pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_new_align_fitness_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.fitness_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_align_precision_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", @@ -1028,7 +1040,7 @@ def test_new_align_precision_df(self): pm4py.precision_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_vis_case_duration_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") target = os.path.join("test_output_data", "case_duration.svg") @@ -1036,7 +1048,7 @@ def test_vis_case_duration_df(self): os.remove(target) def test_vis_ev_time_graph_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") target = os.path.join("test_output_data", "ev_graph_graph.svg") @@ -1044,7 +1056,7 @@ def test_vis_ev_time_graph_df(self): os.remove(target) def test_vis_ev_distr_graph_df(self): - dataframe = pd.read_csv("input_data/running-example-transformed.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/running-example-transformed.csv") dataframe["Timestamp"] = pd.to_datetime(dataframe["Timestamp"], utc=True) dataframe["CaseID"] = dataframe["CaseID"].astype("string") target = os.path.join("test_output_data", "ev_distr_graph.svg") diff --git a/tests/simplified_interface_2.py b/tests/simplified_interface_2.py index f6b006eb1..6aa99fd86 100644 --- a/tests/simplified_interface_2.py +++ b/tests/simplified_interface_2.py @@ -62,7 +62,7 @@ def test_conversion_log_to_ocel(self): def test_conversion_ocelcsv_to_ocel(self): import pandas as pd - dataframe = pd.read_csv("input_data/ocel/example_log.csv", dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv("input_data/ocel/example_log.csv") pm4py.convert_log_to_ocel(dataframe, activity_column="ocel:activity", timestamp_column="ocel:timestamp") def test_conversion_petri_to_nx(self): diff --git a/tests/sna_test.py b/tests/sna_test.py index cd8b38c75..3280f2c6d 100644 --- a/tests/sna_test.py +++ b/tests/sna_test.py @@ -26,7 +26,7 @@ def test_pandas(self): # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" - log = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log, timest_format="ISO8601") hw_values = sna_alg.apply(log, variant=sna_alg.Variants.HANDOVER_PANDAS) @@ -76,7 +76,7 @@ def test_res_profiles_log(self): def test_res_profiles_df(self): from pm4py.algo.organizational_mining.resource_profiles import algorithm - log = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + log = pd.read_csv(os.path.join("..", "tests", "input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log, timest_format="ISO8601") algorithm.distinct_activities(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sara") algorithm.activity_frequency(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sara", "decide") diff --git a/tests/statistics_df_test.py b/tests/statistics_df_test.py index 78bfd2cb3..ccd6960d1 100644 --- a/tests/statistics_df_test.py +++ b/tests/statistics_df_test.py @@ -7,7 +7,7 @@ class StatisticsDfTest(unittest.TestCase): def get_dataframe(self): - dataframe = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "roadtraffic100traces.csv")) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") return dataframe @@ -52,7 +52,7 @@ def test_variants(self): def test_batch_detection(self): from pm4py.algo.discovery.batches.variants import pandas as pandas_batches - dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv"), dtype_backend=constants.DEFAULT_PANDAS_PARSING_DTYPE_BACKEND) + dataframe = pd.read_csv(os.path.join("input_data", "receipt.csv")) dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format="ISO8601") pandas_batches.apply(dataframe)