Merge remote-tracking branch 'upstream/hotfixes' into release

process-intelligence-solutions · Jan 22, 2024 · 31603fa · 31603fa
2 parents bb99ed1 + 4cf8849
commit 31603fa
Show file tree

Hide file tree

Showing 5 changed files with 126 additions and 82 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake
 RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake
 RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev
 RUN pip install -U wheel six pytest
-RUN pip install colorama==0.4.6 contourpy==1.2.0 cycler==0.12.1 deprecation==2.1.0 fonttools==4.44.3 graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.5 lxml==4.9.3 matplotlib==3.8.2 networkx==3.2.1 numpy==1.26.2 packaging==23.2 pandas==2.1.3 Pillow==10.1.0 pydotplus==2.0.2 pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3.post1 scipy==1.11.4 six==1.16.0 sortedcontainers==2.4.0 StringDist==1.0.9 tqdm==4.66.1 tzdata==2023.3 
+RUN pip install colorama==0.4.6 contourpy==1.2.0 cycler==0.12.1 deprecation==2.1.0 fonttools==4.47.2 graphviz==0.20.1 intervaltree==3.1.0 kiwisolver==1.4.5 lxml==5.1.0 matplotlib==3.8.2 networkx==3.2.1 numpy==1.26.3 packaging==23.2 pandas==2.2.0 pillow==10.2.0 pydotplus==2.0.2 pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3.post1 scipy==1.12.0 six==1.16.0 sortedcontainers==2.4.0 StringDist==1.0.9 tqdm==4.66.1 tzdata==2023.4 
 
 COPY . /app
 RUN cd /app && python setup.py install
diff --git a/pm4py/algo/conformance/tokenreplay/variants/token_replay.py b/pm4py/algo/conformance/tokenreplay/variants/token_replay.py
@@ -912,6 +912,42 @@ def get_variant_from_trace(trace, activity_key, disable_variants=False):
     return variants_util.get_variant_from_trace(trace, parameters=parameters)
 
 
+def transcribe_result(t, return_object_names=True):
+    corr_value = {"trace_is_fit": copy(t.t_fit),
+                  "trace_fitness": float(copy(t.t_value)),
+                  "activated_transitions": copy(t.act_trans),
+                  "reached_marking": copy(t.reached_marking),
+                  "enabled_transitions_in_marking": copy(
+                      t.enabled_trans_in_mark),
+                  "transitions_with_problems": copy(
+                      t.trans_probl),
+                  "missing_tokens": int(t.missing),
+                  "consumed_tokens": int(t.consumed),
+                  "remaining_tokens": int(t.remaining),
+                  "produced_tokens": int(t.produced)}
+
+    if return_object_names:
+        corr_value["activated_transitions_labels"] = [x.label for x in
+                                                      corr_value[
+                                                          "activated_transitions"]]
+        corr_value["activated_transitions"] = [x.name for x in corr_value[
+            "activated_transitions"]]
+        corr_value["enabled_transitions_in_marking_labels"] = [x.label for x in
+                                                               corr_value[
+                                                                   "enabled_transitions_in_marking"]]
+        corr_value["enabled_transitions_in_marking"] = [x.name for x in
+                                                        corr_value[
+                                                            "enabled_transitions_in_marking"]]
+        corr_value["transitions_with_problems"] = [x.name for x in
+                                                   corr_value[
+                                                       "transitions_with_problems"]]
+        corr_value["reached_marking"] = {x.name: y for x, y in
+                                         corr_value[
+                                             "reached_marking"].items()}
+
+    return corr_value
+
+
 def apply_log(log, net, initial_marking, final_marking, enable_pltr_fitness=False, consider_remaining_in_fitness=False,
               activity_key="concept:name", reach_mark_through_hidden=True, stop_immediately_unfit=False,
               walk_through_hidden_trans=True, places_shortest_path_by_hidden=None,
@@ -987,86 +1023,94 @@ def apply_log(log, net, initial_marking, final_marking, enable_pltr_fitness=Fals
         trans_map[t.label] = t
 
     if pandas_utils.check_is_pandas_dataframe(log):
-        traces = [tuple(x) for x in log.groupby(case_id_key)[activity_key].agg(list).to_dict().values()]
+        traces = [(tuple(x), y) for y, x in log.groupby(case_id_key)[activity_key].agg(list).to_dict().items()]
+        traces = [(traces[i][0], i) for i in range(len(traces))]
     else:
-        traces = [tuple(x[activity_key] for x in trace) for trace in log]
+        traces = [(tuple(x[activity_key] for x in log[i]), i) for i in range(len(log))]
+
+    variants = dict()
+    for t in traces:
+        if t[0] not in variants:
+            variants[t[0]] = list()
+        variants[t[0]].append(t[1])
 
-    variants = Counter(traces)
+    traces = [t[0] for t in traces]
 
     vc = [(k, v) for k, v in variants.items()]
-    vc = list(sorted(vc, key=lambda x: (x[1], x[0]), reverse=True))
+    vc = list(sorted(vc, key=lambda x: (len(x[1]), x[0]), reverse=True))
+
+    threads_results = {}
 
     progress = None
+
     if importlib.util.find_spec("tqdm") and show_progress_bar and len(variants) > 1:
         from tqdm.auto import tqdm
-        progress = tqdm(total=len(variants), desc="replaying log with TBR, completed variants :: ")
 
-    threads = {}
-    threads_results = {}
+        if disable_variants and not pandas_utils.check_is_pandas_dataframe(log):
+            progress = tqdm(total=len(traces), desc="replaying log with TBR, completed traces :: ")
+        else:
+            progress = tqdm(total=len(variants), desc="replaying log with TBR, completed traces :: ")
 
     for i in range(len(vc)):
         variant = vc[i][0]
-        considered_case = variants_util.variant_to_trace(variant, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key})
-
-        threads[variant] = ApplyTraceTokenReplay(considered_case, net, initial_marking, final_marking,
-                                                 trans_map, enable_pltr_fitness, place_fitness_per_trace,
-                                                 transition_fitness_per_trace,
-                                                 notexisting_activities_in_model,
-                                                 places_shortest_path_by_hidden,
-                                                 consider_remaining_in_fitness,
-                                                 activity_key=activity_key,
-                                                 reach_mark_through_hidden=reach_mark_through_hidden,
-                                                 stop_immediately_when_unfit=stop_immediately_unfit,
-                                                 walk_through_hidden_trans=walk_through_hidden_trans,
-                                                 post_fix_caching=post_fix_cache,
-                                                 marking_to_activity_caching=marking_to_activity_cache,
-                                                 is_reduction=is_reduction,
-                                                 thread_maximum_ex_time=thread_maximum_ex_time,
-                                                 cleaning_token_flood=cleaning_token_flood,
-                                                 s_components=s_components, trace_occurrences=vc[i][1],
-                                                 consider_activities_not_in_model_in_fitness=consider_activities_not_in_model_in_fitness)
-        threads[variant].run()
-        if progress is not None:
-            progress.update()
-
-        t = threads[variant]
-        threads_results[variant] = {"trace_is_fit": copy(t.t_fit),
-                                    "trace_fitness": float(copy(t.t_value)),
-                                    "activated_transitions": copy(t.act_trans),
-                                    "reached_marking": copy(t.reached_marking),
-                                    "enabled_transitions_in_marking": copy(
-                                        t.enabled_trans_in_mark),
-                                    "transitions_with_problems": copy(
-                                        t.trans_probl),
-                                    "missing_tokens": int(t.missing),
-                                    "consumed_tokens": int(t.consumed),
-                                    "remaining_tokens": int(t.remaining),
-                                    "produced_tokens": int(t.produced)}
-
-        if return_object_names:
-            threads_results[variant]["activated_transitions_labels"] = [x.label for x in
-                                                                        threads_results[variant][
-                                                                            "activated_transitions"]]
-            threads_results[variant]["activated_transitions"] = [x.name for x in threads_results[variant][
-                "activated_transitions"]]
-            threads_results[variant]["enabled_transitions_in_marking_labels"] = [x.label for x in
-                                                                                 threads_results[variant][
-                                                                                     "enabled_transitions_in_marking"]]
-            threads_results[variant]["enabled_transitions_in_marking"] = [x.name for x in
-                                                                          threads_results[variant][
-                                                                              "enabled_transitions_in_marking"]]
-            threads_results[variant]["transitions_with_problems"] = [x.name for x in
-                                                                     threads_results[variant][
-                                                                         "transitions_with_problems"]]
-            threads_results[variant]["reached_marking"] = {x.name: y for x, y in
-                                                           threads_results[variant][
-                                                               "reached_marking"].items()}
-        del threads[variant]
-
-    for trace_variant in traces:
-        if trace_variant in threads_results:
-            t = threads_results[trace_variant]
-            aligned_traces.append(t)
+        all_cases = vc[i][1]
+
+        if disable_variants and not pandas_utils.check_is_pandas_dataframe(log):
+            for j in range(len(all_cases)):
+                case_position = all_cases[j]
+                considered_case = log[case_position]
+                t = ApplyTraceTokenReplay(considered_case, net, initial_marking, final_marking,
+                                          trans_map, enable_pltr_fitness, place_fitness_per_trace,
+                                          transition_fitness_per_trace,
+                                          notexisting_activities_in_model,
+                                          places_shortest_path_by_hidden,
+                                          consider_remaining_in_fitness,
+                                          activity_key=activity_key,
+                                          reach_mark_through_hidden=reach_mark_through_hidden,
+                                          stop_immediately_when_unfit=stop_immediately_unfit,
+                                          walk_through_hidden_trans=walk_through_hidden_trans,
+                                          post_fix_caching=post_fix_cache,
+                                          marking_to_activity_caching=marking_to_activity_cache,
+                                          is_reduction=is_reduction,
+                                          thread_maximum_ex_time=thread_maximum_ex_time,
+                                          cleaning_token_flood=cleaning_token_flood,
+                                          s_components=s_components, trace_occurrences=1,
+                                          consider_activities_not_in_model_in_fitness=consider_activities_not_in_model_in_fitness)
+                t.run()
+                threads_results[case_position] = transcribe_result(t, return_object_names=return_object_names)
+                if progress is not None:
+                    progress.update()
+        else:
+            considered_case = variants_util.variant_to_trace(variant, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key})
+            t = ApplyTraceTokenReplay(considered_case, net, initial_marking, final_marking,
+                                                     trans_map, enable_pltr_fitness, place_fitness_per_trace,
+                                                     transition_fitness_per_trace,
+                                                     notexisting_activities_in_model,
+                                                     places_shortest_path_by_hidden,
+                                                     consider_remaining_in_fitness,
+                                                     activity_key=activity_key,
+                                                     reach_mark_through_hidden=reach_mark_through_hidden,
+                                                     stop_immediately_when_unfit=stop_immediately_unfit,
+                                                     walk_through_hidden_trans=walk_through_hidden_trans,
+                                                     post_fix_caching=post_fix_cache,
+                                                     marking_to_activity_caching=marking_to_activity_cache,
+                                                     is_reduction=is_reduction,
+                                                     thread_maximum_ex_time=thread_maximum_ex_time,
+                                                     cleaning_token_flood=cleaning_token_flood,
+                                                     s_components=s_components, trace_occurrences=len(vc[i][1]),
+                                                     consider_activities_not_in_model_in_fitness=consider_activities_not_in_model_in_fitness)
+            t.run()
+
+            for j in range(len(all_cases)):
+                case_position = all_cases[j]
+
+                threads_results[case_position] = transcribe_result(t, return_object_names=return_object_names)
+
+            if progress is not None:
+                progress.update()
+
+    for i in range(len(traces)):
+        aligned_traces.append(threads_results[i])
 
     # gracefully close progress bar
     if progress is not None:

diff --git a/requirements_complete.txt b/requirements_complete.txt
@@ -12,7 +12,7 @@ networkx
 numpy
 packaging
 pandas
-Pillow
+pillow
 pydotplus
 pyparsing
 python-dateutil

diff --git a/requirements_stable.txt b/requirements_stable.txt
@@ -11,13 +11,13 @@ matplotlib==3.8.2
 networkx==3.2.1
 numpy==1.26.3
 packaging==23.2
-pandas==2.1.4
-Pillow==10.2.0
+pandas==2.2.0
+pillow==10.2.0
 pydotplus==2.0.2
 pyparsing==3.1.1
 python-dateutil==2.8.2
 pytz==2023.3.post1
-scipy==1.11.4
+scipy==1.12.0
 six==1.16.0
 sortedcontainers==2.4.0
 StringDist==1.0.9

diff --git a/third_party/LICENSES_TRANSITIVE.md b/third_party/LICENSES_TRANSITIVE.md
@@ -5,29 +5,29 @@ PM4Py depends upon. This is a best effort attempt to describe the library's depe
 libraries are added/removed.
 
 | Name | URL | License | Version |
-| --------------------------- | ------------------------------------------------------------ | ---------------------- | ------------------- |
+| --------------------------- | ------------------------------------------------------------ | ----------- | ------------------- |
 | colorama | https://pypi.org/project/colorama | BSD License | 0.4.6 |
 | contourpy | https://pypi.org/project/contourpy | BSD License | 1.2.0 |
 | cycler | https://pypi.org/project/cycler | BSD License | 0.12.1 |
 | deprecation | https://pypi.org/project/deprecation | Apache Software License (Apache 2) | 2.1.0 |
-| fonttools | https://pypi.org/project/fonttools | MIT License (MIT) | 4.44.3 |
+| fonttools | https://pypi.org/project/fonttools | MIT License (MIT) | 4.47.2 |
 | graphviz | https://pypi.org/project/graphviz | MIT License (MIT) | 0.20.1 |
-| intervaltree | https://pypi.org/project/intervaltree | Apache Software License (Apache License, Version 2.0) | 3.1.0 |
+| intervaltree | https://pypi.org/project/intervaltree | Apache Software License | 3.1.0 |
 | kiwisolver | https://pypi.org/project/kiwisolver | BSD License | 1.4.5 |
-| lxml | https://pypi.org/project/lxml | BSD License (BSD-3-Clause) | 4.9.3 |
+| lxml | https://pypi.org/project/lxml | BSD License (BSD-3-Clause) | 5.1.0 |
 | matplotlib | https://pypi.org/project/matplotlib | Python Software Foundation License (PSF) | 3.8.2 |
 | networkx | https://pypi.org/project/networkx | BSD License | 3.2.1 |
-| numpy | https://pypi.org/project/numpy | BSD License | 1.26.2 |
+| numpy | https://pypi.org/project/numpy | BSD License | 1.26.3 |
 | packaging | https://pypi.org/project/packaging | Apache Software License, BSD License | 23.2 |
-| pandas | https://pypi.org/project/pandas | BSD License | 2.1.3 |
-| Pillow | https://pypi.org/project/Pillow | Historical Permission Notice and Disclaimer (HPND) (HPND) | 10.1.0 |
+| pandas | https://pypi.org/project/pandas | BSD License | 2.2.0 |
+| pillow | https://pypi.org/project/pillow | Historical Permission Notice and Disclaimer (HPND) (HPND) | 10.2.0 |
 | pydotplus | https://pypi.org/project/pydotplus | MIT License (UNKNOWN) | 2.0.2 |
 | pyparsing | https://pypi.org/project/pyparsing | MIT License | 3.1.1 |
 | python-dateutil | https://pypi.org/project/python-dateutil | Apache Software License, BSD License (Dual License) | 2.8.2 |
 | pytz | https://pypi.org/project/pytz | MIT License (MIT) | 2023.3.post1 |
-| scipy | https://pypi.org/project/scipy | BSD License | 1.11.4 |
+| scipy | https://pypi.org/project/scipy | BSD License | 1.12.0 |
 | six | https://pypi.org/project/six | MIT License (MIT) | 1.16.0 |
 | sortedcontainers | https://pypi.org/project/sortedcontainers | Apache Software License (Apache 2.0) | 2.4.0 |
 | StringDist | https://pypi.org/project/StringDist | MIT License (MIT) | 1.0.9 |
 | tqdm | https://pypi.org/project/tqdm | MIT License, Mozilla Public License 2.0 (MPL 2.0) (MPL-2.0 AND MIT) | 4.66.1 |
-| tzdata | https://pypi.org/project/tzdata | Apache Software License (Apache-2.0) | 2023.3 |
+| tzdata | https://pypi.org/project/tzdata | Apache Software License (Apache-2.0) | 2023.4 |