Extend logger to store output to separate file

LLNL · Jun 4, 2024 · 4798b7d · 4798b7d
1 parent f6306b1
commit 4798b7d
Show file tree

Hide file tree

Showing 5 changed files with 224 additions and 16 deletions.
diff --git a/src/AMSlib/AMS.cpp b/src/AMSlib/AMS.cpp
@@ -391,25 +391,29 @@ class AMSWrap
       CFATAL(AMS, !path_exists(log_dir), "Log Directory does not exist");
 
       int id = 0;
-      if (log_prefix.find("<PID>") != std::string::npos) {
-        pattern = std::string("<PID>");
-        id = getpid();
-      } else if (log_prefix.find("<RID>") != std::string::npos) {
+      if (log_prefix.find("<RID>") != std::string::npos) {
         pattern = std::string("<RID>");
         id = get_rank_id();
+      } else if (log_prefix.find("<PID>") != std::string::npos) {
+        pattern = std::string("<PID>");
+        id = getpid();
+      } else {
+        log_prefix += "<PID>";
+        pattern = std::string("<PID>");
+        id = getpid();
       }
 
       // Combine hostname and pid
       std::ostringstream combined;
-      combined << hostname << "_" << id;
+      combined << "." << hostname << "." << id;
 
       if (!pattern.empty()) {
         log_path =
             fs::absolute(log_dir).string() +
             std::regex_replace(log_prefix, std::regex(pattern), combined.str());
       } else {
         log_path =
-            fs::absolute(log_dir).string() + log_prefix + "_" + combined.str();
+            fs::absolute(log_dir).string() + log_prefix + "." + combined.str();
       }
     }
     logger->initialize_std_io_err(enable_log, log_path);
@@ -494,6 +498,7 @@ class AMSWrap
         }
       }
     }
+    ams::util::close();
   }
 };
 

diff --git a/src/AMSlib/ml/surrogate.hpp b/src/AMSlib/ml/surrogate.hpp
@@ -213,7 +213,7 @@ class SurrogateModel
     };
 
     if (uq_policy == AMSUQPolicy::AMS_DELTAUQ_MEAN) {
-      if (model_resource == AMSResourceType::AMS_DEVICE){
+      if (model_resource == AMSResourceType::AMS_DEVICE) {
 #ifdef __ENABLE_CUDA__
         DBG(Surrogate, "Compute mean delta uq predicates on device\n");
         constexpr int block_size = 256;
@@ -227,13 +227,12 @@ class SurrogateModel
         THROW(std::runtime_error,
               "Expected CUDA is enabled when model data are on DEVICE");
 #endif
-      }
-      else {
+      } else {
         DBG(Surrogate, "Compute mean delta uq predicates on host\n");
         computeDeltaUQMeanPredicatesHost();
       }
     } else if (uq_policy == AMSUQPolicy::AMS_DELTAUQ_MAX) {
-      if (model_resource == AMSResourceType::AMS_DEVICE){
+      if (model_resource == AMSResourceType::AMS_DEVICE) {
 #ifdef __ENABLE_CUDA__
         DBG(Surrogate, "Compute max delta uq predicates on device\n");
         constexpr int block_size = 256;
@@ -247,8 +246,7 @@ class SurrogateModel
         THROW(std::runtime_error,
               "Expected CUDA is enabled when model data are on DEVICE");
 #endif
-      }
-      else {
+      } else {
         DBG(Surrogate, "Compute max delta uq predicates on host\n");
         computeDeltaUQMaxPredicatesHost();
       }
@@ -412,7 +410,7 @@ class SurrogateModel
 
       DBG(Surrogate,
           "Returning existing model represented under (%s)",
-          model_path);
+          model_path.empty() ? "" : model_path.c_str());
       return torch_model;
     }
 

diff --git a/src/AMSlib/wf/debug.h b/src/AMSlib/wf/debug.h
@@ -72,16 +72,15 @@ void memUsage(double& vm_usage, double& resident_set);
     size_t watermark, current_size, actual_size;                       \
     auto& rm = ams::ResourceManager::getInstance();                    \
     memUsage(vm, rs);                                                  \
-    DBG(MEM : id, "Memory usage at %s is VM:%g RS:%g", phase, vm, rs); \
+    DBG(MEM, "Memory usage at %s is VM:%g RS:%g", phase, vm, rs); \
                                                                        \
     for (int i = 0; i < AMSResourceType::AMS_RSEND; i++) {             \
       if (rm.isActive((AMSResourceType)i)) {                           \
         rm.getAllocatorStats((AMSResourceType)i,                       \
                              watermark,                                \
                              current_size,                             \
                              actual_size);                             \
-        DBG(MEM                                                        \
-            : id,                                                      \
+        DBG(MEM,                                                        \
               "Allocator: %s HWM:%lu CS:%lu AS:%lu) ",                 \
               rm.getAllocatorName((AMSResourceType)i).c_str(),         \
               watermark,                                               \

diff --git a/src/AMSlib/wf/logger.cpp b/src/AMSlib/wf/logger.cpp
@@ -108,6 +108,7 @@ void Logger::flush()
 
 void Logger::close()
 {
+
   if (ams_out != nullptr && ams_out != stdout) {
     fclose(ams_out);
     ams_out = nullptr;
@@ -118,6 +119,7 @@ void close()
 {
   auto logger = Logger::getActiveLogger();
   logger->flush();
+  logger->close();
 }
 
 void flush_files()

diff --git a/tools/AMSLogReader.py b/tools/AMSLogReader.py
@@ -0,0 +1,204 @@
+import argparse
+from pathlib import Path
+import glob
+import re
+import pandas as pd
+import numpy as np
+
+
+def plot_lines(ax, data, xtitle, ytitle):
+    ax.plot(data, linewidth=1, label=data.columns.tolist())
+    ax.set_xlabel(xtitle)
+    ax.set_ylabel(ytitle)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+
+
+def plot(df, uq, memory, out):
+    import matplotlib.pyplot as plt
+
+    num_rows = len(df["FileName"].unique())
+    if memory:
+        # We need one row per rank/file and 2 cols for intro+outro and diff
+        fig = plt.figure(constrained_layout=True)
+        subfigs = fig.subfigures(nrows=num_rows, ncols=1)
+        for i, (fn, gdf) in enumerate(df.groupby("FileName")):
+            subfig = subfigs[i]
+            subfig.suptitle(f"Log File : {fn}")
+            ax = subfig.subplots(nrows=1, ncols=2)
+            plot_lines(
+                ax[0],
+                gdf[["AMS Memory At Intro", "AMS Memory At Outro"]],
+                xtitle="invocation-id",
+                ytitle="Memory (MB)",
+            )
+            ax[0].legend(frameon=False, shadow=False, fancybox=False)
+            tmp = pd.DataFrame(gdf["AMS Memory At Outro"] - gdf["AMS Memory At Intro"], columns=["diff"])
+            plot_lines(
+                ax[1],
+                tmp,
+                xtitle="invocation-id",
+                ytitle="Memory Diff (Outro-Intro)",
+            )
+
+        fig.savefig(f"{out}.ams.mem.pdf")
+        plt.close()
+
+    if uq:
+        fig = plt.figure(constrained_layout=True)
+        subfigs = fig.subfigures(nrows=num_rows, ncols=1)
+        for i, (fn, gdf) in enumerate(df.groupby("FileName")):
+            subfig = subfigs[i]
+            subfig.suptitle(f"Log File : {fn}")
+            ax = subfig.subplots(nrows=1, ncols=2)
+            plot_lines(
+                ax[0],
+                gdf[["Domain Model", "ML Model"]],
+                xtitle="invocation-id",
+                ytitle="# Every model",
+            )
+            ax[0].legend(frameon=False, shadow=False, fancybox=False)
+            tmp = pd.DataFrame(gdf["Domain Model"] / gdf["total"], columns=["fraction"])
+            plot_lines(
+                ax[1],
+                tmp,
+                xtitle="invocation-id",
+                ytitle="Fraction Of Domain",
+            )
+        fig.savefig(f"{out}.ams.uq.pdf")
+        plt.close()
+
+
+def digest_memory(memory_lines):
+    pattern = r"RS:(.*?)\n"
+    mem = {"Start": list(), "End": list()}
+    for l in memory_lines:
+        match = re.search(pattern, l)
+        if match:
+            value = float(match.group(1)) / (1024.0 * 1024.0)
+            if "Start" in l:
+                mem["Start"].append(value)
+            elif "End" in l:
+                mem["End"].append(value)
+            else:
+                raise RuntimeError(f"Memory Line : {l} does not contain End/Start")
+
+    return mem
+
+
+def get_lines(lines, pattern):
+    matching_lines = [line for line in lines if re.match(pattern, line)]
+    return matching_lines
+
+
+def get_uq(lines):
+    pattern = r"\[AMS:INFO:Workflow\] Computed (\d+) using physics out of the (\d+) items \((.*?)\)"
+    mem = {"physics": list(), "ml": list(), "total": list()}
+    for line in lines:
+        match = re.findall(pattern, line)
+        if match:
+            vals = match[0]
+            assert len(vals) == 3, "Expecting 3 Values"
+            mem["physics"].append(int(vals[0]))
+            mem["ml"].append(int(vals[1]) - int(vals[0]))
+            mem["total"].append(int(vals[1]))
+    return mem
+
+
+def parse(file, memory, uq):
+    # Define the regex pattern to match lines that start with [AMS:DEBUG:MEM]
+    mem_pattern = r"^\[AMS:DEBUG:MEM\].*"
+
+    with open(file, "r") as fd:
+        lines = fd.readlines()
+
+    results = []
+    columns = []
+    if memory:
+        memory_lines = get_lines(lines, mem_pattern)
+        memory_consumption = digest_memory(memory_lines)
+        for k, v in memory_consumption.items():
+            results.append(v)
+            columns.append(k)
+
+    if uq:
+        uq_results = get_uq(lines)
+        for k, v in uq_results.items():
+            results.append(v)
+            columns.append(k)
+
+    return results, columns
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Simple Script digesting AMS logs and output/ploting results using matplotlib/pandas"
+    )
+    parser.add_argument(
+        "-l",
+        "--log-file",
+        default=str,
+        help="Log-file or files to glob and read",
+        required=True,
+    )
+
+    parser.add_argument("--memory", action=argparse.BooleanOptionalAction, default=False)
+    parser.add_argument("--uq", action=argparse.BooleanOptionalAction, default=False)
+    parser.add_argument("--csv", action=argparse.BooleanOptionalAction, default=False)
+    parser.add_argument("--plot", action=argparse.BooleanOptionalAction, default=False)
+
+    parser.add_argument(
+        "-o",
+        "--out-file",
+        dest="out_file",
+        default=str,
+        help="File with ams verbosity test",
+        required=True,
+    )
+
+    args = parser.parse_args()
+    files = {}
+
+    for log in glob.glob(args.log_file):
+        path = Path(log)
+        files[str(path.stem)] = parse(path, args.memory, args.uq)
+
+    dfs = []
+
+    for k, v in files.items():
+        if len(v[0]) > 0:
+            data = np.array(v[0])
+            df = pd.DataFrame(data.T, columns=v[1])
+            df["FileName"] = k
+            dfs.append(df)
+
+    df = pd.concat(dfs)
+    new_names = {"FileName": "FileName"}
+    if args.memory:
+        new_names.update(
+            {
+                "Start": "AMS Memory At Intro",
+                "End": "AMS Memory At Outro",
+            }
+        )
+    if args.uq:
+        new_names.update(
+            {
+                "physics": "Domain Model",
+                "ml": "ML Model",
+                "Total Elements": "total",
+            }
+        )
+
+    df.rename(columns=new_names, inplace=True)
+
+    if args.csv:
+        df.to_csv(args.out_file)
+
+    if args.plot:
+        plot(df, args.uq, args.memory, args.out_file)
+
+
+if __name__ == "__main__":
+    main()
+    main()