push beta mode of "lpad introspect"

computron · computron · commit 39d0635386f7 · 2015-10-05T17:03:16.000-07:00
diff --git a/fireworks/features/introspect.py b/fireworks/features/introspect.py
@@ -0,0 +1,157 @@
+from __future__ import division
+from collections import defaultdict
+from pymongo import DESCENDING
+from tabulate import tabulate
+from fireworks import LaunchPad
+
+__author__ = 'Anubhav Jain <ajain@lbl.gov>'
+
+def flatten_to_keys(curr_doc, curr_recurs=1, max_recurs=2):
+
+    """
+    Converts a dictionary into a list of keys, with string values "key1.key2:val"
+
+    :param curr_doc:
+    :param curr_recurs:
+    :param max_recurs:
+    :return: [<str>]
+    """
+    if isinstance(curr_doc, dict):
+        if curr_recurs > max_recurs:
+            return [":<TRUNCATED_OBJECT>"]
+        my_list = []
+        for k in curr_doc:
+            for val in flatten_to_keys(curr_doc[k], curr_recurs+1, max_recurs):
+                dot_char = '' if curr_recurs==1 else '.'
+                my_list.append(dot_char+k+val)
+
+        return my_list
+
+    elif isinstance(curr_doc, list) or isinstance(curr_doc, tuple):
+        my_list = []
+        for k in curr_doc:
+            if isinstance(k, dict) or isinstance(k, list) or isinstance(k, tuple):
+                return [":<TRUNCATED_OBJECT>"]
+            my_list.append(":"+str(k))
+        return my_list
+
+        return [flatten_to_keys(k, curr_recurs+1, max_recurs) for k in curr_doc]
+
+    return [":"+str(curr_doc)]
+
+def collect_stats(list_keys, filter_truncated=True):
+    """
+    Turns a list of keys (from flatten_to_keys) into a dict of <str>:count, i.e. counts the number of times each key appears
+    :param list_keys:
+    :param filter_truncated:
+    :return:
+    """
+    d = defaultdict(int)
+    for x in list_keys:
+        if not filter_truncated or '<TRUNCATED_OBJECT>' not in x:
+            d[x] += 1
+
+    return d
+
+def compare_stats(statsdict1, numsamples1, statsdict2, numsamples2, threshold=5):
+    diff_dict = defaultdict(float)
+
+    all_keys = statsdict1.keys()
+    all_keys.extend(statsdict2.keys())
+    all_keys = set(all_keys)
+    for k in all_keys:
+        if k in statsdict1:
+            diff_dict[k] += (statsdict1[k]/numsamples1) * 100
+
+        if k in statsdict2:
+            diff_dict[k] -= (statsdict2[k]/numsamples2) * 100
+
+        if abs(diff_dict[k]) < threshold:
+            del(diff_dict[k])
+
+    return diff_dict
+
+
+class Introspector():
+    def __init__(self, lpad):
+            """
+            :param lpad: (LaunchPad)
+            """
+            self.db = lpad.db
+
+    def introspect_fizzled(self, coll="fws", rsort=True, threshold=10, limit=100):
+
+        # initialize collection
+        if coll.lower() in ["fws", "fireworks"]:
+            coll = "fireworks"
+            state_key = "spec"
+
+        elif coll.lower() in ["tasks"]:
+            coll = "fireworks"
+            state_key = "spec._tasks"
+
+        elif coll.lower() in ["wflows", "workflows"]:
+            coll = "workflows"
+            state_key = "metadata"
+        else:
+            raise ValueError("Unrecognized collection!")
+
+        if rsort:
+            sort_key=[("updated_on", DESCENDING)]
+        else:
+            sort_key=None
+
+        # get stats on fizzled docs
+        fizzled_keys = []
+        nsamples_fizzled = 0
+
+        for doc in self.db[coll].find({"state": "FIZZLED"}, {state_key: 1}, sort=sort_key).limit(limit):
+            nsamples_fizzled += 1
+            if state_key == "spec._tasks":
+                for t in doc['spec']['_tasks']:
+                    fizzled_keys.append('_fw_name:{}'.format(t['_fw_name']))
+            else:
+                fizzled_keys.extend(flatten_to_keys(doc[state_key]))
+
+        fizzled_d = collect_stats(fizzled_keys)
+
+        # get stats on completed docs
+        completed_keys = []
+        nsamples_completed = 0
+
+        for doc in self.db[coll].find({"state": "COMPLETED"}, {state_key: 1}, sort=sort_key).limit(limit):
+            nsamples_completed += 1
+            if state_key == "spec._tasks":
+                for t in doc['spec']['_tasks']:
+                    completed_keys.append('_fw_name:{}'.format(t['_fw_name']))
+            else:
+                completed_keys.extend(flatten_to_keys(doc[state_key]))
+
+        completed_d = collect_stats(completed_keys)
+
+        diff_d = compare_stats(completed_d, nsamples_completed, fizzled_d, nsamples_fizzled, threshold=threshold)
+
+        table = []
+        for w in sorted(diff_d, key=diff_d.get, reverse=True):
+            table.append([w.split(":")[0], w.split(":")[1], completed_d.get(w, 0), fizzled_d.get(w, 0), diff_d[w]])
+
+        return table
+
+    def print_report(self, table, coll=None):
+
+        if coll:
+            if coll.lower() in ["fws", "fireworks"]:
+                coll = "fireworks.spec"
+            elif coll.lower() in ["tasks"]:
+                coll = "fireworks.spec._tasks"
+            elif coll.lower() in ["wflows", "workflows"]:
+                coll = "workflows.metadata"
+
+            coll = "Introspection report for {}".format(coll)
+            print('=' * len(coll))
+            print(coll)
+            print('=' * len(coll))
+
+        print(tabulate(table, headers=['key', 'value', '#C', '#F', '%C - %F']))
+
+
diff --git a/fireworks/features/tests/__init__.py b/fireworks/features/tests/__init__.py
@@ -0,0 +1 @@
+__author__ = 'Anubhav Jain <ajain@lbl.gov>'
diff --git a/fireworks/features/tests/test_introspect.py b/fireworks/features/tests/test_introspect.py
@@ -0,0 +1,16 @@
+import unittest
+from fireworks.features.introspect import flatten_to_keys
+
+__author__ = 'Anubhav Jain <ajain@lbl.gov>'
+
+
+
+class IntrospectTest(unittest.TestCase):
+
+    def test_flatten_dict(self):
+        self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=1), ['d:<TRUNCATED_OBJECT>'])
+        self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=2), ['d.e:<TRUNCATED_OBJECT>', 'd.f:10'])
+        self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=3), ['d.e.f:4', 'd.f:10'])
+        self.assertEqual(flatten_to_keys({"d": [[0, 1], [2, 3]]}, max_recurs=5), ['d:<TRUNCATED_OBJECT>'])
+        self.assertEqual(flatten_to_keys({"d": [1, 2, 3]}, max_recurs=2), ['d:1', 'd:2', 'd:3'])
+        self.assertEqual(flatten_to_keys({"d": {"e": [0, 1]}}, max_recurs=2), ['d.e:0', 'd.e:1'])
diff --git a/fireworks/scripts/lpad_run.py b/fireworks/scripts/lpad_run.py
@@ -2,6 +2,7 @@
 
 from __future__ import unicode_literals
 from fireworks.features.fw_report import FWReport
+from fireworks.features.introspect import Introspector
 
 """
 A runnable script for managing a FireWorks database (a command-line interface to launchpad.py)
@@ -479,6 +480,20 @@ def report(args):
     fwr.print_stats(stats)
 
 
+def introspect(args):
+    print("NOTE: This feature is in beta mode...")
+    lp=get_lp(args)
+    max = args.max if hasattr(args, "max") else 100
+
+    isp = Introspector(lp)
+    for coll in ['tasks', 'fireworks', 'workflows']:
+        print('generating report...please wait...')
+        print('')
+        table = isp.introspect_fizzled(coll=coll, limit=max)
+        isp.print_report(table, coll=coll)
+        print('')
+
+
 def track_fws(args):
     lp = get_lp(args)
     fw_ids = parse_helper(lp, args, skip_pw=True)
@@ -816,6 +831,10 @@ def lpad():
     report_parser.add_argument('-q', '--query', help="Additional Pymongo queries to filter entries before processing.")
     report_parser.set_defaults(func=report)
 
+    introspect_parser = subparsers.add_parser('introspect', help='Introspect recent runs to pin down errors')
+    introspect_parser.add_argument('-m', '--max', help='examine past <max> results', default=100, type=int)
+    introspect_parser.set_defaults(func=introspect)
+
     args = parser.parse_args()
 
     args.output = get_output_func(args.output)
diff --git a/setup.py b/setup.py
@@ -32,7 +32,8 @@
                         'newt': ['requests>=2.01'],
                         'webgui':['flask>=0.10.1', 'flask-paginate>=0.2.8'],
                         'daemon_mode':['fabric>=1.8.1'],
-                        'plotting':['matplotlib>=1.1.1', 'networkx>=1.8.1']},
+                        'plotting':['matplotlib>=1.1.1', 'networkx>=1.8.1'],
+                        'introspect':['tabulate>=0.7.5']},
         classifiers=['Programming Language :: Python :: 2.7',
                      'Development Status :: 4 - Beta',
                      'Intended Audience :: Science/Research',

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+__author__ = 'Anubhav Jain <ajain@lbl.gov>'`