Skip to content

Commit 39d0635

Browse files
committed
push beta mode of "lpad introspect"
1 parent 3e3e687 commit 39d0635

File tree

5 files changed

+195
-1
lines changed

5 files changed

+195
-1
lines changed

fireworks/features/introspect.py

+157
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
from __future__ import division
2+
from collections import defaultdict
3+
from pymongo import DESCENDING
4+
from tabulate import tabulate
5+
from fireworks import LaunchPad
6+
7+
__author__ = 'Anubhav Jain <ajain@lbl.gov>'
8+
9+
def flatten_to_keys(curr_doc, curr_recurs=1, max_recurs=2):
10+
11+
"""
12+
Converts a dictionary into a list of keys, with string values "key1.key2:val"
13+
14+
:param curr_doc:
15+
:param curr_recurs:
16+
:param max_recurs:
17+
:return: [<str>]
18+
"""
19+
if isinstance(curr_doc, dict):
20+
if curr_recurs > max_recurs:
21+
return [":<TRUNCATED_OBJECT>"]
22+
my_list = []
23+
for k in curr_doc:
24+
for val in flatten_to_keys(curr_doc[k], curr_recurs+1, max_recurs):
25+
dot_char = '' if curr_recurs==1 else '.'
26+
my_list.append(dot_char+k+val)
27+
28+
return my_list
29+
30+
elif isinstance(curr_doc, list) or isinstance(curr_doc, tuple):
31+
my_list = []
32+
for k in curr_doc:
33+
if isinstance(k, dict) or isinstance(k, list) or isinstance(k, tuple):
34+
return [":<TRUNCATED_OBJECT>"]
35+
my_list.append(":"+str(k))
36+
return my_list
37+
38+
return [flatten_to_keys(k, curr_recurs+1, max_recurs) for k in curr_doc]
39+
40+
return [":"+str(curr_doc)]
41+
42+
def collect_stats(list_keys, filter_truncated=True):
43+
"""
44+
Turns a list of keys (from flatten_to_keys) into a dict of <str>:count, i.e. counts the number of times each key appears
45+
:param list_keys:
46+
:param filter_truncated:
47+
:return:
48+
"""
49+
d = defaultdict(int)
50+
for x in list_keys:
51+
if not filter_truncated or '<TRUNCATED_OBJECT>' not in x:
52+
d[x] += 1
53+
54+
return d
55+
56+
def compare_stats(statsdict1, numsamples1, statsdict2, numsamples2, threshold=5):
57+
diff_dict = defaultdict(float)
58+
59+
all_keys = statsdict1.keys()
60+
all_keys.extend(statsdict2.keys())
61+
all_keys = set(all_keys)
62+
for k in all_keys:
63+
if k in statsdict1:
64+
diff_dict[k] += (statsdict1[k]/numsamples1) * 100
65+
66+
if k in statsdict2:
67+
diff_dict[k] -= (statsdict2[k]/numsamples2) * 100
68+
69+
if abs(diff_dict[k]) < threshold:
70+
del(diff_dict[k])
71+
72+
return diff_dict
73+
74+
75+
class Introspector():
76+
def __init__(self, lpad):
77+
"""
78+
:param lpad: (LaunchPad)
79+
"""
80+
self.db = lpad.db
81+
82+
def introspect_fizzled(self, coll="fws", rsort=True, threshold=10, limit=100):
83+
84+
# initialize collection
85+
if coll.lower() in ["fws", "fireworks"]:
86+
coll = "fireworks"
87+
state_key = "spec"
88+
89+
elif coll.lower() in ["tasks"]:
90+
coll = "fireworks"
91+
state_key = "spec._tasks"
92+
93+
elif coll.lower() in ["wflows", "workflows"]:
94+
coll = "workflows"
95+
state_key = "metadata"
96+
else:
97+
raise ValueError("Unrecognized collection!")
98+
99+
if rsort:
100+
sort_key=[("updated_on", DESCENDING)]
101+
else:
102+
sort_key=None
103+
104+
# get stats on fizzled docs
105+
fizzled_keys = []
106+
nsamples_fizzled = 0
107+
108+
for doc in self.db[coll].find({"state": "FIZZLED"}, {state_key: 1}, sort=sort_key).limit(limit):
109+
nsamples_fizzled += 1
110+
if state_key == "spec._tasks":
111+
for t in doc['spec']['_tasks']:
112+
fizzled_keys.append('_fw_name:{}'.format(t['_fw_name']))
113+
else:
114+
fizzled_keys.extend(flatten_to_keys(doc[state_key]))
115+
116+
fizzled_d = collect_stats(fizzled_keys)
117+
118+
# get stats on completed docs
119+
completed_keys = []
120+
nsamples_completed = 0
121+
122+
for doc in self.db[coll].find({"state": "COMPLETED"}, {state_key: 1}, sort=sort_key).limit(limit):
123+
nsamples_completed += 1
124+
if state_key == "spec._tasks":
125+
for t in doc['spec']['_tasks']:
126+
completed_keys.append('_fw_name:{}'.format(t['_fw_name']))
127+
else:
128+
completed_keys.extend(flatten_to_keys(doc[state_key]))
129+
130+
completed_d = collect_stats(completed_keys)
131+
132+
diff_d = compare_stats(completed_d, nsamples_completed, fizzled_d, nsamples_fizzled, threshold=threshold)
133+
134+
table = []
135+
for w in sorted(diff_d, key=diff_d.get, reverse=True):
136+
table.append([w.split(":")[0], w.split(":")[1], completed_d.get(w, 0), fizzled_d.get(w, 0), diff_d[w]])
137+
138+
return table
139+
140+
def print_report(self, table, coll=None):
141+
142+
if coll:
143+
if coll.lower() in ["fws", "fireworks"]:
144+
coll = "fireworks.spec"
145+
elif coll.lower() in ["tasks"]:
146+
coll = "fireworks.spec._tasks"
147+
elif coll.lower() in ["wflows", "workflows"]:
148+
coll = "workflows.metadata"
149+
150+
coll = "Introspection report for {}".format(coll)
151+
print('=' * len(coll))
152+
print(coll)
153+
print('=' * len(coll))
154+
155+
print(tabulate(table, headers=['key', 'value', '#C', '#F', '%C - %F']))
156+
157+

fireworks/features/tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'Anubhav Jain <ajain@lbl.gov>'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
from fireworks.features.introspect import flatten_to_keys
3+
4+
__author__ = 'Anubhav Jain <ajain@lbl.gov>'
5+
6+
7+
8+
class IntrospectTest(unittest.TestCase):
9+
10+
def test_flatten_dict(self):
11+
self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=1), ['d:<TRUNCATED_OBJECT>'])
12+
self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=2), ['d.e:<TRUNCATED_OBJECT>', 'd.f:10'])
13+
self.assertEqual(flatten_to_keys({"d": {"e": {"f": 4}, "f": 10}}, max_recurs=3), ['d.e.f:4', 'd.f:10'])
14+
self.assertEqual(flatten_to_keys({"d": [[0, 1], [2, 3]]}, max_recurs=5), ['d:<TRUNCATED_OBJECT>'])
15+
self.assertEqual(flatten_to_keys({"d": [1, 2, 3]}, max_recurs=2), ['d:1', 'd:2', 'd:3'])
16+
self.assertEqual(flatten_to_keys({"d": {"e": [0, 1]}}, max_recurs=2), ['d.e:0', 'd.e:1'])

fireworks/scripts/lpad_run.py

+19
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import unicode_literals
44
from fireworks.features.fw_report import FWReport
5+
from fireworks.features.introspect import Introspector
56

67
"""
78
A runnable script for managing a FireWorks database (a command-line interface to launchpad.py)
@@ -479,6 +480,20 @@ def report(args):
479480
fwr.print_stats(stats)
480481

481482

483+
def introspect(args):
484+
print("NOTE: This feature is in beta mode...")
485+
lp=get_lp(args)
486+
max = args.max if hasattr(args, "max") else 100
487+
488+
isp = Introspector(lp)
489+
for coll in ['tasks', 'fireworks', 'workflows']:
490+
print('generating report...please wait...')
491+
print('')
492+
table = isp.introspect_fizzled(coll=coll, limit=max)
493+
isp.print_report(table, coll=coll)
494+
print('')
495+
496+
482497
def track_fws(args):
483498
lp = get_lp(args)
484499
fw_ids = parse_helper(lp, args, skip_pw=True)
@@ -816,6 +831,10 @@ def lpad():
816831
report_parser.add_argument('-q', '--query', help="Additional Pymongo queries to filter entries before processing.")
817832
report_parser.set_defaults(func=report)
818833

834+
introspect_parser = subparsers.add_parser('introspect', help='Introspect recent runs to pin down errors')
835+
introspect_parser.add_argument('-m', '--max', help='examine past <max> results', default=100, type=int)
836+
introspect_parser.set_defaults(func=introspect)
837+
819838
args = parser.parse_args()
820839

821840
args.output = get_output_func(args.output)

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
'newt': ['requests>=2.01'],
3333
'webgui':['flask>=0.10.1', 'flask-paginate>=0.2.8'],
3434
'daemon_mode':['fabric>=1.8.1'],
35-
'plotting':['matplotlib>=1.1.1', 'networkx>=1.8.1']},
35+
'plotting':['matplotlib>=1.1.1', 'networkx>=1.8.1'],
36+
'introspect':['tabulate>=0.7.5']},
3637
classifiers=['Programming Language :: Python :: 2.7',
3738
'Development Status :: 4 - Beta',
3839
'Intended Audience :: Science/Research',

0 commit comments

Comments
 (0)