forked from logpai/Log3C
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
66 lines (45 loc) · 2.55 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from lib import cascading_clustering as cluster
from lib.util import *
import argparse
# ***********************************CODE USAGE GUIDE***************************************
# Work for FSE 2018
#
# 1. How to run the code?
# Open a terminal, run the cascading clustering with "python run.py".
# Make sure that you have Python 3 and all required packages installed.
#
# 2. How to set the parameters?
# Replace the parameters in the following "para" according to your data
#
# Notes: multiprocessing is only used to read input files and save output files.
# ******************************************************************************************
@timeit
def main(args):
raw_data, raw_index, event_occu_matrix = load_all_data(args)
kpi_list = cluster.load_kpi(args.kpi_path)
correlation_weight_list = cluster.get_correlation_weight(event_occu_matrix, kpi_list)
weight_data, weight_list = cluster.weigh(raw_data, correlation_weight_list)
cleanup_output_dir(args)
final_clustering_result = cluster.cascade(args, raw_data, raw_index, weight_data)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--seq_folder", default="/seq_folder/", required=False,
help="folder of log sequence matrix files")
parser.add_argument("--kpi_path", default="/kpis/kpis.csv", required=False,
help="the path of KPI file")
parser.add_argument("--proc_num", type=int, default=16, required=False,
help="number of processes when loading files and saving files")
parser.add_argument("--sample_rate", type=int, default=100, required=False,
help="same rate for sampling, 100 represents 1% sample rate")
parser.add_argument("--threshold", type=float, default=0.3, required=False,
help="threshold for clustering, and also used when matching the nearest sequence")
parser.add_argument("--save_file", type=bool, default=False, required=False,
help="FLAG to decide whether saving output clusters, it costs a lot if turned on")
parser.add_argument("--output_path", default="/output/", required=False,
help="folder for saving output clusters of data")
parser.add_argument("--rep_path", default="/reps/", required=False,
help="path used for saving all representatives (patterns)")
args = parser.parse_args()
main(args)