-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculateScAverage.py
159 lines (143 loc) · 7.9 KB
/
calculateScAverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from database_connect import get_session
import aitestOrm
from aitestOrm import AnnotationGen, KnlgExp, CaseGen, CodeGen, CodeCor
from tabulate import tabulate
session = get_session('database_aitest')
def format_to_two_decimal_places(value):
return f"{value:.2f}"
def summary_AnnotationGen(model_name):
av_anno = AnnotationGen(accuracy=0, simplicity=0, naturalness=0, usefulness=0, maccuracy=0, msimplicity=0, mnaturalness=0, musefulness=0)
for anno in session.query(AnnotationGen).filter(AnnotationGen.model_name==model_name).all():
av_anno.accuracy += anno.accuracy
av_anno.simplicity += anno.simplicity
av_anno.naturalness += anno.naturalness
av_anno.usefulness += anno.usefulness
av_anno.maccuracy += anno.maccuracy
av_anno.msimplicity += anno.msimplicity
av_anno.mnaturalness += anno.mnaturalness
av_anno.musefulness += anno.musefulness
count = session.query(AnnotationGen).filter(AnnotationGen.model_name==model_name).count()
av_anno.accuracy /= count
av_anno.simplicity /= count
av_anno.naturalness /= count
av_anno.usefulness /= count
av_anno.maccuracy /= count
av_anno.msimplicity /= count
av_anno.mnaturalness /= count
av_anno.musefulness /= count
average = (av_anno.accuracy + av_anno.simplicity + av_anno.naturalness + av_anno.usefulness + av_anno.maccuracy + av_anno.msimplicity + av_anno.mnaturalness + av_anno.musefulness) / 8
headers = ["Model Name", "accuracy", "simplicity", "naturalness", "usefulness", "maccuracy", "msimplicity", "mnaturalness", "musefulness", "average"]
data = [[model_name,
format_to_two_decimal_places(av_anno.accuracy),
format_to_two_decimal_places(av_anno.simplicity),
format_to_two_decimal_places(av_anno.naturalness),
format_to_two_decimal_places(av_anno.usefulness),
format_to_two_decimal_places(av_anno.maccuracy),
format_to_two_decimal_places(av_anno.msimplicity),
format_to_two_decimal_places(av_anno.mnaturalness),
format_to_two_decimal_places(av_anno.musefulness),
format_to_two_decimal_places(average)]]
print("AnnotationGen Score:")
print(tabulate(data, headers, tablefmt="pipe"))
def summary_KnlgExp(model_name):
av_knlg = KnlgExp(accuracy=0, correlation=0, understandability=0, maccuracy=0, mcorrelation=0, munderstandability=0)
for knlg in session.query(KnlgExp).filter(KnlgExp.model_name==model_name).all():
av_knlg.accuracy += knlg.accuracy
av_knlg.correlation += knlg.correlation
av_knlg.understandability += knlg.understandability
av_knlg.maccuracy += knlg.maccuracy
av_knlg.mcorrelation += knlg.mcorrelation
av_knlg.munderstandability += knlg.munderstandability
count = session.query(KnlgExp).filter(KnlgExp.model_name==model_name).count()
av_knlg.accuracy /= count
av_knlg.correlation /= count
av_knlg.understandability /= count
av_knlg.maccuracy /= count
av_knlg.mcorrelation /= count
av_knlg.munderstandability /= count
average = (av_knlg.accuracy + av_knlg.correlation + av_knlg.understandability + av_knlg.maccuracy + av_knlg.mcorrelation + av_knlg.munderstandability) / 6
headers = ["Model Name", "accuracy", "correlation", "understandability", "maccuracy", "mcorrelation", "munderstandability", "average"]
data = [[model_name,
format_to_two_decimal_places(av_knlg.accuracy),
format_to_two_decimal_places(av_knlg.correlation),
format_to_two_decimal_places(av_knlg.understandability),
format_to_two_decimal_places(av_knlg.maccuracy),
format_to_two_decimal_places(av_knlg.mcorrelation),
format_to_two_decimal_places(av_knlg.munderstandability),
format_to_two_decimal_places(average)]]
print("KnlgExp Score:")
print(tabulate(data, headers, tablefmt="pipe"))
def summary_CaseGen(model_name):
av_case = CaseGen(correctness=0, comprehensive=0, mcomprehensive=0)
for case in session.query(CaseGen).filter(CaseGen.model_name==model_name).all():
av_case.correctness += case.correctness
av_case.comprehensive += case.comprehensive
av_case.mcomprehensive += case.mcomprehensive
count = session.query(CaseGen).filter(CaseGen.model_name==model_name).count()
av_case.correctness /= count
av_case.comprehensive /= count
av_case.mcomprehensive /= count
average = (float(av_case.correctness) + av_case.comprehensive + av_case.mcomprehensive) / 3
headers = ["Model Name", "correctness", "comprehensive", "mcomprehensive", "average"]
data = [[model_name,
format_to_two_decimal_places(float(av_case.correctness)),
format_to_two_decimal_places(av_case.comprehensive),
format_to_two_decimal_places(av_case.mcomprehensive),
format_to_two_decimal_places(average)]]
print("CaseGen Score:")
print(tabulate(data, headers, tablefmt="pipe"))
def summary_CodeGen(model_name):
av_code = CodeGen(correctness=0, readability=0, performance=0, mreadability=0, mperformance=0)
for code in session.query(CodeGen).filter(CodeGen.model_name==model_name).all():
av_code.correctness += code.correctness
av_code.readability += code.readability
av_code.performance += code.performance
av_code.mreadability += code.mreadability
av_code.mperformance += code.mperformance
count = session.query(CodeGen).filter(CodeGen.model_name==model_name).count()
av_code.correctness /= count
av_code.readability /= count
av_code.performance /= count
av_code.mreadability /= count
av_code.mperformance /= count
average = (float(av_code.correctness) + av_code.readability + av_code.performance + av_code.mreadability + av_code.mperformance) / 5
headers = ["Model Name", "correctness", "readability", "performance", "mreadability", "mperformance", "average"]
data = [[model_name,
format_to_two_decimal_places(float(av_code.correctness)),
format_to_two_decimal_places(av_code.readability),
format_to_two_decimal_places(av_code.performance),
format_to_two_decimal_places(av_code.mreadability),
format_to_two_decimal_places(av_code.mperformance),
format_to_two_decimal_places(average)]]
print("CodeGen Score:")
print(tabulate(data, headers, tablefmt="pipe"))
def summary_CodeCor(model_name):
av_codecor = CodeCor(correctness=0, understandability=0, munderstandability=0)
for codecor in session.query(CodeCor).filter(CodeCor.model_name==model_name).all():
av_codecor.correctness += codecor.correctness
av_codecor.understandability += codecor.understandability
av_codecor.munderstandability += codecor.munderstandability
count = session.query(CodeCor).filter(CodeCor.model_name==model_name).count()
av_codecor.correctness /= count
av_codecor.understandability /= count
av_codecor.munderstandability /= count
average = (float(av_codecor.correctness) + av_codecor.understandability + av_codecor.munderstandability) / 3
headers = ["Model Name", "correctness", "understandability", "munderstandability", "average"]
data = [[model_name,
format_to_two_decimal_places(float(av_codecor.correctness)),
format_to_two_decimal_places(av_codecor.understandability),
format_to_two_decimal_places(av_codecor.munderstandability),
format_to_two_decimal_places(average)]]
print("CodeCor Score:")
print(tabulate(data, headers, tablefmt="pipe"))
if __name__ == '__main__':
summary_AnnotationGen("Qwen2.5-Coder-7B-Instruct")
print('\n\n')
summary_KnlgExp("Qwen2.5-Coder-7B-Instruct")
print('\n\n')
summary_CaseGen("Qwen2.5-Coder-7B-Instruct")
print('\n\n')
summary_CodeGen("Qwen2.5-Coder-7B-Instruct")
print('\n\n')
summary_CodeCor("Qwen2.5-Coder-7B-Instruct")
print('\n\n')