Skip to content

Commit

Permalink
Organize all experiments and update to pass all tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
Hhyemin committed Aug 30, 2024
1 parent 567a3f5 commit b3ee5a2
Show file tree
Hide file tree
Showing 17 changed files with 819 additions and 297 deletions.
23 changes: 15 additions & 8 deletions src/suppression_study/evolution/AccidentalSuppressionFinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
help="JSON file with the suppression histories of the repository", required=True)
parser.add_argument(
"--results_dir", help="Directory where to put the results", required=True)
parser.add_argument("--file_name_specific", default=None,
help="file specific for mapping, useless and useful suppression files", required=False)


def find_relevant_range_of_commits(suppression_history, commits):
Expand Down Expand Up @@ -90,18 +92,23 @@ def find_relevant_commits(repo_dir: str, history: List[ChangeEvent], commits: Li
return result # the commits that changes the relevant files.


def get_suppression_warning_pairs(repo_dir, commit, relevant_files, results_dir):
def get_suppression_warning_pairs(repo_dir, commit, relevant_files, results_dir, is_file_specific):
# TODO mypy support
file_specific = "_".join(relevant_files[0].rsplit("/", 3)[1:]).rsplit(".", 1)[0]
file = join(results_dir, f"{commit}_mapping_{file_specific}.csv")
file_specific = None
if not is_file_specific: # now the is_file_specific is None
file_specific = "_".join(relevant_files[0].rsplit("/", 3)[1:]).rsplit(".", 1)[0]
file = join(results_dir, f"{commit}_mapping_{file_specific}.csv")
else: # is_file_specific is "non-specific", refer to test_AccidentalSuppressionFinder.py
file = join(results_dir, f"{commit}_mapping.csv")

if not exists(file):
compute_warning_suppression_mapping(
repo_dir, commit, "pylint", results_dir, relevant_files=relevant_files, file_specific=file_specific)
pairs = read_mapping_from_csv(file=file)
return pairs


def check_for_accidental_suppressions(repo_dir, history, relevant_commits, relevant_files, results_dir):
def check_for_accidental_suppressions(repo_dir, history, relevant_commits, relevant_files, results_dir, is_file_specific):
accidentally_suppressed_warnings = []
previous_commit = None
warnings_suppressed_at_previous_commit = None
Expand Down Expand Up @@ -132,7 +139,7 @@ def check_for_accidental_suppressions(repo_dir, history, relevant_commits, relev
# if not "<Parsing failed>" in warnings_suppressed_at_previous_commit
if commit in commits:
suppression_warning_pairs = get_suppression_warning_pairs(
repo_dir, commit, relevant_files, results_dir)
repo_dir, commit, relevant_files, results_dir, is_file_specific)
if suppression_warning_pairs:
# find warnings that the suppression suppresses at the current point in time
warnings_suppressed_at_commit = []
Expand Down Expand Up @@ -173,7 +180,7 @@ def check_for_accidental_suppressions(repo_dir, history, relevant_commits, relev
return accidentally_suppressed_warnings


def main(repo_dir, commits_file, history_file, results_dir):
def main(repo_dir, commits_file, history_file, results_dir, is_file_specific):
# read the list of commit ids
commits = get_commit_list(commits_file)

Expand All @@ -190,7 +197,7 @@ def main(repo_dir, commits_file, history_file, results_dir):
print(f"Found {len(relevant_commits)} relevant commits.")

accidentally_suppressed_warnings = check_for_accidental_suppressions(
repo_dir, history, relevant_commits, relevant_files, results_dir)
repo_dir, history, relevant_commits, relevant_files, results_dir, is_file_specific)
all_accidentally_suppressed_warnings.extend(accidentally_suppressed_warnings)
print(f"Done with {history_idx + 1}/{len(histories)} histories. Found {len(accidentally_suppressed_warnings)} accidentally suppressed warnings.\n")

Expand All @@ -203,4 +210,4 @@ def main(repo_dir, commits_file, history_file, results_dir):

if __name__ == "__main__":
args = parser.parse_args()
main(args.repo_dir, args.commits_file, args.history_file, args.results_dir)
main(args.repo_dir, args.commits_file, args.history_file, args.results_dir, args.file_name_specific)
3 changes: 1 addition & 2 deletions src/suppression_study/evolution/AnalyzeGitlogReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,9 @@ def from_gitlog_results_to_change_events(self):
add_events = CommitBlock(commit_block, self.suppressor, self.raw_warning_type,
self.current_file, self.specific_numeric_maps).from_single_commit_block_to_add_event()
if add_events != None:
if len(add_events.keys()) == 7: # not merge commit
if len(add_events.keys()) == 6: # not merge commit
return add_events
else:
# TODO check what really happens here
add_events.pop("backup")
backup_add_events = add_events
else:
Expand Down
4 changes: 3 additions & 1 deletion src/suppression_study/evolution/ChangeEvent.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def get_change_event_dict(given_object):
"warning_type": given_object.warning_type,
"line_number": given_object.line_number,
"change_operation": given_object.change_operation,
"middle_status_chain": given_object.middle_status_chain
}
if given_object.middle_status_chain:
change_event.update({"middle_status_chain": given_object.middle_status_chain})

return change_event
2 changes: 1 addition & 1 deletion src/suppression_study/evolution/CommitBlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def get_add_event(self, last_commit_block_mark):
if last_commit_block_mark:
return add_event_ready_to_json
else:
add_event_ready_to_json.update(backup="backup")
add_event_ready_to_json.update({"backup": "backup"})
return add_event_ready_to_json

# for normal commit (not merge commit)
Expand Down
94 changes: 33 additions & 61 deletions src/suppression_study/evolution/DiffBlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,53 +21,23 @@ def from_diff_block_to_delete_event(self):
diffs = self.diff_contents.split("\n")
diff_lines_num = len(diffs)
diff_max_line = diff_lines_num - 1
mapped_line_num = self.suppression.line # the line number of the mapped suppression in next commit

current_start = None
current_step = 0
target_step = 0
for diff_line, diff_line_num in zip(diffs, range(diff_lines_num)):
diff_line = diff_line.strip()
if diff_line.startswith("@@"):
if target_block_mark:
if self.current_hunk_line_range:
delete_event_ready_to_json = self.get_delete_event()
# actually here the returned line number should be None
# it records the line number where is last exists in the commit just before the deletion commit
if delete_event_ready_to_json:
return delete_event_ready_to_json, self.suppression.line # event, mapped_line_num

# compute the middle statuses suppression line numbers, the suppression is not changed and line numbers may changed
if self.suppression.line > current_start:
hunk_delta = target_step - current_step
mapped_line_num += hunk_delta
else:
return mapped_line_num
return delete_event_ready_to_json

# eg,. @@ -168,14 +168,13 @@
tmp = diff_line.split(" ")
current_lines_tmp = tmp[1].lstrip("-")
if "," in current_lines_tmp:
current_lines_tmp = current_lines_tmp.split(",")
current_start = int(current_lines_tmp[0])
current_step = int(current_lines_tmp[1])
else:
current_start = int(current_lines_tmp)
current_step = 1
end = current_start + current_step
self.current_hunk_line_range = range(current_start, end)
current_lines_tmp = tmp[1].lstrip("-").split(",")
start = int(current_lines_tmp[0])
step = int(current_lines_tmp[1])
end = start + step
self.current_hunk_line_range = range(start, end)
if self.suppression.line in self.current_hunk_line_range:
target_block_mark = True
target_tmp = tmp[2].lstrip("+")
if "," in target_tmp:
target_step = int(target_tmp.split(",")[1])
else:
target_step = 1

if not target_block_mark and self.suppression.line > current_start:
hunk_delta = target_step - current_step
mapped_line_num += hunk_delta
else:
return mapped_line_num

if target_block_mark: # Source code
if diff_line.startswith("+"):
Expand All @@ -76,13 +46,14 @@ def from_diff_block_to_delete_event(self):
self.current_source_code.append(diff_line.replace("-", "", 1).strip())

if diff_line_num == diff_max_line: # the last diff block, the last line
if target_block_mark:
delete_event_ready_to_json = self.get_delete_event()
if delete_event_ready_to_json:
return delete_event_ready_to_json, self.suppression.line # event, mapped_line_num
return mapped_line_num
if self.current_hunk_line_range:
last_block_mark = True
delete_event_ready_to_json = self.get_delete_event(last_block_mark)
return delete_event_ready_to_json
else:
return None # get to the end of all diff blocks, but still not find the delete

def get_delete_event(self):
def get_delete_event(self, last_block_mark=False):
comment_symbol = "#"
'''
sometimes the changed hunk includes the target line number, but no changes to the line.
Expand All @@ -92,14 +63,13 @@ def get_delete_event(self):
changed hunk is from line 10 to line 11(included), but only "-" symbol to line 11, no changes to line 10
to get more accurate results, here check if the suppression real exists in current_source_code'''
target_warning_type_exists_in_current = False # default set as no suppression in old commit
suppression_text_from_code_in_current = None
for code in self.current_source_code:
suppressor = get_suppressor(code)
if suppressor: # make sure suppression in current code
suppression_text_from_code_in_current = str(get_suppression_from_source_code(suppressor,
if suppressor != None: # make sure suppression in current code
suppression_text_from_code = str(get_suppression_from_source_code(suppressor,
comment_symbol, code, self.specific_numeric_maps))
if suppression_text_from_code_in_current:
if self.target_raw_warning_type in suppression_text_from_code_in_current:
if suppression_text_from_code:
if self.target_raw_warning_type in suppression_text_from_code:
target_warning_type_exists_in_current = True
break

Expand All @@ -108,12 +78,11 @@ def get_delete_event(self):
target_warning_type_exists_in_next = False
for code in self.next_source_code:
suppressor = get_suppressor(code)
if suppressor:
suppression_text_from_code_in_next = str(get_suppression_from_source_code(suppressor,
if suppressor != None:
suppression_text_from_code = str(get_suppression_from_source_code(suppressor,
comment_symbol, code, self.specific_numeric_maps))
if suppression_text_from_code_in_next and suppression_text_from_code_in_current == suppression_text_from_code_in_next:
if self.target_raw_warning_type in suppression_text_from_code_in_next:
# if needed, here we can extract the line number of suppression where it is deleted.
if suppression_text_from_code:
if self.target_raw_warning_type in suppression_text_from_code:
target_warning_type_exists_in_next = True
break

Expand All @@ -124,13 +93,16 @@ def get_delete_event(self):
delete_event_ready_to_json = get_change_event_dict(delete_event_object)
return delete_event_ready_to_json
else: # no deletions, as the suppression included in current change hunk also in the next commit's changed hunk
if last_block_mark == True:
return None
else:
self.current_hunk_line_range = []
self.next_source_code = []
self.current_source_code = []
else: # no deletions, as no delete suppression in current commit
if last_block_mark == True:
return None
else:
self.current_hunk_line_range = []
self.next_source_code = []
self.current_source_code = []
return None
else: # no deletions, as no suppression in current hunk
# generally, will not happen, to handle the inaccurate report from diff
self.current_hunk_line_range = []
self.next_source_code = []
self.current_source_code = []
return None
self.current_source_code = []
7 changes: 4 additions & 3 deletions src/suppression_study/evolution/ExtractHistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,16 @@ def main(repo_dir, selected_1000_commits_csv, results_dir):
# change commits and dates lists to from oldest to newest
selected_1000_commits_list.reverse()
selected_1000_dates_list.reverse()
delete_event_suppression_commit_list, middle_line_number_chain_remain, middle_line_number_chain_delete = GetSuppressionDeleteHistories(
delete_event_suppression_commit_list = GetSuppressionDeleteHistories(
repo_dir, selected_1000_commits_list, selected_1000_dates_list, suppression_result, specific_numeric_maps
).track_commits_forward()

# get add events (for both delete and never removed suppressions)
# finally get the histories: 1) add event 2) add delete events
evolution_init = GitLogFromFinalStatus(repo_dir, never_removed_suppressions,
delete_event_suppression_commit_list, specific_numeric_maps)
only_add_event_histories = evolution_init.git_log_never_removed_suppression(last_commit_with_suppression, middle_line_number_chain_remain)
add_delete_histories = evolution_init.git_log_deleted_suppression(middle_line_number_chain_delete)
only_add_event_histories = evolution_init.git_log_never_removed_suppression(last_commit_with_suppression)
add_delete_histories = evolution_init.git_log_deleted_suppression()

all_histories = []
history_index = 0
Expand All @@ -109,6 +109,7 @@ def main(repo_dir, selected_1000_commits_csv, results_dir):

# Write all extracted suppression level histories to a JSON file.
sort_by_date(all_histories)
print(f"Extracted {len(all_histories)} histories.")
history_json_file = join(results_dir, "histories_suppression_level_all.json")
write_all_histories_to_json(history_json_file, all_histories)

Expand Down
Loading

0 comments on commit b3ee5a2

Please sign in to comment.