From 5bd906f11deccaf491cb1172e41f650a51cb6635 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 17 Feb 2024 09:05:14 +0000 Subject: [PATCH] Refactor code --- action.py | 14 -------------- main.py | 19 ++----------------- 2 files changed, 2 insertions(+), 31 deletions(-) delete mode 100644 action.py diff --git a/action.py b/action.py deleted file mode 100644 index a74a0a1..0000000 --- a/action.py +++ /dev/null @@ -1,14 +0,0 @@ -import pandas as pd -import os - - -def integrate(input_file_path, output_file_name): - number = sum(os.path.isfile(os.path.join(input_file_path, name)) for name in os.listdir(input_file_path)) - data_list = [] - - for i in range(0, number): - data_list.append(pd.read_csv(f"{input_file_path}/{i+1}.csv")) - - df = pd.concat(data_list, axis=0, sort=True) - - df.to_csv(f"{output_file_name}", index=False) diff --git a/main.py b/main.py index 4215cb1..933110c 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,8 @@ import pandas as pd import tabula -from action import integrate import os -import shutil from prefecture import prefectures -if not os.path.exists("./files"): - os.mkdir("./files") if not os.path.exists("./output_files"): os.mkdir("./output_files") @@ -14,17 +10,6 @@ for i in range(1, 47): opendata_file = os.listdir(f"./data_files/shinryoujo_{i}") dfs = tabula.read_pdf(f"./data_files/shinryoujo_{i}/{opendata_file[0]}", lattice=True, pages='all', pandas_options={'header': None}) - j = 0 - for df in dfs: - j += 1 - df = df.replace('\n', '', regex=True).replace('\r', '', regex=True).replace('\r\n', '', regex=True).replace('\n\r', '', regex=True) - print(df) - if j == 1: - df_header = df.iloc[:2] - print(df_header) - df.to_csv(f"./files/{j}.csv", index=None) - integrate("./files", f"./output_files/output_{prefectures[i-1]}.csv") - shutil.rmtree("./files") - os.mkdir("./files") + merged_df = pd.concat(dfs).replace('\n', '', regex=True).replace('\r', '', regex=True).replace('\r\n', '', regex=True).replace('\n\r', '', regex=True) + merged_df.to_csv(f"./output_files/{prefectures[i-1]}.csv", index=None) -shutil.rmtree("./files")