refactor directories for better understanding of the project

nlp4se · Feb 27, 2025 · 6699470 · 6699470
1 parent 8025cad
commit 6699470
Show file tree

Hide file tree

Showing 561 changed files with 20 additions and 55,819 deletions.
diff --git a/backend/dendogram_controller.py b/backend/dendogram_controller.py
@@ -91,7 +91,7 @@ def generate_dendogram_from_csv():
         return make_response("CSV file is required", 400)
 
     file = request.files['file']
-    if not file.filename.endswith('.csv'):
+    if not file.filename.endswith('.model_embeddings'):
         return make_response("File must be a CSV", 400)
 
     features = []

diff --git a/backend/dendogram_service.py b/backend/dendogram_service.py
@@ -13,17 +13,17 @@
 
 load_dotenv()
 def preprocessed_app(app_name):
-    file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_jsons/{app_name}Features.json"
+    file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_features_jsons/{app_name}Features.json"
     return os.path.exists(file_path) and os.path.getsize(file_path) > 0
 
 def save_preprocessed_features(features, app_name):
-    file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_jsons/{app_name}Features.json"
+    file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_features_jsons/{app_name}Features.json"
     os.makedirs(os.path.dirname(file_path), exist_ok=True)
     with open(file_path, "w") as json_file:
         json.dump(features, json_file)
 
 def load_saved_preprocessed_features(app_name):
-    file_path = os.path.join(BASE_DIR, "data", "Stage 2 - Hierarchical Clustering", "preprocessed_jsons", f"{app_name}Features.json")
+    file_path = os.path.join(BASE_DIR, "data", "Stage 2 - Hierarchical Clustering", "preprocessed_features_jsons", f"{app_name}Features.json")
     if not os.path.exists(file_path):
         return None
     with open(file_path, "r") as json_file:

diff --git a/backend/preprocessing_service.py b/backend/preprocessing_service.py
@@ -33,17 +33,17 @@ def preprocess():
         return jsonify({"error": str(e)}), 500
 
 def preprocessed_app(app_name):
-    file_path = f"static/preprocessed_jsons/{app_name}Features.json"
+    file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
     return os.path.exists(file_path) and os.path.getsize(file_path) > 0
 
 def save_preprocessed_features(features, app_name):
-    file_path = f"static/preprocessed_jsons/{app_name}Features.json"
+    file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
     os.makedirs(os.path.dirname(file_path), exist_ok=True)
     with open(file_path, "w") as json_file:
         json.dump(features, json_file)
 
 def load_saved_preprocessed_features(app_name):
-    file_path = f"static/preprocessed_jsons/{app_name}Features.json"
+    file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
     if os.path.exists(file_path):
         with open(file_path, "r") as json_file:
             return json.load(json_file)

diff --git a/backend/utils.py b/backend/utils.py
@@ -11,8 +11,8 @@
 STAGE_2_MODEL_DIRECTORY_PATH = os.path.join(STAGE_2_OUTPUT_PATH)
 STAGE_3_MODEL_DIRECTORY_PATH = os.path.join(STAGE_3_INPUT_PATH)
 
-MODEL_DIRECTORY_CSV_PATH = os.path.join(STAGE_2_OUTPUT_PATH, 'csv')
-MODEL_DIRECTORY_CSV_EMBEDDINGS_PATH = os.path.join('static', 'csv', 'embeddings')
+MODEL_DIRECTORY_CSV_PATH = os.path.join(STAGE_2_OUTPUT_PATH, 'model_embeddings')
+MODEL_DIRECTORY_CSV_EMBEDDINGS_PATH = os.path.join('data', 'model_embeddings', 'embeddings')
 
 class Utils:
     @staticmethod

diff --git a/backend/visualization_service.py b/backend/visualization_service.py
@@ -49,7 +49,7 @@ def generate_dynamic_label(cluster_labels):
     unique_labels = list(set(cluster_labels))
     few_shot_input_text = (
         "Generate a single concise label summarizing the following actions:\n\n"
-        "Examples:\n"
+        "Input_Examples:\n"
         "Video meeting, online meeting, team video chat, conference call\n"
         "Label: Virtual Team Communication\n\n"
         "Secure chat, encrypted messaging, private message\n"

diff --git a/client/dendogram_generation.py → cli-client/dendogram_generation.py b/client/dendogram_generation.py → cli-client/dendogram_generation.py
diff --git a/client/dynamic_visualizator.py → cli-client/dynamic_visualizator.py b/client/dynamic_visualizator.py → cli-client/dynamic_visualizator.py
diff --git a/client/requester.py → cli-client/requester.py b/client/requester.py → cli-client/requester.py
@@ -11,7 +11,7 @@
 obj_weights = [0.9, 0.75, 0.669, 0.5, 0.25, 0.1]
 affinity_models = ['bert', 'tf-idf', 'paraphrase']
 
-json_file_path = "body.json"
+json_file_path = "small_whatsapp_example.json"
 with open(json_file_path, 'r') as json_file:
     json_data = json.load(json_file)
 

diff --git a/...e extraction/feature_extraction_t-frex.py → ...e extraction/feature_extraction_t-frex.py b/...e extraction/feature_extraction_t-frex.py → ...e extraction/feature_extraction_t-frex.py
@@ -60,7 +60,7 @@ def process_folder(input_folder, output_folder):
 
     # Process each CSV file in the input folder
     for file_name in os.listdir(input_folder):
-        if file_name.endswith('.csv'):
+        if file_name.endswith('.model_embeddings'):
             input_file = os.path.join(input_folder, file_name)
             output_file = os.path.join(output_folder, file_name)
             print(f"Processing {input_file} -> {output_file}")

diff --git a/...raction/feature_extraction_transfeatex.py → ...raction/feature_extraction_transfeatex.py b/...raction/feature_extraction_transfeatex.py → ...raction/feature_extraction_transfeatex.py
diff --git a/...age 1 - Feature extraction/json_to_csv.py → ...age 1 - Feature extraction/json_to_csv.py b/...age 1 - Feature extraction/json_to_csv.py → ...age 1 - Feature extraction/json_to_csv.py
@@ -55,7 +55,7 @@ def process_folder(input_folder, output_folder):
     for file_name in os.listdir(input_folder):
         if file_name.endswith('.json'):
             input_file = os.path.join(input_folder, file_name)
-            app_output_file = os.path.join(output_folder, file_name.replace('.json', '.csv'))
+            app_output_file = os.path.join(output_folder, file_name.replace('.json', '.model_embeddings'))
 
             print(f"Processing {input_file} -> {app_output_file}")
             reviews_data, grouped_reviews = parse_reviews(input_file)
@@ -71,7 +71,7 @@ def process_folder(input_folder, output_folder):
 
     # Write separate CSV files for each categoryId
     for categoryId, reviews in combined_grouped_reviews.items():
-        category_output_file = os.path.join(output_folder, f"{categoryId}.csv")
+        category_output_file = os.path.join(output_folder, f"{categoryId}.model_embeddings")
         print(f"Saving reviews for categoryId={categoryId} -> {category_output_file}")
 
         # Write CSV for each categoryId

diff --git a/...ature extraction/review_postprocessing.py → ...ature extraction/review_postprocessing.py b/...ature extraction/review_postprocessing.py → ...ature extraction/review_postprocessing.py
@@ -8,7 +8,7 @@ def parse_and_add_column(csv_folder, json_folder, output_folder):
     os.makedirs(output_folder, exist_ok=True)
 
     # List all CSV and JSON files
-    csv_files = {os.path.splitext(f)[0]: f for f in os.listdir(csv_folder) if f.endswith('.csv')}
+    csv_files = {os.path.splitext(f)[0]: f for f in os.listdir(csv_folder) if f.endswith('.model_embeddings')}
     json_files = {os.path.splitext(f)[0]: f for f in os.listdir(json_folder) if f.endswith('.json')}
 
     for base_name, csv_file in csv_files.items():

diff --git a/...eature extraction/review_preprocessing.py → ...eature extraction/review_preprocessing.py b/...eature extraction/review_preprocessing.py → ...eature extraction/review_preprocessing.py
diff --git a/...tage 1 - Feature extraction/statistics.py → ...tage 1 - Feature extraction/statistics.py b/...tage 1 - Feature extraction/statistics.py → ...tage 1 - Feature extraction/statistics.py
@@ -24,7 +24,7 @@ def process_files(input_folder, output_folder, top_n):
 
     # Process each CSV file in the input folder
     for filename in os.listdir(input_folder):
-        if not filename.endswith('.csv'):
+        if not filename.endswith('.model_embeddings'):
             continue
 
         input_path = os.path.join(input_folder, filename)
@@ -50,7 +50,7 @@ def process_files(input_folder, output_folder, top_n):
         }).sort_values('count', ascending=False)
 
         # Save individual file statistics
-        stats_output = os.path.join(output_folder, f"{base_name}_statistics.csv")
+        stats_output = os.path.join(output_folder, f"{base_name}_statistics.model_embeddings")
         stats_df.to_csv(stats_output, index=False)
 
         # Process with spaCy for POS tagging
@@ -129,7 +129,7 @@ def process_files(input_folder, output_folder, top_n):
         }).sort_values('count', ascending=False)
 
         # Save category statistics
-        stats_output = os.path.join(output_folder, f"category_{category}_statistics.csv")
+        stats_output = os.path.join(output_folder, f"category_{category}_statistics.model_embeddings")
         stats_df.to_csv(stats_output, index=False)
 
         # Process with spaCy for POS tagging

diff --git a/scripts/data-preprocessing/csv_to_json.py → ...scripts/data-preprocessing/csv_to_json.py b/scripts/data-preprocessing/csv_to_json.py → ...scripts/data-preprocessing/csv_to_json.py
diff --git a/scripts/llama_tryout.py → cli-client/scripts/llama_tryout.py b/scripts/llama_tryout.py → cli-client/scripts/llama_tryout.py
diff --git a/scripts/requester.py → cli-client/scripts/requester.py b/scripts/requester.py → cli-client/scripts/requester.py
@@ -18,7 +18,7 @@
 
 
 def process_csv_files(input_folder, base_url, default_params):
-    csv_files = glob.glob(os.path.join(input_folder, '*.csv'))
+    csv_files = glob.glob(os.path.join(input_folder, '*.model_embeddings'))
 
     for csv_file in csv_files:
         try:
@@ -31,7 +31,7 @@ def process_csv_files(input_folder, base_url, default_params):
 
             # Prepare the file to be uploaded
             with open(csv_file, 'rb') as f:
-                files = {'file': (os.path.basename(csv_file), f, 'text/csv')}
+                files = {'file': (os.path.basename(csv_file), f, 'text/model_embeddings')}
 
                 response = requests.post(base_url, params=params, files=files)
 

diff --git a/...pts/visualization/dendogram_generation.py → ...pts/visualization/dendogram_generation.py b/...pts/visualization/dendogram_generation.py → ...pts/visualization/dendogram_generation.py
diff --git a/...pts/visualization/dendogram_generation.sh → ...pts/visualization/dendogram_generation.sh b/...pts/visualization/dendogram_generation.sh → ...pts/visualization/dendogram_generation.sh
diff --git a/...pts/visualization/dynamic_visualizator.py → ...pts/visualization/dynamic_visualizator.py b/...pts/visualization/dynamic_visualizator.py → ...pts/visualization/dynamic_visualizator.py
diff --git a/scripts/visualization/executer.py → cli-client/scripts/visualization/executer.py b/scripts/visualization/executer.py → cli-client/scripts/visualization/executer.py
diff --git a/scripts/weight_script.py → cli-client/scripts/weight_script.py b/scripts/weight_script.py → cli-client/scripts/weight_script.py
diff --git a/client/visualizator.py → cli-client/visualizator.py b/client/visualizator.py → cli-client/visualizator.py