Skip to content

Commit

Permalink
refactor directories for better understanding of the project
Browse files Browse the repository at this point in the history
  • Loading branch information
mtiessler committed Feb 27, 2025
1 parent 8025cad commit 6699470
Show file tree
Hide file tree
Showing 561 changed files with 20 additions and 55,819 deletions.
2 changes: 1 addition & 1 deletion backend/dendogram_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def generate_dendogram_from_csv():
return make_response("CSV file is required", 400)

file = request.files['file']
if not file.filename.endswith('.csv'):
if not file.filename.endswith('.model_embeddings'):
return make_response("File must be a CSV", 400)

features = []
Expand Down
6 changes: 3 additions & 3 deletions backend/dendogram_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@

load_dotenv()
def preprocessed_app(app_name):
file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_jsons/{app_name}Features.json"
file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_features_jsons/{app_name}Features.json"
return os.path.exists(file_path) and os.path.getsize(file_path) > 0

def save_preprocessed_features(features, app_name):
file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_jsons/{app_name}Features.json"
file_path = f"data/Stage 2 - Hierarchical Clustering/preprocessed_features_jsons/{app_name}Features.json"
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "w") as json_file:
json.dump(features, json_file)

def load_saved_preprocessed_features(app_name):
file_path = os.path.join(BASE_DIR, "data", "Stage 2 - Hierarchical Clustering", "preprocessed_jsons", f"{app_name}Features.json")
file_path = os.path.join(BASE_DIR, "data", "Stage 2 - Hierarchical Clustering", "preprocessed_features_jsons", f"{app_name}Features.json")
if not os.path.exists(file_path):
return None
with open(file_path, "r") as json_file:
Expand Down
6 changes: 3 additions & 3 deletions backend/preprocessing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ def preprocess():
return jsonify({"error": str(e)}), 500

def preprocessed_app(app_name):
file_path = f"static/preprocessed_jsons/{app_name}Features.json"
file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
return os.path.exists(file_path) and os.path.getsize(file_path) > 0

def save_preprocessed_features(features, app_name):
file_path = f"static/preprocessed_jsons/{app_name}Features.json"
file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "w") as json_file:
json.dump(features, json_file)

def load_saved_preprocessed_features(app_name):
file_path = f"static/preprocessed_jsons/{app_name}Features.json"
file_path = f"data/Stage 3 - Topic Modelling/preprocessed_features_jsons/{app_name}Features.json"
if os.path.exists(file_path):
with open(file_path, "r") as json_file:
return json.load(json_file)
Expand Down
4 changes: 2 additions & 2 deletions backend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
STAGE_2_MODEL_DIRECTORY_PATH = os.path.join(STAGE_2_OUTPUT_PATH)
STAGE_3_MODEL_DIRECTORY_PATH = os.path.join(STAGE_3_INPUT_PATH)

MODEL_DIRECTORY_CSV_PATH = os.path.join(STAGE_2_OUTPUT_PATH, 'csv')
MODEL_DIRECTORY_CSV_EMBEDDINGS_PATH = os.path.join('static', 'csv', 'embeddings')
MODEL_DIRECTORY_CSV_PATH = os.path.join(STAGE_2_OUTPUT_PATH, 'model_embeddings')
MODEL_DIRECTORY_CSV_EMBEDDINGS_PATH = os.path.join('data', 'model_embeddings', 'embeddings')

class Utils:
@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion backend/visualization_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def generate_dynamic_label(cluster_labels):
unique_labels = list(set(cluster_labels))
few_shot_input_text = (
"Generate a single concise label summarizing the following actions:\n\n"
"Examples:\n"
"Input_Examples:\n"
"Video meeting, online meeting, team video chat, conference call\n"
"Label: Virtual Team Communication\n\n"
"Secure chat, encrypted messaging, private message\n"
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion client/requester.py → cli-client/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
obj_weights = [0.9, 0.75, 0.669, 0.5, 0.25, 0.1]
affinity_models = ['bert', 'tf-idf', 'paraphrase']

json_file_path = "body.json"
json_file_path = "small_whatsapp_example.json"
with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def process_folder(input_folder, output_folder):

# Process each CSV file in the input folder
for file_name in os.listdir(input_folder):
if file_name.endswith('.csv'):
if file_name.endswith('.model_embeddings'):
input_file = os.path.join(input_folder, file_name)
output_file = os.path.join(output_folder, file_name)
print(f"Processing {input_file} -> {output_file}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def process_folder(input_folder, output_folder):
for file_name in os.listdir(input_folder):
if file_name.endswith('.json'):
input_file = os.path.join(input_folder, file_name)
app_output_file = os.path.join(output_folder, file_name.replace('.json', '.csv'))
app_output_file = os.path.join(output_folder, file_name.replace('.json', '.model_embeddings'))

print(f"Processing {input_file} -> {app_output_file}")
reviews_data, grouped_reviews = parse_reviews(input_file)
Expand All @@ -71,7 +71,7 @@ def process_folder(input_folder, output_folder):

# Write separate CSV files for each categoryId
for categoryId, reviews in combined_grouped_reviews.items():
category_output_file = os.path.join(output_folder, f"{categoryId}.csv")
category_output_file = os.path.join(output_folder, f"{categoryId}.model_embeddings")
print(f"Saving reviews for categoryId={categoryId} -> {category_output_file}")

# Write CSV for each categoryId
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def parse_and_add_column(csv_folder, json_folder, output_folder):
os.makedirs(output_folder, exist_ok=True)

# List all CSV and JSON files
csv_files = {os.path.splitext(f)[0]: f for f in os.listdir(csv_folder) if f.endswith('.csv')}
csv_files = {os.path.splitext(f)[0]: f for f in os.listdir(csv_folder) if f.endswith('.model_embeddings')}
json_files = {os.path.splitext(f)[0]: f for f in os.listdir(json_folder) if f.endswith('.json')}

for base_name, csv_file in csv_files.items():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def process_files(input_folder, output_folder, top_n):

# Process each CSV file in the input folder
for filename in os.listdir(input_folder):
if not filename.endswith('.csv'):
if not filename.endswith('.model_embeddings'):
continue

input_path = os.path.join(input_folder, filename)
Expand All @@ -50,7 +50,7 @@ def process_files(input_folder, output_folder, top_n):
}).sort_values('count', ascending=False)

# Save individual file statistics
stats_output = os.path.join(output_folder, f"{base_name}_statistics.csv")
stats_output = os.path.join(output_folder, f"{base_name}_statistics.model_embeddings")
stats_df.to_csv(stats_output, index=False)

# Process with spaCy for POS tagging
Expand Down Expand Up @@ -129,7 +129,7 @@ def process_files(input_folder, output_folder, top_n):
}).sort_values('count', ascending=False)

# Save category statistics
stats_output = os.path.join(output_folder, f"category_{category}_statistics.csv")
stats_output = os.path.join(output_folder, f"category_{category}_statistics.model_embeddings")
stats_df.to_csv(stats_output, index=False)

# Process with spaCy for POS tagging
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions scripts/requester.py → cli-client/scripts/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


def process_csv_files(input_folder, base_url, default_params):
csv_files = glob.glob(os.path.join(input_folder, '*.csv'))
csv_files = glob.glob(os.path.join(input_folder, '*.model_embeddings'))

for csv_file in csv_files:
try:
Expand All @@ -31,7 +31,7 @@ def process_csv_files(input_folder, base_url, default_params):

# Prepare the file to be uploaded
with open(csv_file, 'rb') as f:
files = {'file': (os.path.basename(csv_file), f, 'text/csv')}
files = {'file': (os.path.basename(csv_file), f, 'text/model_embeddings')}

response = requests.post(base_url, params=params, files=files)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 6699470

Please sign in to comment.