-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_results.py
115 lines (96 loc) · 3.7 KB
/
read_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pandas as pd
import os
import re
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
# Constants
SAVE_PATH = "/home/bgeurten/wolbachia_spread_model/raw_data/compare_spread_features/" # Update with your actual save path
GRID_SIZE = 500
INITIAL_POPULATION = 10
INFECTED_FRACTION = 0.2
def extract_metadata_from_filename(filename):
"""
Extracts metadata from the filename.
Parameters:
filename (str): The name of the file.
Returns:
dict: A dictionary containing extracted metadata.
"""
# Extracting parts of the filename
parts = os.path.basename(filename).split('_')
ci = parts[2] == 'True'
mk = parts[5] == 'True'
er = parts[9] == 'True'
eg = parts[12] == 'True'
trial_number = int(parts[-1].split('.')[0])
return {
'cytoplasmic_incompatibility': ci,
'male_killing': mk,
'increased_exploration_rate': er,
'increased_eggs': eg,
'trial_number': trial_number
}
def read_and_process_data():
"""
Reads all CSV files, processes them, and combines into a single DataFrame.
"""
all_data = []
files = [f for f in os.listdir(SAVE_PATH) if f.endswith(".csv")]
for file in tqdm(files, desc="Processing CSV files"):
file_path = os.path.join(SAVE_PATH, file)
data = pd.read_csv(file_path)
metadata = extract_metadata_from_filename(file)
data['days'] = data.index
data = data.assign(**metadata)
#data['grid_size'] = GRID_SIZE
#data['initial_population'] = INITIAL_POPULATION
#data['infected_fraction'] = INFECTED_FRACTION
# Ensure data covers 365 days
if len(data) < 365:
last_row = data.iloc[-1]
missing_days = 365 - len(data)
repeat_rows = pd.DataFrame([last_row] * missing_days)
repeat_rows['days'] = range(len(data), 365) # Update days for repeated rows
data = pd.concat([data, repeat_rows], ignore_index=True)
all_data.append(data)
return pd.concat(all_data, ignore_index=True)
def plot_time_series(combined_data):
"""
Plots the median and confidence interval for each combination of Wolbachia effects over time.
Parameters:
combined_data (DataFrame): Pandas DataFrame containing the combined data from all simulations.
"""
# Plotting
plt.figure(figsize=(12, 6))
sns.lineplot(data=combined_data, x='days', y='Population Size', hue='id_string', ci=95)
plt.title('Population Size Over Time by Wolbachia Effects')
plt.xlabel('Days')
plt.ylabel('Population Size')
plt.legend(title='Wolbachia Effects', loc='upper left')
plt.show()
plt.figure(figsize=(12, 6))
sns.lineplot(data=combined_data, x='days', y='Infection Rate', hue='id_string', ci=95)
plt.title('Infection Rate Over Time by Wolbachia Effects')
plt.xlabel('Days')
plt.ylabel('Infection Rate')
plt.legend(title='Wolbachia Effects', loc='upper left')
plt.show()
def generate_id_string(row):
"""
Generates a unique ID string based on the combination of active Wolbachia effects in a row.
"""
effects = {
'cytoplasmic_incompatibility': 'ci',
'male_killing': 'mk',
'increased_exploration_rate': 'er',
'increased_eggs': 'eg'
}
id_string = ''.join(abbrev for effect, abbrev in effects.items() if row[effect])
return id_string if id_string else 'no_effects'
# Read and process the data
combined_data = read_and_process_data()
combined_data['id_string'] = combined_data.apply(generate_id_string, axis=1)
combined_data.to_csv('wolbachia_data.csv',index=False)
# Example: Display the first few rows of the combined DataFrame
print(combined_data.head())