-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpoints_distribution.py
118 lines (96 loc) · 4.39 KB
/
points_distribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
"""points_distribution.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1RUYKMWVGgZyBpGsr4vNjBH9j15I9kSJC
"""
import pandas as pd
import numpy as np
from shapely.wkt import loads
from sklearn.cluster import KMeans
import os
def read_csv(filepath):
"""Reads a CSV file into a DataFrame."""
try:
return pd.read_csv(filepath)
except FileNotFoundError:
print("File not found. Please check the file path and try again.")
except Exception as e:
print(f"An error occurred: {e}")
return None
def save_to_pickle(data, filepath):
"""Saves the DataFrame to a pickle file."""
dir_name = os.path.dirname(filepath)
if dir_name:
os.makedirs(dir_name, exist_ok=True)
try:
data.to_pickle(filepath)
print(f"Data saved to {filepath}")
except Exception as e:
print(f"An error occurred while saving to pickle: {e}")
def select_uniform_points(points_wkt, n_clusters):
"""Selects uniform points from a list of WKT strings."""
points = [loads(wkt) for wkt in points_wkt]
coords = np.array([point.coords[0] for point in points])
kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
kmeans.fit(coords)
centroids = kmeans.cluster_centers_
uniform_points = []
for centroid in centroids:
distances = np.sqrt(((coords - centroid) ** 2).sum(axis=1))
closest_point_idx = np.argmin(distances)
uniform_points.append(points[closest_point_idx])
return uniform_points
def process_data(data, n, m):
"""Processes the data to select top n points and m uniform points for each GEOID_12."""
new_rows = []
groups = data.groupby('GEOID_12')
for name, group in groups:
top_n = group.nlargest(n, 'Score')
remaining_points_wkt = group.loc[~group.index.isin(top_n.index), 'geometry'].tolist()
uniform_points = select_uniform_points(remaining_points_wkt, m)
uniform_df = pd.DataFrame({
'GEOID_12': [name] * len(uniform_points),
'Score': [group.loc[group['geometry'] == point.wkt, 'Score'].values[0] for point in uniform_points],
'geometry': [point.wkt for point in uniform_points]
})
combined = pd.concat([top_n, uniform_df])
new_rows.append(combined)
return pd.concat(new_rows).reset_index(drop=True)
def merge_dataframes(data1, data2, key):
"""Merges two dataframes on a specified key."""
# Ensure only necessary columns are included
data1 = data1[[key, 'Score', 'geometry']]
data2.rename(columns={'geometry': 'polygon_geometry'}, inplace=True)
data2 = data2[[key, 'polygon_geometry']]
return data1.merge(data2, on=key, how='left')
def generate_random_points(path_to_random_points="data/filtered_new_point_data_v1.csv", path_to_polygon="data/filtered_gdf_v2.csv", top_n=10, uniform_n=20, path_to_save_output="data/final_points.pkl"):
# Main execution flow
# file_path_data = input("Enter the path to your data CSV file: ").strip()
file_path_data = path_to_random_points # filtered_new_point_data_v1.csv
data = read_csv(file_path_data)
if data is None or data.empty:
exit()
# n = int(input("Please enter the value of n for top n points: "))
n = top_n
# m = int(input("Please enter the value of m for uniformly distributed points: "))
m = uniform_n
print("[INFO] Processing Data.. ")
processed_data = process_data(data, n, m)
# file_path_polygon = input("Enter the path to your polygon data CSV file: ").strip()
file_path_polygon = path_to_polygon # filtered_gdf_v2
polygon_data = read_csv(file_path_polygon)
if polygon_data is None or polygon_data.empty:
print("Polygon File not found ... Exiting!")
exit()
print("[INFO] Merging File.. ")
merged_data = merge_dataframes(processed_data, polygon_data, 'GEOID_12')
print("[INFO] Saving File.. ")
# pickle_file_path = input("Enter the path to save the pickle file (including filename): ").strip()
pickle_file_path = path_to_save_output
save_to_pickle(merged_data, pickle_file_path)
# # Prompt the user for the path to the pickle file
# pickle_file_path = input("Enter the path to the pickle file: ").strip()
# # Load the data from the pickle file
# merged_data = pd.read_pickle(pickle_file_path)
generate_random_points(top_n=3, uniform_n=10)