-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsync-fs-at.py
236 lines (209 loc) · 9.45 KB
/
sync-fs-at.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
from google.cloud import firestore
import google.auth
import json
from datetime import datetime
import openpyxl
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.auth.transport.requests import Request
import os
def flatten_document(doc_snapshot):
"""
Flatten the document to a single-level dictionary, adding document id to each dict.
Handles nested dictionaries by flattening them to a single level.
"""
def flatten(nested_dict, parent_key='', sep='.'):
"""
Recursively flattens a nested dictionary.
"""
items = []
for k, v in nested_dict.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
flattened_doc = flatten(doc_snapshot.to_dict())
flattened_doc['id'] = doc_snapshot.id
return flattened_doc
def default_serializer(obj):
"""
Custom serializer function to handle non-serializable types such as Firestore timestamp or DocumentReference.
"""
if isinstance(obj, datetime):
return obj.isoformat()
elif isinstance(obj, firestore.DocumentReference):
return obj.path
raise TypeError(f"Type {type(obj)} not serializable")
def read_firestore_subcollections(document_path):
"""
Reads all sub-collections of a specific document into a dictionary of lists of flattened documents.
"""
db = firestore.Client()
document_ref = db.document(document_path)
subcollections = document_ref.collections()
all_data = {}
for subcollection in subcollections:
subcollection_name = subcollection.id
docs = subcollection.stream()
all_data[subcollection_name] = [flatten_document(doc) for doc in docs]
return all_data
# def write_firestore_subcollections(document_path, data_dict):
# """
# Writes the list of documents back to Firestore sub-collections, overwriting existing data.
# """
# db = firestore.Client()
# for subcollection_name, data_list in data_dict.items():
# subcollection_ref = db.document(document_path).collection(subcollection_name)
# for doc in data_list:
# # Assumes each object in list has an 'id' field, which is used as document id
# if 'id' in doc:
# doc_ref = subcollection_ref.document(doc['id'])
# # Remove 'id' before updating Firestore
# data_to_write = {k: v for k, v in doc.items() if k != 'id'}
# doc_ref.set(data_to_write)
# else:
# # If 'id' is not present, add new document without specifying document id
# subcollection_ref.add(doc)
def process_surveys(surveys):
"""
Process the surveys data to create a new list of surveys with additional fields.
"""
processed_surveys = {}
for survey in surveys:
name = survey.get('name.he', survey.get('name.en', ''))
description = survey.get('description.he', survey.get('description.en', ''))
if not name:
continue
questions = []
for question in survey.get('questions', []):
text = question.get('text', {})
question_text = text.get('he', text.get('en', ''))
questions.append({
'id': question.get('id'),
'text': question_text,
})
if not questions:
continue
print(f"Processing survey: {name} ({description}), with {len(questions)} questions")
processed_survey = {
'name': name,
'description': description,
'created_at': survey['creationDateTime'].isoformat(),
'questions': questions,
}
processed_surveys[survey['id']] = processed_survey
return processed_surveys
def process_responses(responses, survey_id, survey):
"""
Process the responses data to create a new list of responses with additional fields.
"""
processed_responses = []
headers = ['time', 'lat', 'lon'] + [q['text'] for q in survey['questions']]
for response in responses:
if response['surveyId'] != survey_id:
continue
if 'coordinates.latitude' not in response:
print(f'Response missing coordinate data: {response["id"]}')
continue
answers = dict(
time=response['submittedTs'].isoformat(),
lat=str(response['coordinates.latitude']),
lon=str(response['coordinates.longitude']),
)
response_answers = response.get('responses', [])
for question in survey['questions']:
answer = [a for a in response_answers if a['questionId'] == question['id']]
if len(answer) == 1:
answers[question['text']] = str(answer[0]['response'])
processed_responses.append(answers)
return headers, processed_responses
def fix_sheet(sheet):
print('Fixing sheet...', sheet.title)
# Auto-size columns
for column in sheet.columns:
max_length = 0
for cell in column:
cell.alignment = openpyxl.styles.Alignment(horizontal="right", vertical="center", wrap_text=False, readingOrder=2)
if cell.value:
max_length = max(max_length, len(str(cell.value)))
adjusted_width = (max_length + 2) * 1.0
current_width = sheet.column_dimensions[column[0].column_letter].width
# print(f"Column {column[0].column_letter} width: {current_width} -> {adjusted_width}")
sheet.column_dimensions[column[0].column_letter].width = adjusted_width
sheet.sheet_view.rightToLeft = True
def write_to_excel(surveys, sheets):
# Create a new workbook
wb = openpyxl.Workbook()
# Remove the default sheet created by openpyxl
default_sheet = wb.active
wb.remove(default_sheet)
surveys_sheet = wb.create_sheet(title="סקרים")
surveys_sheet.cell(row=1, column=1, value="שם")
surveys_sheet.cell(row=1, column=2, value="תיאור")
surveys_sheet.cell(row=1, column=3, value="נוצר ב")
surveys_sheet.cell(row=1, column=4, value="מספר שאלות")
surveys_sheet.cell(row=1, column=5, value="מספר תגובות")
for i, survey in enumerate(surveys.values(), start=2):
surveys_sheet.cell(row=i, column=1, value=survey['name'])
surveys_sheet.cell(row=i, column=2, value=survey['description'])
surveys_sheet.cell(row=i, column=3, value=survey['created_at'])
surveys_sheet.cell(row=i, column=4, value=len(survey['questions']))
responses = [r for s, _, r in sheets if s == survey['name']]
surveys_sheet.cell(row=i, column=5, value=len(responses))
fix_sheet(surveys_sheet)
# Create each sheet from the data dictionary
for sheet_name, headers, responses in sheets:
# Create a new sheet with the specified name
sheet = wb.create_sheet(title=sheet_name)
# Write headers to the first row
for col_index, header in enumerate(headers, start=1):
sheet.cell(row=1, column=col_index, value=header)
# Write response data starting from the second row
for row_index, response in enumerate(responses, start=2):
for col_index, header in enumerate(headers, start=1):
sheet.cell(row=row_index, column=col_index, value=response.get(header, ""))
fix_sheet(sheet)
# Save the workbook to a file
current_date = datetime.now().strftime("%Y-%m-%d")
output_filename = f'yallanegev-{current_date}.xlsx'
wb.save(output_filename)
# Upload the output file to Google Drive, using credentials in GOOGLE_APPLICATION_CREDENTIALS, using google-api-python-client
DRIVE_FOLDER = os.getenv('DRIVE_FOLDER_ID').strip() # Use environment variable for Drive folder ID
# Authenticate and create the Drive API client
creds = None
credentials_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
if credentials_path and os.path.exists(credentials_path):
creds = google.auth.load_credentials_from_file(credentials_path)[0]
else:
raise Exception("Credentials file not found or GOOGLE_APPLICATION_CREDENTIALS environment variable is not set.")
service = build('drive', 'v3', credentials=creds)
# Upload the file
file_metadata = {
'name': output_filename,
'parents': [DRIVE_FOLDER.split('/')[-1]]
}
media = MediaFileUpload(output_filename, mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
print(f"File ID: {file.get('id')}")
def main():
document_path = 'versions/v1' # Replace with your document path
# Read Firestore Sub-collections
data_dict = read_firestore_subcollections(document_path)
# Get Surveys:
surveys = process_surveys(data_dict.get('surveys', []))
# Get responses:
sheets = []
for survey_id, survey in surveys.items():
headers, responses = process_responses(data_dict.get('responses', []), survey_id, survey)
print(f"Survey: {survey['name']} ({survey['description']})")
print(f"Headers: {headers}")
print(f"# Responses: {len(responses)}")
if responses:
print(f"Responses: {responses[0]}")
sheets.append((survey['name'], headers, responses))
# Write to Excel
write_to_excel(surveys, sheets)
if __name__ == '__main__':
main()