-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_data.py
32 lines (27 loc) · 1.25 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os, glob, csv, pandas as pd
def get_file_paths(data):
#Walk through the events_data directory and get the absolute path of a list of files
files_list = []
files = os.walk(data)
for root, dir, file in files:
file = glob.glob(os.path.join(root,'*'))
for f in file:
files_list.append(os.path.abspath(f))
return files_list
def write_file():
#Create the csv file and write the header of the file
header = ['artist', 'first_name', 'gender', 'itemInSession', 'last_name', 'length',
'level', 'location', 'sessionId', 'song_title', 'userId']
with open("new_events_data.csv", 'w') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(header)
def read_write_file(files_list):
#Read through the list of file paths and write into the new csv file
for file in files_list:
df = pd.read_csv(file)
for index, row in df.iterrows():
row_list = [row.artist, row.firstName, row.gender, row.itemInSession, row.lastName, row.length,
row.level, row.location, row.sessionId, row.song, row.userId]
with open("new_events_data.csv", "a") as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(row_list)