-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
102 lines (85 loc) · 4.33 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
def most_successfull(df,sport):
temp_df=df.dropna(subset=['Medal'])
if sport!='Overall':
temp_df=temp_df[temp_df['Sport']==sport]
x=temp_df['Name'].value_counts().reset_index().head(15).merge(df,left_on='index',right_on='Name',how='left')[['index','Name_x','Sport','region']].drop_duplicates('index')
x.rename(columns={'index':'Name','Name_x':'Medals'},inplace=True)
return x
def data_over_time(df,col):
nation_over_time=df.drop_duplicates(['Year',col])['Year'].value_counts().reset_index().sort_values('index')
nation_over_time.rename(columns={'index':'Edition','Year':col},inplace=True)
return nation_over_time
def fetch_medal_tally(df,year,country):
medal_df=df.drop_duplicates(subset=(['Team','NOC','Games','Year','City','Sport','Event','Medal']))
flag=0
if year=="Overall" and country=="Overall":
temp_df=medal_df
if year=="Overall" and country!="Overall":
flag=1
temp_df=medal_df[medal_df["region"]==country]
if year!="Overall" and country=="Overall":
temp_df=medal_df[medal_df['Year']==int(year)]
if year!="Overall" and country!="Overall":
temp_df=medal_df[(medal_df['Year']==int(year)) & (medal_df['region']==country)]
if flag==1:
x=temp_df.groupby('Year').sum()[['Gold','Silver','Bronze']].sort_values('Year').reset_index()
else:
x=temp_df.groupby('region').sum()[['Gold','Silver','Bronze']].sort_values('Gold',ascending=False).reset_index()
x['total']=x['Gold']+x['Silver']+x['Bronze']
x['Gold']=x['Gold'].astype('int')
x['Silver']=x['Silver'].astype('int')
x['Bronze']=x['Bronze'].astype('int')
x['total']=x['total'].astype('int')
return x
def medal_tally(df):
medal_tally=df.drop_duplicates(subset=(['Team','NOC','Games','Year','City','Sport','Event','Medal']))
medal_tally=medal_tally.groupby('region').sum()[['Gold','Silver','Bronze']].sort_values('Gold',ascending=False).reset_index()
medal_tally['total']=medal_tally['Gold']+medal_tally['Silver']+medal_tally['Bronze']
medal_tally['Gold']=medal_tally['Gold'].astype('int')
medal_tally['Silver']=medal_tally['Silver'].astype('int')
medal_tally['Bronze']=medal_tally['Bronze'].astype('int')
medal_tally['total']=medal_tally['total'].astype('int')
return medal_tally
def country_year_list(df):
years=df['Year'].unique().tolist()
years.sort()
years.insert(0,'Overall')
country=np.unique(df['region'].dropna().values).tolist()
country.sort()
country.insert(0,'Overall')
return years,country
def yearWiseMedalTally(df,country):
temp_df=df.dropna(subset=['Medal'])
temp_df.drop_duplicates(subset=['Team','NOC','Games','Year','City','Sport','Event','Medal'],inplace=True)
new_df=temp_df[temp_df['region']==country]
final_df=new_df.groupby('Year').count()['Medal'].reset_index()
return final_df
def country_event_heatmap(df,country):
temp_df=df.dropna(subset=['Medal'])
temp_df.drop_duplicates(subset=['Team','NOC','Games','Year','City','Sport','Event','Medal'],inplace=True)
new_df=temp_df[temp_df['region']==country]
pt=new_df.pivot_table(index='Sport',columns='Year',values='Medal',aggfunc='count').fillna(0)
return pt
def most_successfull_country_wise(df,country):
temp_df=df.dropna(subset=['Medal'])
temp_df=temp_df[temp_df['region']==country]
x=temp_df['Name'].value_counts().reset_index().head(10).merge(df,left_on='index',right_on='Name',how='left')[['index','Name_x','Sport']].drop_duplicates('index')
x.rename(columns={'index':'Name','Name_x':'Medals'},inplace=True)
return x
def weight_v_height(df,sport):
athlete_df=df.drop_duplicates(subset=['Name','region'])
athlete_df['Medal'].fillna('No medal',inplace=True)
if sport!='Overall':
temp_df=athlete_df[athlete_df['Sport']==sport]
return temp_df
else:
return athlete_df
def men_vs_women(df):
athlete_df=df.drop_duplicates(subset=['Name','region'])
men=athlete_df[athlete_df['Sex']=='M'].groupby('Year').count()['Name'].reset_index()
women=athlete_df[athlete_df['Sex']=='F'].groupby('Year').count()['Name'].reset_index()
final=men.merge(women,on='Year',how='left')
final.rename(columns={'Name_x':'Male','Name_y':'Female'},inplace=True)
final.fillna(0,inplace=True)
return final