-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpharmacy.py
189 lines (139 loc) · 7.58 KB
/
pharmacy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import requests
import untangle
import csv
import os
# Set your API key. Either store this in an environment variable called 'API_KEY', or amend the default value below
api_key = os.getenv('API_KEY', 'default_value')
base_url = f'http://v1.syndication.nhschoices.nhs.uk/organisations/pharmacies/all.xml?apikey={api_key}'
# Define the API page number to start from - set this to a high number (e.g. 380) if you just want to test against the
# last few pages
starting_page_number = 1
# Create an empty list to hold the pharmacy data we're going to collect
pharmacies = []
def store_services(entries: list) -> None:
"""
Stores some key data from the XML for each pharmacy in an provided list and adds each object to our
list of pharmacies. Takes a list of objects representing <entry> XML trees.
"""
for entry in entries:
org_name = entry.content.s_organisationSummary.s_name.cdata
ods_code = entry.content.s_organisationSummary.s_odscode.cdata
post_code = entry.content.s_organisationSummary.s_address.s_postcode.cdata
coords = {
'long': entry.content.s_organisationSummary.s_geographicCoordinates.s_longitude.cdata,
'lat': entry.content.s_organisationSummary.s_geographicCoordinates.s_latitude.cdata,
}
id_url = entry.id.cdata
# print(f'{org_name} - {ods_code}')
# Create a dictionary for that pharmacy's data
pharmacy = {
'name': org_name,
'odscode': ods_code,
'id': id_url,
'postCode': post_code,
'coords': coords
}
# Add the pharmacy dictionary to the pharmacies list
pharmacies.append(pharmacy)
def get_all_pharmacies() -> None:
"""
Iterate through the pages of pharmacy data from the Choices API and then call store_services() on each page
"""
page_number = starting_page_number
# Perform a GET request against the Choices API using the base URL + the starting page number as a URL param
result = requests.get(base_url + f'&page={page_number}')
# Parse the returned XML into an object representing the XML structure
document = untangle.parse(result.text)
# Define the contents of the <feed> element as its own object
feed = document.feed
# Define the <link> elements into their own object
urls = feed.link
# Create an empty dictionary to store the URLs from the XML response
url_list = {}
# For each <link> element in our urls object
for url in urls:
# Add it to our dictionary with the rel attribute as the key, and the href attribute as the value
url_list[url['rel']] = url['href']
# Following code wrapped in a try/except so as to catch the KeyError when we reach the last page
try:
# Now iterate through the pages of results from the API, each time checking for the presence of a 'next' link in
# our dictionary of URLs.
while url_list['next']:
# Perform a GET against the API using the URL from the 'next' link in the previous page
result = requests.get(url_list['next'],
headers={'Content-Type': 'application/xml',
'Accept': 'application/xml'})
# Parse the returned XML into an object called 'document'
document = untangle.parse(result.text)
# Define the contents of the <feed> element as its own object
feed = document.feed
# Define the list of <entry> elements as their own object (list)
entries = feed.entry
# Define the list of <link> elements as their own object (list)
urls = feed.link
# Create an empty dictionary to store the URLs from the XML response
url_list = {}
# For each <link> element in our urls object
for url in urls:
# Add it to our dictionary with the rel attribute as the key, and the href attribute as the value
url_list[url['rel']] = url['href']
# Call the store_services method passing in the list of entries that we have just extracted from the
# results page XML. We will do this for each results page that we iterate through.
store_services(entries)
# Catch a KeyError exception as this indicates we have reached the last page (as there is no 'next' link type)
except KeyError:
print('Finished getting list of all pharmacies')
def update_eps_statuses() -> None:
"""
Iterate through the list of pharmacies and get the epsEnabled status for each individual pharmacy.
Add it to respective pharmacy object in the pharmacies list. We have to do this separately to the first retrieval
of pharmacy data as the flag we are after is stored in the /overview subset of data for each pharmacy which we can
only access one by one.
"""
print('Getting EPS status for all pharmacies in list')
# For each pharmacy in the list of pharmacies we compiled...
for pharmacy in pharmacies:
# Get the ID url for that pharmacy instance
id_url = pharmacy['id']
# Append /overview.xml to the end to get us the detail containing EPS status, and add the api key on the end
url = f'{id_url}/overview.xml?apikey={api_key}'
# Perform a GET request against the URL
results = requests.get(url)
# Take the returned XML and parse it into an object
document = untangle.parse(results.text)
# Pull the contents of the <content> element into its own object
content = document.feed.entry.content
# Get the value from the <isEpsEnabled> element
eps_enabled = content.s_overview.s_isEpsEnabled.cdata
# Convert it to a boolean
eps_enabled = bool(eps_enabled)
# Update the pharmacy object in our list with the boolean epsEnabled flag
pharmacy['epsEnabled'] = eps_enabled
print('Finished getting EPS status for all pharmacies')
def write_to_csv() -> None:
"""
Write all of the pharmacy data we've collected to a CSV file in the local directory
"""
# Set the filename for the CSV file
csv_file_name = 'pharmacies.csv'
# Use a context manager to open the destination CSV file in writable mode and hold it open whilst we're
# working with it. CSV file will close as soon as the code within the with block has completed.
with open(csv_file_name, 'w', newline='') as csvfile:
# Create a CSV writer instance using the open CSV file, set the delimiter character, and the quote character.
# QUOTE_MINIMAL tells it to only quote fields that have special characters such as the delimeter or quotechar
csv_writer = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
# For each record in our list of pharmacies, write a line to the CSV file
for record in pharmacies:
csv_writer.writerow([f"{record['name']}",
f"{record['odscode']}",
f"{record['epsEnabled']}",
f"{record['postCode']}",
f"{record['coords']['lat']}",
f"{record['coords']['long']}"])
print('Finished writing CSV')
print('Beginning download of pharmacy data')
get_all_pharmacies()
update_eps_statuses()
write_to_csv()
print('Finished!')