forked from iobis/non-matching-names
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
36 lines (28 loc) · 1.44 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import requests
import csv
import pandas as pa
import math
names = requests.get("https://api.obis.org/checklist/nomatchvliz").json()["results"]
vliz_list = pa.read_excel("vliz_list/Blacklist_update_20200626.xlsx", engine="openpyxl", na_values=["NA"], converters={"annotation_type": str, "annotation_resolved_AphiaID": str})
vliz_list.set_index("scientificname_original", inplace=True)
for i in range(len(names)):
name = names[i]["scientificname"]
if name in vliz_list.index:
entries = vliz_list.loc[[name]]
if len(entries) == 1:
names[i]["annotation_type"] = entries["annotation_type"][0]
if isinstance(entries["annotation_resolved_AphiaID"][0], str) or not math.isnan(entries["annotation_resolved_AphiaID"][0]):
names[i]["annotation_aphiaid"] = entries["annotation_resolved_AphiaID"][0]
csv_cols = ["scientificname", "scientificnameid", "records", "phylum", "class", "order", "family", "genus", "annotation_type", "annotation_aphiaid", "datasets"]
with open("list_annotated.csv", "w") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=csv_cols)
writer.writeheader()
for row in names:
if "annotation_type" in row:
writer.writerow(row)
with open("list_open.csv", "w") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=csv_cols)
writer.writeheader()
for row in names:
if "annotation_type" not in row:
writer.writerow(row)