Skip to content

Commit d1d78e2

Browse files
Limit number of requests sent to OpenAlex in tests (asreview#44)
1 parent d71440c commit d1d78e2

File tree

3 files changed

+51
-17
lines changed

3 files changed

+51
-17
lines changed

tests/demo_data/snowballing_doi.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
,title,doi,included
2-
0,"Social Networks Analysis: Tools, Measures and Visualization",https://doi.org/10.1007/978-1-4471-4054-2_1,1
3-
1,"Genome-wide Association Study of Alcohol Dependence",https://doi.org/10.1001/archgenpsychiatry.2009.83,0
2+
0,"Myrmecochorous plants in Australia and their dispersal by ants",https://doi.org/10.1071/bt9750475,1
3+
1,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",https://doi.org/10.1007/bf00699039,0
+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
,openalex_id,title,included
2-
0,https://openalex.org/W2234238252,"Social Networks Analysis: Tools, Measures and Visualization",1
3-
1,https://openalex.org/W1977467968,"Genome-wide Association Study of Alcohol Dependence",0
2+
0,https://openalex.org/W2051970045,"Myrmecochorous plants in Australia and their dispersal by ants",1
3+
1,https://openalex.org/W104454400,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",0

tests/test_snowball.py

+47-13
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,45 @@
11
from pathlib import Path
22

33
import pandas as pd
4+
import pyalex
45

56
from asreviewcontrib.datatools.snowball import backward_snowballing
67
from asreviewcontrib.datatools.snowball import forward_snowballing
78
from asreviewcontrib.datatools.snowball import openalex_from_doi
89
from asreviewcontrib.datatools.snowball import snowball
910

1011
INPUT_DIR = Path(__file__).parent / "demo_data"
12+
EMAIL = "asreview@uu.nl"
13+
14+
pyalex.config.email = EMAIL
15+
16+
# These works were chosen for testing forward snowballing.
17+
# They have a DOI, they cite and are cited by, their cited_by_count is less than 400,
18+
# so it takes only two requests to get all citing works. And they are from the previous
19+
# century so the cited_by_count is unlikely to change very much.
20+
# These are also the same records as in the demo datasets 'snowballing_doi.csv' and
21+
# 'snowballing_openalex.csv'.
22+
WORKS = [
23+
{
24+
"id": "https://openalex.org/W2051970045",
25+
"doi": "https://doi.org/10.1071/bt9750475",
26+
"title": "Myrmecochorous plants in Australia and their dispersal by ants",
27+
"cited_by_count": 372,
28+
"cited_by": "https://openalex.org/W2174650845",
29+
"cites": "https://openalex.org/W1538725992",
30+
},
31+
{
32+
"id": "https://openalex.org/W104454400",
33+
"doi": "https://doi.org/10.1007/bf00699039",
34+
"title": (
35+
"Mimicking the one-dimensional marginal distributions of processes having"
36+
" an ito differential"
37+
),
38+
"cited_by_count": 299,
39+
"cited_by": "https://openalex.org/W1842249978",
40+
"cites": "https://openalex.org/W1513091520",
41+
},
42+
]
1143

1244

1345
def test_openalex_from_doi():
@@ -41,32 +73,30 @@ def test_backward_snowballing():
4173

4274

4375
def test_forward_snowballing():
44-
identifiers = [
45-
"https://openalex.org/W4281483266",
46-
"https://openalex.org/W2008620264",
47-
]
76+
identifiers = [work["id"] for work in WORKS]
4877

4978
forwards_citations = forward_snowballing(identifiers)
5079

51-
assert "https://openalex.org/W4386305682" in [
80+
assert WORKS[0]["cited_by"] in [
5281
field_dict["id"] for field_dict in forwards_citations[identifiers[0]]
5382
]
54-
assert "https://openalex.org/W2124637492" in [
83+
assert WORKS[1]["cited_by"] in [
5584
field_dict["id"] for field_dict in forwards_citations[identifiers[1]]
5685
]
5786

5887

5988
def test_openalex_id_forward(tmpdir):
60-
out_fp = Path(tmpdir, "forward_all.csv")
89+
out_fp = Path(tmpdir, "forward.csv")
6190
snowball(
6291
input_path=INPUT_DIR / "snowballing_openalex.csv",
6392
output_path=out_fp,
6493
forward=True,
6594
backward=False,
6695
use_all=False,
96+
email=EMAIL,
6797
)
6898
df = pd.read_csv(out_fp)
69-
assert len(df) >= 23
99+
assert len(df) >= 364
70100

71101
all_out_fp = Path(tmpdir, "forward_all.csv")
72102
snowball(
@@ -75,22 +105,24 @@ def test_openalex_id_forward(tmpdir):
75105
forward=True,
76106
backward=False,
77107
use_all=True,
108+
email=EMAIL,
78109
)
79110
df_all = pd.read_csv(all_out_fp)
80-
assert len(df_all) >= 387
111+
assert len(df_all) >= 656
81112

82113

83114
def test_openalex_id_backward(tmpdir):
84-
out_fp = Path(tmpdir, "forward_all.csv")
115+
out_fp = Path(tmpdir, "backward.csv")
85116
snowball(
86117
input_path=INPUT_DIR / "snowballing_openalex.csv",
87118
output_path=out_fp,
88119
forward=False,
89120
backward=True,
90121
use_all=False,
122+
email=EMAIL,
91123
)
92124
df = pd.read_csv(out_fp)
93-
assert len(df) == 31
125+
assert len(df) == 40
94126

95127
all_out_fp = Path(tmpdir, "backward_all.csv")
96128
snowball(
@@ -99,9 +131,10 @@ def test_openalex_id_backward(tmpdir):
99131
forward=False,
100132
backward=True,
101133
use_all=True,
134+
email=EMAIL,
102135
)
103136
df_all = pd.read_csv(all_out_fp)
104-
assert len(df_all) == 117
137+
assert len(df_all) == 45
105138

106139

107140
def test_snowballing_from_doi(tmpdir):
@@ -112,6 +145,7 @@ def test_snowballing_from_doi(tmpdir):
112145
forward=False,
113146
backward=True,
114147
use_all=True,
148+
email=EMAIL,
115149
)
116150
df = pd.read_csv(out_fp)
117-
assert len(df) == 117
151+
assert len(df) == 45

0 commit comments

Comments
 (0)