1
1
from pathlib import Path
2
2
3
3
import pandas as pd
4
+ import pyalex
4
5
5
6
from asreviewcontrib .datatools .snowball import backward_snowballing
6
7
from asreviewcontrib .datatools .snowball import forward_snowballing
7
8
from asreviewcontrib .datatools .snowball import openalex_from_doi
8
9
from asreviewcontrib .datatools .snowball import snowball
9
10
10
11
INPUT_DIR = Path (__file__ ).parent / "demo_data"
12
+ EMAIL = "asreview@uu.nl"
13
+
14
+ pyalex .config .email = EMAIL
15
+
16
+ # These works were chosen for testing forward snowballing.
17
+ # They have a DOI, they cite and are cited by, their cited_by_count is less than 400,
18
+ # so it takes only two requests to get all citing works. And they are from the previous
19
+ # century so the cited_by_count is unlikely to change very much.
20
+ # These are also the same records as in the demo datasets 'snowballing_doi.csv' and
21
+ # 'snowballing_openalex.csv'.
22
+ WORKS = [
23
+ {
24
+ "id" : "https://openalex.org/W2051970045" ,
25
+ "doi" : "https://doi.org/10.1071/bt9750475" ,
26
+ "title" : "Myrmecochorous plants in Australia and their dispersal by ants" ,
27
+ "cited_by_count" : 372 ,
28
+ "cited_by" : "https://openalex.org/W2174650845" ,
29
+ "cites" : "https://openalex.org/W1538725992" ,
30
+ },
31
+ {
32
+ "id" : "https://openalex.org/W104454400" ,
33
+ "doi" : "https://doi.org/10.1007/bf00699039" ,
34
+ "title" : (
35
+ "Mimicking the one-dimensional marginal distributions of processes having"
36
+ " an ito differential"
37
+ ),
38
+ "cited_by_count" : 299 ,
39
+ "cited_by" : "https://openalex.org/W1842249978" ,
40
+ "cites" : "https://openalex.org/W1513091520" ,
41
+ },
42
+ ]
11
43
12
44
13
45
def test_openalex_from_doi ():
@@ -41,32 +73,30 @@ def test_backward_snowballing():
41
73
42
74
43
75
def test_forward_snowballing ():
44
- identifiers = [
45
- "https://openalex.org/W4281483266" ,
46
- "https://openalex.org/W2008620264" ,
47
- ]
76
+ identifiers = [work ["id" ] for work in WORKS ]
48
77
49
78
forwards_citations = forward_snowballing (identifiers )
50
79
51
- assert "https://openalex.org/W4386305682" in [
80
+ assert WORKS [ 0 ][ "cited_by" ] in [
52
81
field_dict ["id" ] for field_dict in forwards_citations [identifiers [0 ]]
53
82
]
54
- assert "https://openalex.org/W2124637492" in [
83
+ assert WORKS [ 1 ][ "cited_by" ] in [
55
84
field_dict ["id" ] for field_dict in forwards_citations [identifiers [1 ]]
56
85
]
57
86
58
87
59
88
def test_openalex_id_forward (tmpdir ):
60
- out_fp = Path (tmpdir , "forward_all .csv" )
89
+ out_fp = Path (tmpdir , "forward .csv" )
61
90
snowball (
62
91
input_path = INPUT_DIR / "snowballing_openalex.csv" ,
63
92
output_path = out_fp ,
64
93
forward = True ,
65
94
backward = False ,
66
95
use_all = False ,
96
+ email = EMAIL ,
67
97
)
68
98
df = pd .read_csv (out_fp )
69
- assert len (df ) >= 23
99
+ assert len (df ) >= 364
70
100
71
101
all_out_fp = Path (tmpdir , "forward_all.csv" )
72
102
snowball (
@@ -75,22 +105,24 @@ def test_openalex_id_forward(tmpdir):
75
105
forward = True ,
76
106
backward = False ,
77
107
use_all = True ,
108
+ email = EMAIL ,
78
109
)
79
110
df_all = pd .read_csv (all_out_fp )
80
- assert len (df_all ) >= 387
111
+ assert len (df_all ) >= 656
81
112
82
113
83
114
def test_openalex_id_backward (tmpdir ):
84
- out_fp = Path (tmpdir , "forward_all .csv" )
115
+ out_fp = Path (tmpdir , "backward .csv" )
85
116
snowball (
86
117
input_path = INPUT_DIR / "snowballing_openalex.csv" ,
87
118
output_path = out_fp ,
88
119
forward = False ,
89
120
backward = True ,
90
121
use_all = False ,
122
+ email = EMAIL ,
91
123
)
92
124
df = pd .read_csv (out_fp )
93
- assert len (df ) == 31
125
+ assert len (df ) == 40
94
126
95
127
all_out_fp = Path (tmpdir , "backward_all.csv" )
96
128
snowball (
@@ -99,9 +131,10 @@ def test_openalex_id_backward(tmpdir):
99
131
forward = False ,
100
132
backward = True ,
101
133
use_all = True ,
134
+ email = EMAIL ,
102
135
)
103
136
df_all = pd .read_csv (all_out_fp )
104
- assert len (df_all ) == 117
137
+ assert len (df_all ) == 45
105
138
106
139
107
140
def test_snowballing_from_doi (tmpdir ):
@@ -112,6 +145,7 @@ def test_snowballing_from_doi(tmpdir):
112
145
forward = False ,
113
146
backward = True ,
114
147
use_all = True ,
148
+ email = EMAIL ,
115
149
)
116
150
df = pd .read_csv (out_fp )
117
- assert len (df ) == 117
151
+ assert len (df ) == 45
0 commit comments