Skip to content
This repository was archived by the owner on Apr 2, 2022. It is now read-only.

Commit c2cc7c8

Browse files
authored
Update info on full CORD dataset (v3 -> v4) (#5)
1 parent fb763e7 commit c2cc7c8

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

asreviewcontrib/covid19/datasets.py

+13-14
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,28 @@ class Cord19Dataset(BaseDataSet):
99
topic = "Covid-19"
1010
license = "Covid dataset license"
1111
link = "https://pages.semanticscholar.org/coronavirus-research"
12-
last_update = "2020-03-13"
12+
last_update = "2020-03-20"
1313
description = "A free dataset on publications on the corona virus."
1414
img_url = ("https://pages.semanticscholar.org/hs-fs/hubfs/"
1515
"covid-image.png?width=300&name=covid-image.png")
1616
link = "https://pages.semanticscholar.org/coronavirus-research"
1717
year = 2020
1818

1919

20-
class Cord19DatasetV3(Cord19Dataset):
21-
dataset_id = "cord19-v3"
22-
title = "CORD-19 v3"
23-
sha512 = ("6741211cc47c04897b253a3eaf2d18e6d57391530f8cebe7d8c84310f82"
24-
"c90b2c55071157b418fb7b627302adbfae8838fb8c071516288b320b131"
25-
"03ac1ec7fc")
20+
class Cord19DatasetV4(Cord19Dataset):
21+
dataset_id = "cord19-v4"
22+
title = "CORD-19 v4"
23+
sha512 = ("517e2399767aa1d387baaa07c42ef6ac9a5aec1e3a41f832974ee712413"
24+
"272429f2a5ea658b32bb7330becac70df1ee5262ae1ddebb258a02aaaa2"
25+
"d4b47335cc")
2626

27-
date = "2020-03-13"
27+
date = "2020-03-20"
2828
statistics = {
29-
"n_papers": 29500,
30-
"n_missing_titles": 9,
31-
"n_missing_abstracts": 2591,
29+
"n_papers": 44220,
30+
"n_missing_titles": 224,
31+
"n_missing_abstracts": 8414,
3232
}
33-
url = ("https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/"
34-
"2020-03-13/all_sources_metadata_2020-03-13.csv")
33+
url = ("https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/2020-03-20/metadata.csv")
3534

3635

3736
class Cord19DatasetV4_Dec2019(Cord19Dataset):
@@ -58,6 +57,6 @@ class Covid19DataGroup(BaseDataGroup):
5857

5958
def __init__(self):
6059
super(Covid19DataGroup, self).__init__(
61-
Cord19DatasetV3(),
60+
Cord19DatasetV4(),
6261
Cord19DatasetV4_Dec2019(),
6362
)

0 commit comments

Comments
 (0)