Skip to content

Commit 3d195e2

Browse files
committed
Linting and formatting
1 parent d4cfdfe commit 3d195e2

File tree

8 files changed

+64
-38
lines changed

8 files changed

+64
-38
lines changed

asreviewcontrib/datatools/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
from asreviewcontrib.datatools._version import __version_tuple__
44
except ImportError:
55
__version__ = "0.0.0"
6-
__version_tuple__ = (0, 0, 0)
6+
__version_tuple__ = (0, 0, 0)

asreviewcontrib/datatools/compose.py

+26-17
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def _check_order_arg(order):
1717
return order
1818
else:
1919
raise ValueError(
20-
f"hierarchy '{order}' not found, should be one of the following: {allowed_orders}"
20+
f"hierarchy '{order}' not found, should be one of the"
21+
f" following: {allowed_orders}"
2122
)
2223

2324

@@ -48,18 +49,20 @@ def _check_suffix(input_files, output_file):
4849
if len(set(suffixes)) > 1:
4950
if not (set_suffixes.issubset(set_ris) or set_suffixes.issubset(set_tabular)):
5051
raise ValueError(
51-
"Files with different file types were; all input files, as well as the output file, should be of the "
52-
"same type. "
52+
"Files with different file types were; all input files, as well as the"
53+
" output file, should be of the same type. "
5354
)
5455

5556

5657
def _check_label_errors(as_lab, path_lab):
5758
if as_lab is not None:
5859
if as_lab.labels is None:
5960
warnings.warn(
60-
f"'{path_lab}' was passed as a labeled dataset but no labels were found, continuing with its records "
61-
f"marked as unlabeled. If this is not correct, check if your data format complies with: "
62-
f"https://asreview.readthedocs.io/en/latest/data_format.html"
61+
f"'{path_lab}' was passed as a labeled dataset but no labels were"
62+
" found, continuing with its records marked as unlabeled. If this is"
63+
" not correct, check if your data format complies with:"
64+
" https://asreview.readthedocs.io/en/latest/data_format.html",
65+
stacklevel=1,
6366
)
6467

6568

@@ -83,8 +86,8 @@ def _concat_label(list_df, label, pid="doi"):
8386

8487
n_total_dedup = n_total - len(df_all)
8588
print(
86-
f"Detected {n_total} records with label '{label}', from which {n_total_dedup} duplicate records with the "
87-
f"same label were removed."
89+
f"Detected {n_total} records with label '{label}', from which"
90+
f" {n_total_dedup} duplicate records with the same label were removed."
8891
)
8992
else:
9093
df_all = pd.DataFrame()
@@ -104,9 +107,9 @@ def create_composition(
104107
# load all input files and URLs into ASReviewData objects, fill with None
105108
# if input was not specified
106109
input_files = [rel_path, irr_path, lab_path, unl_path]
107-
as_rel, as_irr, as_lab, as_unl = [
110+
as_rel, as_irr, as_lab, as_unl = (
108111
load_data(item) if item is not None else None for item in input_files
109-
]
112+
)
110113

111114
# check whether input files are correctly labeled
112115
_check_label_errors(as_lab, lab_path)
@@ -185,10 +188,11 @@ def create_composition(
185188
"left",
186189
):
187190
print(
188-
f"\nSome records have inconsistent labels in the input files. This may be intentional because you are "
189-
f"trying to overwrite labels in an input file with labels from another input file. However, "
190-
f"it may also be because some records are unintentionally labeled inconsistently.\n\n"
191-
f"The following records have inconsistent labels in the input files:\n"
191+
f"\nSome records have inconsistent labels in the input files. This may"
192+
" be intentional because you are trying to overwrite labels in an input"
193+
" file with labels from another input file. However, it may also be"
194+
" because some records are unintentionally labeled inconsistently.\n\n"
195+
"The following records have inconsistent labels in the input files:\n"
192196
f"{df_info_conflicts}\n"
193197
)
194198

@@ -197,14 +201,19 @@ def create_composition(
197201

198202
elif resolve == "keep_one":
199203
warnings.warn(
200-
f"Continuing, keeping one label for records with inconsistent labels, resolving conflicts using the "
201-
f"following hierarchy:\n1. {dict_terms[order[0]]}\n2. {dict_terms[order[1]]}\n3. {dict_terms[order[2]]}"
204+
f"Continuing, keeping one label for records with inconsistent labels,"
205+
" resolving conflicts using the following hierarchy:"
206+
f"\n1. {dict_terms[order[0]]}\n2. {dict_terms[order[1]]}"
207+
f"\n3. {dict_terms[order[2]]}",
208+
stacklevel=1,
202209
)
203210
df_composed = as_conflict.drop_duplicates(pid=pid).reset_index(drop=True)
204211

205212
elif resolve == "keep_all":
206213
warnings.warn(
207-
f"Continuing, keeping all labels for duplicate records with inconsistent labels."
214+
"Continuing, keeping all labels for duplicate records with inconsistent"
215+
" labels.",
216+
stacklevel=1,
208217
)
209218
df_composed = as_conflict.df
210219

asreviewcontrib/datatools/describe.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import argparse
22
import json
3-
from pathlib import Path
43

54
import asreview
65
from asreview.data import load_data
7-
from asreview.data.statistics import * # noqa
6+
from asreview.data.statistics import n_duplicates
7+
from asreview.data.statistics import n_irrelevant
8+
from asreview.data.statistics import n_missing_abstract
9+
from asreview.data.statistics import n_missing_title
10+
from asreview.data.statistics import n_records
11+
from asreview.data.statistics import n_relevant
12+
from asreview.data.statistics import n_unlabeled
813

914
from asreviewcontrib.datatools import __version__
1015

1116

1217
def describe(input_path, output_path=None):
13-
1418
# read data in ASReview data object
1519
asdata = load_data(input_path)
1620

@@ -47,19 +51,26 @@ def describe(input_path, output_path=None):
4751
{
4852
"id": "n_missing_title",
4953
"title": "Number of records with missing title",
50-
"description": "The number of records in the dataset with missing title.",
54+
"description": (
55+
"The number of records in the dataset with missing title."
56+
),
5157
"value": n_missing_title(asdata)[0],
5258
},
5359
{
5460
"id": "n_missing_abstract",
5561
"title": "Number of records with missing abstract",
56-
"description": "The number of records in the dataset with missing abstract.",
62+
"description": (
63+
"The number of records in the dataset with missing abstract."
64+
),
5765
"value": n_missing_abstract(asdata)[0],
5866
},
5967
{
6068
"id": "n_duplicates",
6169
"title": "Number of duplicate records (basic algorithm)",
62-
"description": "The number of duplicate records in the dataset based on similar text.",
70+
"description": (
71+
"The number of duplicate records in the dataset based on"
72+
" similar text."
73+
),
6374
"value": n_duplicates(asdata),
6475
},
6576
]

asreviewcontrib/datatools/entrypoint.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from asreview.data import load_data
44
from asreview.entry_points import BaseEntryPoint
55

6+
from asreviewcontrib.datatools import __version__
67
from asreviewcontrib.datatools.compose import _parse_arguments_compose
78
from asreviewcontrib.datatools.compose import compose
89
from asreviewcontrib.datatools.convert import _parse_arguments_convert
@@ -24,7 +25,7 @@ class DataEntryPoint(BaseEntryPoint):
2425
def __init__(self):
2526
from asreviewcontrib.datatools.__init__ import __version__
2627

27-
super(DataEntryPoint, self).__init__()
28+
super().__init__()
2829

2930
self.version = __version__
3031

@@ -78,11 +79,13 @@ def execute(self, argv):
7879
if args_dedup.output_path:
7980
asdata.to_file(args_dedup.output_path)
8081
print(
81-
f"Removed {n_dup} duplicates from dataset with {initial_length} records."
82+
f"Removed {n_dup} duplicates from dataset with"
83+
f" {initial_length} records."
8284
)
8385
else:
8486
print(
85-
f"Found {n_dup} duplicates in dataset with {initial_length} records."
87+
f"Found {n_dup} duplicates in dataset with"
88+
f" {initial_length} records."
8689
)
8790
if argv[0] == "compose":
8891
args_compose_parser = _parse_arguments_compose()
@@ -108,7 +111,6 @@ def execute(self, argv):
108111

109112
# Print help message if subcommand not given or incorrect
110113
else:
111-
112114
parser = argparse.ArgumentParser(
113115
prog="asreview data",
114116
formatter_class=argparse.RawTextHelpFormatter,

asreviewcontrib/datatools/snowball.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def snowball(
211211
raise ValueError("At least one of 'forward' or 'backward' should be True.")
212212

213213
data = load_data(input_path)
214-
if (use_all or (data.included is None)):
214+
if use_all or (data.included is None):
215215
data = data.df
216216
else:
217217
data = data.df.loc[data.included.astype(bool)]
@@ -236,9 +236,11 @@ def snowball(
236236
" records. Performing snowballing for those records."
237237
)
238238
data["openalex_id"] = None
239-
data.loc[data.doi.notna(), "openalex_id"] = data.loc[
240-
data.doi.notna(), "doi"
241-
].str.removeprefix(DOI_PREFIX).apply(lambda doi: id_mapping[doi])
239+
data.loc[data.doi.notna(), "openalex_id"] = (
240+
data.loc[data.doi.notna(), "doi"]
241+
.str.removeprefix(DOI_PREFIX)
242+
.apply(lambda doi: id_mapping[doi])
243+
)
242244

243245
identifiers = data["openalex_id"].dropna().to_list()
244246

asreviewcontrib/datatools/stack.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import argparse
2-
import warnings
32
from pathlib import Path
43

54
import pandas as pd
@@ -19,8 +18,8 @@ def _check_suffix(input_files, output_file):
1918
if len(set(suffixes)) > 1:
2019
if not (set_suffixes.issubset(set_ris) or set_suffixes.issubset(set_tabular)):
2120
raise ValueError(
22-
"• Several file types were given; All input files, as well as the output file should be of the same "
23-
"type. "
21+
"• Several file types were given; All input files, as well as the"
22+
" output file should be of the same type. "
2423
)
2524

2625

@@ -38,7 +37,10 @@ def _parse_arguments_vstack():
3837
parser = argparse.ArgumentParser(prog="asreview data vstack")
3938
parser.add_argument("output_path", type=str, help="The output file path.")
4039
parser.add_argument(
41-
"datasets", type=str, nargs="+", help="Any number of datasets to stack vertically."
40+
"datasets",
41+
type=str,
42+
nargs="+",
43+
help="Any number of datasets to stack vertically.",
4244
)
4345

4446
return parser

tests/test_compose.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ def test_label_prioritization():
5555
df_3 = create_composition(*input_files_1, order="uri")
5656
assert df_3["included"].value_counts()[-1] == len(df_3)
5757

58-
# input different datasets with some identical records, combining as labeled and unlabeled data
58+
# input different datasets with some identical records, combining as labeled and
59+
# unlabeled data
5960
df_4 = create_composition(*input_files_2, order="riu")
6061
df_4_counts = df_4["included"].value_counts()
6162
assert df_4_counts[-1] == 7 and df_4_counts[0] == 3 and df_4_counts[1] == 1

tests/test_describe.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22

33

44
def test_describe():
5-
65
subprocess.run(["asreview", "data-describe", "benchmark:van_de_schoot2017"])

0 commit comments

Comments
 (0)