@@ -17,7 +17,8 @@ def _check_order_arg(order):
17
17
return order
18
18
else :
19
19
raise ValueError (
20
- f"hierarchy '{ order } ' not found, should be one of the following: { allowed_orders } "
20
+ f"hierarchy '{ order } ' not found, should be one of the"
21
+ f" following: { allowed_orders } "
21
22
)
22
23
23
24
@@ -48,18 +49,20 @@ def _check_suffix(input_files, output_file):
48
49
if len (set (suffixes )) > 1 :
49
50
if not (set_suffixes .issubset (set_ris ) or set_suffixes .issubset (set_tabular )):
50
51
raise ValueError (
51
- "Files with different file types were; all input files, as well as the output file, should be of the "
52
- "same type. "
52
+ "Files with different file types were; all input files, as well as the"
53
+ " output file, should be of the same type. "
53
54
)
54
55
55
56
56
57
def _check_label_errors (as_lab , path_lab ):
57
58
if as_lab is not None :
58
59
if as_lab .labels is None :
59
60
warnings .warn (
60
- f"'{ path_lab } ' was passed as a labeled dataset but no labels were found, continuing with its records "
61
- f"marked as unlabeled. If this is not correct, check if your data format complies with: "
62
- f"https://asreview.readthedocs.io/en/latest/data_format.html"
61
+ f"'{ path_lab } ' was passed as a labeled dataset but no labels were"
62
+ " found, continuing with its records marked as unlabeled. If this is"
63
+ " not correct, check if your data format complies with:"
64
+ " https://asreview.readthedocs.io/en/latest/data_format.html" ,
65
+ stacklevel = 1 ,
63
66
)
64
67
65
68
@@ -83,8 +86,8 @@ def _concat_label(list_df, label, pid="doi"):
83
86
84
87
n_total_dedup = n_total - len (df_all )
85
88
print (
86
- f"Detected { n_total } records with label '{ label } ', from which { n_total_dedup } duplicate records with the "
87
- f"same label were removed."
89
+ f"Detected { n_total } records with label '{ label } ', from which"
90
+ f" { n_total_dedup } duplicate records with the same label were removed."
88
91
)
89
92
else :
90
93
df_all = pd .DataFrame ()
@@ -104,9 +107,9 @@ def create_composition(
104
107
# load all input files and URLs into ASReviewData objects, fill with None
105
108
# if input was not specified
106
109
input_files = [rel_path , irr_path , lab_path , unl_path ]
107
- as_rel , as_irr , as_lab , as_unl = [
110
+ as_rel , as_irr , as_lab , as_unl = (
108
111
load_data (item ) if item is not None else None for item in input_files
109
- ]
112
+ )
110
113
111
114
# check whether input files are correctly labeled
112
115
_check_label_errors (as_lab , lab_path )
@@ -185,10 +188,11 @@ def create_composition(
185
188
"left" ,
186
189
):
187
190
print (
188
- f"\n Some records have inconsistent labels in the input files. This may be intentional because you are "
189
- f"trying to overwrite labels in an input file with labels from another input file. However, "
190
- f"it may also be because some records are unintentionally labeled inconsistently.\n \n "
191
- f"The following records have inconsistent labels in the input files:\n "
191
+ f"\n Some records have inconsistent labels in the input files. This may"
192
+ " be intentional because you are trying to overwrite labels in an input"
193
+ " file with labels from another input file. However, it may also be"
194
+ " because some records are unintentionally labeled inconsistently.\n \n "
195
+ "The following records have inconsistent labels in the input files:\n "
192
196
f"{ df_info_conflicts } \n "
193
197
)
194
198
@@ -197,14 +201,19 @@ def create_composition(
197
201
198
202
elif resolve == "keep_one" :
199
203
warnings .warn (
200
- f"Continuing, keeping one label for records with inconsistent labels, resolving conflicts using the "
201
- f"following hierarchy:\n 1. { dict_terms [order [0 ]]} \n 2. { dict_terms [order [1 ]]} \n 3. { dict_terms [order [2 ]]} "
204
+ f"Continuing, keeping one label for records with inconsistent labels,"
205
+ " resolving conflicts using the following hierarchy:"
206
+ f"\n 1. { dict_terms [order [0 ]]} \n 2. { dict_terms [order [1 ]]} "
207
+ f"\n 3. { dict_terms [order [2 ]]} " ,
208
+ stacklevel = 1 ,
202
209
)
203
210
df_composed = as_conflict .drop_duplicates (pid = pid ).reset_index (drop = True )
204
211
205
212
elif resolve == "keep_all" :
206
213
warnings .warn (
207
- f"Continuing, keeping all labels for duplicate records with inconsistent labels."
214
+ "Continuing, keeping all labels for duplicate records with inconsistent"
215
+ " labels." ,
216
+ stacklevel = 1 ,
208
217
)
209
218
df_composed = as_conflict .df
210
219
0 commit comments