-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnnotation.py
513 lines (418 loc) · 25.4 KB
/
Annotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
import pandas as pd
import numpy as np
import sys
import re
import os
import logging
from pathlib import Path
from openpyxl import load_workbook
class MS_Template():
"""A class to describe the excel macro sheet MS Template Creator
Args:
filePath (str): file path of the input MS Template Creator file
logger (object): logger object created by start_logger in MSOrganiser
ingui (bool): if True, print analysis status to screen
doing_normalization (bool): if True, check if input file has data. If no data, throws an error
allow_multiple_istd (bool): if True, allow normalization of data by mulitple internal standards
"""
def __init__(self,filepath,column_name, logger=None,ingui=True,
doing_normalization = False, allow_multiple_istd = False):
self.__logger = logger
self.__ingui = ingui
self.filepath = filepath
self.__filecheck(column_name)
self.__doing_normalization = doing_normalization
self.__allow_multiple_istd = allow_multiple_istd
def remove_whiteSpaces(df):
"""Strip the whitespaces for each string columns of a df
Args:
df (pandas DataFrame): A panda data frame
Returns:
df (pandas DataFrame): A panda data frame with white space removed
"""
df[df.select_dtypes(['object']).columns] = df.select_dtypes(['object']).apply(lambda x: x.str.strip())
return df
def __filecheck(self,column_name):
# Taken care of by MSParser.py
# Check if input is blank/None
#if not self.filepath:
# if self.__logger:
# self.__logger.error('An annotation file is required to perform this calculation: %s', column_name)
# if self.__ingui:
# print('An annotation file is required to perform this calculation: ' + column_name,flush=True)
# sys.exit(-1)
file_to_check = Path(self.filepath)
# Check if filepath exists and is a file
#if not os.path.isfile(self.filepath):
if not file_to_check.exists():
if self.__logger:
self.__logger.error('Input annotation ' + '\'' + self.filepath + '\'' +
' could not be found. ' +
'Please check if the input file path.')
if self.__ingui:
print('Input annotation ' + '\'' + self.filepath + '\'' +
' could not be found. ' +
'Please check the input file path.',
flush=True)
sys.exit(-1)
elif not file_to_check.is_file():
if self.__logger:
self.__logger.error('Input file path ' + '\'' + self.filepath + '\'' +
' does not lead to a system file. ' +
'Please check if the input file path is a system file and not a folder.')
if self.__ingui:
print('Input file path ' + '\'' + self.filepath + '\'' +
' does not lead to a system file. ' +
'Please check if the input file path is a system file and not a folder.',
flush=True)
sys.exit(-1)
if self.filepath.endswith('.csv'):
if self.__logger:
self.__logger.error('This program no longer accepts csv file as input for the annotation file. Please use the excel template file given.')
if self.__ingui:
print('This program no longer accepts csv file as input for the annotation file. Please use the excel template file given.',
flush=True)
sys.exit(-1)
def __readExcelWorkbook(self):
# Read the excel file
try:
wb = load_workbook(filename=self.filepath,data_only=True)
except Exception as e:
if self.__logger:
self.__logger.error("Unable to read excel file %s",self.filepath)
self.__logger.error(e)
if self.__ingui:
print("Unable to read excel file " + self.filepath,flush=True)
print(e,flush=True)
sys.exit(-1)
return wb
def __checkExcelWorksheet_in_Workbook(self,sheetname,wb):
# Check if the excel file has the sheet sheetname
if sheetname not in wb.sheetnames:
if self.__logger:
self.__logger.error('Sheet name ' + sheetname + ' does not exists. Please check the input excel file.')
if self.__ingui:
print('Sheet name ' + sheetname + ' does not exists. Please check the input excel file.',flush=True)
sys.exit(-1)
def __check_if_df_is_empty(self,sheetname,df):
# Validate the input sheet has data
if df.empty:
if self.__logger:
self.__logger.warning('The input ' + sheetname + ' sheet has no data.')
if self.__ingui:
print('The input ' + sheetname + ' sheet has no data.',flush=True)
sys.exit(-1)
def __checkColumns_in_df(self,colname,sheetname,df):
# Check if the column name exists as a header in the df
if colname not in df:
if self.__logger:
self.__logger.error('The ' + sheetname + ' sheet is missing the column ' + colname + '.')
if self.__ingui:
print('The ' + sheetname + ' sheet is missing the column ' + colname + '.',flush=True)
sys.exit(-1)
def __checkDuplicates_in_cols(self,colname_list,sheetname,df):
# Check if a list of columns in the input df has duplicate data
duplicateValues = df.duplicated(subset=colname_list)
if duplicateValues.any():
duplicatelist = [ str(int(i) + 2) for i in duplicateValues[duplicateValues==True].index.tolist()]
if self.__logger:
self.__logger.error('Data at ' + ', '.join(colname_list) + ' column(s) in the ' + sheetname +
' sheet have duplicates at row(s) ' + ', '.join(duplicatelist) + '.')
if self.__ingui:
print('Data at ' + ', '.join(colname_list) + ' column(s) in the ' + sheetname +
' sheet has duplicates at row(s) ' + ', '.join(duplicatelist) + '.',
flush=True)
sys.exit(-1)
def Read_Transition_Name_Annot_Sheet(self):
"""Read the excel sheet Transition_Name_Annot as a pandas data frame
Returns:
Transition_Name_Annot_df (pandas DataFrame): A panda data frame containing the contents of Transition_Name_Annot
"""
#Open the excel file
wb = self.__readExcelWorkbook()
#Check if the excel file has the sheet "Transition_Name_Annot"
self.__checkExcelWorksheet_in_Workbook("Transition_Name_Annot",wb)
#Convert worksheet to a dataframe
worksheet = wb["Transition_Name_Annot"]
#Get the column names in the first row of the excel sheet
cols = next(worksheet.values)[0:]
Transition_Name_Annot_df = pd.DataFrame(worksheet.values, columns=cols)
#We remove the first row as the headers as been set up
Transition_Name_Annot_df = Transition_Name_Annot_df.iloc[1:]
#Reset the row index
Transition_Name_Annot_df = Transition_Name_Annot_df.reset_index(drop=True)
#Remove rows with all None, NA or NaN
Transition_Name_Annot_df = Transition_Name_Annot_df.dropna(axis=0, how='all')
#Remove columns with column name set as None, NA or NaN
Transition_Name_Annot_df = Transition_Name_Annot_df.loc[:, Transition_Name_Annot_df.columns.notna()]
#tmp = Transition_Name_Annot_df[Transition_Name_Annot_df.columns.difference(["Transition_Name", "Transition_Name_Annot"])].isna().all()
#Transition_Name_Annot_df = Transition_Name_Annot_df.drop(tmp.index[tmp], axis=1)
#Transition_Name_Annot_df = Transition_Name_Annot_df.dropna(axis=1, how='all')
#Validate the Transition_Name_Annot sheet is valid (Has the Transition_Name and Transition_Name_ISTD columns are not empty)
self.__validate_Transition_Name_Annot_sheet("Transition_Name_Annot",Transition_Name_Annot_df,
allow_multiple_istd = self.__allow_multiple_istd)
#Remove whitespaces in column names
Transition_Name_Annot_df.columns = Transition_Name_Annot_df.columns.str.strip()
#Remove whitespace for each string column
Transition_Name_Annot_df = MS_Template.remove_whiteSpaces(Transition_Name_Annot_df)
# Remove Rows with ISTD with no Transition_Names
# A bit redundant as error has been given in __validate_Transition_Name_Annot_sheet
if not Transition_Name_Annot_df.empty:
Transition_Name_Annot_df = Transition_Name_Annot_df.dropna(subset=['Transition_Name'])
#print(Transition_Name_Annot_df)
#Close the workbook
wb.close()
return Transition_Name_Annot_df
def __validate_Transition_Name_Annot_sheet(self,sheetname,Transition_Name_Annot_df,
allow_multiple_istd = False):
#Validate the Transition_Name_Annot sheet has data when normalization is performed
if self.__doing_normalization:
self.__check_if_df_is_empty(sheetname,Transition_Name_Annot_df)
#Check if the column Transition_Name exists as a header in Transition_Name_Annot_df
self.__checkColumns_in_df('Transition_Name',sheetname,Transition_Name_Annot_df)
#Check if the column Transition_Name exists as a header in Transition_Name_Annot_df
self.__checkColumns_in_df('Transition_Name_ISTD',sheetname,Transition_Name_Annot_df)
# Check if the column Transition_Name has empty entries and highlight them.
emptyTransitions = Transition_Name_Annot_df[Transition_Name_Annot_df["Transition_Name"].isna()]
if len(emptyTransitions) > 0:
emptyTransitionslist = [ str(int(i) + 2) for i in emptyTransitions[emptyTransitions==True].index.tolist()]
if self.__logger:
self.__logger.error('There are transition name annotations that are not associated with a transition name at row(s) ' +
', '.join(emptyTransitionslist) + '. ' +
'Ensure that every annotation is associated with a Transition_Name.')
if self.__ingui:
print('There are transition name annotations that are not associated with a transition name at row(s) ' +
', '.join(emptyTransitionslist) + '. ' +
'Ensure that every annotation is associated with a Transition_Name.',
flush = True)
sys.exit(-1)
#Check if Transition_Name column has duplicate Transition_Names
if allow_multiple_istd:
self.__checkDuplicates_in_cols(colname_list = ['Transition_Name', 'Transition_Name_ISTD'],
sheetname = sheetname,
df = Transition_Name_Annot_df)
else:
self.__checkDuplicates_in_cols(colname_list = ['Transition_Name'],
sheetname = sheetname,
df = Transition_Name_Annot_df)
def Read_ISTD_Annot_Sheet(self):
"""Read the excel sheet ISTD_Annot as a pandas data frame
Returns:
ISTD_Annot_df (pandas DataFrame): A panda data frame containing the contents of ISTD_Annot
Note:
Transition_Name_ISTD, ISTD_Conc_[nM] and Custom Unit are the only columns taken
"""
#Open the excel file
wb = self.__readExcelWorkbook()
#Check if the excel file has the sheet "ISTD_Annot"
self.__checkExcelWorksheet_in_Workbook("ISTD_Annot",wb)
#Convert worksheet to a dataframe
worksheet = wb["ISTD_Annot"]
#Check that sheet is valid
self.__validate_ISTD_Annot_Sheet(worksheet)
#Get the column names
istd_conc_name = re.sub("\[.*?\]",worksheet["F3"].value,worksheet["E3"].value)
cols = [worksheet["A2"].value, istd_conc_name]
#Get the ISTD Table and clean it up
ISTD_Annot_df = worksheet.values
ISTD_Annot_df = pd.DataFrame(ISTD_Annot_df)
#We remove the first three row as the headers as been set up
ISTD_Annot_df = ISTD_Annot_df.iloc[3:]
#Reset the row index
ISTD_Annot_df = ISTD_Annot_df.reset_index(drop=True)
#Take specific columns (A and F only)
ISTD_Annot_df = ISTD_Annot_df.iloc[:,[0,5]]
ISTD_Annot_df.columns = cols
#Remove rows with no Transition_Name_ISTD
ISTD_Annot_df = ISTD_Annot_df.dropna(subset=['Transition_Name_ISTD'])
#ISTD_Annot_df = ISTD_Annot_df.dropna(axis=0, how='all')
#Check if Transition_Name_ISTD column has duplicate Transition_Name_ISTD
self.__checkDuplicates_in_cols(colname_list = ['Transition_Name_ISTD'],
sheetname = 'ISTD_Annot',
df = ISTD_Annot_df)
#Remove whitespaces in column names
ISTD_Annot_df.columns = ISTD_Annot_df.columns.str.strip()
#Convert all but first column to numeric
#ISTD_Annot_df['ISTD_Conc_[nM]'] = pd.to_numeric(ISTD_Annot_df['ISTD_Conc_[nM]'], errors='coerce')
ISTD_Annot_df[istd_conc_name] = pd.to_numeric(ISTD_Annot_df[istd_conc_name], errors='coerce')
#Remove whitespace for each string column
ISTD_Annot_df = MS_Template.remove_whiteSpaces(ISTD_Annot_df)
#Close the workbook
wb.close()
return(ISTD_Annot_df)
def __validate_ISTD_Annot_Sheet(self,worksheet):
#Check if the sheet has been tampled
if worksheet["A2"].value != "Transition_Name_ISTD":
if self.__logger:
self.__logger.error('The ISTD_Annot sheet is missing the column Transition_Name_ISTD at position A2.')
if self.__ingui:
print('The ISTD_Annot sheet is missing the column Transition_Name_ISTD at position A2.',flush=True)
sys.exit(-1)
if worksheet["E3"].value != "ISTD_Conc_[nM]":
if self.__logger:
self.__logger.error('The ISTD_Annot sheet is missing the column ISTD_Conc_[nM] at position E3.')
if self.__ingui:
print('The ISTD_Annot sheet is missing the column ISTD_Conc_[nM] at position E3.',flush=True)
sys.exit(-1)
if worksheet["F2"].value != "Custom_Unit":
if self.__logger:
self.__logger.error('The ISTD_Annot sheet is missing the column Custom_Unit at position F2.')
if self.__ingui:
print('The ISTD_Annot sheet is missing the column Custom_Unit at position F2.',flush=True)
sys.exit(-1)
if worksheet["F3"].value in ["[M]","[mM]","[uM]","[nM]","[pM]",
"[M] or [mmol/mL]", "[mM] or [umol/mL]",
"[uM] or [nmol/mL]", "[nM] or [pmol/mL]",
"[pM] or [fmol/mL]"]:
if self.__logger:
self.__logger.error('Sheet ISTD_Annot\'s column Custom_Unit option ' +
worksheet["F3"].value + ' ' +
'is no longer accepted in MSOrganiser. ' +
'Please use a later version of MSTemplate_Creator (above 1.0.1).')
if self.__ingui:
print('Sheet ISTD_Annot\'s column Custom_Unit option ' +
worksheet["F3"].value + ' ' +
'is no longer accepted in MSOrganiser. ' +
'Please use a later version of MSTemplate_Creator (above 1.0.1).',
flush=True)
sys.exit(-1)
if worksheet["F3"].value not in ["[M]","[mM]","[uM]","[nM]","[pM]",
"[M] or [umol/uL]", "[mM] or [nmol/uL]",
"[uM] or [pmol/uL]", "[nM] or [fmol/uL]",
"[pM] or [amol/uL]"]:
if self.__logger:
self.__logger.error('Sheet ISTD_Annot\'s column Custom_Unit option ' +
worksheet["F3"].value + ' is invalid.')
if self.__ingui:
print('Sheet ISTD_Annot\'s column Custom_Unit option ' +
worksheet["F3"].value + ' is invalid.',
flush=True)
sys.exit(-1)
def Read_Sample_Annot_Sheet(self,MS_FilePathList=[]):
"""Read the excel sheet Sample_Annot as a pandas data frame
Args:
MS_FilePathList (list): A list of MRM transition name file names.
Note:
The list of MRM transition name file names names is to help the program properly filter
the Sample annotation such that we only pick rows whose Data_File_Name values is in the list.
Currently, our input is set as [os.path.basename(self.MS_FilePath)] from MSAnalysis.
Returns:
Sample_Annot_df (pandas DataFrame): A panda data frame containing the contents of Sample_Annot
"""
#Open the excel file
wb = self.__readExcelWorkbook()
#Check if the excel file has the sheet "Sample_Annot"
self.__checkExcelWorksheet_in_Workbook("Sample_Annot",wb)
#Convert worksheet to a dataframe
worksheet = wb["Sample_Annot"]
#Get the column names in the first row of the excel sheet
cols = next(worksheet.values)[0:]
Sample_Annot_df = pd.DataFrame(worksheet.values, columns=cols)
#We remove the first row as the headers as been set up
Sample_Annot_df = Sample_Annot_df.iloc[1:]
#Reset the row index
Sample_Annot_df = Sample_Annot_df.reset_index(drop=True)
#Remove rows with all None, NA,NaN
Sample_Annot_df = Sample_Annot_df.dropna(axis=0, how='all')
#Validate the Sample_Annot sheet is valid
# (the columns are not remove in the excel sheet but can be empty)
self.__validate_Sample_Annot_sheet("Sample_Annot",Sample_Annot_df)
#We take the Sample Annotation data that can be found in the MS_FilePathList
#Else we just take all of them
if len(MS_FilePathList) > 0:
Sample_Annot_df = Sample_Annot_df[Sample_Annot_df.Data_File_Name.isin(MS_FilePathList)]
MS_FilePath_with_no_sample_annot = []
# Check that the Filtered_Sample_Annot_df is not empty for each of the provided MS_FilePath
# If yes, stop the program and inform the user to check the Sample Annot file
for MS_FilePath in MS_FilePathList:
Filtered_Sample_Annot_df = Sample_Annot_df[Sample_Annot_df.Data_File_Name.isin([MS_FilePath])]
if(len(Filtered_Sample_Annot_df.index) == 0 ):
MS_FilePath_with_no_sample_annot.append(MS_FilePath)
if(len(MS_FilePath_with_no_sample_annot) > 0 ):
if self.__logger:
self.__logger.error('The "Data_File_Name" column in the Sample Annotation sheet does not contain the input file name(s).\n' +
"\n".join(MS_FilePath_with_no_sample_annot) + '\n' +
'Please correct the Sample Annotation sheet or the input file name.')
if self.__ingui:
print('The "Data_File_Name" column in the Sample Annotation sheet does not contain the input file name(s).\n' +
"\n".join(MS_FilePath_with_no_sample_annot) + '\n' +
'Please correct the Sample Annotation sheet or the input file name.',
flush = True)
sys.exit(-1)
#Remove whitespaces in column names
Sample_Annot_df.columns = Sample_Annot_df.columns.str.strip()
#Convert all number columns to numeric
Sample_Annot_df['Sample_Amount'] = pd.to_numeric(Sample_Annot_df['Sample_Amount'], errors='coerce')
Sample_Annot_df['ISTD_Mixture_Volume_[uL]'] = pd.to_numeric(Sample_Annot_df['ISTD_Mixture_Volume_[uL]'], errors='coerce')
#print(Sample_Annot_df.info())
#Remove columns with all None, NA,NaN
Sample_Annot_df = Sample_Annot_df.dropna(axis=1, how='all')
#Remove whitespace for each string column
Sample_Annot_df = MS_Template.remove_whiteSpaces(Sample_Annot_df)
#Close the workbook
wb.close()
return Sample_Annot_df
def __validate_Sample_Annot_sheet(self,sheetname,Sample_Annot_df):
# Check if "Raw_Data_File_Name" exists as a header in Sample_Annot_df
# If yes, give an error and ask the user to use the latest version of
# the MSTemplate_Creator
# Check if the column name exists as a header in the df
if "Raw_Data_File_Name" in Sample_Annot_df:
if self.__logger:
self.__logger.error('The ' + sheetname + ' sheet contains the column "Raw_Data_File_Name". ' +
'This column name is no longer accepted in MSOrganiser. ' +
'Please use a later version of MSTemplate_Creator (above 0.0.1) that ' +
'uses "Data_File_Name" instead.')
if self.__ingui:
print('The ' + sheetname + ' sheet contains the column "Raw_Data_File_Name". ' +
'This column name is no longer accepted in MSOrganiser. ' +
'Please use a later version of MSTemplate_Creator (above 0.0.1) that ' +
'uses "Data_File_Name" instead.',
flush=True)
sys.exit(-1)
# Check if the column Data_File_Name exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Data_File_Name',sheetname,Sample_Annot_df)
# Check if the column Merge_Status exists as a header in Sample_Annot_df
#self.__checkColumns_in_df('Merge_Status',sheetname,Sample_Annot_df)
# Check if the column Sample_Name exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Sample_Name',sheetname,Sample_Annot_df)
# Check if the column Sample_Type exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Sample_Type',sheetname,Sample_Annot_df)
# Check if the column Sample_Amount exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Sample_Amount',sheetname,Sample_Annot_df)
# Check if the column Sample_Amount_Unit exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Sample_Amount_Unit',sheetname,Sample_Annot_df)
# Check if the column ISTD_Mixture_Volume_[uL] exists as a header in Sample_Annot_df
self.__checkColumns_in_df('ISTD_Mixture_Volume_[uL]',sheetname,Sample_Annot_df)
# Check if the column Concentration_Unit exists as a header in Sample_Annot_df
self.__checkColumns_in_df('Concentration_Unit',sheetname,Sample_Annot_df)
# Check if the column Data_File_Name has empty entries and highlight them.
emptyDataFileName = Sample_Annot_df[["Data_File_Name","Sample_Name"]][Sample_Annot_df["Data_File_Name"].isna()]
if len(emptyDataFileName) > 0:
emptyDataFileNamelist = [ str(int(i) + 2) for i in emptyDataFileName[emptyDataFileName==True].index.tolist()]
if self.__logger:
self.__logger.warning('There are sample names that are not associated with a data file name at row(s) ' +
', '.join(emptyDataFileNamelist) + '. ' +
'They will not be used during analysis. '
'Ensure that both columns Data_File_Name and Sample_Name are filled for each sample.')
if self.__ingui:
print('There are sample names that are not associated with a data file name at row(s) ' +
', '.join(emptyDataFileNamelist) + '. ' +
'They will not be used during analysis. '
'Ensure that both columns Data_File_Name and Sample_Name are filled for each sample.',
flush = True)
# Check if the column Sample_Name has empty entries and highlight them.
emptySampleName = Sample_Annot_df[["Data_File_Name","Sample_Name"]][Sample_Annot_df["Sample_Name"].isna()]
if len(emptySampleName) > 0:
emptySampleNamelist = [ str(int(i) + 2) for i in emptySampleName[emptySampleName==True].index.tolist()]
if self.__logger:
self.__logger.warning('There are data file names that are not associated with a sample name at row(s) ' +
', '.join(emptySampleNamelist) + '. ' +
'They will not be used during analysis. ' +
'Ensure that both columns Data_File_Name and Sample_Name are filled for each sample.')
if self.__ingui:
print('There are data file names that are not associated with a sample name at row(s) ' +
', '.join(emptySampleNamelist) + '. ' +
'They will not be used during analysis. ' +
'Ensure that both columns Data_File_Name and Sample_Name are filled for each sample.',
flush = True)