Skip to content

Commit

Permalink
Merge pull request #136 from MannLabs/alphaquant_updates
Browse files Browse the repository at this point in the history
Add option to specify own config file
  • Loading branch information
GeorgWa authored Mar 18, 2024
2 parents 3dd3953 + 860bac4 commit bdc9877
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 899 deletions.
51 changes: 22 additions & 29 deletions alphabase/constants/const_files/quant_reader_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ alphapept_peptides:

use_iontree: False

maxquant_peptides:

maxquant_peptides_leading_razor_protein:
format: widetable
quant_prefix: "Intensity "
quant_pre_or_suffix: "Intensity "
protein_cols:
- Leading razor protein
ion_cols:
Expand All @@ -45,33 +46,10 @@ maxquant_peptides:
param: Potential contaminant
comparator: "!="
value: "+"
ml_level: SEQ
use_iontree: False

maxquant_peptides_benchmarking:
format: widetable
quant_prefix: "Intensity "
protein_cols:
- Protein group IDs
ion_cols:
- Sequence
ion_hierarchy:
sequence_int:
order: [SEQ, MOD]
mapping:
SEQ:
- Sequence
MOD:
- Mass
filters:
reverse:
param: Reverse
comparator: "!="
value: "+"
contaminant:
param: Potential contaminant
amino_acid:
param: Amino acid before
comparator: "!="
value: "+"
value: "XYZ"
ml_level: SEQ
use_iontree: False

Expand Down Expand Up @@ -1083,4 +1061,19 @@ diann_fragion_isotopes_gene_level:
MS1ISOTOPES:
- Precursor.Charge
use_iontree: True
ml_level: CHARGE
ml_level: CHARGE

fragpipe_precursors:
format: widetable
quant_pre_or_suffix: " Intensity"
protein_cols:
- Protein
ion_hierarchy:
sequence_int:
order: [SEQ, MOD]
mapping:
SEQ:
- Peptide Sequence
MOD:
- Modified Sequence
use_iontree: False
36 changes: 21 additions & 15 deletions alphabase/quantification/quant_reader/config_dict_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,45 @@
INTABLE_CONFIG = os.path.join(pathlib.Path(__file__).parent.absolute(), "../../../alphabase/constants/const_files/quant_reader_config.yaml") #the yaml config is located one directory below the python library files

def get_input_type_and_config_dict(input_file, input_type_to_use = None):
config_dict = _load_config(INTABLE_CONFIG)
type2relevant_columns = _get_type2relevant_cols(config_dict)
all_config_dicts = _load_config(INTABLE_CONFIG)
type2relevant_columns = _get_type2relevant_cols(all_config_dicts)

if "aq_reformat.tsv" in input_file:
input_file = _get_original_file_from_aq_reformat(input_file)

filename = str(input_file)
if '.csv' in filename:
sep=','
if '.tsv' in filename:
sep='\t'
if '.txt' in filename:
sep='\t'

if 'sep' not in locals():
raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
sep = _get_seperator(input_file)

uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1, encoding ='latin1').columns)
uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1).columns)

for input_type in type2relevant_columns.keys():
if (input_type_to_use is not None) and (input_type!=input_type_to_use):
continue
relevant_columns = type2relevant_columns.get(input_type)
relevant_columns = [x for x in relevant_columns if x] #filter None values
if set(relevant_columns).issubset(uploaded_data_columns):
config_dict_type = config_dict.get(input_type)
return input_type, config_dict_type, sep
config_dict = all_config_dicts.get(input_type)
return input_type, config_dict, sep

raise TypeError("format not specified in intable_config.yaml!")

def _get_original_file_from_aq_reformat(input_file):
matched = re.match("(.*)(\..*\.)(aq_reformat\.tsv)",input_file)
return matched.group(1)

def _get_seperator(input_file):
filename = str(input_file)
if '.csv' in filename:
sep=','
if '.tsv' in filename:
sep='\t'
if '.txt' in filename:
sep='\t'

if 'sep' not in locals():
raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
return sep



def _load_config(config_yaml):
with open(config_yaml, 'r') as stream:
Expand Down
3 changes: 3 additions & 0 deletions alphabase/quantification/quant_reader/quant_reader_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,8 @@ def reformat_and_save_input_file(input_file, input_type_to_use = None, use_alpha
raise Exception('Format not recognized!')
return outfile_name

def set_quanttable_config_location(quanttable_config_file):
config_dict_loader.INTABLE_CONFIG = quanttable_config_file



4 changes: 2 additions & 2 deletions alphabase/quantification/quant_reader/table_reformatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def get_quantitative_columns(input_df, hierarchy_type, config_dict, ion_headers_

if config_dict.get("format") == 'widetable':
quantcolumn_candidates = [x for x in input_df.columns if x not in naming_columns]
if "quant_prefix" in config_dict.keys():
return [x for x in quantcolumn_candidates if x.startswith(config_dict.get("quant_prefix"))] # in the case that the quantitative columns have a prefix (like "Intensity " in MQ peptides.txt), only columns with the prefix are filtered
if "quant_pre_or_suffix" in config_dict.keys():
return [x for x in quantcolumn_candidates if x.startswith(config_dict.get("quant_pre_or_suffix")) or x.endswith(config_dict.get("quant_pre_or_suffix"))] # in the case that the quantitative columns have a prefix (like "Intensity " in MQ peptides.txt), only columns with the prefix are filtered
else:
return quantcolumn_candidates #in this case, we assume that all non-ionname/proteinname columns are quantitative columns

Expand Down
8 changes: 4 additions & 4 deletions alphabase/quantification/quant_reader/wideformat_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def reformat_and_write_wideformat_table(peptides_tsv, outfile_name, config_dict)
input_df = quantreader_utils.filter_input(filter_dict, input_df)
#input_df = merge_protein_and_ion_cols(input_df, config_dict)
input_df = table_reformatter.merge_protein_cols_and_config_dict(input_df, config_dict)
if 'quant_prefix' in config_dict.keys():
quant_prefix = config_dict.get('quant_prefix')
headers = ['protein', 'quant_id'] + list(filter(lambda x: x.startswith(quant_prefix), input_df.columns))
if 'quant_pre_or_suffix' in config_dict.keys():
quant_pre_or_suffix = config_dict.get('quant_pre_or_suffix')
headers = ['protein', 'quant_id'] + list(filter(lambda x: x.startswith(quant_pre_or_suffix) or x.endswith(quant_pre_or_suffix), input_df.columns))
input_df = input_df[headers]
input_df = input_df.rename(columns = lambda x : x.replace(quant_prefix, ""))
input_df = input_df.rename(columns = lambda x : x.replace(quant_pre_or_suffix, ""))

#input_df = input_df.reset_index()

Expand Down
Loading

0 comments on commit bdc9877

Please sign in to comment.