Merge pull request #136 from MannLabs/alphaquant_updates

Add option to specify own config file
MannLabs · Mar 18, 2024 · bdc9877 · bdc9877
2 parents 3dd3953 + 860bac4
commit bdc9877
Show file tree

Hide file tree

Showing 7 changed files with 179 additions and 899 deletions.
diff --git a/alphabase/constants/const_files/quant_reader_config.yaml b/alphabase/constants/const_files/quant_reader_config.yaml
@@ -21,9 +21,10 @@ alphapept_peptides:
 
   use_iontree: False
 
-maxquant_peptides:
+
+maxquant_peptides_leading_razor_protein:
   format: widetable
-  quant_prefix: "Intensity "
+  quant_pre_or_suffix: "Intensity "
   protein_cols:
    - Leading razor protein
   ion_cols:
@@ -45,33 +46,10 @@ maxquant_peptides:
       param: Potential contaminant
       comparator: "!="
       value: "+"
-  ml_level: SEQ
-  use_iontree: False
-
-maxquant_peptides_benchmarking:
-  format: widetable
-  quant_prefix: "Intensity "
-  protein_cols:
-   - Protein group IDs
-  ion_cols:
-   - Sequence
-  ion_hierarchy:
-    sequence_int:
-      order: [SEQ, MOD]
-      mapping:
-        SEQ: 
-          - Sequence
-        MOD:
-          - Mass
-  filters:
-    reverse:
-      param: Reverse
-      comparator: "!="
-      value: "+"
-    contaminant:
-      param: Potential contaminant
+    amino_acid:
+      param: Amino acid before
       comparator: "!="
-      value: "+"
+      value: "XYZ"
   ml_level: SEQ
   use_iontree: False
 
@@ -1083,4 +1061,19 @@ diann_fragion_isotopes_gene_level:
         MS1ISOTOPES:
          - Precursor.Charge
   use_iontree: True
-  ml_level: CHARGE
+  ml_level: CHARGE
+
+fragpipe_precursors:
+  format: widetable
+  quant_pre_or_suffix: " Intensity"
+  protein_cols:
+   - Protein
+  ion_hierarchy:
+    sequence_int:
+      order: [SEQ, MOD]
+      mapping:
+        SEQ:
+          - Peptide Sequence
+        MOD: 
+          - Modified Sequence
+  use_iontree: False
diff --git a/alphabase/quantification/quant_reader/config_dict_loader.py b/alphabase/quantification/quant_reader/config_dict_loader.py
@@ -9,39 +9,45 @@
 INTABLE_CONFIG = os.path.join(pathlib.Path(__file__).parent.absolute(), "../../../alphabase/constants/const_files/quant_reader_config.yaml") #the yaml config is located one directory below the python library files
 
 def get_input_type_and_config_dict(input_file, input_type_to_use = None):
-    config_dict = _load_config(INTABLE_CONFIG)
-    type2relevant_columns = _get_type2relevant_cols(config_dict)
+    all_config_dicts = _load_config(INTABLE_CONFIG)
+    type2relevant_columns = _get_type2relevant_cols(all_config_dicts)
 
     if "aq_reformat.tsv" in input_file:
         input_file = _get_original_file_from_aq_reformat(input_file)
 
-    filename = str(input_file)
-    if '.csv' in filename:
-        sep=','
-    if '.tsv' in filename:
-        sep='\t'
-    if '.txt' in filename:
-        sep='\t'
-
-    if 'sep' not in locals():
-        raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
+    sep = _get_seperator(input_file)
 
-    uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1, encoding ='latin1').columns)
+    uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1).columns)
 
     for input_type in type2relevant_columns.keys():
         if (input_type_to_use is not None) and (input_type!=input_type_to_use):
             continue
         relevant_columns = type2relevant_columns.get(input_type)
         relevant_columns = [x for x in relevant_columns if x] #filter None values
         if set(relevant_columns).issubset(uploaded_data_columns):
-            config_dict_type =  config_dict.get(input_type)
-            return input_type, config_dict_type, sep
+            config_dict =  all_config_dicts.get(input_type)
+            return input_type, config_dict, sep
+
     raise TypeError("format not specified in intable_config.yaml!")
 
 def _get_original_file_from_aq_reformat(input_file):
     matched = re.match("(.*)(\..*\.)(aq_reformat\.tsv)",input_file)
     return matched.group(1)
 
+def _get_seperator(input_file):
+    filename = str(input_file)
+    if '.csv' in filename:
+        sep=','
+    if '.tsv' in filename:
+        sep='\t'
+    if '.txt' in filename:
+        sep='\t'
+
+    if 'sep' not in locals():
+        raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
+    return sep
+
+
 
 def _load_config(config_yaml):
     with open(config_yaml, 'r') as stream:

diff --git a/alphabase/quantification/quant_reader/quant_reader_manager.py b/alphabase/quantification/quant_reader/quant_reader_manager.py
@@ -44,5 +44,8 @@ def reformat_and_save_input_file(input_file, input_type_to_use = None, use_alpha
         raise Exception('Format not recognized!')
     return outfile_name
 
+def set_quanttable_config_location(quanttable_config_file):
+    config_dict_loader.INTABLE_CONFIG = quanttable_config_file
+
 
 
diff --git a/alphabase/quantification/quant_reader/table_reformatter.py b/alphabase/quantification/quant_reader/table_reformatter.py
@@ -77,8 +77,8 @@ def get_quantitative_columns(input_df, hierarchy_type, config_dict, ion_headers_
 
     if config_dict.get("format") == 'widetable':
         quantcolumn_candidates = [x for x in input_df.columns if x not in naming_columns]
-        if "quant_prefix" in config_dict.keys():
-            return [x for x in quantcolumn_candidates if x.startswith(config_dict.get("quant_prefix"))] # in the case that the quantitative columns have a prefix (like "Intensity " in MQ peptides.txt), only columns with the prefix are filtered
+        if "quant_pre_or_suffix" in config_dict.keys():
+            return [x for x in quantcolumn_candidates if x.startswith(config_dict.get("quant_pre_or_suffix")) or x.endswith(config_dict.get("quant_pre_or_suffix"))] # in the case that the quantitative columns have a prefix (like "Intensity " in MQ peptides.txt), only columns with the prefix are filtered
         else:
             return quantcolumn_candidates #in this case, we assume that all non-ionname/proteinname columns are quantitative columns
 

diff --git a/alphabase/quantification/quant_reader/wideformat_reader.py b/alphabase/quantification/quant_reader/wideformat_reader.py
@@ -11,11 +11,11 @@ def reformat_and_write_wideformat_table(peptides_tsv, outfile_name, config_dict)
     input_df = quantreader_utils.filter_input(filter_dict, input_df)
     #input_df = merge_protein_and_ion_cols(input_df, config_dict)
     input_df = table_reformatter.merge_protein_cols_and_config_dict(input_df, config_dict)
-    if 'quant_prefix' in config_dict.keys():
-        quant_prefix = config_dict.get('quant_prefix')
-        headers = ['protein', 'quant_id'] + list(filter(lambda x: x.startswith(quant_prefix), input_df.columns))
+    if 'quant_pre_or_suffix' in config_dict.keys():
+        quant_pre_or_suffix = config_dict.get('quant_pre_or_suffix')
+        headers = ['protein', 'quant_id'] + list(filter(lambda x: x.startswith(quant_pre_or_suffix) or x.endswith(quant_pre_or_suffix), input_df.columns))
         input_df = input_df[headers]
-        input_df = input_df.rename(columns = lambda x : x.replace(quant_prefix, ""))
+        input_df = input_df.rename(columns = lambda x : x.replace(quant_pre_or_suffix, ""))
 
     #input_df = input_df.reset_index()