Skip to content

Commit

Permalink
Update ProteinAssigner.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
rbarreror authored Sep 23, 2022
1 parent 076c9e7 commit b95f5a1
Showing 1 changed file with 36 additions and 35 deletions.
71 changes: 36 additions & 35 deletions config/ProteinAssigner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,34 @@

# Path to the file(s) containing PSM
infile:
- Path/To/File1
- Path/To/File2
- /path/to/infile1.tsv
- /path/to/infile2.tsv

# Path to output file(s) containing PSM with most probable proteins
outfile:
- Path/To/File1
- Path/To/File2
outfile:
- /path/to/outfile1.tsv
- /path/to/outfile2.tsv

# Name of the column containing peptide sequence
seq_column: Sequence
seq_column: plain_peptide


# name of the output column with most probable accession
mpp_a: MPP_accessions


# name of the output column with most probable description
mpp_d: MPP_description


# regular expressions to filter in case of ties --> "/regex1/regex2/regex3/.../"
regex: /^sp/Sus scrofa/^((?!fragment).)*$/


#Sequence

# Select mode of execution: fasta/column
mode: column
mode: fasta


#
Expand All @@ -25,49 +40,35 @@ mode: column
fasta_params:

# Path to fasta file used to identify candidate proteins
fasta: Path/To/Fasta.fa
fasta: /path/to/fasta.fa

# decoy prefix in fasta
decoy_prefix: DECOY_

# Convert L, I and J to the selected letter
iso_leucine: L

# Column names
column_names:

# name of the column containing candidate descriptions
candidate_d: PA_description_candidates

# name of the column containing candidate accessions
candidate_a: PA_accession_candidates

# name of the column with most probable description
mpp_d: MPP_description

# name of the column with most probable accession
mpp_a: MPP_accessions


# name of the output column containing candidate accessions
candidate_a: PA_accession_candidates

# name of the output column containing candidate descriptions
candidate_d: PA_description_candidates


#
# COLUMN mode parameters (if column mode selected)
#
column_params:

# Name of the column(s) containing information of the candidate proteins
prot_column:
- Protein_Accessions
- Protein_Descriptions

# Name of the output column(s) with the most probable protein
prot_column_mpp:
- Protein_Accessions_MP
- Protein_Descriptions_MP

# name of the input column containing candidate accessions
candidate_a: PA_accession_candidates

# name of the input column containing candidate descriptions
candidate_d: PA_description_candidates

# Character used to separate candidate proteins
sep_char: ;


# Number of cores
n_cores: 4
n_cores: 8

0 comments on commit b95f5a1

Please sign in to comment.