-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit acfaaf6
Showing
52 changed files
with
19,352 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# PanDrugsDB | ||
[Pandrugs](https://pandrugs.org/) is a bioinformatics platform to prioritize anticancer drug treatments according to individual genomic data. Its database version 2.0 integrates data from 23 primary sources. | ||
|
||
The code to generate its content and the annotations for VCF files is available in this github repository. | ||
|
||
This workflow summarizes PanDrugsDB composition and the steps implemented for its construction. | ||
|
||
 | ||
|
||
In this table are described the sources included in the database and in the annotation process along with versions and license. | ||
|
||
| Source | Version/Access Date*/DOI | License | Retrieved Information | | ||
| ------------------ | ---------------------------- | --------------------------- | --------------------------------------------------------------- | | ||
| CIViC | v2022-07-01 | CC0 1.0 | Drug-gene associations; Drug-gene annotations | | ||
| DGIdb | v4.2.0 | MIT | Drug-gene associations | | ||
| DrugBank | v5.1.9 | CC BY-NC 4.0 | Drug-gene associations | | ||
| GDSC | v8.2 | CC BY-NC-ND 2.5 | Drug-gene associations; Drug-gene annotations | | ||
| MOAlmanac | v2022-03-03 | GPLv2 | Drug-gene associations; Drug-gene annotations | | ||
| OncoKB | v3.14 | Free for academic use | Drug-gene associations; Drug-gene annotations; Gene annotations | | ||
| Thera-SAbDab | 2022-06-21 | CC BY 4.0 | Drug-gene associations | | ||
| PubChem | 2022-11-29 | Free | Drug standardisation | | ||
| HGNC | v2022-10-01 | CC0 1.0 | Gene standardisation | | ||
| ClinicalTrials.gov | 2022-03-15 | Free | Drug annotations | | ||
| CLUE | v1.2; build 1.44 | CC BY 4.0 | Drug annotations | | ||
| Drugs@FDA | 2022-03-14 | Free | Drug annotations | | ||
| EMA | 2022-05-31 | Free | Drug annotations | | ||
| FDA Drug Labels | 2022-06-01 | CC0 1.0 | Drug annotations | | ||
| KEGG BRITE | v101 | Free for academic use | Drug annotations | | ||
| COSMIC's CGC | v95 | Free for non-commercial use | Gene annotations | | ||
| DepMap | v22Q2 | CC BY 4.0 | Gene annotations; GScore calculation | | ||
| KEGG PATHWAY | v103 | Free for academic use | Gene annotations; Variant annotations | | ||
| OncoVar | v1.2 | Free for non-commercial use | Gene annotations; GScore calculation | | ||
| Cancer Hallmarks | 10.1038/s41598-018-25076-6 | CC BY 4.0 | GScore calculation | | ||
| TDLs | 10.1016/j.celrep.2022.110400 | CC BY-NC-ND 4.0 | GScore calculation | | ||
| ClinVar | v2022-05 | Free | Variant annotations | | ||
| COSMIC | v96 | Free for non-commercial use | Variant annotations | | ||
| Domains | 10.1371/journal.pcbi.1004147 | CC BY 4.0 | Variant annotations | | ||
| InterPro | v88.0 | CC0 1.0 | Variant annotations | | ||
| Pfam | v35.0 | CC0 1.0 | Variant annotations | | ||
| UniProt | v2022_01 | CC BY 4.0 | Variant annotations | | ||
| VEP | v109 | Apache-2.0 | Variant annotations | | ||
|
||
* Dates are displayed in ISO 8601 standard format: YYYY-MM-DD. | ||
|
||
## Synthethic letal pairs | ||
SL pairs are generated separately from the database and then merged back to the databse. The code responsible for generating and updating these pairs is stored [here](https://github.com/cnio-bu/pandrugs_sl_pairs). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
--- | ||
channels: | ||
- bu_cnio | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- perl-archive-extract =0.88 | ||
- perl-dbm-deep =2.0016 | ||
- perl-exporter-tiny =1.002002 | ||
- perl-list-moreutils =0.430 | ||
- perl-parallel-forkmanager =2.02 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
##!/usr/bin/perl | ||
use lib "modules/"; | ||
use strict; | ||
use warnings; | ||
use Time::HiRes; | ||
use Switch; | ||
use Cwd; | ||
use File::Path; | ||
use Net::FTP; | ||
use Archive::Extract (); | ||
use POSIX (); | ||
use LWP::Simple; | ||
use File::Copy; | ||
use DBM::Deep; | ||
use Parallel::ForkManager; | ||
|
||
# Variable Initialization | ||
my $dbdir; | ||
my $genesids = "custom"; | ||
my $pathways = "pathway_desc.tsv"; | ||
my $logfile = ""; | ||
|
||
my ($start, $end, $time); | ||
|
||
#Command line arguments handle | ||
if (!@ARGV || grep (/^((\-\-help)|(\-h))$/,@ARGV)) { | ||
&help_info; | ||
} | ||
|
||
for my $a (0..$#ARGV){ | ||
|
||
switch ($ARGV[$a]){ | ||
|
||
# databases path | ||
case /^((\-\-databases=)|(\-d=))/ { | ||
$ARGV[$a] =~ /\-(\-databases|d)=(.+)/; | ||
$dbdir = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -d=/home/epineiro/Programs/PCDA/databases\n\n"; | ||
$dbdir = $dbdir . "/"; | ||
} | ||
|
||
else { | ||
die "\nArgument $ARGV[$a] not valid.\n\n"; | ||
} | ||
|
||
} | ||
|
||
} | ||
|
||
if (!$dbdir) { | ||
die "\nPath to databases not indicated. Please, enter the databases path.\n\neg. -d=databases\n\n"; | ||
} | ||
|
||
# Create folders | ||
#mkpath($dbdir, 0); | ||
|
||
&create_dbs; | ||
|
||
# Start time counter | ||
$start = Time::HiRes::gettimeofday(); | ||
|
||
$end = Time::HiRes::gettimeofday(); | ||
$time = sprintf("%.2f", $end - $start); | ||
printl ("\nTotal time: $time seconds\n"); | ||
|
||
exit; | ||
|
||
sub create_dbs { | ||
|
||
# Load files into variables | ||
print "\n\nLoading database files...\n\n"; | ||
|
||
my (%pfam_a, %interpro_a, %last_domain, %cancer_domain); | ||
|
||
my @cosmic_files = glob("$dbdir/cosmic*.tsv"); | ||
|
||
foreach (@cosmic_files) { | ||
my $file = $_; | ||
$_ =~ s/.tsv/.db/; | ||
my $cosmic_list = DBM::Deep->new($_); | ||
print("$file\n"); | ||
open (FILE, "<$file") or die "Couldn't open file: $!"; | ||
while (<FILE>){ | ||
chomp $_; | ||
if ($. % 100000 == 0) {print("$.\n")}; | ||
my @line = split ("\t", $_); | ||
$cosmic_list->{$line[0]} = [$line[1], $line[2], "$line[3] / $line[5]", "$line[4] / $line[5]"]; | ||
} | ||
close FILE; | ||
} | ||
|
||
my $cosmic_gene_freq = DBM::Deep->new("$dbdir/cosmic_gene_freq.db"); | ||
my $cosmic_gf_file = "$dbdir/cosmic_gene_freq.tsv"; | ||
open (FILE, "<$cosmic_gf_file") or die "Couldn't open file: $!"; | ||
while (<FILE>){ | ||
chomp $_; | ||
my @line = split ("\t", $_); | ||
$cosmic_gene_freq -> {$line[0]} = [$line[1], $line[2]]; | ||
} | ||
close FILE; | ||
|
||
print "COSMIC loaded!\n"; | ||
|
||
my $genes_ids = DBM::Deep->new("$dbdir/genesids.db"); | ||
my $genes_ids_file = $dbdir . $genesids; | ||
open (FILE, "<$genes_ids_file") or die "Couldn't open file: $!"; | ||
while (<FILE>){ | ||
chomp $_; | ||
my @line = split ("\t", $_); | ||
$genes_ids -> {$line[0]} = $line[1] if ($line[1]); | ||
} | ||
close FILE; | ||
|
||
print "genes IDs loaded!\n"; | ||
|
||
my $kegg_gene_pathway_DB = DBM::Deep->new("$dbdir/gene_pathway.db"); | ||
my $gene_pathway_file = $dbdir . "gene_pathway.tsv"; | ||
open FILE, "<$gene_pathway_file" or die "Couldn't open file: $!"; | ||
while (<FILE>){ | ||
chomp $_; | ||
my @line = split("\t", $_); | ||
$kegg_gene_pathway_DB -> {$line[0]} = $line[1]; | ||
} | ||
close FILE; | ||
|
||
print "gene-pathway loaded!\n"; | ||
|
||
my $pathw_desc = DBM::Deep->new("$dbdir/pathways_desc.db"); | ||
my $pathway_desc = $dbdir . $pathways; | ||
open FILE, "<$pathway_desc" or die "Couldn't open file: $pathway_desc $!"; | ||
while (<FILE>){ | ||
chomp $_; | ||
my @line = split("\t", $_); | ||
$pathw_desc -> {$line[0]} = $line[1]; | ||
} | ||
close FILE; | ||
|
||
print "pathway description loaded!\n"; | ||
|
||
my $pfam_a = DBM::Deep->new("$dbdir/pfam.db"); | ||
my @pfam_file = glob("$dbdir/Pfam-A.full.tsv"); | ||
open FILE, "<$pfam_file[0]" or die "Couldn't open file: $!"; | ||
while (<FILE>){ | ||
chomp ($_); | ||
my @line = split("\t", $_); | ||
if (exists($pfam_a{$line[4]})) { | ||
push @{$pfam_a{$line[4]}}, [$line[1], $line[2], $line[5], $line[6]]; | ||
} else { | ||
@{$pfam_a->{$line[4]}} = [$line[1], $line[2], $line[5], $line[6]]; | ||
} | ||
} | ||
close FILE; | ||
|
||
print "pfam loaded!\n"; | ||
|
||
my $uniprot_b = DBM::Deep->new("$dbdir/uniprot_b.db"); | ||
my @uniprot_file = "$dbdir/Uniprot.tsv"; | ||
open FILE, "<$uniprot_file[0]" or die "Couldn't open file: $!"; | ||
while (<FILE>) { | ||
chomp ($_); | ||
my @line = split("\t", $_); | ||
my $name = $1 if ($line[0] =~ /^([A-Z0-9]+)/); | ||
$uniprot_b->{$line[1]} = $name if ($line[1] ne ""); | ||
} | ||
close FILE; | ||
|
||
print "uniprot loaded!\n"; | ||
|
||
my $interpro_a = DBM::Deep->new("$dbdir/interpro_a.db"); | ||
my @interpro_file = "$dbdir/Interpro.tsv"; | ||
my $last_domain = DBM::Deep->new("$dbdir/last_domain.db"); | ||
open FILE, "<$interpro_file[0]" or die "Couldn't open file: $!"; | ||
while (<FILE>) { | ||
chomp ($_); | ||
my @line = split ("\t",$_); | ||
if (exists($interpro_a{$line[3]})) { | ||
push @{$interpro_a{$line[3]}}, [$line[0], $line[1], $line[4], $line[5]]; | ||
} else { | ||
@{$interpro_a->{$line[3]}} = [$line[0], $line[1], $line[4], $line[5]]; | ||
} | ||
if (exists($last_domain{$line[3]})) { | ||
if ($last_domain{$line[3]} < $line[4]) { | ||
$last_domain{$line[3]} = $line[4]; | ||
} | ||
} else { | ||
$last_domain->{$line[3]} = $line[4]; | ||
} | ||
} | ||
close FILE; | ||
|
||
print "interpro loaded!\n"; | ||
|
||
my $oncorole = DBM::Deep->new("$dbdir/generole.db"); | ||
open FILE, "<$dbdir/generole.tsv" or die "Couldn't open file: $!"; | ||
my %pos; | ||
while (<FILE>) { | ||
chomp ($_); | ||
my @line = split ("\t",$_); | ||
if ($_ =~ /^gene/) { | ||
for my $i (0..$#line) { | ||
$pos{$i} = $line[$i]; | ||
} | ||
} else { | ||
my @roles = @line[1..$#line]; | ||
my @role_list; | ||
for my $i (0 .. $#roles) { | ||
my $role = $roles[$i]; | ||
if ($role ne "") { | ||
push(@role_list,"$pos{$i+1}:$role"); | ||
} | ||
} | ||
$oncorole->{$line[0]} = join ("; ", @role_list); | ||
} | ||
} | ||
close FILE; | ||
print "Gene Role loaded!\n"; | ||
|
||
my $gscore = DBM::Deep->new("$dbdir/gscore.db"); | ||
open (ESSEN, "$dbdir/gscore_Ene_2023.tsv"); | ||
while (<ESSEN>) { | ||
chomp $_; | ||
my @line = split ("\t", $_); | ||
unless ($line[0] eq "checked_gene_symbol") { | ||
$gscore->{$line[0]} = $line[1] ; | ||
} | ||
} | ||
close ESSEN; | ||
print "gscores loaded!\n"; | ||
|
||
my $cancer_domain = DBM::Deep->new("$dbdir/cancer_domain.db"); | ||
open DOM, "<$dbdir/domains.tsv" or die "Couldn't open file: $!"; | ||
while (<DOM>){ | ||
chomp ($_); | ||
my @line = split ("\t",$_); | ||
unless (exists($cancer_domain{$line[4]})) { | ||
|
||
$cancer_domain->{$line[4]} = ""; | ||
} | ||
} | ||
close DOM; | ||
|
||
print "cancer domains loaded!\n"; | ||
|
||
my $clinvar = DBM::Deep->new("$dbdir/clinvar.db"); | ||
my @clinvar_file = "$dbdir/Clinvar.tsv"; | ||
open CLINVAR, "<$clinvar_file[0]" or die "Couldn't open file: $!"; | ||
while (<CLINVAR>) { | ||
chomp $_; | ||
my @line = split ("\t", $_); | ||
if ($line[1] eq "GRCh38") { | ||
if (exists($clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"})) { | ||
@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[0] .= "; $line[7]"; | ||
@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[1] .= "; $line[0]"; | ||
@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[2] .= "; $line[8]"; | ||
} else { | ||
$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"} = [$line[7], $line[0], $line[8]]; | ||
} | ||
} | ||
} | ||
close CLINVAR; | ||
|
||
print "clinvar loaded!\n"; | ||
} | ||
|
||
sub printl { | ||
$logfile = $logfile . $_[0]; | ||
print $_[0]; | ||
} | ||
|
||
sub help_info { | ||
|
||
print "--databases=directory or -d=directory \t\t\t Absolute path to databases directory. Mandatory.\n\n"; | ||
|
||
print "\ni.e. VEP_parser.pl -d=/home/epineiro/Programs/PCDA/databases\n\n"; | ||
exit; | ||
|
||
} |
Oops, something went wrong.