-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_phamdb.pl
78 lines (71 loc) · 1.63 KB
/
build_phamdb.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/perl -w
use strict;
use warnings;
#get pham files
mkdir("pham_fastas");
system("wget http://databases.hatfull.org/Actino_Draft/fastas.zip");
system("unzip fastas.zip -d pham_fastas");
#combine pham files
my @fastafiles = glob"pham_fastas/*.fasta";
open(OUT, "+> allphams.faa");
foreach my $infile (@fastafiles){
my($pham)=($infile=~/(.*?)\_.*/);
open(IN, "< $infile");
while(<IN>){
chomp;
if($_=~/\>/){
my($phage,$cluster)=($_=~/\>(.*?)\ .*?\[cluster\=(.*?)\].*/);
print OUT ">$phage $cluster $pham\n";
}
else{
$_=~s/\ /\_/g;
$_=~s/\-//g;
print OUT "$_\n";
}
}
close IN
}
close OUT;
#get pham numbers and other data
mkdir("pham_csvs");
my @pham_numbers =(1..40000);
my %products;
foreach my $pham (@pham_numbers){
my $url = 'https://phagesdb.org/phams/genelist/'.$pham;
print "$url\n";
my $ff = File::Fetch->new(uri => $url);
my $file = $ff->fetch(to => 'pham_csvs');
open(IN, "< pham_csvs/$pham");
my $counter=0;
while(<IN>){
$counter++;
next if($counter <= 1);
my($product)=($_=~/.*\t(.*?)\n/);
next if(!defined $product);
$product=~s/b"//g;
$product=~s/b'//g;
$product=~s/'//g;
$products{$pham}=$product;
last;
}
close IN;
}
#apply pham numbers to database
open(OUT, "+> temp.txt");
open(PHAM, "< allphams.faa");
while(<PHAM>){
if($_=~/\>/){
my($pham)=($_=~/.*\ (.*?)\n/);
chomp;
print OUT "$_"." $products{$pham}\n";
}
else{
print OUT $_;
}
}
close PHAM;
close OUT;
unlink "allphams.faa";
rename "temp.txt", "allphams.faa";
#make blastdatabse
system("makeblastdb -in allphams.faa -dbtype prot -parse_seqids");