-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQC_files.pl
executable file
·142 lines (130 loc) · 4.34 KB
/
QC_files.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env perl
use strict;
my $work_folder = `pwd`;
chomp $work_folder;
##########################################
#### If nessesary folders are present ####
##########################################
print "If nessesary folders are present:\n";
my $index = 1;
foreach my $folder ("BigWig","data","files","Glmnet","HiC.files","report","RSEM","temp","temp/Glmnet","temp/HiC","temp/promoter","temp/signal")
{
if(-d "$work_folder/$folder")
{
print "1.$index Present $work_folder/$folder\n";
}
else
{
print "1.$index Absent $work_folder/$folder\n";
exit;
}
$index++;
}
print "All folders are here\n\n";
##########################################################
#### If nessesary index and support files are present ####
##########################################################
print "If nessesary index and support files are present:\n";
$index = 1;
foreach my $file ("EPIraction.H3K27ac.data","EPIraction.regions.bed","EPIraction.tissues.data","Gencode.v40.promoters.data","Human.promoters.bed","juicer_tools.jar")
{
if(-f "$work_folder/files/$file")
{
print "2.$index Present $work_folder/files/$file\n";
}
else
{
print "2.$index Absent $work_folder/files/$file\n";
exit;
}
$index++;
}
print "All such files are here\n\n";
#####################################################
#### If bigWigs and expression files are present ####
#####################################################
my ($tissues,undef) = load_data("$work_folder/files/EPIraction.tissues.data");
my ($samples,undef) = load_data("$work_folder/files/EPIraction.H3K27ac.data");
my @Samples = sort { $a <=> $b } keys %{$samples};
print "If all sample-specific H3K27ac files are present:\n";
foreach my $tissue_index (sort { $a <=> $b } keys %{$tissues})
{
my $tissue = $tissues->{$tissue_index}->{"tissue"};
my $total = 0;
foreach my $sample_id ( grep { $samples->{$_}->{"tissue"} eq $tissue } @Samples)
{
my $file = $samples->{$sample_id}->{"file"};
unless(-f "$work_folder/BigWig/$file.H3K27ac.bigWig")
{
print "\n$work_folder/BigWig/$file.H3K27ac.bigWig absent\n";
exit;
}
$total++;
}
print "3.$tissue_index Found $total *.H3K27ac.bigWig files for $tissue\n";
}
print "All sample-specific H3K27ac files are here\n\n";
print "If all tissue-consensus files are present:\n";
foreach my $tissue_index (sort { $a <=> $b } keys %{$tissues})
{
my $tissue = $tissues->{$tissue_index}->{"tissue"};
##### RNAseq #####
my $label = $tissues->{$tissue_index}->{"RNA-seq"};
unless(-f "$work_folder/RSEM/$label.genes.results")
{
print "\n$work_folder/RSEM/$label.genes.results absent\n";
exit;
}
##### Open #####
$label = $tissues->{$tissue_index}->{"Open.Chromatin"};
unless(-f "$work_folder/BigWig/$label.bigWig")
{
print "\n$work_folder/BigWig/$label.bigWig absent\n";
exit;
}
##### Cofactor #####
$label = $tissues->{$tissue_index}->{"Cofactor"};
unless(-f "$work_folder/BigWig/$label.bigWig")
{
print "\n$work_folder/BigWig/$label.bigWig absent\n";
exit;
}
##### Hi-C #####
foreach my $label (split ";", $tissues->{$tissue_index}->{"HiC"})
{
unless(-f "$work_folder/HiC.files/$label.hic")
{
print "\n$work_folder/HiC.files/$label.hic absent\n";
exit;
}
}
print "4.$tissue_index Found all tissue-consensus files for $tissue\n";
}
unless(-f "$work_folder/HiC.files/Consensus.tissues.Encode.intact.hic")
{
print "\n$work_folder/HiC.files/Consensus.tissues.Encode.intact.hic absent\n";
exit;
}
print "4.78 Consensus.tissues.Encode.intact.hic present\n";
print "All tissue-consensus files are here\n\n";
print "It looks like all files are present.\nDo not forget to put the current folder '$work_folder'\ninto 'EPIraction.config' file within params section\nDone QC!!!!\n";
sub load_data
{
open FILE_L,$_[0] or die "$! $_[0]";
my $string = <FILE_L>;chomp $string;$string=~s/\r//;
my @Head = split "\t",$string;
my $num_rows=$#Head;
my %Hash_data=();
while($string = <FILE_L>)
{
chomp $string;$string=~s/\r//;
$string.="NA" if $string=~/\t$/;
my @Content = split "\t",$string;
%{$Hash_data{$Content[0]}}=();
my $hash_ref = $Hash_data{$Content[0]};
map { $hash_ref->{$Head[$_]}= $Content[$_]} (1..$num_rows);
}
close FILE_L;
shift @Head;
return (\%Hash_data,\@Head);
}