-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsplit_fasta.pl
executable file
·64 lines (51 loc) · 1.73 KB
/
split_fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/perl
use strict;
use warnings;
# Title: splitFasta.pl
# Author: Chase W. Nelson
# Affiliation1: Austin L. Hughes lab, University of South Carolina (Columbia, SC 29208, USA)
# Affiliation2: Wen-Hsiung Li lab, Academia Sinica (Taipei, Taiwan)
# Contact1: cwnelson88@gmail.com
# Contact2: nelsoncw@email.sc.edu
# Date of creation: 09 August 2013
# Last update: 09 August 2013
# Explanation: This script takes a FASTA file as input and creates n FASTA files,
# one for each of the n individual sequences in the input FASTA. Thus, it is meant
# for separating a FASTA file with n sequences into n FASTA files, one for each sequence.
# Check that an argument is given
# User-given arguments are placed in the array @ARGV, indexed with 0
# ACKNOWLEDGMENTS: written by C.W.N. with support from a National Science Foundation
# Graduate Research Fellowship (DGE-0929297), a National Science Foundation East Asian
# and Pacific Summer Institutes Fellowship, and a University of South Carolina
# Presidential Fellowship.
if(! $ARGV[0]) {
die "\nAn argument must be supplied: seqFile.fasta";
}
my $fastaFile = $ARGV[0];
my $whichSeq = 1;
# Woei-Fu's request
my $newFilePrefix;
if($fastaFile =~/\.fasta/) {
$newFilePrefix = $`;
} elsif($fastaFile =~/\.txt/) {
$newFilePrefix = $`;
}
open(REF_FASTA_FILE,"$fastaFile"); # Reference FASTA sequence
while (<REF_FASTA_FILE>) { # For each line of the FASTA
#chomp;
if($_ =~/^>(\w+)/) {
if($whichSeq == 1) {
open(CURR_OUTFILE,">>$newFilePrefix\_$1\.fasta");
$whichSeq++;
print CURR_OUTFILE "$_";
} else {
close CURR_OUTFILE;
open(CURR_OUTFILE,">>$newFilePrefix\_$1\.fasta");
print CURR_OUTFILE "$_";
}
} else {
print CURR_OUTFILE "$_";
}
}
close CURR_OUTFILE;
close REF_FASTA_FILE;