zUMIs2.6.0 updates

sdparekh · Jan 14, 2020 · 976fd4d · 976fd4d
1 parent 6d528b2
commit 976fd4d
Show file tree

Hide file tree

Showing 14 changed files with 286 additions and 305 deletions.
diff --git a/README.md b/README.md
@@ -14,9 +14,11 @@ We provide a script to convert zUMIs output into loom file automatically based o
 To convert zUMIs output to loom, simply run `Rscript rds2loom.R myRun.yaml`.
 
 ## Changelog
-14 Oct 2019: zUMIs2.5.6: small fixes and optimization when using PE data. 
+14 Jan 2020: [zUMIs2.6.0 released](https://github.com/sdparekh/zUMIs/releases/tag/2.6.0). Large update to the output bam file which is now always coordinate sorted for better compatibility with downstream tools. Please review the new [bam tags](https://github.com/sdparekh/zUMIs/wiki/Output#explanation-of-the-bam-tags-zumis-uses). UMI error correction by hamming distance now also gets added to the bam file. Faster processing speeds when downsampling and using UMI hamming distance.
 
-02 Sep 2019: zUMIs2.5.5: new BC detection algorithm using the [inflection](https://cran.r-project.org/web/packages/inflection/index.html) R package. 
+14 Oct 2019: zUMIs2.5.6: small fixes and optimization when using PE data.
+
+02 Sep 2019: zUMIs2.5.5: new BC detection algorithm using the [inflection](https://cran.r-project.org/web/packages/inflection/index.html) R package.
 
 09 Aug 2019: zUMIs2.5.4: zUMIs is now independent of the version of the Rsubread package. To improve code stability and reduce version-dependency, zUMIs now uses inbuilt precompiled Rsubread::featureCounts code.   
 

diff --git a/UMIstuffFUN.R b/UMIstuffFUN.R
diff --git a/correct_BCtag.pl b/correct_BCtag.pl
@@ -47,7 +47,7 @@ BEGIN
   }
 
   print BCBAM $read[0],"\t",$read[1],"\t",$read[2],"\t",$read[3],"\t",$read[4],"\t",$read[5],"\t",$read[6],"\t",$read[7],"\t",$read[8],"\t",
-        $read[9],"\t",$read[10],"\t","BX:Z:",$thisBC,"\t","BC:Z:",$correctBC,"\t",$read[12],"\n";
+        $read[9],"\t",$read[10],"\t","BX:Z:",$thisBC,"\t","BC:Z:",$correctBC,"\t",$read[12],"\t",$read[13],"\t",$read[14],"\n";
 
 }
 close INBAM;

diff --git a/correct_UBtag.pl b/correct_UBtag.pl
@@ -45,12 +45,15 @@ BEGIN
   $UXtag = $UXmatches[0];
   $UXtag =~ s/UX:Z://g;
 
-  my @GEmatches = grep { /XT:Z:/ } @readarr;
+  my @GEmatches = grep { /GE:Z:/ } @readarr;
+  if (@GEmatches == 0){ #if not exon, maybe there is an intron gene tag?
+    my @GEmatches = grep { /GI:Z:/ } @readarr;
+  }
 
   if (!@GEmatches == 0){
     #print "GE found";
     $GEtag = $GEmatches[0];
-    $GEtag =~ s/XT:Z://g;
+    $GEtag =~ s/G[EI]:Z://g;
 
     if (defined($ubmap{$GEtag})) {
       #print "GE found";

diff --git a/fqfilter_v2.pl b/fqfilter_v2.pl
@@ -277,10 +277,10 @@ BEGIN
       $bclist{$bcseq}++;
       #print $lay,"\n";
       if($lay eq "SE"){
-        print BCBAM $ridtmp,"\t4\t*\t0\t0\t*\t*\t0\t0\t",$cseqr1,"\t",$cqseqr1,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\n";
+        print BCBAM $ridtmp,"\t4\t*\t0\t0\t*\t*\t0\t0\t",$cseqr1,"\t",$cqseqr1,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\tQB:Z:",$bcqseq,"\tQU:Z:",$ubqseq,"\n";
       }else{
-        print BCBAM $ridtmp,"\t77\t*\t0\t0\t*\t*\t0\t0\t",$cseqr1,"\t",$cqseqr1,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\n";
-        print BCBAM $ridtmp,"\t141\t*\t0\t0\t*\t*\t0\t0\t",$cseqr2,"\t",$cqseqr2,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\n";
+        print BCBAM $ridtmp,"\t77\t*\t0\t0\t*\t*\t0\t0\t",$cseqr1,"\t",$cqseqr1,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\tQB:Z:",$bcqseq,"\tQU:Z:",$ubqseq,"\n";
+        print BCBAM $ridtmp,"\t141\t*\t0\t0\t*\t*\t0\t0\t",$cseqr2,"\t",$cqseqr2,"\tBC:Z:",$bcseq,"\tUB:Z:",$ubseq,"\tQB:Z:",$bcqseq,"\tQU:Z:",$ubqseq,"\n";
       }
     }
 }

diff --git a/misc/demultiplex_BC.pl b/misc/demultiplex_BC.pl
@@ -1,19 +1,20 @@
 #!/usr/bin/perl
 use warnings;
 
-if(@ARGV != 3)
+if(@ARGV != 4)
 {
 print
 "\n#####################################################################################
-Usage: perl $0 <output_folder> <project_name> <samtools-executable> \n
+Usage: perl $0 <output_folder> <project_name> <bam_file> <samtools-executable> \n
 Please drop your suggestions and clarifications to <christoph.ziegenhain\@ki.se>\n
 ######################################################################################\n\n";
 exit;
 }
 BEGIN{
 $out=$ARGV[0];
 $name=$ARGV[1];
-$samtoolsexc=$ARGV[2];
+$bam=$ARGV[2];
+$samtoolsexc=$ARGV[3];
 }
 
 
@@ -25,7 +26,7 @@ BEGIN
 
 #initialize output files with header
 $demuxout="${out}/zUMIs_output/demultiplexed/";
-$exbam="${out}/${name}.filtered.tagged.Aligned.out.bam.ex.featureCounts.bam";
+$exbam="$bam";
 
 unless ( -d $demuxout) {
     mkdir $demuxout;

diff --git a/misc/fcountsLib2 b/misc/fcountsLib2
diff --git a/misc/featureCounts.R b/misc/featureCounts.R
@@ -1,4 +1,4 @@
-featureCounts <- function(files,annot.inbuilt="mm10",annot.ext=NULL,isGTFAnnotationFile=FALSE,GTF.featureType="exon",GTF.attrType="gene_id",GTF.attrType.extra=NULL,chrAliases=NULL,useMetaFeatures=TRUE,allowMultiOverlap=FALSE,minOverlap=1,fracOverlap=0,fracOverlapFeature=0,largestOverlap=FALSE,nonOverlap=NULL,nonOverlapFeature=NULL,readShiftType="upstream",readShiftSize=0,readExtension5=0,readExtension3=0,read2pos=NULL,countMultiMappingReads=TRUE,fraction=FALSE,isLongRead=FALSE,minMQS=0,splitOnly=FALSE,nonSplitOnly=FALSE,primaryOnly=FALSE,ignoreDup=FALSE,strandSpecific=0,juncCounts=FALSE,genome=NULL,isPairedEnd=FALSE,requireBothEndsMapped=FALSE,checkFragLength=FALSE,minFragLength=50,maxFragLength=600,countChimericFragments=TRUE,autosort=TRUE,nthreads=1,byReadGroup=FALSE,reportReads=NULL,reportReadsPath=NULL,maxMOp=10,tmpDir=".",verbose=FALSE, fcounts_clib = NULL)
+featureCounts <- function(files,annot.inbuilt="mm10",annot.ext=NULL,isGTFAnnotationFile=FALSE,GTF.featureType="exon",GTF.attrType="gene_id",GTF.attrType.extra=NULL,chrAliases=NULL,useMetaFeatures=TRUE,allowMultiOverlap=FALSE,minOverlap=1,fracOverlap=0,fracOverlapFeature=0,largestOverlap=FALSE,nonOverlap=NULL,nonOverlapFeature=NULL,readShiftType="upstream",readShiftSize=0,readExtension5=0,readExtension3=0,read2pos=NULL,countMultiMappingReads=TRUE,fraction=FALSE,isLongRead=FALSE,minMQS=0,splitOnly=FALSE,nonSplitOnly=FALSE,primaryOnly=FALSE,ignoreDup=FALSE,strandSpecific=0,juncCounts=FALSE,genome=NULL,isPairedEnd=FALSE,requireBothEndsMapped=FALSE,checkFragLength=FALSE,minFragLength=50,maxFragLength=600,countChimericFragments=TRUE,autosort=TRUE,nthreads=1,byReadGroup=FALSE,reportReads=NULL,reportReadsPath=NULL,maxMOp=10,tmpDir=".",verbose=FALSE, fcounts_clib = NULL, isIntronInput)
 {
 	flag <- FALSE
 	files <- normalizePath(files, mustWork=T)
@@ -114,7 +114,7 @@ featureCounts <- function(files,annot.inbuilt="mm10",annot.ext=NULL,isGTFAnnotat
 	if(!is.null(nonOverlapFeature)) max_missing_bases_in_feature <- nonOverlapFeature
 	if(!is.null(GTF.attrType.extra))GTF.attrType.extra_str <- paste(GTF.attrType.extra, collapse="\t")
 
-	cmd <- paste("readSummary",ann,files_C,fout,as.numeric(isPairedEnd),minFragLength,maxFragLength,0,as.numeric(allowMultiOverlap),as.numeric(useMetaFeatures),nthreads,as.numeric(isGTFAnnotationFile),strandSpecific,reportReads_C,as.numeric(requireBothEndsMapped),as.numeric(!countChimericFragments),as.numeric(checkFragLength),GTF.featureType,GTF.attrType,minMQS,as.numeric(countMultiMappingReads),chrAliases_C," ",as.numeric(FALSE),14,readExtension5,readExtension3,minOverlap,split_C,read2pos_C," ",as.numeric(ignoreDup),as.numeric(!autosort),as.numeric(fraction),as.numeric(largestOverlap),PE_orientation,as.numeric(juncCounts),genome_C,maxMOp,0,as.numeric(fracOverlap),as.character(tmpDir),"0",as.numeric(byReadGroup),as.numeric(isLongRead),as.numeric(verbose),as.numeric(fracOverlapFeature), as.numeric(do_detection_calls), as.numeric(max_missing_bases_in_read), as.numeric(max_missing_bases_in_feature), as.numeric(primaryOnly), reportReadsPath, GTF.attrType.extra_str, annot.screen.output, readShiftType,readShiftSize ,sep=",")
+	cmd <- paste("readSummary",ann,files_C,fout,as.numeric(isPairedEnd),minFragLength,maxFragLength,0,as.numeric(allowMultiOverlap),as.numeric(useMetaFeatures),nthreads,as.numeric(isGTFAnnotationFile),strandSpecific,reportReads_C,as.numeric(requireBothEndsMapped),as.numeric(!countChimericFragments),as.numeric(checkFragLength),GTF.featureType,GTF.attrType,minMQS,as.numeric(countMultiMappingReads),chrAliases_C," ",as.numeric(FALSE),14,readExtension5,readExtension3,minOverlap,split_C,read2pos_C," ",as.numeric(ignoreDup),as.numeric(!autosort),as.numeric(fraction),as.numeric(largestOverlap),PE_orientation,as.numeric(juncCounts),genome_C,maxMOp,0,as.numeric(fracOverlap),as.character(tmpDir),"0",as.numeric(byReadGroup),as.numeric(isLongRead),as.numeric(verbose),as.numeric(fracOverlapFeature), as.numeric(do_detection_calls), as.numeric(max_missing_bases_in_read), as.numeric(max_missing_bases_in_feature), as.numeric(primaryOnly), reportReadsPath, GTF.attrType.extra_str, annot.screen.output, readShiftType,readShiftSize ,isIntronInput,sep=",")
 	n <- length(unlist(strsplit(cmd,",")))
 	dyn.load(fcounts_clib)
 	C_args <- .C("R_readSummary_wrapper",as.integer(n),as.character(cmd))

diff --git a/runVelocyto.R b/runVelocyto.R
@@ -13,19 +13,27 @@ if(is.null(opt$mem_limit)){
 }else{
   mempercpu <- round(opt$mem_limit/opt$num_threads,0)
   if(mempercpu==0){
-    mempercpu <- 1
+        mempercpu <- 1
   }
 }
+
+featfile_vector <- c(paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.UBcorrected.sorted.bam"),
+                     paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.sorted.bam"))
+
+featfile <- featfile_vector[which(file.exists(featfile_vector))[1]]
+
+
 ##########################
 
 print(Sys.time())
 #prepare the bam file for use with velocyto
 print("Preparing bam file for velocyto...")
-retag_cmd <- paste0(samtoolsexc," view -@ 2 -h ",paste0(opt$out_dir,"/",opt$project,".filtered.tagged.Aligned.out.bam | sed 's/BC:Z:/CB:Z:/'"))
+retag_cmd <- paste0(samtoolsexc," view -@ 2 -h ",featfile," | sed 's/BC:Z:/CB:Z:/'")
 velobam <- paste0(opt$out_dir,"/",opt$project,".tagged.forVelocyto.bam")
-sort_cmd <- paste0(samtoolsexc," sort -m ",mempercpu,"G -O BAM -@ ",opt$num_threads," -o ",velobam )
-
-system(paste(retag_cmd,sort_cmd,sep=" | "))
+#sort_cmd <- paste0(samtoolsexc," sort -m ",mempercpu,"G -O BAM -@ ",opt$num_threads," -o ",velobam )
+out_cmd <- paste0(samtoolsexc," view -b -@ ",opt$num_threads," -o ",velobam," - " )
+#system(paste(retag_cmd,sort_cmd,sep=" | "))
+system(paste(retag_cmd,out_cmd,sep=" | "))
 
 print(Sys.time())
 

diff --git a/runfeatureCountFUN.R b/runfeatureCountFUN.R
@@ -83,7 +83,8 @@ suppressWarnings(suppressMessages(require(AnnotationDbi)))
                                    strandSpecific=strand,
                                    isPairedEnd=T,
                                    countChimericFragments=F,
-                                   fcounts_clib = fcounts_clib)$stat
+                                   fcounts_clib = fcounts_clib,
+                                   isIntronInput = ifelse(type == "in", 1, 0))$stat
   fn<-paste0(abamfile,".featureCounts.bam")
   nfn<-paste0(abamfile,".",type,".featureCounts.bam")
 

diff --git a/statsFUN.R b/statsFUN.R
@@ -32,9 +32,8 @@ splitRG_stats<-function(bccount,mem){
   return(bccount)
 }
 
-prep_samtools_stats <- function(featfiles,bccount,cores,samtoolsexc){
+prep_samtools_stats <- function(featfile,bccount,inex,cores,samtoolsexc){
   print("Extracting reads from bam file(s)...")
-  nfiles=length(featfiles)
   nchunks <- length(unique(bccount$chunkID))
   all_rgfiles <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".RGgroup.",1:nchunks,".txt")
 
@@ -43,43 +42,40 @@ prep_samtools_stats <- function(featfiles,bccount,cores,samtoolsexc){
     rgfile <- all_rgfiles[i]
     chunks <- bccount[chunkID==i]$XC
     write.table(file=rgfile,chunks,col.names = F,quote = F,row.names = F)
-
   }
 
   headerXX <- paste( c(paste0("V",1:3)) ,collapse="\t")
   write(headerXX,"freadHeader")
 
   headercommand <- "cat freadHeader > "
   layoutflag <- ifelse(opt$read_layout == "PE", "-f 0x0040", "")
-  samcommand <- paste(samtoolsexc," view -x BX -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN -x UB",layoutflag," -@")
-  grepcommand <- " | cut -f12,13,14 | sed 's/BC:Z://' | sed 's/XS:Z://' | sed 's/XT:Z://' | grep -F -f "
+  samcommand <- paste(samtoolsexc," view -x QB -x QU -x BX -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN -x UB -x XS -x UX -x EN -x IS -x IN",layoutflag," -@")
 
-  outfiles_ex <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".ex.",1:nchunks,".txt")
-  system(paste(headercommand,outfiles_ex,collapse = "; "))
 
-  if(length(featfiles)==1){
-    cpusperchunk <- round(cores/nchunks,0)
-    ex_cmd <- paste(samcommand,cpusperchunk,featfiles[1],grepcommand,all_rgfiles,">>",outfiles_ex," & ",collapse = " ")
+  outfiles <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".tmp.",1:nchunks,".txt")
+  system(paste(headercommand,outfiles,collapse = "; "))
 
-    system(paste(ex_cmd,"wait"))
-  }else{
-    cpusperchunk <- round(cores/(2*nchunks),0)
-    ex_cmd <- paste(samcommand,cpusperchunk,featfiles[1],grepcommand,all_rgfiles,">>",outfiles_ex," & ",collapse = " ")
+  cpusperchunk <- round(cores/nchunks,0)
 
-    outfiles_in <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".in.",1:nchunks,".txt")
-    system(paste(headercommand,outfiles_in,collapse = "; "))
+  if(inex == FALSE){
+    grepcommand <- " | cut -f12,13,14 | sed 's/BC:Z://' | sed 's/ES:Z://' | sed 's/GE:Z://' | grep -F -f "
+    ex_cmd <- paste(samcommand,cpusperchunk,featfile,grepcommand,all_rgfiles,">>",outfiles," & ",collapse = " ")
 
-    in_cmd <- paste(samcommand,cpusperchunk,featfiles[2],grepcommand,all_rgfiles,">>",outfiles_in," & ",collapse = " ")
+    system(paste(ex_cmd,"wait"))
+  }else{
+    grepcommand <- " | cut -f12,13,14,15 | sed 's/BC:Z://' | sed 's/ES:Z://' | sed 's/Z://g' | grep -F -f "
+    inex_cmd <- paste(samcommand,cpusperchunk,featfile,grepcommand,all_rgfiles,">>",outfiles," & ",collapse = " ")
 
-    system(paste(ex_cmd,in_cmd,"wait"))
+    system(paste(inex_cmd,"wait"))
   }
+
   system("rm freadHeader")
   system(paste("rm",all_rgfiles))
 
-  return(outfiles_ex)
+  return(outfiles)
 }
 
-sumstatBAM <- function(featfiles,cores,outdir,user_seq,bc,outfile,samtoolsexc){
+sumstatBAM <- function(featfile,cores,outdir,user_seq,bc,inex,outfile,samtoolsexc){
   require(data.table)
   # chunk barcodes
   bccount_file <- paste0(opt$out_dir,"/", opt$project, ".BCstats.txt")
@@ -89,55 +85,63 @@ sumstatBAM <- function(featfiles,cores,outdir,user_seq,bc,outfile,samtoolsexc){
   }
   bccount <- splitRG_stats(bccount=bccount, mem= opt$mem_limit)
 
-  samouts <- prep_samtools_stats(featfiles = featfiles,
-                           bccount   = bccount,
-                           cores     = opt$num_threads,
-                           samtoolsexc=samtoolsexc)
+  samouts <- prep_samtools_stats(featfile = featfile,
+                           bccount    = bccount,
+                           inex       = inex,
+                           cores      = opt$num_threads,
+                           samtoolsexc= samtoolsexc)
 
 
   for(i in unique(bccount$chunkID)){
     print(paste("Working on chunk",i))
-    #rgfile    = paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".currentRGgroup.txt")
-    #chunks    = bccount[chunkID==i]$XC
-    #write.table(file=rgfile,chunks,col.names = F,quote = F,row.names = F)
-
-    ## minifunction for string operations
-    #headerXX<-paste( c(paste0("V",1:3)) ,collapse="\t")
-    #write(headerXX,paste(outdir,"freadHeader",sep="/"))
-    #samcommand<-paste("cat freadHeader; ",samtoolsexc," view -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN -x UB -@",cores)
-    samfile_ex <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".ex.",i,".txt")
-    if(grepl(pattern = ".filtered.tagged.Aligned.out.bam.in.featureCounts.bam",featfiles[2])){
-      samfile_in <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".in.",i,".txt")
-    }else{
-      samfile_in <- samfile_ex
-    }
 
+    samfile <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,".tmp.",i,".txt")
 
-    #tmp<-data.table::fread(paste(samcommand,featfiles[1],"| cut -f12,13,14 | sed 's/BC:Z://' | sed 's/XS:Z://' | sed 's/XT:Z://' | grep -F -f ",rgfile), na.strings=c(""),
-    tmp <- data.table::fread(samfile_ex, na.strings=c(""),
+    if(inex==FALSE){
+      tmp<-data.table::fread(samfile, na.strings=c(""),
                                select=c(1,2,3),header=T,fill=T,colClasses = "character" , col.names = c("RG","XS","GE") )[
-                                #,"GEin":=fread(paste(samcommand,featfiles[2],"| cut -f12,13,14 | sed 's/BC:Z://' | sed 's/XT:Z://' | grep -F -f ",rgfile),select=3,header=T,fill=T,na.strings=c(""),colClasses = "character")
-                                 ,"GEin":=fread(samfile_in,select=3,header=T,fill=T,na.strings=c(""),colClasses = "character")
-                                 ][ ,"ftype":="NA"
-                                 ][is.na(GEin)==F,ftype:="Intron"
-                                 ][is.na(GE)==F  ,ftype:="Exon"
-                                 ][is.na(GE)     ,GE:=GEin
-                                 ][ftype!="NA",   XS:=ftype
-                                 ][GE %in% user_seq[,V1], XS:="User"
-                                 ][,RG:=as.character(RG)
-                                 ][!(RG %in% bc[,XC]), RG:="bad"
-                                 ][ ,c("GEin","GE","ftype"):=NULL
-                                 ][,list(.N),by=c("RG","XS")
-                                 ][,type:=.rmUnassigned(XS)
-                                 ][type=="NoFeatures",type:="Intergenic"
-                                 ][,XS:=NULL]
-
+                               ,"ftype":="NA"
+                               ][is.na(GE)==F,  ftype:="Exon"
+                               ][ftype!="NA",   XS:=ftype
+                               ][GE %in% user_seq[,V1], XS:="User"
+                               ][,RG:=as.character(RG)
+                               ][!(RG %in% bc[,XC]), RG:="bad"
+                               ][ ,c("GEin","GE","ftype"):=NULL
+                               ][,list(.N),by=c("RG","XS")
+                               ][,type:=.rmUnassigned(XS)
+                               ][type=="NoFeatures",type:="Intergenic"
+                               ][,XS:=NULL]
+    }else{
+     tmp<-data.table::fread(samfile, na.strings=c(""),
+                              select=c(1,2,3,4),header=T,fill=T,colClasses = "character" , col.names = c("RG","XS","V3","V4") )[
+                                   ][ , V3_id := substr(V3,1,2)
+                                   ][ , V4_id := substr(V4,1,2)
+                                   ][ , V3 := substr(V3,4,nchar(V3))
+                                   ][ , V4 := substr(V4,4,nchar(V4))
+                                   ][ V3_id == "GE", GE := V3
+                                   ][ V3_id == "GI", GEin := V3
+                                   ][ V4_id == "GI", GEin := V4
+                                   ][ ,c("V3_id","V4_id","V3","V4") := NULL
+                                   ][ ,"ftype":="NA"
+                                   ][is.na(GEin)==F,ftype:="Intron"
+                                   ][is.na(GE)==F,  ftype:="Exon"
+                                   ][is.na(GE),GE:=GEin
+                                   ][ftype!="NA",   XS:=ftype
+                                   ][GE %in% user_seq[,V1], XS:="User"
+                                   ][,RG:=as.character(RG)
+                                   ][!(RG %in% bc[,XC]), RG:="bad"
+                                   ][ ,c("GE","ftype"):=NULL
+                                   ][,list(.N),by=c("RG","XS")
+                                   ][,type:=.rmUnassigned(XS)
+                                   ][type=="NoFeatures",type:="Intergenic"
+                                   ][,XS:=NULL]
+    }
     if(i==1){
       mapCount<-tmp
     }else{
       mapCount<-rbind(mapCount,tmp)
     }
-    system(paste("rm ",samfile_ex,samfile_in))
+    system(paste("rm ",samfile))
   }
     saveRDS(mapCount,file=outfile)
     #system(paste0("rm ",outdir,"/freadHeader"))