diff --git a/README.md b/README.md
new file mode 100644
index 0000000..aac9ff7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# PanDrugsDB
+[Pandrugs](https://pandrugs.org/) is a bioinformatics platform to prioritize anticancer drug treatments according to individual genomic data. Its database version 2.0 integrates data from 23 primary sources.
+
+The code to generate its content and the annotations for VCF files is available in this github repository.
+
+This workflow summarizes PanDrugsDB composition and the steps implemented for its construction.
+
+![workflow](images/pandrugsdb-sources.png)
+
+In this table are described the sources included in the database and in the annotation process along with versions and license.
+
+| Source | Version/Access Date*/DOI | License | Retrieved Information |
+| ------------------ | ---------------------------- | --------------------------- | --------------------------------------------------------------- |
+| CIViC | v2022-07-01 | CC0 1.0 | Drug-gene associations; Drug-gene annotations |
+| DGIdb | v4.2.0 | MIT | Drug-gene associations |
+| DrugBank | v5.1.9 | CC BY-NC 4.0 | Drug-gene associations |
+| GDSC | v8.2 | CC BY-NC-ND 2.5 | Drug-gene associations; Drug-gene annotations |
+| MOAlmanac | v2022-03-03 | GPLv2 | Drug-gene associations; Drug-gene annotations |
+| OncoKB | v3.14 | Free for academic use | Drug-gene associations; Drug-gene annotations; Gene annotations |
+| Thera-SAbDab | 2022-06-21 | CC BY 4.0 | Drug-gene associations |
+| PubChem | 2022-11-29 | Free | Drug standardisation |
+| HGNC | v2022-10-01 | CC0 1.0 | Gene standardisation |
+| ClinicalTrials.gov | 2022-03-15 | Free | Drug annotations |
+| CLUE | v1.2; build 1.44 | CC BY 4.0 | Drug annotations |
+| Drugs@FDA | 2022-03-14 | Free | Drug annotations |
+| EMA | 2022-05-31 | Free | Drug annotations |
+| FDA Drug Labels | 2022-06-01 | CC0 1.0 | Drug annotations |
+| KEGG BRITE | v101 | Free for academic use | Drug annotations |
+| COSMIC's CGC | v95 | Free for non-commercial use | Gene annotations |
+| DepMap | v22Q2 | CC BY 4.0 | Gene annotations; GScore calculation |
+| KEGG PATHWAY | v103 | Free for academic use | Gene annotations; Variant annotations |
+| OncoVar | v1.2 | Free for non-commercial use | Gene annotations; GScore calculation |
+| Cancer Hallmarks | 10.1038/s41598-018-25076-6 | CC BY 4.0 | GScore calculation |
+| TDLs | 10.1016/j.celrep.2022.110400 | CC BY-NC-ND 4.0 | GScore calculation |
+| ClinVar | v2022-05 | Free | Variant annotations |
+| COSMIC | v96 | Free for non-commercial use | Variant annotations |
+| Domains | 10.1371/journal.pcbi.1004147 | CC BY 4.0 | Variant annotations |
+| InterPro | v88.0 | CC0 1.0 | Variant annotations |
+| Pfam | v35.0 | CC0 1.0 | Variant annotations |
+| UniProt | v2022_01 | CC BY 4.0 | Variant annotations |
+| VEP | v109 | Apache-2.0 | Variant annotations |
+
+* Dates are displayed in ISO 8601 standard format: YYYY-MM-DD.
+
+## Synthethic letal pairs
+SL pairs are generated separately from the database and then merged back to the databse. The code responsible for generating and updating these pairs is stored [here](https://github.com/cnio-bu/pandrugs_sl_pairs).
diff --git a/environment.yaml b/environment.yaml
new file mode 100644
index 0000000..91ae824
--- /dev/null
+++ b/environment.yaml
@@ -0,0 +1,12 @@
+---
+channels:
+  - bu_cnio
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - perl-archive-extract =0.88
+  - perl-dbm-deep =2.0016
+  - perl-exporter-tiny =1.002002
+  - perl-list-moreutils =0.430
+  - perl-parallel-forkmanager =2.02
diff --git a/images/pandrugsdb-sources.png b/images/pandrugsdb-sources.png
new file mode 100644
index 0000000..9937ab0
Binary files /dev/null and b/images/pandrugsdb-sources.png differ
diff --git a/src/VEP_parser_DB_v20.pl b/src/VEP_parser_DB_v20.pl
new file mode 100644
index 0000000..9391667
--- /dev/null
+++ b/src/VEP_parser_DB_v20.pl
@@ -0,0 +1,276 @@
+##!/usr/bin/perl
+use lib "modules/";
+use strict;
+use warnings;
+use Time::HiRes;
+use Switch;
+use Cwd;
+use File::Path;
+use Net::FTP;
+use Archive::Extract ();
+use POSIX ();
+use LWP::Simple;
+use File::Copy;
+use DBM::Deep;
+use Parallel::ForkManager;
+
+# Variable Initialization
+my $dbdir;
+my $genesids = "custom";
+my $pathways = "pathway_desc.tsv";
+my $logfile = "";
+
+my ($start, $end, $time);
+
+#Command line arguments handle
+if (!@ARGV || grep (/^((\-\-help)|(\-h))$/,@ARGV)) {
+	&help_info;
+}
+
+for my $a (0..$#ARGV){
+
+	switch ($ARGV[$a]){
+
+		# databases path
+		case /^((\-\-databases=)|(\-d=))/ {
+			$ARGV[$a] =~ /\-(\-databases|d)=(.+)/;
+			$dbdir = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -d=/home/epineiro/Programs/PCDA/databases\n\n";
+			$dbdir = $dbdir . "/";
+		}
+
+		else {
+			die "\nArgument $ARGV[$a] not valid.\n\n";
+		}
+
+	}
+
+}
+
+if (!$dbdir) {
+	die "\nPath to databases not indicated. Please, enter the databases path.\n\neg. -d=databases\n\n";
+}
+
+# Create folders
+#mkpath($dbdir, 0);
+
+&create_dbs;
+
+# Start time counter
+$start = Time::HiRes::gettimeofday();
+
+$end = Time::HiRes::gettimeofday();
+$time =  sprintf("%.2f", $end - $start);
+printl ("\nTotal time: $time seconds\n");
+
+exit;
+
+sub create_dbs {
+
+#	Load files into variables
+	print "\n\nLoading database files...\n\n";
+
+	my (%pfam_a, %interpro_a, %last_domain, %cancer_domain);
+
+	my @cosmic_files = glob("$dbdir/cosmic*.tsv");
+
+	foreach (@cosmic_files) {
+		my $file = $_;
+		$_ =~ s/.tsv/.db/;
+		my $cosmic_list = DBM::Deep->new($_);
+		print("$file\n");
+		open (FILE, "<$file") or die "Couldn't open file: $!";
+		while (<FILE>){
+			chomp $_;
+			if ($. % 100000 == 0) {print("$.\n")};
+			my @line = split ("\t", $_);
+			$cosmic_list->{$line[0]} = [$line[1], $line[2], "$line[3] / $line[5]", "$line[4] / $line[5]"];
+		}
+		close FILE;
+	}
+
+	my $cosmic_gene_freq = DBM::Deep->new("$dbdir/cosmic_gene_freq.db");
+	my $cosmic_gf_file = "$dbdir/cosmic_gene_freq.tsv";
+	open (FILE, "<$cosmic_gf_file") or die "Couldn't open file: $!";
+	while (<FILE>){
+		chomp $_;
+		my @line = split ("\t", $_);
+		$cosmic_gene_freq -> {$line[0]} = [$line[1], $line[2]];
+	}
+	close FILE;
+
+	print "COSMIC loaded!\n";
+
+	my $genes_ids = DBM::Deep->new("$dbdir/genesids.db");
+	my $genes_ids_file = $dbdir . $genesids;
+	open (FILE, "<$genes_ids_file") or die "Couldn't open file: $!";
+	while (<FILE>){
+		chomp $_;
+		my @line = split ("\t", $_);
+		$genes_ids -> {$line[0]} = $line[1] if ($line[1]);
+	}
+	close FILE;
+
+	print "genes IDs loaded!\n";
+
+	my $kegg_gene_pathway_DB = DBM::Deep->new("$dbdir/gene_pathway.db");
+	my $gene_pathway_file = $dbdir . "gene_pathway.tsv";
+	open FILE, "<$gene_pathway_file" or die "Couldn't open file: $!";
+	while (<FILE>){
+		chomp $_;
+		my @line = split("\t", $_);
+		$kegg_gene_pathway_DB -> {$line[0]} = $line[1];
+	}
+	close FILE;
+
+	print "gene-pathway loaded!\n";
+
+	my $pathw_desc = DBM::Deep->new("$dbdir/pathways_desc.db");
+	my $pathway_desc = $dbdir . $pathways;
+	open FILE, "<$pathway_desc" or die "Couldn't open file: $pathway_desc $!";
+	while (<FILE>){
+		chomp $_;
+		my @line = split("\t", $_);
+		$pathw_desc -> {$line[0]} = $line[1];
+	}
+	close FILE;
+
+	print "pathway description loaded!\n";
+
+	my $pfam_a = DBM::Deep->new("$dbdir/pfam.db");
+	my @pfam_file = glob("$dbdir/Pfam-A.full.tsv");
+	open FILE, "<$pfam_file[0]" or die "Couldn't open file: $!";
+	while (<FILE>){
+		chomp ($_);
+		my @line = split("\t", $_);
+		if (exists($pfam_a{$line[4]})) {
+			push @{$pfam_a{$line[4]}}, [$line[1], $line[2], $line[5], $line[6]];
+		} else {
+			@{$pfam_a->{$line[4]}} = [$line[1], $line[2], $line[5], $line[6]];
+		}
+	}
+	close FILE;
+
+	print "pfam loaded!\n";
+
+	my $uniprot_b = DBM::Deep->new("$dbdir/uniprot_b.db");
+	my @uniprot_file = "$dbdir/Uniprot.tsv";
+	open FILE, "<$uniprot_file[0]" or die "Couldn't open file: $!";
+	while (<FILE>) {
+		chomp ($_);
+		my @line = split("\t", $_);
+		my $name = $1 if ($line[0] =~ /^([A-Z0-9]+)/);
+		$uniprot_b->{$line[1]} = $name if ($line[1] ne "");
+	}
+	close FILE;
+
+	print "uniprot loaded!\n";
+
+	my $interpro_a = DBM::Deep->new("$dbdir/interpro_a.db");
+	my @interpro_file = "$dbdir/Interpro.tsv";
+	my $last_domain = DBM::Deep->new("$dbdir/last_domain.db");
+	open FILE, "<$interpro_file[0]" or die "Couldn't open file: $!";
+	while (<FILE>) {
+		chomp ($_);
+		my @line = split ("\t",$_);
+		if (exists($interpro_a{$line[3]})) {
+			push @{$interpro_a{$line[3]}}, [$line[0], $line[1], $line[4], $line[5]];
+		} else {
+			@{$interpro_a->{$line[3]}} = [$line[0], $line[1], $line[4], $line[5]];
+		}
+		if (exists($last_domain{$line[3]})) {
+			if ($last_domain{$line[3]} < $line[4]) {
+				$last_domain{$line[3]} = $line[4];
+			}
+		} else {
+			$last_domain->{$line[3]} = $line[4];
+		}
+	}
+	close FILE;
+
+	print "interpro loaded!\n";
+
+	my $oncorole = DBM::Deep->new("$dbdir/generole.db");
+	open FILE, "<$dbdir/generole.tsv" or die "Couldn't open file: $!";
+	my %pos;
+	while (<FILE>) {
+		chomp ($_);
+		my @line = split ("\t",$_);
+		if ($_ =~ /^gene/) {
+			for my $i (0..$#line) {
+				$pos{$i} = $line[$i];
+			}
+		} else {
+			my @roles = @line[1..$#line];
+			my @role_list;
+			for my $i (0 .. $#roles) {
+				my $role = $roles[$i];
+				if ($role ne "") {
+					push(@role_list,"$pos{$i+1}:$role");
+				}
+			}
+			$oncorole->{$line[0]} = join ("; ", @role_list);
+		}
+	}
+	close FILE;
+	print "Gene Role loaded!\n";
+
+	my $gscore = DBM::Deep->new("$dbdir/gscore.db");
+	open (ESSEN, "$dbdir/gscore_Ene_2023.tsv");
+	while (<ESSEN>) {
+		chomp $_;
+		my @line = split ("\t", $_);
+		unless ($line[0] eq "checked_gene_symbol") {
+			$gscore->{$line[0]} = $line[1] ;
+		}
+	}
+	close ESSEN;
+	print "gscores loaded!\n";
+
+	my $cancer_domain = DBM::Deep->new("$dbdir/cancer_domain.db");
+	open DOM, "<$dbdir/domains.tsv" or die "Couldn't open file: $!";
+	while (<DOM>){
+		chomp ($_);
+		my @line = split ("\t",$_);
+		unless (exists($cancer_domain{$line[4]})) {
+
+			$cancer_domain->{$line[4]} = "";
+		}
+	}
+	close DOM;
+
+	print "cancer domains loaded!\n";
+
+	my $clinvar = DBM::Deep->new("$dbdir/clinvar.db");
+	my @clinvar_file = "$dbdir/Clinvar.tsv";
+	open CLINVAR, "<$clinvar_file[0]" or die "Couldn't open file: $!";
+	while (<CLINVAR>) {
+		chomp $_;
+		my @line = split ("\t", $_);
+		if ($line[1] eq "GRCh38") {
+			if (exists($clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"})) {
+				@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[0] .= "; $line[7]";
+				@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[1] .= "; $line[0]";
+				@{$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"}}[2] .= "; $line[8]";
+			} else {
+				$clinvar->{"$line[2]:$line[3]:$line[4]:$line[5]"} = [$line[7], $line[0], $line[8]];
+			}
+		}
+	}
+	close CLINVAR;
+
+	print "clinvar loaded!\n";
+}
+
+sub printl {
+	$logfile = $logfile . $_[0];
+	print $_[0];
+}
+
+sub help_info {
+
+	print "--databases=directory or -d=directory \t\t\t Absolute path to databases directory. Mandatory.\n\n";
+
+	print "\ni.e. VEP_parser.pl -d=/home/epineiro/Programs/PCDA/databases\n\n";
+	exit;
+
+}
diff --git a/src/VEP_parser_v20_PD.pl b/src/VEP_parser_v20_PD.pl
new file mode 100644
index 0000000..765a0cf
--- /dev/null
+++ b/src/VEP_parser_v20_PD.pl
@@ -0,0 +1,1037 @@
+##!/usr/bin/perl
+use lib "modules/";
+use strict;
+use warnings;
+use Time::HiRes;
+use Switch;
+use Cwd;
+use File::Path;
+use Net::FTP;
+use POSIX ();
+use LWP::Simple;
+use File::Copy;
+use DBM::Deep;
+use Parallel::ForkManager;
+use List::MoreUtils qw(uniq);
+
+# Variable Initialization
+my $outdir = "./output/";
+my $dbdir;
+my $outpath = getcwd;
+my (%cosmic_list0, %cosmic_list1, %cosmic_list2, %cosmic_list3, %cosmic_list4, %cosmic_list5, %cosmic_list6, %cosmic_list7, %cosmic_list8, %cosmic_list9, %cosmic_list10, %cosmic_list11, %cosmic_list12, %cosmic_list13, %cosmic_list14, %cosmic_list15, %cosmic_list16, %cosmic_list17, %cosmic_list18, %cosmic_list19, %cosmic_list20, %cosmic_list21, %cosmic_list22, %cosmic_list23, %cosmic_list24, %cosmic_list25, %cosmic_gene_freq);
+my ($cosmic_list0, $cosmic_list1, $cosmic_list2, $cosmic_list3, $cosmic_list4, $cosmic_list5, $cosmic_list6, $cosmic_list7, $cosmic_list8, $cosmic_list9, $cosmic_list10, $cosmic_list11, $cosmic_list12, $cosmic_list13, $cosmic_list14, $cosmic_list15, $cosmic_list16, $cosmic_list17, $cosmic_list18, $cosmic_list19, $cosmic_list20, $cosmic_list21, $cosmic_list22, $cosmic_list23, $cosmic_list24, $cosmic_list25, $cosmic_gene_freq);
+my $vep_results_file;
+my $logfile = "";
+my %pathw_desc;
+my $pathw_desc;
+my %genes_ids;
+my $genes_ids;
+my $root_name = "";
+my $jobid;
+my $genes_affected;
+
+my $conseq_file = 1;
+my $MAX_PROCESSES = 8;
+
+my %kegg_gene_pathway_DB;
+my $kegg_gene_pathway_DB;
+
+my ($start, $end, $time);
+my (%pfam_a, %uniprot_b, %interpro_a);
+my ($pfam_a, $uniprot_b, $interpro_a);
+
+my %zygosity;
+my %last_domain;
+my %gscore;
+my %cancer_domain;
+my %clinvar;
+my %appris;
+
+my $generole;
+my $last_domain;
+my $gscore;
+my $cancer_domain;
+my $clinvar;
+my $appris;
+
+#Command line arguments handle
+if (!@ARGV || grep (/^((\-\-help)|(\-h))$/,@ARGV)) {
+	&help_info;
+}
+
+for my $a (0..$#ARGV){
+
+	switch ($ARGV[$a]){
+
+		#Input file
+		case /^((\-\-vepfile=)|(\-f=))/ {
+			$ARGV[$a] =~ /\-(\-vepfile|f)=(.+)/;
+			$vep_results_file = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -f=file.vcf\n\n";
+			my @vep_results_file_tmp = glob ("$2");
+			$vep_results_file = $vep_results_file_tmp[0];
+		}
+
+		# Output file
+		case /^((\-\-output=)|(\-o=))/ {
+			$ARGV[$a] =~ /\-(\-output|o)=(.+)/;
+			$outdir = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -o=/home/user/z13-222\n\n";
+			$outpath = "";
+		}
+
+		# Root for the file
+		case /^((\-\-root=)|(\-r=))/{
+			$ARGV[$a] =~ /\-(\-root|r)=(.+)/;
+			if ($2) {
+				$root_name = $2 . "_";
+			}
+			else {
+				die "\nEmpty argument. Please enter the parameter information.\n\neg. -r=analysis\n\n";
+			}
+		}
+
+		# JobID
+		case /^((\-\-int=)|(\-i=))/ {
+			$ARGV[$a] =~ /\-(\-int|i)=(.+)/;
+			$jobid = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -i=20140213_000000\n\n";
+		}
+
+		# databases path
+		case /^((\-\-databases=)|(\-d=))/ {
+			$ARGV[$a] =~ /\-(\-databases|d)=(.+)/;
+			$dbdir = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. -d=/home/epineiro/Programs/PCDA/databases\n\n";
+			$dbdir = $dbdir . "/";
+		}
+
+		# Fork processes
+		case /^(\-\-forkp=)/ {
+			$ARGV[$a] =~ /\-(\-forkp)=(.+)/;
+			$MAX_PROCESSES = $2 ? $2 : die "\nEmpty argument. Please enter the parameter information.\n\neg. --forkp=8\n\n";
+		}
+		else {
+			die "\nArgument $ARGV[$a] not valid.\n\n";
+		}
+
+	}
+
+}
+
+if (!$vep_results_file) {
+	die "\nVEP file not indicated. Please enter the absolute path to VEP sorted output file.\n\neg. -f=z13-222\n\n";
+}
+if (!$dbdir) {
+	die "\nPath to databases not indicated. Please, enter the databases path.\n\neg. -d=databases\n\n";
+}
+
+# Create folders
+mkpath($dbdir, 0);
+
+# Start time counter
+$start = Time::HiRes::gettimeofday();
+
+# get new experiment id
+if ($jobid) {
+	$jobid = $root_name . $jobid . "_VEP";
+}
+else {
+	$jobid = &get_runid;
+}
+my $outexp = $outdir . "/" . $jobid . "/";
+
+# Create experiment folder in output folder
+mkpath($outexp);
+
+# Call VEP main subroutine
+&VEP_Parser_Csv;
+
+$end = Time::HiRes::gettimeofday();
+$time = sprintf("%.2f", $end - $start);
+printl ("\nTotal time: $time seconds\n");
+
+# Log file creation
+open LOGFILE, ">$outpath$jobid" . ".log" or die $!;
+print LOGFILE $logfile;
+close LOGFILE;
+
+exit;
+
+sub load_vars2 {
+#Load files into variables
+
+	$cosmic_list0 = DBM::Deep->new("$dbdir/cosmic00.db");
+	$cosmic_list1 = DBM::Deep->new("$dbdir/cosmic01.db");
+	$cosmic_list2 = DBM::Deep->new("$dbdir/cosmic02.db");
+	$cosmic_list3 = DBM::Deep->new("$dbdir/cosmic03.db");
+	$cosmic_list4 = DBM::Deep->new("$dbdir/cosmic04.db");
+	$cosmic_list5 = DBM::Deep->new("$dbdir/cosmic05.db");
+	$cosmic_list6 = DBM::Deep->new("$dbdir/cosmic06.db");
+	$cosmic_list7 = DBM::Deep->new("$dbdir/cosmic07.db");
+	$cosmic_list8 = DBM::Deep->new("$dbdir/cosmic08.db");
+	$cosmic_list9 = DBM::Deep->new("$dbdir/cosmic09.db");
+	$cosmic_list10 = DBM::Deep->new("$dbdir/cosmic10.db");
+	$cosmic_list11 = DBM::Deep->new("$dbdir/cosmic11.db");
+	$cosmic_list12 = DBM::Deep->new("$dbdir/cosmic12.db");
+	$cosmic_list13 = DBM::Deep->new("$dbdir/cosmic13.db");
+	$cosmic_list14 = DBM::Deep->new("$dbdir/cosmic14.db");
+	$cosmic_list15 = DBM::Deep->new("$dbdir/cosmic15.db");
+	$cosmic_list16 = DBM::Deep->new("$dbdir/cosmic16.db");
+	$cosmic_list17 = DBM::Deep->new("$dbdir/cosmic17.db");
+	$cosmic_list18 = DBM::Deep->new("$dbdir/cosmic18.db");
+	$cosmic_list19 = DBM::Deep->new("$dbdir/cosmic19.db");
+	$cosmic_list20 = DBM::Deep->new("$dbdir/cosmic20.db");
+	$cosmic_list21 = DBM::Deep->new("$dbdir/cosmic21.db");
+	$cosmic_list22 = DBM::Deep->new("$dbdir/cosmic22.db");
+	$cosmic_list23 = DBM::Deep->new("$dbdir/cosmic23.db");
+	$cosmic_list24 = DBM::Deep->new("$dbdir/cosmic24.db");
+	$cosmic_list25 = DBM::Deep->new("$dbdir/cosmic25.db");
+	$cosmic_gene_freq = DBM::Deep->new("$dbdir/cosmic_gene_freq.db");
+
+	$genes_ids = DBM::Deep->new("$dbdir/genesids.db");
+
+	$kegg_gene_pathway_DB = DBM::Deep->new("$dbdir/gene_pathway.db");
+
+	$pathw_desc = DBM::Deep->new("$dbdir/pathways_desc.db");
+
+	$pfam_a = DBM::Deep->new("$dbdir/pfam.db");
+
+	$uniprot_b = DBM::Deep->new("$dbdir/uniprot_b.db");
+
+	$interpro_a = DBM::Deep->new("$dbdir/interpro_a.db");
+	$last_domain = DBM::Deep->new("$dbdir/last_domain.db");
+
+	$gscore = DBM::Deep->new("$dbdir/gscore.db");
+
+	$cancer_domain = DBM::Deep->new("$dbdir/cancer_domain.db");
+
+	$clinvar = DBM::Deep->new("$dbdir/clinvar.db");
+
+	$generole = DBM::Deep->new("$dbdir/generole.db");
+
+}
+
+sub help_info {
+
+	print "\n\n--vepfile=filename or -f=filename \t\t Input file containing results of VEP from Ensembl analysis. Mandatory.\n\n";
+	print "--output=directory or -o=directory \t\t Execution output dir. Default ./output.\n\n";
+	print "--int=jobID or -i=jobID \t\t\t Job ID code (when executing from sequencingAP). Default: Generated during execution.\n\n";
+	print "--databases=directory or -d=directory \t\t\t Absolute path to databases directory. Mandatory.\n\n";
+	print "--forkp \t\t\t\t\t\t Number of forks to improve runtime. Default: 4.\n\n";
+
+	print "\ni.e. VEP_parser.pl -f=file.vcf -o=/home/user/z13-222 -r=analysis -i=20140213_000000 -d=/home/epineiro/Programs/PCDA/databases --forkp=8\n\n";
+	exit;
+
+}
+
+sub get_runid {
+
+	my $timestamp = POSIX::strftime("%Y%m%d_%H%M%S", localtime);
+	return $root_name . "$timestamp" . "_VEP";
+
+}
+
+sub VEP_Parser_Csv($$) {#Require a DB_conection and source data file
+
+	# Main process
+	# Variable inti
+	my $data = '';
+
+	# Open vcf file
+	open (RFILE, "gunzip -c $vep_results_file |") || die "Could not find file $vep_results_file";
+	printl ("\nProcessing file $vep_results_file...\n");
+	my @rfile = <RFILE>;
+	close RFILE;
+
+	my %pos;
+
+	my $count = 0;
+
+	foreach my $i (0..$#rfile) {
+
+		# Remove lines with ## & # to be able to handle file as csv
+		$rfile[$i] =~ s/\r/\n/g;
+
+		if ($rfile[$i] =~ /^#[^#]/) {
+			$rfile[$i] =~ s/#//;
+			$data = $data . $rfile[$i];
+		}
+		elsif ($rfile[$i] =~ /^[^#]/) {
+			$data = $data . $rfile[$i];
+			$count++;
+		}
+		elsif ($rfile[$i] =~ /^##INFO=<ID=CSQ.+Format: (.+)">/) {
+			my @vep_fields = split ('\|', $1);
+			foreach my $i (0..$#vep_fields) {
+				$pos{Consequence} = $i if ($vep_fields[$i] eq "Consequence");
+				$pos{Impact} = $i if ($vep_fields[$i] eq "IMPACT");
+				$pos{Existing_variation} = $i if ($vep_fields[$i] eq "Existing_variation");
+				$pos{Feature} = $i if ($vep_fields[$i] eq "Feature");
+				$pos{PolyPhen} = $i if ($vep_fields[$i] eq "PolyPhen");
+				$pos{SIFT} = $i if ($vep_fields[$i] eq "SIFT");
+				$pos{CADD_PHRED} = $i if ($vep_fields[$i] eq "CADD_PHRED");
+				$pos{CADD_RAW} = $i if ($vep_fields[$i] eq "CADD_RAW");
+				$pos{SYMBOL} = $i if ($vep_fields[$i] eq "SYMBOL");
+				$pos{Protein_position} = $i if ($vep_fields[$i] eq "Protein_position");
+				$pos{Amino_acids} = $i if ($vep_fields[$i] eq "Amino_acids");
+				$pos{HGVSc} = $i if ($vep_fields[$i] eq "HGVSc");
+				$pos{HGVSp} = $i if ($vep_fields[$i] eq "HGVSp");
+				$pos{GMAF} = $i if ($vep_fields[$i] eq "AF");
+				$pos{CDS_position} = $i if ($vep_fields[$i] eq "CDS_position");
+				$pos{Allele} = $i if ($vep_fields[$i] eq "Allele");
+				$pos{Gene} = $i if ($vep_fields[$i] eq "Gene");
+				$pos{Feature_type} = $i if ($vep_fields[$i] eq "Feature_type");
+				$pos{cDNA_position} = $i if ($vep_fields[$i] eq "cDNA_position");
+				$pos{Codons} = $i if ($vep_fields[$i] eq "Codons");
+				$pos{VARIANT_CLASS} = $i if ($vep_fields[$i] eq "VARIANT_CLASS");
+				$pos{gnomADe} = $i if ($vep_fields[$i] eq "gnomADe_AF");
+				$pos{gnomADe_NFE} = $i if ($vep_fields[$i] eq "gnomADe_NFE_AF");
+				$pos{EXON} = $i if ($vep_fields[$i] eq "EXON");
+				$pos{APPRIS} = $i if ($vep_fields[$i] eq "APPRIS");
+
+			}
+
+		} else {
+
+		}
+
+	}
+
+	# Save modifications to ensembl_vep.csv
+	my $outexp_path = $outexp;
+	$outexp_path =~ s/^\.//g;
+
+	$outpath .= $outexp_path;
+	my $outpathfile = $outpath . "ensembl_vep.csv";
+
+	open (SFILE, ">$outpathfile") || die "Could not save temp file\n";
+	print SFILE $data;
+	close SFILE;
+
+	printl ("\nensembl_vep.csv file created!\n");
+
+	my $sth;
+
+	my @last_gene = ("","","","");
+
+	print "\n\nCreating annotations for $count variants...\n";
+
+	open (INPUT, "$outexp/ensembl_vep.csv");
+	my $ensemblhead = "";
+	my $chr = "";
+	my $filechr = "";
+	my @chr_names = ();
+	while (<INPUT>) {
+		if ($_ =~ /^CHROM/) {
+			$ensemblhead = $_;
+		} else {
+			my @line = split ("\t", $_);
+			if ($line[0] ne $chr) {
+				$chr = $line[0];
+				push (@chr_names, $chr);
+				if ($filechr ne "") {close $filechr}
+				$filechr = "$outexp/ensembl_vep_$chr.csv";
+				open (OUTPUTCHR, ">$filechr");
+				print OUTPUTCHR $ensemblhead;
+				print OUTPUTCHR $_;
+			} else {
+				print OUTPUTCHR $_;
+			}
+		}
+	}
+	close INPUT;
+
+	my $pm = Parallel::ForkManager->new($MAX_PROCESSES);
+
+	DATA_LOOP:
+	foreach $chr(@chr_names) {
+		print "Processing chromosome $chr...\n";
+		&load_vars2;
+		my $pid = $pm->start and next DATA_LOOP;
+		open (INPUT, "$outexp/ensembl_vep_$chr.csv");
+		open (OUT, ">$outexp/vep_data_$chr.csv");
+		open (OUTSORT, ">$outexp/vep_data_sorted_$chr.csv");
+
+		print OUT "Chr\tLoc\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample\tmut\tlocation\tallele\tgene\tfeature\tfeature_type\tconsequence\timpact\tcdna_position\tcds_position\tprotein_position\tamino_acids\tcodons\texisting_variation\textra\tprincipal\tpoly_effect\tpoly_score\tsift_effect\tsift_score\tCADD_phred\tCADD_raw\tgene_hgnc\tgene_role\tcosmic_id\tKEGG_data\tKEGG_path_id\tclinvar_acc\tclinvar_disease\tclinvar_clinical_significance\tvariation_type\tHGVS_cDNA\tHGVS_protein\tGMAF\tgnomAD\tgnomAD_NFE\tpfam\tinterpro\tgene_cosmic_freq\tmut_cosmic_freq\tvscore\tbranch\tzygosity\n";
+
+		print OUTSORT "chr\tloc\tmut\tgene\tfeature\tfeature_type\tconsequence\timpact\tprincipal\tpoly_effect\tpoly_score\tsift_effect\tsift_score\tCADD_phred\tCADD_raw\tgene_hgnc\tgene_role\tcosmic_id\tkegg_data\tkegg_path_id\tprotein_position\tamino_acids\tclinvar_acc\tclinvar_disease\tclinvar_clinical_significance\tvariation_type\tHGVS_cDNA\tHGVS_protein\tGMAF\tgnomAD\tgnomAD_NFE\tpfam\tinterpro\tgene_cosmic_freq\tmut_cosmic_freq\tvscore\tbranch\tzygosity\n";
+
+		$count = 1;
+		while (<INPUT>) {
+			unless ($_ =~ /^CHROM/) {
+				chomp $_;
+				my @line = split ("\t", $_);
+
+				$line[0] =~ s/chr//;
+				$line[2] = "" if ($line[2] eq ".");
+
+				my $VCF_pos = $line[1];
+				my $VCF_ref = $line[3];
+				my $VCF_alt = $line[4];
+
+				if ((length($line[3]) ne length($line[4])) && ($line[4] !~ /,/)) {
+					if (length($line[3]) > length($line[4])) {
+						#Puede haber una deleción interna que no se procesaría correctamente
+						if ($line[3] =~ /^$line[4]/) {
+							$line[3] =~ s/^$line[4]//;
+							$line[1] = $line[1] + length($line[4]);
+							$line[4] = "-";
+						}
+					} else {
+						#Puede haber una inserción interna que no se procesaría correctamente (ej. TGCTCTACC/TATAGATCGGAAGCTCTACC)
+						if ($line[4] =~ /^$line[3]/) {
+							$line[4] =~ s/^$line[3]//;
+							$line[1] = $line[1] + length($line[3]);
+							$line[3] = "-";
+						}
+					}
+				}
+
+				$line[10] = $line[3] . "/" . $line[4];
+
+				$line[11] = $line[0] . ":" . $line[1];
+
+				if ($line[9]) {
+					my @gentype = split(":", $line[9]);
+					if ($gentype[0] eq "1/1") {
+						$zygosity{"$line[0]_$line[1]_$line[10]"} = "Homozygous";
+					}
+					else {
+						$zygosity{"$line[0]_$line[1]_$line[10]"} = "Heterozygous";
+					}
+				} else {
+					$zygosity{"$line[0]_$line[1]_$line[10]"} = "";
+				}
+
+				my @q_data;
+
+				if ($line[7] =~ /CSQ=(.+)/) {
+					my @trans = split (",", $1);
+					foreach my $t (0..$#trans) {
+						my $transcript = "";
+						my $pol_cons = "";
+						my $pol_sco = "";
+						my $sift_cons = "";
+						my $sift_sco = "";
+						my $CADD_phred = "";
+						my $CADD_raw = "";
+						my $cosmic_id = "";
+						my $cosmic_fathmm = "";
+						my $gene_freq = 0;
+						my $mut_freq = 0;
+						my $cosmic_total = "";
+						my $kegg_data = "";
+						my $kegg_ids = "";
+						my $clinvar_acc = "";
+						my $clinvar_dis = "";
+						my $clinvar_pat = "";
+						my $HGVSc = "";
+						my $HGVSp = "";
+						my $GMAF = "";
+						my $uniprot = "";
+						my $pfam = "";
+						my $interpro = "";
+						my $var_type = "";
+						my $gnomAD = "";
+						my $gnomAD_NFE = "";
+
+						my @fields = split ('\|', $trans[$t]);
+
+						$fields[$pos{Consequence}] =~ s/&/,/g;
+						$fields[$pos{Existing_variation}] =~ s/&/,/g if ($fields[$pos{Existing_variation}]);
+
+						if ($fields[$pos{HGVSp}] && $fields[$pos{HGVSp}] =~ /:(.+)/) {
+							$HGVSp = $1;
+						}
+
+						if ($fields[$pos{HGVSc}] && $fields[$pos{HGVSc}] =~ /:(.+)/) {
+							$HGVSc = $1;
+						}
+
+						if ($fields[$pos{PolyPhen}] && $fields[$pos{PolyPhen}] =~ /(\w+)\((\d+\.*\d*)\)/) {
+							$pol_cons = $1;
+							$pol_sco = $2;
+						}
+
+#						elsif ($fields[$pos{Consequence}] =~ /stop_gained/ || $fields[$pos{Consequence}] =~ /frameshift_variant/) {
+#							$pol_cons = "inferred";
+#							$pol_sco = 1;
+#						}
+
+						if ($fields[$pos{SIFT}] && $fields[$pos{SIFT}] =~ /(\w+)\((\d+\.*\d*)\)/) {
+							$sift_cons = $1;
+							$sift_sco = $2;
+						}
+
+#						elsif ($fields[$pos{Consequence}] =~ /stop_gained/ || $fields[$pos{Consequence}] =~ /frameshift_variant/) {
+#							$sift_cons = "inferred";
+#							$sift_sco = 0;
+#						}
+
+						$CADD_phred = $fields[$pos{CADD_PHRED}];
+						$CADD_raw = $fields[$pos{CADD_RAW}];
+
+						if ($fields[$pos{HGVSc}]) {
+							($cosmic_id, $cosmic_fathmm, $gene_freq, $mut_freq, $cosmic_total) = &chkmut_cosmic($fields[$pos{SYMBOL}], $fields[$pos{Feature}], $HGVSc);
+							$cosmic_id .= ":$cosmic_fathmm" if ($cosmic_fathmm ne "");
+						}
+
+						if (exists($cosmic_gene_freq->{$fields[$pos{SYMBOL}]})) {
+							$gene_freq = "@{$cosmic_gene_freq->{$fields[$pos{SYMBOL}]}}[0] / @{$cosmic_gene_freq->{$fields[$pos{SYMBOL}]}}[1]"
+						}
+
+						if ($fields[$pos{SYMBOL}]) {
+							if ($fields[$pos{SYMBOL}] ne $last_gene[0]) {
+								#Get information about gene symbol: pathway_description, pathway_ids and entrez_gene_id
+								@last_gene = get_kegg_id_sym(uc($fields[$pos{SYMBOL}]));
+							}
+							$kegg_data = $last_gene[1];
+							$kegg_ids = $last_gene[2];
+						}
+
+						$var_type = $fields[$pos{"VARIANT_CLASS"}];
+
+						if ($fields[$pos{GMAF}]) {
+							my $num = $fields[$pos{GMAF}];
+							my @GMAF_a = split ("&", $fields[$pos{GMAF}]);
+							foreach my $gf (@GMAF_a) {
+								if ($gf ne "") {
+									$GMAF = $gf * 100;
+								}
+							}
+						}
+
+						if (exists($clinvar->{"$line[0]:$VCF_pos:$VCF_ref:$VCF_alt"})) {
+							$clinvar_acc = @{$clinvar->{"$line[0]:$VCF_pos:$VCF_ref:$VCF_alt"}}[1];
+							$clinvar_dis = @{$clinvar->{"$line[0]:$VCF_pos:$VCF_ref:$VCF_alt"}}[0];
+							$clinvar_pat = @{$clinvar->{"$line[0]:$VCF_pos:$VCF_ref:$VCF_alt"}}[2];
+						}
+
+						my $prot_pos = $fields[$pos{Protein_position}];
+						my $prot_end;
+						my $ident = "";
+
+						if (exists($uniprot_b->{$fields[$pos{SYMBOL}]})) {
+							$ident = $uniprot_b->{$fields[$pos{SYMBOL}]};
+
+							my $prot_end = 0;
+
+							if ($fields[$pos{Protein_position}] =~ /(\d+)\-(\d+)/) {
+								$prot_pos = $1;
+								$prot_end = $2;
+							}
+
+							if ($fields[$pos{Protein_position}] =~ /(\d+)\-(\?)/) {
+								$prot_pos = $1;
+								$prot_end = 0;
+							}
+
+							if ($fields[$pos{Protein_position}] =~ /(\?)\-(\d+)/) {
+								$prot_pos = 0;
+								$prot_end = $2;
+							}
+
+
+							if ($prot_pos ne "") {
+								if (exists($pfam_a->{$ident})) {
+									foreach my $ia (0..scalar(@{$pfam_a->{$ident}})-1) {
+										if (($prot_pos >= ${@{$pfam_a->{$ident}}[$ia]}[2] && $prot_pos <= ${@{$pfam_a->{$ident}}[$ia]}[3]) || ($prot_end >= ${@{$pfam_a->{$ident}}[$ia]}[2] && $prot_end <= ${@{$pfam_a->{$ident}}[$ia]}[3])) {
+											$pfam = "${@{$pfam_a->{$ident}}[$ia]}[0]: ${@{$pfam_a->{$ident}}[$ia]}[1]";
+										}
+									}
+								}
+								if (exists($interpro_a->{$ident})) {
+									foreach my $ii (0..scalar(@{$interpro_a->{$ident}})-1) {
+										if (($prot_pos >= ${@{$interpro_a->{$ident}}[$ii]}[2] && $prot_pos <= ${@{$interpro_a->{$ident}}[$ii]}[3]) || ($prot_end >= ${@{$interpro_a->{$ident}}[$ii]}[2] && $prot_end <= ${@{$interpro_a->{$ident}}[$ii]}[3])) {
+											$interpro = "${@{$interpro_a->{$ident}}[$ii]}[0]: ${@{$interpro_a->{$ident}}[$ii]}[1]";
+										}
+									}
+								}
+								if ($fields[$pos{Consequence}] =~ /(stop_gained|frameshift_variant)/ && $interpro eq "" && exists($last_domain->{$ident}) && $prot_pos <= $last_domain->{$ident}) {
+									$interpro = "Mutation previous last protein domain";
+								}
+							}
+
+						}
+
+						my $gene_role = '';
+						$gene_role = $generole->{$fields[$pos{SYMBOL}]} if ($generole->{$fields[$pos{SYMBOL}]});
+
+						if ($fields[$pos{gnomADe}]) {
+							my @gnomAD = split ("&", $fields[$pos{gnomADe}]);
+							foreach my $ex (@gnomAD) {
+								if ($ex ne "") {
+									$gnomAD = $ex * 100;
+								}
+							}
+						}
+						if ($fields[$pos{gnomADe_NFE}]) {
+							my @gnomAD = split ("&", $fields[$pos{gnomADe_NFE}]);
+							foreach my $ex (@gnomAD) {
+								if ($ex ne "") {
+									$gnomAD_NFE = $ex * 100;
+								}
+							}
+						}
+						my @q_data_line = [$line[0], $line[1], $line[2], $line[3], $line[4], $line[5], $line[6], $line[7], $line[8], $line[9], $line[10], $line[11], $fields[$pos{Allele}], $fields[$pos{Gene}], $fields[$pos{Feature}], $fields[$pos{Feature_type}], $fields[$pos{Consequence}], $fields[$pos{Impact}], $fields[$pos{cDNA_position}], $fields[$pos{CDS_position}], $fields[$pos{Protein_position}], $fields[$pos{Amino_acids}], $fields[$pos{Codons}], $fields[$pos{Existing_variation}], "", $fields[$pos{APPRIS}], $pol_cons, $pol_sco, $sift_cons, $sift_sco, $CADD_phred, $CADD_raw, $fields[$pos{SYMBOL}], $gene_role, $cosmic_id, $kegg_data, $kegg_ids, $clinvar_acc, $clinvar_dis, $clinvar_pat, $var_type, $HGVSc, $HGVSp, $GMAF, $gnomAD, $gnomAD_NFE, $pfam, $interpro, $gene_freq, $mut_freq];
+
+						@q_data_line = &create_vscore (@q_data_line);
+
+						no warnings 'uninitialized';
+						push @q_data, @q_data_line;
+						print OUT join("\t", @{$q_data[scalar(@q_data)-1]}), "\n";
+					}
+					$count++;
+				}
+
+				@q_data = sort { $a->[0] cmp $b->[0] || $a->[1] cmp $b->[1] || $a->[10] cmp $b->[10] || $a->[13] cmp $b->[13] || $a->[42] cmp $b->[42] || $a->[14] cmp $b->[14]} @q_data;
+
+				foreach my $qi (0..$#q_data) {
+					no warnings 'uninitialized';
+					if ($conseq_file == 1) {
+						if ($q_data[$qi][17] =~ /HIGH|MODERATE/) {
+							print OUTSORT "$q_data[$qi][0]\t$q_data[$qi][1]\t$q_data[$qi][10]\t$q_data[$qi][13]\t$q_data[$qi][14]\t$q_data[$qi][15]\t$q_data[$qi][16]\t$q_data[$qi][17]\t$q_data[$qi][25]\t$q_data[$qi][26]\t$q_data[$qi][27]\t$q_data[$qi][28]\t$q_data[$qi][29]\t$q_data[$qi][30]\t$q_data[$qi][31]\t$q_data[$qi][32]\t$q_data[$qi][33]\t$q_data[$qi][34]\t$q_data[$qi][35]\t$q_data[$qi][36]\t$q_data[$qi][20]\t$q_data[$qi][21]\t$q_data[$qi][37]\t$q_data[$qi][38]\t$q_data[$qi][39]\t$q_data[$qi][40]\t$q_data[$qi][41]\t$q_data[$qi][42]\t$q_data[$qi][43]\t$q_data[$qi][44]\t$q_data[$qi][45]\t$q_data[$qi][46]\t$q_data[$qi][47]\t$q_data[$qi][48]\t$q_data[$qi][49]\t$q_data[$qi][50]\t$q_data[$qi][51]\t$q_data[$qi][52]\t$q_data[$qi][53]\t$q_data[$qi][54]\t$q_data[$qi][55]\t$q_data[$qi][56]\t$q_data[$qi][57]\t$q_data[$qi][58]\t$q_data[$qi][59]\t$q_data[$qi][60]\t$q_data[$qi][61]\n";
+						}
+					} else {
+						print OUTSORT "$q_data[$qi][0]\t$q_data[$qi][1]\t$q_data[$qi][10]\t$q_data[$qi][13]\t$q_data[$qi][14]\t$q_data[$qi][15]\t$q_data[$qi][16]\t$q_data[$qi][17]\t$q_data[$qi][25]\t$q_data[$qi][26]\t$q_data[$qi][27]\t$q_data[$qi][28]\t$q_data[$qi][29]\t$q_data[$qi][30]\t$q_data[$qi][31]\t$q_data[$qi][32]\t$q_data[$qi][33]\t$q_data[$qi][34]\t$q_data[$qi][35]\t$q_data[$qi][36]\t$q_data[$qi][37]\t$q_data[$qi][20]\t$q_data[$qi][21]\t$q_data[$qi][38]\t$q_data[$qi][39]\t$q_data[$qi][40]\t$q_data[$qi][41]\t$q_data[$qi][42]\t$q_data[$qi][43]\t$q_data[$qi][44]\t$q_data[$qi][45]\t$q_data[$qi][46]\t$q_data[$qi][47]\t$q_data[$qi][48]\t$q_data[$qi][49]\t$q_data[$qi][50]\t$q_data[$qi][51]\t$q_data[$qi][52]\t$q_data[$qi][53]\t$q_data[$qi][54]\t$q_data[$qi][55]\t$q_data[$qi][56]\t$q_data[$qi][57]\t$q_data[$qi][58]\t$q_data[$qi][59]\t$q_data[$qi][60]\t$q_data[$qi][61]\n";
+					}
+				}
+			}
+		}
+		close INPUT;
+		close OUT;
+		close OUTSORT;
+		print "Chromosome $chr processed!\n";
+		$pm->finish; # Terminates the child process
+	}
+	$pm->wait_all_children;
+	open (GREATOUT, ">$outexp/vep_data.csv");
+	print GREATOUT "Chr\tLoc\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample\tmut\tlocation\tallele\tgene\tfeature\tfeature_type\tconsequence\timpact\tcdna_position\tcds_position\tprotein_position\tamino_acids\tcodons\texisting_variation\textra\tprincipal\tpoly_effect\tpoly_score\tsift_effect\tsift_score\tCADD_phred\tCADD_raw\tgene_hgnc\tgene_role\tcosmic_id\tKEGG_data\tKEGG_path_id\tclinvar_acc\tclinvar_disease\tclinvar_clinical_significance\tvariation_type\tHGVS_cDNA\tHGVS_protein\tGMAF\tgnomAD\tgnomAD_NFE\tpfam\tinterpro\tgene_cosmic_freq\tmut_cosmic_freq\tvscore\tbranch\n";
+
+	foreach $chr (@chr_names) {
+		open (INPUT, "$outexp/vep_data_$chr.csv");
+		while (<INPUT>) {
+			unless ($_ =~ /^Chr/) {
+				print GREATOUT $_;
+			}
+		}
+		close INPUT;
+		unlink ("$outexp/vep_data_$chr.csv");
+		unlink ("$outexp/ensembl_vep_$chr.csv");
+	}
+	close GREATOUT;
+
+	open (GREATOUT, ">$outexp/vep_data_sorted.csv");
+	print GREATOUT "chr\tloc\tmut\tgene\tfeature\tfeature_type\tconsequence\timpact\tprincipal\tpoly_effect\tpoly_score\tsift_effect\tsift_score\tCADD_phred\tCADD_raw\tgene_hgnc\tgene_role\tcosmic_id\tkegg_data\tkegg_path_id\tprotein_position\tamino_acids\tclinvar_acc\tclinvar_disease\tclinvar_clinical_significance\tvariation_type\tHGVS_cDNA\tHGVS_protein\tGMAF\tgnomAD\tgnomAD_NFE\tpfam\tinterpro\tgene_cosmic_freq\tmut_cosmic_freq\tvscore\tbranch\n";
+
+	foreach $chr (@chr_names) {
+		open (INPUT, "$outexp/vep_data_sorted_$chr.csv");
+		while (<INPUT>) {
+			unless ($_ =~ /^chr/) {
+				print GREATOUT $_;
+			}
+		}
+		close INPUT;
+		unlink ("$outexp/vep_data_sorted_$chr.csv")
+	}
+	close GREATOUT;
+
+	# Save query to file
+	my $veptable_sort = $outpath . "vep_data_sorted.csv";
+
+	# Select the highest score for the principal isoform
+	my @vep_file;
+	open FILE, "<$veptable_sort" or die $!;
+	while (<FILE>) {
+		chomp $_;
+		my @line = split ("\t", $_);
+		push @vep_file, [@line];
+	}
+	close FILE;
+
+	my %isoform;
+	foreach my $is (1..$#vep_file) {
+		if ($vep_file[$is][7] =~ /HIGH|MODERATE/) {
+			if (exists($isoform{"$vep_file[$is][15]"})) {
+				$isoform{"$vep_file[$is][15]"}[0] = 1 if ($vep_file[$is][8] eq "P1");
+				$isoform{"$vep_file[$is][15]"}[1] = 1 if ($vep_file[$is][8] eq "P2");
+				$isoform{"$vep_file[$is][15]"}[2] = 1 if ($vep_file[$is][8] eq "P3");
+				$isoform{"$vep_file[$is][15]"}[3] = 1 if ($vep_file[$is][8] eq "P4");
+				$isoform{"$vep_file[$is][15]"}[4] = 1 if ($vep_file[$is][8] eq "P5");
+				$isoform{"$vep_file[$is][15]"}[5] = 1 if ($vep_file[$is][8] eq "A1" || $vep_file[$is][8] eq "A2" || $vep_file[$is][8] eq "");
+			} else {
+				$isoform{"$vep_file[$is][15]"} = [1, 0, 0, 0, 0, 0] if ($vep_file[$is][8] eq "P1");
+				$isoform{"$vep_file[$is][15]"} = [0, 1, 0, 0, 0, 0] if ($vep_file[$is][8] eq "P2");
+				$isoform{"$vep_file[$is][15]"} = [0, 0, 1, 0, 0, 0] if ($vep_file[$is][8] eq "P3");
+				$isoform{"$vep_file[$is][15]"} = [0, 0, 0, 1, 0, 0] if ($vep_file[$is][8] eq "P4");
+				$isoform{"$vep_file[$is][15]"} = [0, 0, 0, 0, 1, 0] if ($vep_file[$is][8] eq "P5");
+				$isoform{"$vep_file[$is][15]"} = [0, 0, 0, 0, 0, 1] if ($vep_file[$is][8] eq "A1" || $vep_file[$is][8] eq "A2" || $vep_file[$is][8] eq "");
+			}
+		}
+	}
+
+	foreach my $key (keys (%isoform)) {
+		if ($isoform{$key}[0] == 1) {
+			$isoform{$key} = "P1";
+		} elsif ($isoform{$key}[1] == 1) {
+			$isoform{$key} = "P2";
+		} elsif ($isoform{$key}[2] == 1) {
+			$isoform{$key} = "P3";
+		} elsif ($isoform{$key}[3] == 1) {
+			$isoform{$key} = "P4";
+		} elsif ($isoform{$key}[4] == 1) {
+			$isoform{$key} = "P5";
+		} else {
+			$isoform{$key} = "|A1|A2";
+		}
+	}
+
+	my %genes_affected;
+	my $head = "";
+	foreach my $is (0..$#vep_file) {
+		if ($is == 0) {
+			$head = join("\t", @{$vep_file[$is]});
+		} else {
+			if ($vep_file[$is][7] =~ /HIGH|MODERATE/) {
+				if ($vep_file[$is][8] =~ /^$isoform{$vep_file[$is][15]}$/) {
+					if (exists($genes_affected{$vep_file[$is][15]})) {
+						if ($vep_file[$is][35] >= $genes_affected{$vep_file[$is][15]}[0]) {
+							$genes_affected{$vep_file[$is][15]} = [$vep_file[$is][35], $vep_file[$is][36]];
+							push @{$genes_affected{$vep_file[$is][15]}}, @{$vep_file[$is]};
+						}
+					} else {
+						$genes_affected{$vep_file[$is][15]} = [$vep_file[$is][35], $vep_file[$is][36]];
+						push @{$genes_affected{$vep_file[$is][15]}}, @{$vep_file[$is]};
+					}
+				}
+			}
+		}
+	}
+
+	$genes_affected = $outpath . "genes_affected.csv";
+
+	open (FILE, ">$genes_affected");
+	print FILE "gene_hgnc\tmax(vscore)\tbranch\t$head\n";
+	foreach my $key (keys(%genes_affected)) {
+		print FILE $key, "\t", join ("\t", @{$genes_affected{$key}}), "\n";
+	}
+	close FILE;
+
+	printl ("\ngenes_affected.csv file created!\n\n");
+
+}
+
+sub chkmut_cosmic() {
+
+	my ($gene, $transcript, $HGVSc) = @_;
+
+	if (exists($cosmic_list0->{"$gene:$transcript:$HGVSc"})) {
+		return (@{$cosmic_list0->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list1->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list1->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list2->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list2->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list3->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list3->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list4->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list4->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list5->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list5->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list6->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list6->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list7->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list7->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list8->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list8->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list9->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list9->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list10->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list10->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list11->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list11->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list12->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list12->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list13->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list13->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list14->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list14->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list15->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list15->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list16->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list16->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list17->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list17->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list18->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list18->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list19->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list19->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list20->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list20->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list21->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list21->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list22->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list22->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list23->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list23->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list24->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list24->{"$gene:$transcript:$HGVSc"}});
+	} elsif (exists($cosmic_list25->{"$gene:$transcript:$HGVSc"})){
+		return (@{$cosmic_list25->{"$gene:$transcript:$HGVSc"}});
+	} else {
+		return ("", "", 0, 0, "")
+	}
+
+}
+
+sub get_kegg_id_sym {
+
+	my ($hgnc_symbol) = @_;
+
+	if (exists($genes_ids->{$hgnc_symbol})) {
+		my @kegg_pathways = &get_kegg_path("$genes_ids->{$hgnc_symbol}");
+		return ($hgnc_symbol,@kegg_pathways,$1);
+	}
+	else {
+		return ("","","","");
+	}
+
+}
+
+sub get_kegg_path {
+	my ($kegg_id) = @_;
+
+	if (exists($kegg_gene_pathway_DB->{$kegg_id})) {
+		my @kegg_path_desc = split /\|/, $kegg_gene_pathway_DB->{$kegg_id};
+		return get_kegg_path_desc(\@kegg_path_desc);
+	}
+	else{
+		return ("","");
+	}
+}
+
+sub get_kegg_path_desc {
+	my @kegg_paths = @{$_[0]};
+	my $results = "";
+
+	foreach my $path_id (@kegg_paths) {
+		if (exists($pathw_desc->{$path_id})) {
+			$results .= "$pathw_desc->{$path_id}|";
+		}
+	}
+
+	my $keggpaths = join ("|",@kegg_paths);
+
+	$results =~ s/\|$//g;
+
+	return ($results, $keggpaths);
+}
+
+sub create_vscore() {
+
+	# vscore creation
+	my @vep_file = @_;
+	my $score = 0;
+	my $lastgene = "";
+	my $skip = 0;
+	my %scored_columns = ();
+
+	# Get column position to apply the score (poly_score, sift_score, cosmic_id, kegg_path_id )
+	# Get Gene_hgnc column for getting sample mutation frequency
+	# Add column CCLE genes matches column
+	my @linedata = ("Chr", "Loc", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "sample", "mut", "location", "allele", "gene", "feature", "feature_type", "consequence", "impact", "cdna_position", "cds_position", "protein_position", "amino_acids", "codons", "existing_variation", "extra", "principal", "poly_effect", "poly_score", "sift_effect", "sift_score", "CADD_phred", "CADD_raw", "gene_hgnc", "gene_role", "cosmic_id", "KEGG_data", "KEGG_path_id", "clinvar_acc", "clinvar_disease", "clinvar_clinical_significance", "variation_type", "HGVS_cDNA", "HGVS_protein", "GMAF", "gnomAD", "gnomAD_NFE", "pfam", "interpro", "gene_freq", "mut_freq");
+
+	foreach my $i (0..$#linedata) {
+
+		if ($linedata[$i] eq "Chr") { $scored_columns{Chr} = $i; }
+		if ($linedata[$i] eq "Loc") { $scored_columns{Loc} = $i; }
+		if ($linedata[$i] eq "mut") { $scored_columns{mut} = $i; }
+		if ($linedata[$i] eq "poly_score") { $scored_columns{poly_score} = $i; }
+		if ($linedata[$i] eq "sift_score") { $scored_columns{sift_score} = $i; }
+		if ($linedata[$i] eq "CADD_phred") { $scored_columns{CADD_phred} = $i; }
+		if ($linedata[$i] eq "gene_role") { $scored_columns{gene_role} = $i; }
+		if ($linedata[$i] eq "cosmic_id") { $scored_columns{cosmic_id} = $i; }
+		if ($linedata[$i] eq "protein_position") { $scored_columns{protein_position} = $i; }
+		if ($linedata[$i] eq "mut_freq") { $scored_columns{mut_cosmic_freq} = $i; }
+		if ($linedata[$i] eq "gene_freq") { $scored_columns{gene_cosmic_freq} = $i; }
+
+		if ($linedata[$i] eq "consequence") { $scored_columns{consequence} = $i; }
+		if ($linedata[$i] eq "impact") { $scored_columns{impact} = $i; }
+		if ($linedata[$i] eq "GMAF") { $scored_columns{GMAF} = $i; }
+		if ($linedata[$i] eq "gnomAD") { $scored_columns{gnomADe} = $i; }
+		if ($linedata[$i] eq "pfam") { $scored_columns{pfam} = $i; }
+		if ($linedata[$i] eq "interpro") { $scored_columns{interpro} = $i; }
+		if ($linedata[$i] eq "clinvar_clinical_significance") { $scored_columns{clinvar} = $i; }
+		if ($linedata[$i] eq "INFO") {$scored_columns{INFO} = $i; }
+		if ($linedata[$i] eq "gene_hgnc") { $scored_columns{gene_hgnc} = $i; }
+
+	}
+
+	foreach my $i (0..$#vep_file) {
+
+		@linedata = @{$vep_file[$i]};
+
+		if ($lastgene ne $linedata[$scored_columns{gene_hgnc}]) {
+
+			$lastgene = $linedata[$scored_columns{gene_hgnc}];
+
+		}
+
+		if ($skip == 0) {
+
+			#Decide the branch for the calculation according to the variation or gene definition
+			my $branch;
+
+			my @components = split ("; ", $linedata[$scored_columns{gene_role}]);
+			my @genetype = ();
+			foreach my $ic (0..$#components) {
+				if ($components[$ic] =~ /:([\w ]+)/) {
+					if ($1 eq "ONC" || $1 eq "TSG") {
+						push(@genetype, $1);
+					}
+				}
+			}
+			@genetype = uniq @genetype;
+
+			my $genetype_string = join(":", @genetype);
+
+			if ($genetype_string) {
+				if ($genetype_string eq "ONC") {
+					$branch = "ONC";
+				} elsif ($genetype_string eq "TSG") {
+					$branch = "TSG";
+				} else {
+					$branch = "UNCLASSIFIED";
+				}
+			} else {
+				$branch = "UNCLASSIFIED";
+			}
+
+			if ($branch eq "UNCLASSIFIED") {
+				if ($linedata[$scored_columns{consequence}] =~ /stop_gain|stop_lost|frameshift_variant|splice_donor_variant|splice_acceptor_variant|splice_region_variant/) {
+					$branch = "TSG";
+				}
+			}
+
+			# Add scores
+			my $prediction_damaging = 0;
+
+			#Cosmic ID
+			if ($linedata[$scored_columns{cosmic_id}] =~ /(COSV\d+):*/) {
+				if ($linedata[$scored_columns{cosmic_id}] =~ /PATHOGENIC/) {
+					$prediction_damaging += 1;
+				}
+				if ($linedata[$scored_columns{mut_cosmic_freq}] && $linedata[$scored_columns{mut_cosmic_freq}] =~ /(\d+) \/ (\d+)/) {
+					if ($branch eq "ONC" || $branch eq "UNCLASSIFIED") {
+						my $ss;
+						if ($1 >= 100) {
+							$ss = 0.125 / 2;
+						} else {
+							$ss = (0.125 / 2) * ((log($1) - 0) / (log($2) - 0));
+						}
+						$score += $ss;
+					}
+				}
+				if ($linedata[$scored_columns{gene_cosmic_freq}] && $linedata[$scored_columns{gene_cosmic_freq}] =~ /(\d+) \/ (\d+)/) {
+					my $ss;
+					if ($1 >= 100) {
+						$ss = 0.125 / 2;
+					} else {
+						$ss = (0.125 / 2) * ((log($1) - 0) / (log($2) - 0));
+					}
+					$score += $ss;
+				}
+			}
+
+#print "cosm:$score\n";
+			#Prediction score
+			if ($linedata[$scored_columns{poly_score}] && $linedata[$scored_columns{poly_score}] > 0.435) {
+				$prediction_damaging += 1;
+			}
+
+			if ($linedata[$scored_columns{sift_score}] ne "" && $linedata[$scored_columns{sift_score}] <= 0.05) {
+				$prediction_damaging += 1;
+			}
+
+			if ($linedata[$scored_columns{CADD_phred}] && $linedata[$scored_columns{CADD_phred}] > 20) {
+				$prediction_damaging += 1;
+			}
+
+			if ($prediction_damaging >= 3) {
+				$score += 0.125;
+			} elsif ($prediction_damaging == 2) {
+				$score += 0.08;
+			} elsif ($prediction_damaging == 1) {
+				$score += 0.04
+			}
+#print "pred:$score\n";
+			#Mutation type
+			if ($linedata[$scored_columns{impact}] =~ /(HIGH)/) {
+				$score += 0.125;
+			}
+#print "mut:$score\n";
+			#Frequencies
+			if ($linedata[$scored_columns{GMAF}] ne "") {
+				if ($linedata[$scored_columns{GMAF}] < 1) {
+					$score += 0.125 / 2;
+				}
+			} else {
+				$score = ($score + 0.125 / 2);
+			}
+			if ($linedata[$scored_columns{gnomADe}] ne "") {
+				if ($linedata[$scored_columns{gnomADe}] < 1) {
+					$score += 0.125 / 2;
+				}
+			} else {
+				$score = ($score + 0.125 / 2);
+			}
+#print "freq:$score\n";
+			#Domains
+			if ($linedata[$scored_columns{pfam}] =~ /^(\w+)\./ && exists($cancer_domain->{$1})) {
+				$score += 0.125;
+			} elsif ($linedata[$scored_columns{interpro}] eq "Mutation previous last protein domain") {
+				$score += 0.125;
+			} else {
+				if ($linedata[$scored_columns{pfam}] || $linedata[$scored_columns{interpro}]) {
+					$score += 0.125 / 2;
+				}
+			}
+#print "dom:$score\n";
+			#Clinvar
+			if ($linedata[$scored_columns{clinvar}] =~ /Pathogenic/) {
+				if ($zygosity{"$linedata[$scored_columns{Chr}]_$linedata[$scored_columns{Loc}]_$linedata[$scored_columns{mut}]"} ne "") {
+					$score += 0.125;
+				} else {
+					if ($branch eq "ONC" || $branch eq "UNCLASSIFIED") {
+						$score += 0.250;
+					} else {
+						$score += 0.3125;
+					}
+				}
+			}
+#print "clin:$score\n";
+			#Homozigous
+			if ($zygosity{"$linedata[$scored_columns{Chr}]_$linedata[$scored_columns{Loc}]_$linedata[$scored_columns{mut}]"} eq "Homozygous") {
+				if ($branch eq "ONC" || $branch eq "UNCLASSIFIED") {
+					$score += 0.125;
+				} else {
+					$score += 0.1875;
+				}
+			}
+#print "hom:$score\n";
+			#GScore
+			if (exists($gscore->{$linedata[$scored_columns{gene_hgnc}]})) {
+				$score += (0.125 * $gscore->{$linedata[$scored_columns{gene_hgnc}]});
+			}
+
+			push @{$vep_file[$i]}, (sprintf("%.4f", $score), $branch);
+
+		}
+
+		$score = 0;
+
+	}
+
+	return @vep_file;
+}
+
+sub printl {
+	$logfile = $logfile . $_[0];
+	print $_[0];
+}
diff --git a/src/createDB.py b/src/createDB.py
new file mode 100644
index 0000000..6288e5e
--- /dev/null
+++ b/src/createDB.py
@@ -0,0 +1,396 @@
+#!/usr/bin/python
+
+import re
+import pdb
+import pandas as pd
+from datetime import date
+import progressbar
+import math
+import sys
+
+if len(sys.argv) < 2:
+    print("python createDB.py level [being 1(create intermediate file), and 2(create final file)]")
+    sys.exit()
+else:
+    print(sys.argv)
+
+#Directory with the downloaded files
+dwn_dir = 'downloads/'
+#Directory with the processed files
+pro_dir = 'processed/'
+#Directory with the manually reviewed files
+mr_dir = 'manual_review/'
+#Directory with additional files
+add_dir = 'additional_files/'
+#Output directory
+out_dir = '2.0/'
+#Output file
+out_file = 'PanDrugs_'+date.today().strftime('%b_%d_%Y')+'.tsv'
+
+def load_files():
+    global checked_gene_symbol_file, drug_names_file, family_KEGG_file, family_cmap_file, drug_approved_data_file, ind_pathway_file, gene_dependency_file, moalmanac_sen_res, gdsc_sen_res, civic_sen_res, oncokb_sen_res, gene_names, gene_pathways, cgc, intogen_group, intogen_min, intogen_max, oncovar, oncovar_min, oncovar_max, chronos, chronos_min, hallmarks
+
+    print('Loading annotation files...')
+
+    checked_gene_symbol_file = pd.read_csv(pro_dir+'genes_checked.tsv', sep ='\t', low_memory=False)
+    checked_gene_symbol_file['gene_name'] = checked_gene_symbol_file['gene_name'].str.upper()
+
+    drug_names_file = pd.read_csv(mr_dir+'drug_synonyms_mrd.tsv', sep ='\t', low_memory=False)
+    drug_names_file['drug_name'] = drug_names_file['drug_name'].str.upper()
+    drug_names_file['standard_drug_name'] = drug_names_file['standard_drug_name'].str.upper()
+    drug_names_file['show_drug_name'] = drug_names_file['show_drug_name'].str.upper()
+    drug_names_file = drug_names_file.fillna('')
+
+    family_KEGG_file = pd.read_csv(pro_dir+'TargetBasedClassificationKEGG_formated.tsv', sep ='\t', low_memory=False)
+    family_KEGG_file = family_KEGG_file.fillna('')
+    family_KEGG_file['drug'] = family_KEGG_file['drug'].str.upper()
+    family_cmap_file = pd.read_csv(pro_dir+'cmap_moa.tsv', sep ='\t', low_memory=False)
+    family_cmap_file = family_cmap_file.fillna('')
+    family_cmap_file['drug'] = family_cmap_file['drug'].str.upper()
+
+    drug_approved_data_file = pd.read_csv(mr_dir+'drug_approved_data_mrd.tsv', sep='\t', low_memory=False)
+    drug_approved_data_file = drug_approved_data_file.fillna('')
+
+    ind_pathway_file = pd.read_csv(pro_dir+'KEGGmodeled/upstream_genes.tsv', sep='\t', low_memory=False)
+    ind_pathway_file = ind_pathway_file.fillna('')
+    ind_pathway_file['gene'] = ind_pathway_file['gene'].str.upper()
+
+    gene_dependency_file = pd.read_csv(pro_dir+'genetic_dependencies.tsv', sep='\t', low_memory=False)
+    gene_dependency_file = gene_dependency_file.fillna('')
+
+    moalmanac_sen_res = pd.read_csv(pro_dir+'moalmanac.tsv', sep='\t', low_memory=False)
+    moalmanac_sen_res = moalmanac_sen_res.fillna('')
+    moalmanac_sen_res['drug_name'] = moalmanac_sen_res['drug_name'].str.upper()
+    gdsc_sen_res = pd.read_csv(pro_dir+'GDSC.tsv', sep='\t', low_memory=False)
+    gdsc_sen_res = gdsc_sen_res.fillna('')
+    gdsc_sen_res['drug_name'] = gdsc_sen_res['drug_name'].str.upper()
+    civic_sen_res = pd.read_csv(pro_dir+'civic.tsv', sep='\t', low_memory=False)
+    civic_sen_res = civic_sen_res.fillna('')
+    civic_sen_res['drug_name'] = civic_sen_res['drug_name'].str.upper()
+    oncokb_sen_res = pd.read_csv(pro_dir+'oncokb.tsv', sep='\t', low_memory=False)
+    oncokb_sen_res = oncokb_sen_res.fillna('')
+    oncokb_sen_res['drug_name'] = oncokb_sen_res['drug_name'].str.upper()
+
+    gene_names = pd.read_csv(dwn_dir+'custom', sep='\t', low_memory=False, dtype=str)
+    gene_names = gene_names.fillna('')
+    gene_pathways = pd.read_csv(pro_dir+'gene_pathway.tsv', sep='\t', low_memory=False, dtype=str)
+
+    #Files for gscore calculation
+    cgc = pd.read_csv(pro_dir+'cgc_scores.tsv', sep='\t', low_memory=False, dtype=str)
+    intogen = pd.read_csv(pro_dir+'intogen.tsv', sep='\t', low_memory=False, dtype=str)
+    intogen_group = intogen.groupby('gene_name')['qvalue_combination'].median().reset_index()
+    intogen_min = sorted(intogen_group['qvalue_combination'][intogen_group['qvalue_combination'] > 0.05].tolist())[0]
+    intogen_max = min(intogen_group['qvalue_combination'])
+    oncovar = pd.read_csv(pro_dir+'oncovar_scores.tsv', sep='\t', low_memory=False, dtype=str)
+    oncovar_max = float(max(oncovar[(oncovar['cancer_type'] == 'PanCancer')]['Consensus_Score'].tolist()))
+    chronos = pd.read_csv(pro_dir+'chronos_skew.tsv', sep='\t', low_memory=False, dtype=str, header = None)
+    chronos.columns = ['gene', 'value', 'min']
+    chronos['value'] = chronos['value'].astype(float)
+    chronos_min = sorted(chronos[chronos['value'] > -0.5]['value'].tolist())[0]
+    hallmarks = pd.read_csv(pro_dir+'hallmarks.tsv', sep='\t', low_memory=False, dtype=str)
+
+def drug_gene_associations():
+
+    print('Processing drug-gene associations...')
+    #CIVIC and OncoKB records are obtained from its web page in order to restrict to robut associations and avoid record of adverse efects
+    files = ('DGIdb_interactions.tsv', 'oncokb.tsv', 'civic.tsv', 'DrugBank.tsv', 'moalmanac.tsv', 'GDSC.tsv', 'sabdab.tsv')
+
+    outputf = open(out_dir+'PanDrugs_prov.tsv','w')
+    outputf.write('\t'.join(['gene_symbol', 'checked_gene_symbol', 'source', 'source_drug_name', 'standard_drug_name', 'show_drug_name', 'family', 'status', 'pathology', 'cancer', 'extra', 'extra2', 'pathways', 'target_marker', 'resistance', 'alteration', 'ind_pathway', 'gene_dependency', 'dscore', 'gscore', 'reviews'])+'\n')
+
+    for fil in files:
+        print('>Processing '+fil+'...')
+
+        drug_gene_a = pd.read_csv(pro_dir+fil, sep='\t', low_memory=False)
+        drug_gene_a['drug_name'] = drug_gene_a['drug_name'].str.upper()
+        drug_gene_a = drug_gene_a.fillna('')
+        drug_gene_a.drop_duplicates(inplace=True)
+
+        for i in progressbar.progressbar(range(len(drug_gene_a.index))):
+            row = drug_gene_a.iloc[i]
+
+            (gene_symbol, source, source_drug_name) = ('', '', '')
+
+            gene_symbol = re.split(';|,',row['gene_name'].upper())
+            source = row['source']
+            source_drug_name = row['drug_name'].upper()
+            source_drug_name_reduced = source_drug_name.split(' ')[0]
+
+            for gs in gene_symbol:
+
+                (checked_gene_symbol, standard_drug_name, show_drug_name, family, status, pathology, cancer, extra, extra2, pathways, target_marker, resistance, alteration, ind_pathway, gene_dependency, dscore, gscore, reviews) = ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', [])
+
+                #checked gene symbol
+                checked_gene_symbol = checked_gene_symbol_file.loc[checked_gene_symbol_file['gene_name'] == gs]['checked_gene_symbol'].tolist()[0]
+                standard_drug_name = drug_names_file.loc[drug_names_file['drug_name'] == source_drug_name]['standard_drug_name'].tolist()[0]
+                standard_drug_name_reduced = standard_drug_name.split(' ')[0]
+
+                #drug names file
+                if drug_names_file.loc[drug_names_file['drug_name'] == source_drug_name]['show_drug_name'].tolist()[0] != '':
+                    show_drug_name = drug_names_file.loc[drug_names_file['drug_name'] == source_drug_name]['show_drug_name'].tolist()[0]
+                else:
+                    show_drug_name = standard_drug_name
+
+                show_drug_name_reduced = show_drug_name.split(' ')[0]
+                drug_list = [standard_drug_name, show_drug_name, source_drug_name, standard_drug_name_reduced, show_drug_name_reduced, source_drug_name_reduced]
+
+                #Drug_type
+                extra2 = drug_approved_data_file.loc[drug_approved_data_file['standard_drug_name'] == standard_drug_name]['drug_type_mc'].tolist()[0]
+
+                families = []
+                #Drug family KEGG
+                found = False
+                for idx, d in enumerate(drug_list):
+                    if d in family_KEGG_file['drug'].tolist():
+                        families = families + [x+'(KEGG)' for x in family_KEGG_file.loc[family_KEGG_file['drug'] == d]['family2'].tolist()]
+                        if idx > 2: reviews.append('KEGG')
+                        found = True
+                        break
+                if not found:
+                    for idx, d in enumerate(drug_list):
+                        if d in family_KEGG_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist():
+                            index = family_KEGG_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist().index(d)
+                            families = families + [x+'(KEGG)' for x in family_KEGG_file.iloc[[index]]['family2'].tolist()]
+                            reviews.append('KEGG')
+                            break
+
+                #Drug family cmap
+                found = False
+                for idx, d in enumerate(drug_list):
+                    if d in family_cmap_file['drug'].tolist():
+                        families = families + [x+'(Cmap)' for x in family_cmap_file.loc[family_cmap_file['drug'] == d]['moa'].tolist()]
+                        if idx > 2: reviews.append('Cmap')
+                        found = True
+                        break
+                if not found:
+                    for idx, d in enumerate(drug_list):
+                        if d in family_cmap_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist():
+                            index = family_cmap_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist().index(d)
+                            families = families + [x+'(Cmap)' for x in family_cmap_file.iloc[[index]]['moa'].tolist()]
+                            reviews.append('Cmap')
+                            break
+
+                if len(families) > 0: family = ', '.join(families)
+                else: family = 'Other'
+
+                #Status
+                status = drug_approved_data_file.loc[drug_approved_data_file['standard_drug_name'] == standard_drug_name]['status_mc'].tolist()[0]
+
+                #Labels info
+                if status == 'Approved':
+                    pathology = drug_approved_data_file.loc[drug_approved_data_file['standard_drug_name'] == standard_drug_name]['pathology_mc'].tolist()[0]
+                    extra = drug_approved_data_file.loc[drug_approved_data_file['standard_drug_name'] == standard_drug_name]['indication_mc'].tolist()[0]
+
+                cancer = drug_approved_data_file.loc[drug_approved_data_file['standard_drug_name'] == standard_drug_name]['cancer_type_mc'].tolist()[0]
+
+                #Target_marker
+                if source in ['CancerCommons', 'ClearityFoundationClinicalTrial', 'DrugBank', 'MyCancerGenome', 'TALC', 'TEND', 'TTD', 'SAbDab']:
+                    target_marker = 'target'
+                else:
+                    target_marker = 'marker'
+
+                #Pathway members
+                if len(ind_pathway_file.loc[ind_pathway_file['gene'] == checked_gene_symbol]) > 0:
+                    ind_pathway = ind_pathway_file.loc[ind_pathway_file['gene'] == checked_gene_symbol]['upstream_genes'].tolist()[0]
+
+                #Genetic dependencies
+                if checked_gene_symbol in gene_dependency_file['gene'].tolist():
+                    gene_dependency = gene_dependency_file[gene_dependency_file['gene'] == checked_gene_symbol]['genetic_dependency'].tolist()[0]
+
+                #Resistance and alteration
+                resistance = 'sensitivity'
+                if source == 'CIViC':
+                    if len(civic_sen_res[(civic_sen_res['drug_name'] == source_drug_name) & (civic_sen_res['gene_name'] == gs)]['response'].tolist()) > 0:
+                        resistance = civic_sen_res[(civic_sen_res['drug_name'] == source_drug_name) & (civic_sen_res['gene_name'] == gs)]['response'].tolist()[0]
+                        alteration = civic_sen_res[(civic_sen_res['drug_name'] == source_drug_name) & (civic_sen_res['gene_name'] == gs)]['variation'].tolist()[0]
+                elif source == 'GDSC':
+                    if len(gdsc_sen_res[(gdsc_sen_res['drug_name'] == source_drug_name) & (gdsc_sen_res['gene_name'] == gs)]['response'].tolist()) > 0:
+                        resistance = gdsc_sen_res[(gdsc_sen_res['drug_name'] == source_drug_name) & (gdsc_sen_res['gene_name'] == gs)]['response'].tolist()[0]
+                        alteration = gdsc_sen_res[(gdsc_sen_res['drug_name'] == source_drug_name) & (gdsc_sen_res['gene_name'] == gs)]['alteration'].tolist()[0]
+                elif source == 'MOAlmanac':
+                    if len(moalmanac_sen_res[(moalmanac_sen_res['drug_name'] == source_drug_name) & (moalmanac_sen_res['gene_name'] == gs)]['response'].tolist()[0]) > 0:
+                        resistance = moalmanac_sen_res[(moalmanac_sen_res['drug_name'] == source_drug_name) & (moalmanac_sen_res['gene_name'] == gs)]['response'].tolist()[0]
+                        alteration = moalmanac_sen_res[(moalmanac_sen_res['drug_name'] == source_drug_name) & (moalmanac_sen_res['gene_name'] == gs)]['alteration'].tolist()[0]
+                elif source == 'OncoKB':
+                    if len(oncokb_sen_res[(oncokb_sen_res['drug_name'] == source_drug_name) & (oncokb_sen_res['gene_name'] == gs)]['response'].tolist()) > 0:
+                        resistance = oncokb_sen_res[(oncokb_sen_res['drug_name'] == source_drug_name) & (oncokb_sen_res['gene_name'] == gs)]['response'].tolist()[0]
+                        alteration = oncokb_sen_res[(oncokb_sen_res['drug_name'] == source_drug_name) & (oncokb_sen_res['gene_name'] == gs)]['variation'].tolist()[0]
+                elif source == 'COSMIC':
+                    resistance = 'resistance'
+
+                #pathways
+                if len(gene_names.loc[gene_names['Approved symbol']==checked_gene_symbol,]['NCBI Gene ID'].tolist()) > 0:
+                    NCBI_name = gene_names.loc[gene_names['Approved symbol']==checked_gene_symbol,]['NCBI Gene ID'].tolist()[0]
+                    if len(gene_pathways.loc[gene_pathways['KEGG Gene ID'] == NCBI_name]['KEGG Pathway ID'].tolist()) > 0:
+                        pathways = gene_pathways.loc[gene_pathways['KEGG Gene ID'] == NCBI_name]['KEGG Pathway ID'].tolist()[0]
+
+                gscore = str(compute_gscore(checked_gene_symbol))
+
+                outputf.write('\t'.join([gs, checked_gene_symbol, source, source_drug_name, str(standard_drug_name), str(show_drug_name), family, status, pathology, cancer, extra, str(extra2), pathways, target_marker, resistance, alteration, ind_pathway, gene_dependency, dscore, gscore, ';'.join(reviews)])+'\n')
+
+    outputf.close()
+
+    print('****IMPORTANT****'+'\n'+'Before creating the final PanDrugs file:')
+    print('-> Check the records with review comments and modify them accordingly')
+    print('-> Check the associations of KRAS, TP53, STK11 and APC (using checked_gene_symbol column) with drugs (using standard_drug_name column) with a target relationship (using target_marker column) in PanDrugs file. Update them in the control_records.tsv file along with the corresponding tag (keep/exclude/target/marker).')
+    print('-> Recover EGFR (using checked_gene_symbol column) - drug (using standard_drug_name column) associations with a target relationship (using target_marker column) in PanDrugs file. For each record without any described targeted MET association with the drug, create a MET-drug association and update them in the control_records.tsv file along with the corresponding tag (amp_resistance).')
+    print('Then re-run in create final file mode to create the final PanDrugs file.'+'\n'+'*****************')
+
+def create_final_file():
+
+    print('Creating final file...')
+
+    pandrugs = pd.read_csv(out_dir+'PanDrugs_prov.tsv', sep='\t', low_memory=False)
+    pandrugs = pandrugs.fillna('')
+
+    #update all target records
+    print('Updating controled records...')
+    target = pandrugs.loc[pandrugs['target_marker'] == 'target'].drop_duplicates(subset=['checked_gene_symbol', 'standard_drug_name'])
+
+    for index, row in target.iterrows():
+        pandrugs.loc[((pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']) & (pandrugs['standard_drug_name'] == row['standard_drug_name'])), 'target_marker'] = 'target'
+
+    conres = pd.read_csv(add_dir+'controled_records.tsv', sep='\t', low_memory=False)
+
+    for index, row in conres.loc[conres['reason'] == 'marker'].iterrows():
+        pandrugs.loc[((pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']) & (pandrugs['standard_drug_name'] == row['standard_drug_name'])), 'target_marker'] = 'marker'
+
+    for index, row in conres.loc[conres['reason'] == 'exclude'].iterrows():
+        pandrugs = pandrugs.drop(pandrugs.loc[((pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']) & (pandrugs['standard_drug_name'] == row['standard_drug_name']))].index)
+
+    for index, row in conres.loc[conres['reason'] == 'amp_resistance'].iterrows():
+        pandrugs = pandrugs.reset_index(drop=True)
+        pandrugs.loc[len(pandrugs.index)] = [row['checked_gene_symbol'], row['checked_gene_symbol'], 'Curated', row['standard_drug_name'], row['standard_drug_name']]+pandrugs.loc[pandrugs['standard_drug_name'] == row['standard_drug_name']][['show_drug_name', 'family', 'status', 'pathology', 'cancer', 'extra', 'extra2']].iloc[0].tolist()+pandrugs.loc[pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']][['pathways']].iloc[0].tolist()+['marker', 'resistance', 'amplification']+pandrugs.loc[pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']][['ind_pathway', 'gene_dependency']].iloc[0].tolist()+['']+pandrugs.loc[pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']][['gscore']].iloc[0].tolist()+['']
+
+    for index, row in conres.loc[conres['reason'] == 'sensitivity'].iterrows():
+        pandrugs.loc[((pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']) & (pandrugs['standard_drug_name'] == row['standard_drug_name'])), 'resistance'] = 'sensitivity'
+    for index, row in conres.loc[conres['reason'] == 'resistance'].iterrows():
+        pandrugs.loc[((pandrugs['checked_gene_symbol'] == row['checked_gene_symbol']) & (pandrugs['standard_drug_name'] == row['standard_drug_name'])), 'resistance'] = 'resistance'
+
+    #homogenize target-marker for salt correction
+    print('Homogenizing target-marker information...')
+    genes = list(set(pandrugs['checked_gene_symbol'].tolist()))
+
+    for g in genes:
+        subset = pandrugs.loc[pandrugs['checked_gene_symbol'] == g]
+        drugs = list(set(subset['show_drug_name'].tolist()))
+        for d in drugs:
+            drug = d.split(' ')
+            if len(drug) > 1 and drug[0] in drugs:
+                if subset.loc[(subset['checked_gene_symbol'] == g) & (subset['show_drug_name'] == d)]['target_marker'].tolist()[0] != subset.loc[(subset['checked_gene_symbol'] == g) & (subset['show_drug_name'] == drug[0])]['target_marker'].tolist()[0]:
+                    idx = pandrugs.index[(pandrugs['checked_gene_symbol'] == g) & (pandrugs['show_drug_name'] == d)].tolist()
+                    pandrugs.loc[idx, ['target_marker']] = 'target'
+
+                    idx = pandrugs.index[(pandrugs['checked_gene_symbol'] == g) & (pandrugs['show_drug_name'] == drug[0])].tolist()
+                    pandrugs.loc[idx, ['target_marker']] = 'target'
+
+    #homogenize drug families
+    print('Homogenizing drug families...')
+    drugs = list(set(pandrugs['show_drug_name'].tolist()))
+    for d in drugs:
+        f_values = list(set(pandrugs.loc[pandrugs['show_drug_name'] == d]['family'].tolist()))
+        if len(f_values) > 1:
+            idx = pandrugs.index[pandrugs['show_drug_name'] == d].tolist()
+            pandrugs.loc[idx, ['family']] = [x for x in f_values if x != 'Other'][0]
+
+    #Drug-score calculation
+    for index, row in pandrugs.iterrows():
+        dscore = 0
+        if pandrugs.iloc[index]['status'] == 'Approved':
+            if pandrugs.iloc[index]['extra2'] != '':
+                if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 1
+                else: dscore = 0.9
+            else:
+                if pandrugs.iloc[index]['cancer'] == 'clinical cancer':
+                    if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 0.8
+                    else: dscore = 0.7
+                else:
+                    if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 0.4
+                    else: dscore = 0.3
+        if pandrugs.iloc[index]['status'] == 'Clinical trials':
+            if pandrugs.iloc[index]['cancer'] == 'cancer':
+                if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 0.6
+                else: dscore = 0.5
+            else:
+                if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 0.2
+                else: dscore = 0.1
+        if pandrugs.iloc[index]['status'] == 'Experimental':
+            if pandrugs.iloc[index]['target_marker'] == 'target': dscore = 0.0008
+            else: dscore = 0.0004
+        if pandrugs.iloc[index]['resistance'] == 'resistance':
+            dscore = dscore * (-1)
+
+        pandrugs.loc[index, 'dscore'] = dscore
+
+    #remove duplicates due to repeated genes in original databases
+    pandrugs.drop_duplicates(inplace=True)
+
+    #add GScore and gene role
+    gscore = pd.read_csv(add_dir+ "gscore_Ene_2023.tsv", sep='\t', low_memory=False)
+    driver = pd.read_csv(add_dir+ "drivers_Ene_2023.tsv", sep='\t', low_memory=False)
+
+    pandrugs_gs = pd.merge(pandrugs, gscore, on="checked_gene_symbol", how='left')
+    pandrugs_gs_dr = pd.merge(pandrugs_gs, driver, on="checked_gene_symbol", how='left')
+
+    pandrugs_gs_dr.to_csv(out_dir+out_file, sep='\t', index=False, header=True)
+
+def create_pubchem_ids_file():
+
+    print('Creating PubChemID file...')
+
+    pandrugs = pd.read_csv(out_dir+out_file, sep='\t', low_memory=False)
+    pandrugs = pandrugs.fillna('')
+
+    drugs = pandrugs['show_drug_name']
+
+    outputf = open(out_dir+'show_drug_name_lk_pubchem.tsv', 'w')
+    outputf.write('\t'.join(['show_drug_name', 'PubChemID'])+'\n')
+
+    import pubchempy as pcp
+
+    drugs = list(set(drugs))
+    for i in progressbar.progressbar(range(len(drugs))):
+        response = pcp.get_cids(drugs[i], 'name')
+        CID = ''
+        if len(response) > 0:
+            CID = '|'.join([str(x) for x in response])
+        outputf.write('\t'.join([drugs[i], CID])+'\n')
+
+    outputf.close()
+
+def compute_gscore(gene):
+    gscore = 0
+    if not cgc.loc[cgc['gene'] == gene].empty:
+        if '1' in list(set(cgc.loc[cgc['gene'] == gene]['Tier'])): gscore += 0.2
+        else: gscore += 0.1
+
+    if not intogen_group.loc[intogen_group['gene_name'] == gene].empty:
+        qvalue = intogen_group.loc[intogen_group['gene_name'] == gene]['qvalue_combination'].tolist()[0]
+        if qvalue <= 0.05: gscore += 0.2 * (qvalue - intogen_min) / (intogen_max - intogen_min)
+
+    if not oncovar.loc[(oncovar['cancer_type'] == 'PanCancer') & (oncovar['gene'] == gene)].empty:
+        score = float(oncovar.loc[(oncovar['cancer_type'] == 'PanCancer') & (oncovar['gene'] == gene)]['Consensus_Score'].tolist()[0])
+        if score <= 3:
+            gscore += 0.1 * (score - 0) / (3 - 0)
+        else:
+            gscore += 0.1 + (0.1 * (score - 3) / (oncovar_max - 3))
+
+    if not chronos.loc[chronos['gene'] == gene].empty:
+        value = chronos.loc[chronos['gene'] == gene]['value'].tolist()[0]
+        if value < -2: gscore += 0.2
+        elif value <= -0.5: gscore += 0.2 * (value - chronos_min) / (-2 - chronos_min)
+
+    if not hallmarks.loc[hallmarks['Genes_list'] == gene].empty:
+        n_hall = len(list((set(hallmarks.loc[hallmarks['Genes_list'] == gene]['Hallmark']))))
+        if n_hall >= 5: gscore += 0.2
+        else: gscore += 0.2 * (n_hall - 0) / (5 - 0)
+
+    return round(gscore, 4)
+
+load_files()
+
+if sys.argv[1] == '1':
+    drug_gene_associations()
+else:
+    create_final_file()
+    create_pubchem_ids_file()
diff --git a/src/download_preprocess_drugbank.R b/src/download_preprocess_drugbank.R
new file mode 100644
index 0000000..0664e14
--- /dev/null
+++ b/src/download_preprocess_drugbank.R
@@ -0,0 +1,39 @@
+rm(list = ls()) # R version 4.1.3
+library(XML) # XML_3.99-0.9
+library(dbparser) # dbparser_1.2.0
+library(tidyverse) # tidyverse_1.3.1
+setwd(".")
+outdir <- "results/"
+
+# --- Data ---
+read_drugbank_xml_db("data/drugbank.xml")
+
+# --- Code ---
+# Outdir
+dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
+
+# Run parsers
+drugBank <- run_all_parsers()
+
+# Drug names
+dnames <- drugBank$general_information %>% 
+  rename(preferred.DB = name, DB.IDs = primary_key) %>% 
+  group_by(DB.IDs) %>% 
+  mutate(n.preferred = length(unique(preferred.DB))) %>% 
+  select(preferred.DB, DB.IDs, n.preferred) %>% unique
+table(dnames$n.preferred) # One term per drug
+
+# Targets
+targets <- drugBank$targets_polypeptides %>% select(gene_name, parent_id) %>%
+  rename(id = parent_id) %>% unique %>% 
+  merge(drugBank$targets, by = "id") %>% filter(organism == "Humans") %>%
+  rename(targets = gene_name, DB.IDs = parent_key) %>% 
+  select(targets, DB.IDs) %>% unique
+
+# Drug-gene interactions
+drug_gene <- merge(dnames, targets, by = "DB.IDs", all = TRUE) %>%
+  select(-n.preferred) %>% na.omit() %>% filter(targets != "") %>% unique()
+
+# Save
+write.table(drug_gene, file = paste0(outdir, "DrugBank.tsv"), col.names = TRUE, 
+            row.names = FALSE, sep = "\t")
diff --git a/src/downloads.py b/src/downloads.py
new file mode 100644
index 0000000..52a46d4
--- /dev/null
+++ b/src/downloads.py
@@ -0,0 +1,410 @@
+#!/usr/bin/python
+
+import os
+import re
+import subprocess
+import pdb
+import gzip
+import wget
+import pandas as pd
+import progressbar
+import urllib.request
+
+#Set to the desired download directory
+dwn_dir = 'downloads/'
+
+#URLs for downloadable files
+gencode = 'ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/latest_release/gencode.v39.annotation.gtf.gz'
+gdsc = 'ftp://ftp.sanger.ac.uk/pub/project/cancerrxgene/releases/current_release/ANOVA_results_GDSC2_20Feb20.xlsx'
+features = 'https://www.cancerrxgene.org/downloads/download/genetic_feature'
+kegg_ATC = 'https://www.genome.jp/kegg-bin/download_htext?htext=br08310&format=htext&filedir='
+cmap = 'https://api.clue.io/api/rep_drug_moas/?filter[skip]=0&user_key=' #Access clue web and retrieve user key
+fda = 'https://www.fda.gov/media/89850/download'
+fda_label = ['https://download.open.fda.gov/drug/label/drug-label-0001-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0002-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0003-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0004-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0005-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0006-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0007-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0008-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0009-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0010-of-0011.json.zip', 'https://download.open.fda.gov/drug/label/drug-label-0011-of-0011.json.zip']
+ema = 'https://www.ema.europa.eu/sites/default/files/Medicines_output_european_public_assessment_reports.xlsx'
+ct = 'https://clinicaltrials.gov/AllPublicXML.zip'
+kegg_ind = 'http://rest.kegg.jp/get/$path/kgml' # $path will be substituted for pathway's codes
+cgc = 'https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v95/cancer_gene_census.csv'
+oncovar = 'https://oncovar.org/resource/download/All_genes_OncoVar_TCGA.tar.gz'
+therasabdab = 'http://opig.stats.ox.ac.uk/webapps/newsabdab/static/downloads/TheraSAbDab_SeqStruc_OnlineDownload.csv'
+intogen = 'https://www.intogen.org/download?file=IntOGen-Drivers-20200201.zip'
+depmap = 'https://ndownloader.figshare.com/files/34990036'
+genes_dwn = 'https://www.genenames.org/cgi-bin/download/custom?col=gd_app_sym&col=gd_pub_eg_id&status=Approved&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit'
+genepathway_dwn = 'http://rest.kegg.jp/link/pathway/hsa'
+pathwaydesc_dwn = 'http://rest.kegg.jp/list/pathway/hsa'
+sl = 'https://figshare.com/ndownloader/files/36451119?private_link=a035301c05daa2ce668e'
+cosmic_link = ''
+clinvar_dwn = 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz'
+pfam_dwn = 'ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.full.gz'
+interpro_dwn = 'ftp://ftp.ebi.ac.uk/pub/databases/interpro/current_release/match_complete.xml.gz'
+uniprot_dwn = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz'
+hallmarks_dwn = 'https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-018-25076-6/MediaObjects/41598_2018_25076_MOESM10_ESM.xlsx'
+
+#place authentication string for COSMIC download
+auth_str = ''
+
+def download_DGIdb():
+    print('Dowloading genecode annotations...')
+    gencode_dwn = wget.download(gencode, out=dwn_dir)
+    print(gencode_dwn)
+
+    print('Obtaining list of human genes...')
+
+    #Obtain the name of the downloaded file from gencode
+    gencode_dwn = ''
+    listdir = os.listdir(dwn_dir)
+    for efile in listdir:
+        if re.search('^gencode', efile) != None: gencode_dwn = efile
+
+    #Retrieve the gene names
+    genes_file = 'genes_fromGFT.tsv'
+    genes = []
+    inputf = gzip.open(dwn_dir+gencode_dwn,'rt')
+
+    for line in inputf:
+        line = line.rstrip("\n")
+        line_a = line.split("\t")
+        if re.search("##", line_a[0]) == None:
+            names = line_a[8]
+            pattern = re.compile('; gene_name "(.+?)";')
+            gene = pattern.search(names)
+            if gene.group(1) not in genes: genes.append(gene.group(1))
+    inputf.close()
+
+    print('Total number of genes: '+str(len(genes)))
+    df = pd.DataFrame(genes)
+    df.to_csv(dwn_dir+genes_file, index=False, sep='\t', header=False)
+    print('Retrieving data from DGIdb...')
+    #For each gene in the list I retrieve the drug-gene associations in DGIdb
+    dgidb_dwn = 'DGIdb_interactions_dwn.tsv'
+    open(dwn_dir+dgidb_dwn, 'w').close()
+    filei = pd.read_csv(dwn_dir+genes_file, sep='\t', low_memory=False, header=None)
+    genes = filei.iloc[:, 0].tolist()
+
+    import progressbar
+
+    for i in progressbar.progressbar(range(len(genes))):
+        command = "python ./python_example.py --genes='"+genes[i]+"' >> "+dwn_dir+dgidb_dwn
+        subprocess.call(command, shell=True)
+
+def download_therasabdab():
+
+    print('Dowloading Thera-SAbDab annotations...')
+
+    sabdab_dwn = wget.download(therasabdab, out=dwn_dir)
+    print(sabdab_dwn)
+
+def download_moalmanac():
+
+    print('Dowloading moalmanac associations...')
+
+    command = "curl -X 'GET' 'https://moalmanac.org/api/assertions' -H 'accept: */*' | jq -r '.[] | \"\\(.assertion_id)\t\\(.features[].attributes[].feature_type)\t\\(.features[].attributes[].gene)\t\\(.features[].attributes[].gene1)\t\\(.features[].attributes[].gene2)\t\\(.therapy_name)\t\\(.therapy_resistance)\t\\(.therapy_sensitivity)\t\\(.therapy_type)\t\\(.features[].attributes[].protein_change)\t\\(.predictive_implication)\t\\(.validated)\"' > "+dwn_dir+"moalmanac_dwn.tsv"
+    subprocess.call(command,shell=True)
+
+def download_GDSC():
+
+    print('Dowloading GDSC associations and features...')
+
+    gdsc_dwn = wget.download(gdsc, out=dwn_dir)
+    print(gdsc_dwn)
+
+    features_dwn = wget.download(features, out=dwn_dir+'GDSC_features.csv')
+    print(features_dwn)
+
+def download_KEGG_TB():
+
+    print('Dowloading KEGG Target-based classification of drugs...')
+
+    kegg_atc_dwn = wget.download(kegg_ATC, out=dwn_dir)
+    print(kegg_atc_dwn)
+
+def download_cmap():
+
+    print('Downloading CLUE Repurposing data...')
+
+    moa_file = 'cmap_moa.tsv'
+
+    stop = 0
+    skip = 0
+
+    append_file = open(dwn_dir+moa_file, 'w')
+    append_file.close()
+
+    while stop == 0:
+        
+        cmap_dwn = wget.download(cmap.replace('[skip]=0','[skip]='+str(skip)), out=dwn_dir)
+        print(cmap_dwn)
+
+        dwn_file = open(dwn_dir+'download.wget', 'r')
+        dwn_file_data = dwn_file.read()
+        if dwn_file_data.count('pert_iname') < 1000: stop = 1
+        dwn_file.close()
+
+        append_file = open(dwn_dir+moa_file, 'a')
+        append_file.write(dwn_file_data+'\n')
+        append_file.close()
+
+        os.remove(dwn_dir+'download.wget')
+        skip = skip+1000	
+        
+def download_FDA():
+
+    print('Downloading FDA data ...')
+
+    fda_dwn = wget.download(fda, out=dwn_dir)
+    print(fda_dwn)
+
+def download_FDA_labels():
+
+    print('Downloading FDA label data ...')
+
+    for e in fda_label:
+        fda_label_dwn = wget.download(e, out=dwn_dir)
+        print(fda_label_dwn)
+
+def download_EMA():
+
+    print('Downloading EMA data ...')
+
+    ema_dwn = wget.download(ema, out=dwn_dir)
+    print(ema_dwn)
+
+def download_ct():
+
+    print('Downloading Clinical Trial records...')
+
+    ct_dwn = wget.download(ct, out=dwn_dir)
+    print(ct_dwn)
+
+def download_KEGG_ind():
+
+    print('Downloading KEGG pathways...')
+
+    KEGG_dir = dwn_dir+'KEGGmodeled'
+
+    if not os.path.exists(KEGG_dir): os.makedirs(KEGG_dir)
+
+    pathways = ['hsa03320', 'hsa04010', 'hsa04012', 'hsa04014', 'hsa04015', 'hsa04020', 'hsa04022', 'hsa04024', 'hsa04062', 'hsa04064', 'hsa04066', 'hsa04068', 'hsa04071', 'hsa04110', 'hsa04114', 'hsa04115', 'hsa04150', 'hsa04151', 'hsa04152', 'hsa04210', 'hsa04261', 'hsa04270', 'hsa04310', 'hsa04330', 'hsa04340', 'hsa04350', 'hsa04370', 'hsa04390', 'hsa04510', 'hsa04520', 'hsa04530', 'hsa04540', 'hsa04611', 'hsa04620', 'hsa04621', 'hsa04622', 'hsa04630', 'hsa04650', 'hsa04660', 'hsa04662', 'hsa04664', 'hsa04666', 'hsa04668', 'hsa04670', 'hsa04722', 'hsa04910', 'hsa04912', 'hsa04914', 'hsa04915', 'hsa04916', 'hsa04919', 'hsa04920', 'hsa04921', 'hsa04922', 'hsa04971', 'hsa05010', 'hsa05012', 'hsa05160', 'hsa05200', 'hsa05205', 'hsa05212', 'hsa05214', 'hsa05218', 'hsa05231']
+
+    for path in pathways:
+        command = 'wget -O '+KEGG_dir+'/'+path+'.kgml.xml http://rest.kegg.jp/get/'+path+'/kgml'
+        subprocess.call(command, shell=True)
+
+def download_CGC():
+
+    import ast
+
+    print('Downloading CGC file...')
+
+    command = 'curl -H "Authorization: Basic ' + auth_str + '" ' + cgc
+    p = subprocess.Popen(['curl', '-H', 'Authorization: Basic ' + auth_str, cgc], stdout=subprocess.PIPE)
+    out, err = p.communicate()
+
+    cgc_dwn = wget.download(ast.literal_eval(out.decode('utf-8'))['url'], out=dwn_dir)
+    print(cgc_dwn)
+
+def download_oncovar():
+
+    print('Downloading oncovar files...')
+
+    oncovar_dwn = wget.download(oncovar, out=dwn_dir)
+    print(oncovar_dwn)
+
+def download_CIViC_evidence():
+
+    print('Downloading CIViC evidence...')
+
+    import requests
+    import json
+
+    url='https://civicdb.org/api/graphql'
+
+    nodes = []
+
+    query = """query evidenceItems($evidenceType: EvidenceType){
+                 evidenceItems(evidenceType: $evidenceType) {
+                 nodes {
+                   id
+                 }
+                 pageCount
+                 pageInfo {
+                   endCursor
+                   startCursor
+                 }
+                 totalCount
+               }
+            }"""
+
+    variables = """{"evidenceType": "PREDICTIVE"}"""
+
+    r = requests.post(url, json={'query': query, 'variables': variables})
+    if r.status_code == 200:
+        data = json.loads(r.text)
+        for n in data['data']['evidenceItems']['nodes']:
+            nodes.append(n['id'])
+
+        query = """query evidenceItems($evidenceType: EvidenceType, $after: String){
+                     evidenceItems(evidenceType: $evidenceType, after: $after) {
+                     nodes {
+                       id
+                     }
+                     pageCount
+                     pageInfo {
+                       endCursor
+                       startCursor
+                     }
+                     totalCount
+                   }
+                }"""
+
+        for i in range(1,data['data']['evidenceItems']['pageCount']):
+            variables = """{"evidenceType": "PREDICTIVE", "after": \""""+data['data']['evidenceItems']['pageInfo']['endCursor']+"""\"}"""
+            r = requests.post(url, json={'query': query, 'variables': variables})
+            if r.status_code == 200:
+                data = json.loads(r.text)
+                for n in data['data']['evidenceItems']['nodes']:
+                    nodes.append(n['id'])
+            else:
+                print('Error: ' + r.text)
+    else:
+        print('Error: ' + r.text)
+
+    query = """query evidenceItem($id: Int!){
+                 evidenceItem(id: $id) {
+                 drugs { name }
+                 gene { name }
+                 variant { name }
+                 status
+                 clinicalSignificance
+                 evidenceDirection
+                 evidenceLevel
+               }
+            }"""
+
+    print('length nodes: '+str(len(nodes)))
+    count = 0
+    output = open(dwn_dir+'civic_evidence.tsv','w')
+    output.write('\t'.join(['drug', 'gene', 'variant', 'status', 'clinical_significance', 'evidence_direction', 'evidenceLevel'])+'\n')
+    for n in progressbar.progressbar(range(len(nodes))):
+        count = count + 1
+        variables = """{"id": """+str(nodes[n])+"""}"""
+        r = requests.post(url, json={'query': query, 'variables': variables})
+        if r.status_code == 200:
+            data = json.loads(r.text)
+            gene = data['data']['evidenceItem']['gene']['name']
+            variant = data['data']['evidenceItem']['variant']['name']
+            status = data['data']['evidenceItem']['status']
+            clinsig = data['data']['evidenceItem']['clinicalSignificance']
+            evdirection = data['data']['evidenceItem']['evidenceDirection']
+            evlevel = data['data']['evidenceItem']['evidenceLevel']
+            drug = []
+            for ele in data['data']['evidenceItem']['drugs']:
+                drug.append(ele['name'])
+            output.write('\t'.join([' + '.join(sorted(drug)), gene, variant, status, clinsig, evdirection, evlevel])+'\n')
+        else:
+            print('Error: ' + r.text)
+    output.close()
+    print('counts: '+str(count))
+
+def download_oncoKB_evidence():
+
+    print('Please, download oncoKB associations file from https://www.oncokb.org/actionableGenes and place it in downloads directory')
+
+def download_intogen():
+
+    print('Downloading IntOGen data...')
+
+    intogen_dwn = wget.download(intogen, out=dwn_dir)
+    print(intogen_dwn)
+
+def download_depmap():
+
+    print('Downloading DepMap public score + Chronos...')
+
+    depmap_dwn = wget.download(depmap, out=dwn_dir)
+    print(depmap_dwn)
+
+def download_gene_ids():
+
+    print('Downloading GeneIds...')
+    genes = wget.download(genes_dwn, out=dwn_dir)
+    print(genes)
+
+def download_KEGG_pathways():
+
+    genepathway_file = 'gene_pathway.tsv'
+    pathwaydesc_file = 'pathway_desc.tsv'
+
+    print('Downloading GenePathways...')
+    gene_pathway = wget.download(genepathway_dwn, out=dwn_dir)
+    print(gene_pathway)
+    os.rename(dwn_dir+'hsa', dwn_dir+genepathway_file)
+
+    print('Downloading Pathways descriptions...')
+    pathway_desc = wget.download(pathwaydesc_dwn, out=dwn_dir)
+    print(pathway_desc)
+    os.rename(dwn_dir+'hsa',dwn_dir+pathwaydesc_file)
+
+def download_SL():
+
+    print('Downloading SL interactions...')
+    sl_dwn = wget.download(sl, out=dwn_dir)
+    print(sl_dwn)
+
+def download_cosmic():
+    print('Dowloading CosmicMutantExport...')
+    cosmic = wget.download(cosmic_link, out=dwn_dir)
+    print(cosmic)
+
+def download_clinvar():
+    print('Dowloading ClinVar...')
+    clinvar = wget.download(clinvar_dwn, out=dwn_dir)
+    print(clinvar)
+
+def download_pfam():
+    print('Dowloading Pfam...')
+    pfam = wget.download(pfam_dwn, out=dwn_dir)
+    print(pfam)
+
+def download_interpro():
+    print('Dowloading Interpro...')
+    interpro = wget.download(interpro_dwn, out=dwn_dir)
+    print(interpro)
+
+def download_uniprot():
+    print('Dowloading Uniprot...')
+    uniprot = wget.download(uniprot_dwn, out=dwn_dir)
+    print(uniprot)
+
+def download_hallmarks():
+    print('Dowloading S8 from Hallmarks article...')
+    urllib.request.urlretrieve(hallmarks_dwn, dwn_dir+"hallmarks.xlsx")
+
+download_DGIdb()
+download_therasabdab()
+download_moalmanac()
+download_GDSC()
+download_KEGG_TB()
+download_cmap()
+download_FDA()
+download_FDA_labels()
+download_EMA()
+download_ct()
+download_KEGG_ind()
+download_CGC()
+download_oncovar()
+download_CIViC_evidence()
+download_oncoKB_evidence()
+download_intogen()
+download_depmap()
+download_gene_ids()
+download_KEGG_pathways()
+download_SL()
+
+#exclusive for genomic annotation
+download_cosmic()
+download_clinvar()
+download_pfam()
+download_interpro()
+download_uniprot()
+download_hallmarks()
diff --git a/src/manualreview.py b/src/manualreview.py
new file mode 100644
index 0000000..e605d95
--- /dev/null
+++ b/src/manualreview.py
@@ -0,0 +1,243 @@
+#!/usr/bin/python
+
+import re
+import pdb
+import pandas as pd
+import progressbar
+import sys
+
+#Set to directory with processed files, pandrugs previous version and manual review files
+pro_dir = 'processed/'
+mr_dir = 'manual_review/'
+pd_prev = 'Pandrugs_Feb2020.tsv'
+
+if len(sys.argv) < 2:
+    print("python manualreview.py level [being 1(create synonyms file), and 2(create drug info file)]")
+    sys.exit()
+else:
+    print(sys.argv)
+
+def process_drug_names():
+
+    print('Creating file with synonyms for review...')
+
+    drugs = []
+
+    files = ['DGIdb_interactions.tsv', 'oncokb.tsv', 'civic.tsv', 'DrugBank.tsv', 'moalmanac.tsv', 'GDSC.tsv', 'sabdab.tsv']
+
+    for f in files:
+        inputf=open(pro_dir+f,'r')
+        for line in inputf:
+            line = line.rstrip('\n')
+            line_a = line.split('\t')
+            if 'drug_name' in line_a: drug_index = line_a.index('drug_name')
+            drugs.append(line_a[drug_index].upper())
+        inputf.close()
+
+    outputf = open(mr_dir+synonyms_mr, 'w')
+    outputf.write('\t'.join(['drug_name', 'short_drug_name', 'standard_drug_name', 'short_standard_drug_name', 'show_drug_name', 'short_show_drug_name', 'synonyms', 'review'])+'\n')
+
+    import pubchempy as pcp
+
+    drugs = list(set(drugs))
+    for i in progressbar.progressbar(range(len(drugs))):
+        response = pcp.get_synonyms(drugs[i], 'name')
+        INNname = ''
+        if len(response) > 0:
+            for e in response[0]['Synonym']:
+                if '[' in e and 'INN' in e and 'INN-' not in e:
+                    INNname = e.split(' [')[0]
+                    break
+            if INNname == '': INNname = response[0]['Synonym'][0]
+            outputf.write('\t'.join([drugs[i], drugs[i].split(' ')[0], response[0]['Synonym'][0], response[0]['Synonym'][0].split(' ')[0], INNname, INNname.split(' ')[0], '::'.join(response[0]['Synonym']), ''])+'\n')
+        else:
+            outputf.write('\t'.join([drugs[i], drugs[i].split(' ')[0], drugs[i], drugs[i].split(' ')[0], drugs[i], drugs[i].split(' ')[0], '', ''])+'\n')
+
+    outputf.close()
+
+    drug_file = pd.read_csv(mr_dir+synonyms_mr, sep ='\t', low_memory=False)
+    drug_file = drug_file.fillna('')
+
+    drugsyn = {}
+
+    for index in progressbar.progressbar(range(drug_file.index.size)):
+        row = drug_file.iloc[index]
+        synonyms = row['synonyms'].split('::')
+        for syn in synonyms:
+            if syn in drugsyn.keys():
+                if row['drug_name'] not in drugsyn[syn].keys():
+                    drugsyn[syn][row['drug_name']] = row['standard_drug_name']
+            else:
+                drugsyn[syn] = {}
+                drugsyn[syn][row['drug_name']] = row['standard_drug_name']
+
+    for syn in drugsyn.keys():
+        if syn != '' and len(drugsyn[syn].keys()) > 1 and len(list(set(list(map(drugsyn[syn].get,list(drugsyn[syn].keys())))))) > 1:
+            for d in drugsyn[syn].keys():
+                drug_file.loc[drug_file['drug_name'] == d,'review'] = ','.join(list(drugsyn[syn].keys()))
+
+    drug_file.to_csv(mr_dir+synonyms_mr, index=False, sep='\t', header=True)
+
+def process_FDA_EMA():
+
+    print('Collecting FDA and EMA information about status...')
+
+    fda_status_file = 'fda_status.tsv'
+    approved_drug_data_mr = 'drug_approved_data_mr.tsv'
+    ema_status_dwn = 'ema_status.tsv'
+    fda_labels_dwn = 'fda_labels.tsv'
+
+    fda_status = pd.read_csv(pro_dir+fda_status_file, sep ='\t', low_memory=False)
+    fda_status.fillna('', inplace=True)
+
+    ema_status = pd.read_csv(pro_dir+ema_status_dwn, sep ='\t', low_memory=False)
+    ema_status.fillna('', inplace=True)
+    ema_status['International non-proprietary name (INN) / common name'] = ema_status['International non-proprietary name (INN) / common name'].str.upper()
+
+    fda_labels = pd.read_csv(pro_dir+fda_labels_dwn, sep ='\t', low_memory=False)
+    fda_labels.fillna('', inplace=True)
+    appl_list = fda_labels['ApplNo'].tolist()
+
+    status_CT_file = pd.read_csv(pro_dir+'clinicaltrials.tsv', sep='\t', low_memory=False)
+    status_CT_file['drug'] = status_CT_file['drug'].str.upper()
+    status_CT_file[['drug']] = status_CT_file[['drug']].fillna('')
+
+    fileo = open(mr_dir+approved_drug_data_mr, 'w')
+    fileo.write('\t'.join(['standard_drug_name','ApplNo','ApplNo_labels','status_FDA','status_EMA','ct_condition','status_mc','PrevStat','indication_FDA','indication_EMA','cancer_term','indication_mc','indication_prev','pathology_mc','pathology_prev','cancer_type_mc','cancer_type_prev','drug_type_mc','drug_type_prev','review'])+'\n')
+
+    drug_list = list(set(synonyms['standard_drug_name'].tolist()))
+    for i in progressbar.progressbar(range(len(drug_list))):
+        standard_drug_name = drug_list[i]
+        #list of possible names in pandrugs database
+        synonyms_list = synonyms.loc[synonyms['standard_drug_name'] == standard_drug_name][['drug_name', 'show_drug_name', 'standard_drug_name', 'short_drug_name', 'short_standard_drug_name', 'short_show_drug_name']].values.flatten().tolist()
+        synonyms_list = [x for x in synonyms_list if str(x) != 'nan']
+
+        (fda_data,status_ema) = (['', ''],'')
+        review = []
+
+        #Data from FDA
+        found = False
+        for idx, syn in enumerate(synonyms_list):
+            if syn in fda_status['drug'].tolist():
+                fda_data = [str(x) for x in fda_status.loc[fda_status['drug'] == syn].values.flatten().tolist()[1:]]
+                if idx in [1,3,5]: review.append('fda:'+fda_status.loc[fda_status['drug'] == syn]['drug'].values.flatten().tolist()[0])
+                found = True
+                break
+        if not found:
+            for idx, syn in enumerate(synonyms_list):
+                if syn in fda_status['drug'].str.split(' ', n = 1, expand = True)[0].tolist():
+                    index = fda_status['drug'].str.split(' ', n = 1, expand = True)[0].tolist().index(syn)
+                    fda_data = [str(x) for x in fda_status.iloc[[index]].values.flatten().tolist()[1:]]
+                    review.append('fda:'+fda_status.iloc[[index]]['drug'].values.flatten().tolist()[0])
+                    break
+
+        #Data from EMA
+        (status_ema, indication_ema) = ('', '')
+        found = False
+        for idx, syn in enumerate(synonyms_list):
+            if syn in ema_status['International non-proprietary name (INN) / common name'].tolist():
+                status_ema = list(set(ema_status.loc[ema_status['International non-proprietary name (INN) / common name'] == syn]['Authorisation status'].values.flatten().tolist()))
+                indication_ema = [x.replace('\t',' ') for x in list(set(ema_status.loc[ema_status['International non-proprietary name (INN) / common name'] == syn]['Condition / indication'].values.flatten().tolist()))]
+                if idx in [1,3,5]: review.append('ema:'+ema_status.loc[ema_status['International non-proprietary name (INN) / common name'] == syn]['International non-proprietary name (INN) / common name'].values.flatten().tolist()[0])
+                found = True
+                break
+        if not found:
+            for idx, syn in enumerate(synonyms_list):
+                if syn in ema_status['International non-proprietary name (INN) / common name'].str.split(' ', n = 1, expand = True)[0].tolist():
+                    index = ema_status['International non-proprietary name (INN) / common name'].str.split(' ', n = 1, expand = True)[0].tolist().index(syn)
+                    status_ema = list(set(ema_status.iloc[[index]]['Authorisation status'].values.flatten().tolist()))
+                    indication_ema = [x.replace('\t', ' ') for x in list(set(ema_status.iloc[[index]]['Condition / indication'].values.flatten().tolist()))]
+                    indication_ema = [x.replace('\t',' ') for x in list(set(ema_status.loc[ema_status['International non-proprietary name (INN) / common name'] == syn]['Condition / indication'].values.flatten().tolist()))]
+                    review.append('ema:'+ema_status.iloc[[index]]['International non-proprietary name (INN) / common name'].values.flatten().tolist()[0])
+                    break
+
+        #Data from FDA labels
+        indications = []
+        applnos = []
+
+        if fda_data[1] != '':
+            applno = fda_data[1].split(';')
+
+            for app in applno:
+                app_label = ''
+                if 'NDA'+app.zfill(6) in appl_list:
+                    app_label = 'NDA'+app.zfill(6)
+                elif 'ANDA'+app.zfill(6) in appl_list:
+                    app_label = 'ANDA'+app.zfill(6)
+                elif 'BLA'+app.zfill(6) in appl_list:
+                    app_label = 'BLA'+app.zfill(6)
+                if app_label != '':
+                    for index2, row2 in fda_labels.loc[fda_labels['ApplNo'] == app_label].iterrows():
+                        indications.append(row2.tolist()[0])
+                        applnos.append(app)
+
+        #Data from clinical trials
+        ct_condition = ''
+        found = False
+        for idx, syn in enumerate(synonyms_list):
+            if syn in status_CT_file['drug'].tolist():
+                ct_condition = list(set(status_CT_file.loc[status_CT_file['drug'] == syn]['condition'].values.flatten().tolist()))
+                if idx in [1,3,5]: review.append('ct:'+status_CT_file.loc[status_CT_file['drug'] == syn]['drug'].values.flatten().tolist()[0])
+                found = True
+                break
+        if not found:
+            for idx, syn in enumerate(synonyms_list):
+                if syn in status_CT_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist():
+                    index = status_CT_file['drug'].str.split(' ', n = 1, expand = True)[0].tolist().index(syn)
+                    ct_condition = list(set(status_CT_file.iloc[[index]]['condition'].values.flatten().tolist()))
+                    review.append('ct:'+status_CT_file.iloc[[index]]['drug'].values.flatten().tolist()[0])
+                    break
+        ct_condition = [str(x) for x in ct_condition]
+
+        #Information from previous version
+        prev_values = ['','','','','']
+        if standard_drug_name in pandrugs_prev['standard_drug_name'].tolist():
+            prev_values = pandrugs_prev.loc[pandrugs_prev['standard_drug_name'] == standard_drug_name][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+        elif standard_drug_name in pandrugs_prev['show_drug_name'].tolist():
+            prev_values = pandrugs_prev.loc[pandrugs_prev['show_drug_name'] == standard_drug_name][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+        elif standard_drug_name in pandrugs_prev['source_drug_name'].tolist():
+            prev_values = pandrugs_prev.loc[pandrugs_prev['source_drug_name'] == standard_drug_name][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+        else:
+            for syn in synonyms_list:
+                if syn in pandrugs_prev['standard_drug_name'].tolist():
+                    prev_values = pandrugs_prev.loc[pandrugs_prev['standard_drug_name'] == syn][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+                    break
+                elif syn in pandrugs_prev['show_drug_name'].tolist() and str(syn) != 'nan':
+                    prev_values = pandrugs_prev.loc[pandrugs_prev['show_drug_name'] == syn][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+                    break
+                elif syn in pandrugs_prev['source_drug_name'].tolist() and str(syn) != 'nan':
+                    prev_values = pandrugs_prev.loc[pandrugs_prev['source_drug_name'] == syn][['status','pathology','cancer','extra','extra2']].values.tolist()[0]
+                    break
+
+        #Type of therapy
+        dtype = ''
+        if re.search(('ib$|mab$|ib |mab '), standard_drug_name, re.IGNORECASE):
+            dtype = 'TARGETED THERAPY'
+        else:
+            synonyms_list = [x for x in synonyms_list if str(x) != 'nan']
+            for syn in synonyms_list:
+                if re.search(('ib$|mab$|ib |mab '), syn, re.IGNORECASE):
+                    dtype = 'TARGETED THERAPY'
+                    break
+
+        cancer_term = ''
+        if re.search('cancer|tumor|oma ','::'.join(list(set(indications)))) != None or re.search('cancer|tumor|oma ','::'.join(list(set(indication_ema)))) != None:
+            cancer_term = 'yes'
+
+        fileo.write('\t'.join([standard_drug_name, fda_data[1], ';'.join(applnos), fda_data[0], '::'.join(status_ema), '::'.join(ct_condition), '',prev_values[0], '::'.join(list(set(indications))), '::'.join(indication_ema), cancer_term, '', prev_values[3], '', prev_values[1], '', prev_values[2], dtype, prev_values[4], ','.join(review)])+'\n')
+
+    fileo.close()
+
+synonyms_mr = 'drug_synonyms_mr.tsv'
+
+if sys.argv[1] == '1':
+    process_drug_names()
+else:
+    pandrugs_prev = pd.read_csv(pd_prev, sep ='\t', low_memory=False)
+    pandrugs_prev = pandrugs_prev.fillna('')
+    synonyms = pd.read_csv(mr_dir+synonyms_mr.replace('.tsv', 'd.tsv'), sep ='\t', low_memory=False)
+    synonyms['drug_name'] = synonyms['drug_name'].str.upper()
+    synonyms['standard_drug_name'] = synonyms['standard_drug_name'].str.upper()
+    synonyms['show_drug_name'] = synonyms['show_drug_name'].str.upper()
+
+#    process_FDA_EMA() # run this after reviewing drug_names
diff --git a/src/modules/Archive/Extract.pm b/src/modules/Archive/Extract.pm
new file mode 100644
index 0000000..7597f91
--- /dev/null
+++ b/src/modules/Archive/Extract.pm
@@ -0,0 +1,1751 @@
+package Archive::Extract;
+use if $] > 5.017, 'deprecate';
+
+use strict;
+
+use Cwd                         qw[cwd chdir];
+use Carp                        qw[carp];
+use IPC::Cmd                    qw[run can_run];
+use FileHandle;
+use File::Path                  qw[mkpath];
+use File::Spec;
+use File::Basename              qw[dirname basename];
+use Params::Check               qw[check];
+use Module::Load::Conditional   qw[can_load check_install];
+use Locale::Maketext::Simple    Style => 'gettext';
+
+### solaris has silly /bin/tar output ###
+use constant ON_SOLARIS     => $^O eq 'solaris' ? 1 : 0;
+use constant ON_NETBSD      => $^O =~ m!^(netbsd|minix)$! ? 1 : 0;
+use constant ON_OPENBSD     => $^O =~ m!^(openbsd|bitrig)$! ? 1 : 0;
+use constant ON_FREEBSD     => $^O =~ m!^(free|midnight|dragonfly)(bsd)?$! ? 1 : 0;
+use constant ON_LINUX       => $^O eq 'linux' ? 1 : 0;
+use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
+
+### VMS may require quoting upper case command options
+use constant ON_VMS         => $^O eq 'VMS' ? 1 : 0;
+
+### Windows needs special treatment of Tar options
+use constant ON_WIN32       => $^O eq 'MSWin32' ? 1 : 0;
+
+### we can't use this extraction method, because of missing
+### modules/binaries:
+use constant METHOD_NA      => [];
+
+### If these are changed, update @TYPES and the new() POD
+use constant TGZ            => 'tgz';
+use constant TAR            => 'tar';
+use constant GZ             => 'gz';
+use constant ZIP            => 'zip';
+use constant BZ2            => 'bz2';
+use constant TBZ            => 'tbz';
+use constant Z              => 'Z';
+use constant LZMA           => 'lzma';
+use constant XZ             => 'xz';
+use constant TXZ            => 'txz';
+
+use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
+            $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
+         ];
+
+$VERSION            = '0.88';
+$PREFER_BIN         = 0;
+$WARN               = 1;
+$DEBUG              = 0;
+$_ALLOW_PURE_PERL   = 1;    # allow pure perl extractors
+$_ALLOW_BIN         = 1;    # allow binary extractors
+$_ALLOW_TAR_ITER    = 1;    # try to use Archive::Tar->iter if available
+
+# same as all constants
+my @Types           = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA, XZ, TXZ );
+
+local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
+local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
+
+=pod
+
+=head1 NAME
+
+Archive::Extract - A generic archive extracting mechanism
+
+=head1 SYNOPSIS
+
+    use Archive::Extract;
+
+    ### build an Archive::Extract object ###
+    my $ae = Archive::Extract->new( archive => 'foo.tgz' );
+
+    ### extract to cwd() ###
+    my $ok = $ae->extract;
+
+    ### extract to /tmp ###
+    my $ok = $ae->extract( to => '/tmp' );
+
+    ### what if something went wrong?
+    my $ok = $ae->extract or die $ae->error;
+
+    ### files from the archive ###
+    my $files   = $ae->files;
+
+    ### dir that was extracted to ###
+    my $outdir  = $ae->extract_path;
+
+
+    ### quick check methods ###
+    $ae->is_tar     # is it a .tar file?
+    $ae->is_tgz     # is it a .tar.gz or .tgz file?
+    $ae->is_gz;     # is it a .gz file?
+    $ae->is_zip;    # is it a .zip file?
+    $ae->is_bz2;    # is it a .bz2 file?
+    $ae->is_tbz;    # is it a .tar.bz2 or .tbz file?
+    $ae->is_lzma;   # is it a .lzma file?
+    $ae->is_xz;     # is it a .xz file?
+    $ae->is_txz;    # is it a .tar.xz or .txz file?
+
+    ### absolute path to the archive you provided ###
+    $ae->archive;
+
+    ### commandline tools, if found ###
+    $ae->bin_tar     # path to /bin/tar, if found
+    $ae->bin_gzip    # path to /bin/gzip, if found
+    $ae->bin_unzip   # path to /bin/unzip, if found
+    $ae->bin_bunzip2 # path to /bin/bunzip2 if found
+    $ae->bin_unlzma  # path to /bin/unlzma if found
+    $ae->bin_unxz    # path to /bin/unxz if found
+
+=head1 DESCRIPTION
+
+Archive::Extract is a generic archive extraction mechanism.
+
+It allows you to extract any archive file of the type .tar, .tar.gz,
+.gz, .Z, tar.bz2, .tbz, .bz2, .zip, .xz,, .txz, .tar.xz or .lzma
+without having to worry how it
+does so, or use different interfaces for each type by using either
+perl modules, or commandline tools on your system.
+
+See the C<HOW IT WORKS> section further down for details.
+
+=cut
+
+
+### see what /bin/programs are available ###
+$PROGRAMS = {};
+CMD: for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma unxz]) {
+    if ( $pgm eq 'unzip' and ON_FREEBSD and my $unzip = can_run('info-unzip') ) {
+      $PROGRAMS->{$pgm} = $unzip;
+      next CMD;
+    }
+    if ( $pgm eq 'unzip' and ( ON_FREEBSD || ON_LINUX ) ) {
+      local $IPC::Cmd::INSTANCES = 1;
+      ($PROGRAMS->{$pgm}) = grep { _is_infozip_esque($_) } can_run($pgm);
+      next CMD;
+    }
+    if ( $pgm eq 'unzip' and ON_NETBSD ) {
+      local $IPC::Cmd::INSTANCES = 1;
+      ($PROGRAMS->{$pgm}) = grep { m!/usr/pkg/! } can_run($pgm);
+      next CMD;
+    }
+    if ( $pgm eq 'tar' and ( ON_OPENBSD || ON_SOLARIS || ON_NETBSD ) ) {
+      # try gtar first
+      next CMD if $PROGRAMS->{$pgm} = can_run('gtar');
+    }
+    $PROGRAMS->{$pgm} = can_run($pgm);
+}
+
+### mapping from types to extractor methods ###
+my $Mapping = {  # binary program           # pure perl module
+    is_tgz  => { bin => '_untar_bin',       pp => '_untar_at'   },
+    is_tar  => { bin => '_untar_bin',       pp => '_untar_at'   },
+    is_gz   => { bin => '_gunzip_bin',      pp => '_gunzip_cz'  },
+    is_zip  => { bin => '_unzip_bin',       pp => '_unzip_az'   },
+    is_tbz  => { bin => '_untar_bin',       pp => '_untar_at'   },
+    is_bz2  => { bin => '_bunzip2_bin',     pp => '_bunzip2_bz2'},
+    is_Z    => { bin => '_uncompress_bin',  pp => '_gunzip_cz'  },
+    is_lzma => { bin => '_unlzma_bin',      pp => '_unlzma_cz'  },
+    is_xz   => { bin => '_unxz_bin',        pp => '_unxz_cz'    },
+    is_txz  => { bin => '_untar_bin',       pp => '_untar_at'   },
+};
+
+{   ### use subs so we re-generate array refs etc for the no-override flags
+    ### if we don't, then we reuse the same arrayref, meaning objects store
+    ### previous errors
+    my $tmpl = {
+        archive         => sub { { required => 1, allow => FILE_EXISTS }    },
+        type            => sub { { default => '', allow => [ @Types ] }     },
+        _error_msg      => sub { { no_override => 1, default => [] }        },
+        _error_msg_long => sub { { no_override => 1, default => [] }        },
+    };
+
+    ### build accessors ###
+    for my $method( keys %$tmpl,
+                    qw[_extractor _gunzip_to files extract_path],
+    ) {
+        no strict 'refs';
+        *$method = sub {
+                        my $self = shift;
+                        $self->{$method} = $_[0] if @_;
+                        return $self->{$method};
+                    }
+    }
+
+sub type_for {
+    local $_ = shift;
+    return /.+?\.(?:tar\.gz|tgz)$/i         ? TGZ   :
+           /.+?\.gz$/i                      ? GZ    :
+           /.+?\.tar$/i                     ? TAR   :
+           /.+?\.(zip|jar|ear|war|par)$/i   ? ZIP   :
+           /.+?\.(?:tbz2?|tar\.bz2?)$/i     ? TBZ   :
+           /.+?\.bz2$/i                     ? BZ2   :
+           /.+?\.Z$/                        ? Z     :
+           /.+?\.lzma$/                     ? LZMA  :
+           /.+?\.(?:txz|tar\.xz)$/i         ? TXZ   :
+           /.+?\.xz$/                       ? XZ    :
+           '';
+}
+
+=head1 METHODS
+
+=head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
+
+Creates a new C<Archive::Extract> object based on the archive file you
+passed it. Automatically determines the type of archive based on the
+extension, but you can override that by explicitly providing the
+C<type> argument, potentially by calling C<type_for()>.
+
+Valid values for C<type> are:
+
+=over 4
+
+=item tar
+
+Standard tar files, as produced by, for example, C</bin/tar>.
+Corresponds to a C<.tar> suffix.
+
+=item tgz
+
+Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
+Corresponds to a C<.tgz> or C<.tar.gz> suffix.
+
+=item gz
+
+Gzip compressed file, as produced by, for example C</bin/gzip>.
+Corresponds to a C<.gz> suffix.
+
+=item Z
+
+Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
+Corresponds to a C<.Z> suffix.
+
+=item zip
+
+Zip compressed file, as produced by, for example C</bin/zip>.
+Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
+
+=item bz2
+
+Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
+Corresponds to a C<.bz2> suffix.
+
+=item tbz
+
+Bzip2 compressed tar file, as produced by, for example C</bin/tar -j>.
+Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
+
+=item lzma
+
+Lzma compressed file, as produced by C</bin/lzma>.
+Corresponds to a C<.lzma> suffix.
+
+=item xz
+
+Xz compressed file, as produced by C</bin/xz>.
+Corresponds to a C<.xz> suffix.
+
+=item txz
+
+Xz compressed tar file, as produced by, for example C</bin/tar -J>.
+Corresponds to a C<.txz> or C<.tar.xz> suffix.
+
+=back
+
+Returns a C<Archive::Extract> object on success, or false on failure.
+
+=cut
+
+    ### constructor ###
+    sub new {
+        my $class   = shift;
+        my %hash    = @_;
+
+        ### see above why we use subs here and generate the template;
+        ### it's basically to not re-use arrayrefs
+        my %utmpl   = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
+
+        my $parsed = check( \%utmpl, \%hash ) or return;
+
+        ### make sure we have an absolute path ###
+        my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
+
+        ### figure out the type, if it wasn't already specified ###
+        $parsed->{type} ||= type_for $ar;
+
+        bless $parsed, $class;
+
+        ### don't know what type of file it is
+        ### XXX this *has* to be an object call, not a package call
+        return $parsed->_error(loc("Cannot determine file type for '%1'",
+                                $parsed->{archive} )) unless $parsed->{type};
+        return $parsed;
+    }
+}
+
+=head2 $ae->extract( [to => '/output/path'] )
+
+Extracts the archive represented by the C<Archive::Extract> object to
+the path of your choice as specified by the C<to> argument. Defaults to
+C<cwd()>.
+
+Since C<.gz> files never hold a directory, but only a single file; if
+the C<to> argument is an existing directory, the file is extracted
+there, with its C<.gz> suffix stripped.
+If the C<to> argument is not an existing directory, the C<to> argument
+is understood to be a filename, if the archive type is C<gz>.
+In the case that you did not specify a C<to> argument, the output
+file will be the name of the archive file, stripped from its C<.gz>
+suffix, in the current working directory.
+
+C<extract> will try a pure perl solution first, and then fall back to
+commandline tools if they are available. See the C<GLOBAL VARIABLES>
+section below on how to alter this behaviour.
+
+It will return true on success, and false on failure.
+
+On success, it will also set the follow attributes in the object:
+
+=over 4
+
+=item $ae->extract_path
+
+This is the directory that the files where extracted to.
+
+=item $ae->files
+
+This is an array ref with the paths of all the files in the archive,
+relative to the C<to> argument you specified.
+To get the full path to an extracted file, you would use:
+
+    File::Spec->catfile( $to, $ae->files->[0] );
+
+Note that all files from a tar archive will be in unix format, as per
+the tar specification.
+
+=back
+
+=cut
+
+sub extract {
+    my $self = shift;
+    my %hash = @_;
+
+    ### reset error messages
+    $self->_error_msg( [] );
+    $self->_error_msg_long( [] );
+
+    my $to;
+    my $tmpl = {
+        to  => { default => '.', store => \$to }
+    };
+
+    check( $tmpl, \%hash ) or return;
+
+    ### so 'to' could be a file or a dir, depending on whether it's a .gz
+    ### file, or basically anything else.
+    ### so, check that, then act accordingly.
+    ### set an accessor specifically so _gunzip can know what file to extract
+    ### to.
+    my $dir;
+    {   ### a foo.gz file
+        if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma or $self->is_xz ) {
+
+            my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma|xz)$//i;
+
+            ### to is a dir?
+            if ( -d $to ) {
+                $dir = $to;
+                $self->_gunzip_to( basename($cp) );
+
+            ### then it's a filename
+            } else {
+                $dir = dirname($to);
+                $self->_gunzip_to( basename($to) );
+            }
+
+        ### not a foo.gz file
+        } else {
+            $dir = $to;
+        }
+    }
+
+    ### make the dir if it doesn't exist ###
+    unless( -d $dir ) {
+        eval { mkpath( $dir ) };
+
+        return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
+            if $@;
+    }
+
+    ### get the current dir, to restore later ###
+    my $cwd = cwd();
+
+    my $ok = 1;
+    EXTRACT: {
+
+        ### chdir to the target dir ###
+        unless( chdir $dir ) {
+            $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
+            $ok = 0; last EXTRACT;
+        }
+
+        ### set files to an empty array ref, so there's always an array
+        ### ref IN the accessor, to avoid errors like:
+        ### Can't use an undefined value as an ARRAY reference at
+        ### ../lib/Archive/Extract.pm line 742. (rt #19815)
+        $self->files( [] );
+
+        ### find out the dispatch methods needed for this type of
+        ### archive. Do a $self->is_XXX to figure out the type, then
+        ### get the hashref with bin + pure perl dispatchers.
+        my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
+
+        ### add pure perl extractor if allowed & add bin extractor if allowed
+        my @methods;
+        push @methods, $map->{'pp'}  if $_ALLOW_PURE_PERL;
+        push @methods, $map->{'bin'} if $_ALLOW_BIN;
+
+        ### reverse it if we prefer bin extractors
+        @methods = reverse @methods if $PREFER_BIN;
+
+        my($na, $fail);
+        for my $method (@methods) {
+            $self->debug( "# Extracting with ->$method\n" );
+
+            my $rv = $self->$method;
+
+            ### a positive extraction
+            if( $rv and $rv ne METHOD_NA ) {
+                $self->debug( "# Extraction succeeded\n" );
+                $self->_extractor($method);
+                last;
+
+            ### method is not available
+            } elsif ( $rv and $rv eq METHOD_NA ) {
+                $self->debug( "# Extraction method not available\n" );
+                $na++;
+            } else {
+                $self->debug( "# Extraction method failed\n" );
+                $fail++;
+            }
+        }
+
+        ### warn something went wrong if we didn't get an extractor
+        unless( $self->_extractor ) {
+            my $diag = $fail ? loc("Extract failed due to errors") :
+                       $na   ? loc("Extract failed; no extractors available") :
+                       '';
+
+            $self->_error($diag);
+            $ok = 0;
+        }
+    }
+
+    ### and chdir back ###
+    unless( chdir $cwd ) {
+        $self->_error(loc("Could not chdir back to start dir '%1': %2'",
+                            $cwd, $!));
+    }
+
+    return $ok;
+}
+
+=pod
+
+=head1 ACCESSORS
+
+=head2 $ae->error([BOOL])
+
+Returns the last encountered error as string.
+Pass it a true value to get the C<Carp::longmess()> output instead.
+
+=head2 $ae->extract_path
+
+This is the directory the archive got extracted to.
+See C<extract()> for details.
+
+=head2 $ae->files
+
+This is an array ref holding all the paths from the archive.
+See C<extract()> for details.
+
+=head2 $ae->archive
+
+This is the full path to the archive file represented by this
+C<Archive::Extract> object.
+
+=head2 $ae->type
+
+This is the type of archive represented by this C<Archive::Extract>
+object. See accessors below for an easier way to use this.
+See the C<new()> method for details.
+
+=head2 $ae->types
+
+Returns a list of all known C<types> for C<Archive::Extract>'s
+C<new> method.
+
+=cut
+
+sub types { return @Types }
+
+=head2 $ae->is_tgz
+
+Returns true if the file is of type C<.tar.gz>.
+See the C<new()> method for details.
+
+=head2 $ae->is_tar
+
+Returns true if the file is of type C<.tar>.
+See the C<new()> method for details.
+
+=head2 $ae->is_gz
+
+Returns true if the file is of type C<.gz>.
+See the C<new()> method for details.
+
+=head2 $ae->is_Z
+
+Returns true if the file is of type C<.Z>.
+See the C<new()> method for details.
+
+=head2 $ae->is_zip
+
+Returns true if the file is of type C<.zip>.
+See the C<new()> method for details.
+
+=head2 $ae->is_lzma
+
+Returns true if the file is of type C<.lzma>.
+See the C<new()> method for details.
+
+=head2 $ae->is_xz
+
+Returns true if the file is of type C<.xz>.
+See the C<new()> method for details.
+
+=cut
+
+### quick check methods ###
+sub is_tgz  { return $_[0]->type eq TGZ }
+sub is_tar  { return $_[0]->type eq TAR }
+sub is_gz   { return $_[0]->type eq GZ  }
+sub is_zip  { return $_[0]->type eq ZIP }
+sub is_tbz  { return $_[0]->type eq TBZ }
+sub is_bz2  { return $_[0]->type eq BZ2 }
+sub is_Z    { return $_[0]->type eq Z   }
+sub is_lzma { return $_[0]->type eq LZMA }
+sub is_xz   { return $_[0]->type eq XZ   }
+sub is_txz  { return $_[0]->type eq TXZ }
+
+=pod
+
+=head2 $ae->bin_tar
+
+Returns the full path to your tar binary, if found.
+
+=head2 $ae->bin_gzip
+
+Returns the full path to your gzip binary, if found
+
+=head2 $ae->bin_unzip
+
+Returns the full path to your unzip binary, if found
+
+=head2 $ae->bin_unlzma
+
+Returns the full path to your unlzma binary, if found
+
+=head2 $ae->bin_unxz
+
+Returns the full path to your unxz binary, if found
+
+=cut
+
+### paths to commandline tools ###
+sub bin_gzip        { return $PROGRAMS->{'gzip'}    if $PROGRAMS->{'gzip'}  }
+sub bin_unzip       { return $PROGRAMS->{'unzip'}   if $PROGRAMS->{'unzip'} }
+sub bin_tar         { return $PROGRAMS->{'tar'}     if $PROGRAMS->{'tar'}   }
+sub bin_bunzip2     { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
+sub bin_uncompress  { return $PROGRAMS->{'uncompress'}
+                                                 if $PROGRAMS->{'uncompress'} }
+sub bin_unlzma      { return $PROGRAMS->{'unlzma'}  if $PROGRAMS->{'unlzma'} }
+sub bin_unxz        { return $PROGRAMS->{'unxz'}    if $PROGRAMS->{'unxz'} }
+
+=head2 $bool = $ae->have_old_bunzip2
+
+Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
+require all archive names to end in C<.bz2> or it will not extract
+them. This method checks if you have a recent version of C<bunzip2>
+that allows any extension, or an older one that doesn't.
+
+=cut
+
+sub have_old_bunzip2 {
+    my $self = shift;
+
+    ### no bunzip2? no old bunzip2 either :)
+    return unless $self->bin_bunzip2;
+
+    ### if we can't run this, we can't be sure if it's too old or not
+    ### XXX stupid stupid stupid bunzip2 doesn't understand --version
+    ### is not a request to extract data:
+    ### $ bunzip2 --version
+    ### bzip2, a block-sorting file compressor.  Version 1.0.2, 30-Dec-2001.
+    ### [...]
+    ### bunzip2: I won't read compressed data from a terminal.
+    ### bunzip2: For help, type: `bunzip2 --help'.
+    ### $ echo $?
+    ### 1
+    ### HATEFUL!
+
+    ### double hateful: bunzip2 --version also hangs if input is a pipe
+    ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
+    ### So, we have to provide *another* argument which is a fake filename,
+    ### just so it wont try to read from stdin to print its version..
+    ### *sigh*
+    ### Even if the file exists, it won't clobber or change it.
+    my $buffer;
+    scalar run(
+         command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
+         verbose => 0,
+         buffer  => \$buffer
+    );
+
+    ### no output
+    return unless $buffer;
+
+    my ($version) = $buffer =~ /version \s+ (\d+)/ix;
+
+    return 1 if $version < 1;
+    return;
+}
+
+#################################
+#
+# Untar code
+#
+#################################
+
+### annoying issue with (gnu) tar on win32, as illustrated by this
+### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
+### which shows that (gnu) tar will interpret a file name with a :
+### in it as a remote file name, so C:\tmp\foo.txt is interpreted
+### as a remote shell, and the extract fails.
+{   my @ExtraTarFlags;
+    if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
+        $cmd = $1 if $cmd =~ m{^(.+)}s; # Tainted perl #
+        ### if this is gnu tar we are running, we need to use --force-local
+        push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
+    }
+
+
+    ### use /bin/tar to extract ###
+    sub _untar_bin {
+        my $self = shift;
+
+        ### check for /bin/tar ###
+        ### check for /bin/gzip if we need it ###
+        ### if any of the binaries are not available, return NA
+        {   my $diag =  !$self->bin_tar ?
+                            loc("No '%1' program found", '/bin/tar') :
+                        $self->is_tgz && !$self->bin_gzip ?
+                            loc("No '%1' program found", '/bin/gzip') :
+                        $self->is_tbz && !$self->bin_bunzip2 ?
+                            loc("No '%1' program found", '/bin/bunzip2') :
+                        $self->is_txz && !$self->bin_unxz ?
+                            loc("No '%1' program found", '/bin/unxz') :
+                        '';
+
+            if( $diag ) {
+                $self->_error( $diag );
+                return METHOD_NA;
+            }
+        }
+
+        ### XXX figure out how to make IPC::Run do this in one call --
+        ### currently i don't know how to get output of a command after a pipe
+        ### trapped in a scalar. Mailed barries about this 5th of june 2004.
+
+        ### see what command we should run, based on whether
+        ### it's a .tgz or .tar
+
+        ### GNU tar can't handled VMS filespecs, but VMSTAR can handle Unix filespecs.
+        my $archive = $self->archive;
+        $archive = VMS::Filespec::unixify($archive) if ON_VMS;
+
+        ### XXX solaris tar and bsdtar are having different outputs
+        ### depending whether you run with -x or -t
+        ### compensate for this insanity by running -t first, then -x
+        {    my $cmd =
+                $self->is_tgz ? [$self->bin_gzip, '-c', '-d', '-f', $archive, '|',
+                                 $self->bin_tar, '-tf', '-'] :
+                $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
+                                 $self->bin_tar, '-tf', '-'] :
+                $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
+                                 $self->bin_tar, '-tf', '-'] :
+                [$self->bin_tar, @ExtraTarFlags, '-tf', $archive];
+
+            ### run the command
+            ### newer versions of 'tar' (1.21 and up) now print record size
+            ### to STDERR as well if v OR t is given (used to be both). This
+            ### is a 'feature' according to the changelog, so we must now only
+            ### inspect STDOUT, otherwise, failures like these occur:
+            ### http://www.cpantesters.org/cpan/report/3230366
+            my $buffer  = '';
+            my @out     = run(  command => $cmd,
+                                buffer  => \$buffer,
+                                verbose => $DEBUG );
+
+            ### command was unsuccessful
+            unless( $out[0] ) {
+                return $self->_error(loc(
+                                "Error listing contents of archive '%1': %2",
+                                $archive, $buffer ));
+            }
+
+            ### no buffers available?
+            if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+                $self->_error( $self->_no_buffer_files( $archive ) );
+
+            } else {
+                ### if we're on solaris we /might/ be using /bin/tar, which has
+                ### a weird output format... we might also be using
+                ### /usr/local/bin/tar, which is gnu tar, which is perfectly
+                ### fine... so we have to do some guessing here =/
+                my @files = map { chomp; s!\x0D!!g if ON_WIN32;
+                              !ON_SOLARIS ? $_
+                                          : (m|^ x \s+  # 'xtract' -- sigh
+                                                (.+?),  # the actual file name
+                                                \s+ [\d,.]+ \s bytes,
+                                                \s+ [\d,.]+ \s tape \s blocks
+                                            |x ? $1 : $_);
+
+                        ### only STDOUT, see above. Sometimes, extra whitespace
+                        ### is present, so make sure we only pick lines with
+                        ### a length
+                        } grep { length } map { split $/, $_ } join '', @{$out[3]};
+
+                ### store the files that are in the archive ###
+                $self->files(\@files);
+            }
+        }
+
+        ### now actually extract it ###
+        {   my $cmd =
+                $self->is_tgz ? [$self->bin_gzip, '-c', '-d', '-f', $archive, '|',
+                                 $self->bin_tar, '-xf', '-'] :
+                $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
+                                 $self->bin_tar, '-xf', '-'] :
+                $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
+                                 $self->bin_tar, '-xf', '-'] :
+                [$self->bin_tar, @ExtraTarFlags, '-xf', $archive];
+
+            my $buffer = '';
+            unless( scalar run( command => $cmd,
+                                buffer  => \$buffer,
+                                verbose => $DEBUG )
+            ) {
+                return $self->_error(loc("Error extracting archive '%1': %2",
+                                $archive, $buffer ));
+            }
+
+            ### we might not have them, due to lack of buffers
+            if( $self->files ) {
+                ### now that we've extracted, figure out where we extracted to
+                my $dir = $self->__get_extract_dir( $self->files );
+
+                ### store the extraction dir ###
+                $self->extract_path( $dir );
+            }
+        }
+
+        ### we got here, no error happened
+        return 1;
+    }
+}
+
+
+### use archive::tar to extract ###
+sub _untar_at {
+    my $self = shift;
+
+    ### Loading Archive::Tar is going to set it to 1, so make it local
+    ### within this block, starting with its initial value. Whatever
+    ### Achive::Tar does will be undone when we return.
+    ###
+    ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
+    ### so users don't have to even think about this variable. If they
+    ### do, they still get their set value outside of this call.
+    local $Archive::Tar::WARN = $Archive::Tar::WARN;
+
+    ### we definitely need Archive::Tar, so load that first
+    {   my $use_list = { 'Archive::Tar' => '0.0' };
+
+        unless( can_load( modules => $use_list ) ) {
+
+            $self->_error(loc("You do not have '%1' installed - " .
+                              "Please install it as soon as possible.",
+                              'Archive::Tar'));
+
+            return METHOD_NA;
+        }
+    }
+
+    ### we might pass it a filehandle if it's a .tbz file..
+    my $fh_to_read = $self->archive;
+
+    ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
+    ### if A::T's version is 0.99 or higher
+    if( $self->is_tgz ) {
+        my $use_list = { 'Compress::Zlib' => '0.0' };
+        {
+           local $@;
+           $use_list->{ 'IO::Zlib' } = '0.0'
+                if eval { Archive::Tar->VERSION('0.99'); 1 };
+        }
+
+        unless( can_load( modules => $use_list ) ) {
+            my $which = join '/', sort keys %$use_list;
+
+            $self->_error(loc(
+                "You do not have '%1' installed - Please ".
+                "install it as soon as possible.", $which)
+            );
+
+            return METHOD_NA;
+        }
+
+    } elsif ( $self->is_tbz ) {
+        my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
+        unless( can_load( modules => $use_list ) ) {
+            $self->_error(loc(
+                "You do not have '%1' installed - Please " .
+                "install it as soon as possible.",
+                'IO::Uncompress::Bunzip2')
+            );
+
+            return METHOD_NA;
+        }
+
+        my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
+            return $self->_error(loc("Unable to open '%1': %2",
+                            $self->archive,
+                            $IO::Uncompress::Bunzip2::Bunzip2Error));
+
+        $fh_to_read = $bz;
+    } elsif ( $self->is_txz ) {
+        my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
+        unless( can_load( modules => $use_list ) ) {
+            $self->_error(loc(
+                "You do not have '%1' installed - Please " .
+                "install it as soon as possible.",
+                'IO::Uncompress::UnXz')
+            );
+
+            return METHOD_NA;
+        }
+
+        my $xz = IO::Uncompress::UnXz->new( $self->archive ) or
+            return $self->_error(loc("Unable to open '%1': %2",
+                            $self->archive,
+                            $IO::Uncompress::UnXz::UnXzError));
+
+        $fh_to_read = $xz;
+    }
+
+    my @files;
+    {
+        ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
+        ### localized $Archive::Tar::WARN already.
+        $Archive::Tar::WARN = $Archive::Extract::WARN;
+
+        ### only tell it it's compressed if it's a .tgz, as we give it a file
+        ### handle if it's a .tbz
+        my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
+
+        ### for version of Archive::Tar > 1.04
+        local $Archive::Tar::CHOWN = 0;
+
+        ### use the iterator if we can. it's a feature of A::T 1.40 and up
+        if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
+
+            my $next;
+            unless ( $next = Archive::Tar->iter( @read ) ) {
+                return $self->_error(loc(
+                            "Unable to read '%1': %2", $self->archive,
+                            $Archive::Tar::error));
+            }
+
+            while ( my $file = $next->() ) {
+                push @files, $file->full_path;
+
+                $file->extract or return $self->_error(loc(
+                        "Unable to read '%1': %2",
+                        $self->archive,
+                        $Archive::Tar::error));
+            }
+
+        ### older version, read the archive into memory
+        } else {
+
+            my $tar = Archive::Tar->new();
+
+            unless( $tar->read( @read ) ) {
+                return $self->_error(loc("Unable to read '%1': %2",
+                            $self->archive, $Archive::Tar::error));
+            }
+
+            ### workaround to prevent Archive::Tar from setting uid, which
+            ### is a potential security hole. -autrijus
+            ### have to do it here, since A::T needs to be /loaded/ first ###
+            {   no strict 'refs'; local $^W;
+
+                ### older versions of archive::tar <= 0.23
+                *Archive::Tar::chown = sub {};
+            }
+
+            {   local $^W;  # quell 'splice() offset past end of array' warnings
+                            # on older versions of A::T
+
+                ### older archive::tar always returns $self, return value
+                ### slightly fux0r3d because of it.
+                $tar->extract or return $self->_error(loc(
+                        "Unable to extract '%1': %2",
+                        $self->archive, $Archive::Tar::error ));
+            }
+
+            @files = $tar->list_files;
+        }
+    }
+
+    my $dir = $self->__get_extract_dir( \@files );
+
+    ### store the files that are in the archive ###
+    $self->files(\@files);
+
+    ### store the extraction dir ###
+    $self->extract_path( $dir );
+
+    ### check if the dir actually appeared ###
+    return 1 if -d $self->extract_path;
+
+    ### no dir, we failed ###
+    return $self->_error(loc("Unable to extract '%1': %2",
+                                $self->archive, $Archive::Tar::error ));
+}
+
+#################################
+#
+# Gunzip code
+#
+#################################
+
+sub _gunzip_bin {
+    my $self = shift;
+
+    ### check for /bin/gzip -- we need it ###
+    unless( $self->bin_gzip ) {
+        $self->_error(loc("No '%1' program found", '/bin/gzip'));
+        return METHOD_NA;
+    }
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    my $cmd = [ $self->bin_gzip, '-c', '-d', '-f', $self->archive ];
+
+    my $buffer;
+    unless( scalar run( command => $cmd,
+                        verbose => $DEBUG,
+                        buffer  => \$buffer )
+    ) {
+        return $self->_error(loc("Unable to gunzip '%1': %2",
+                                    $self->archive, $buffer));
+    }
+
+    ### no buffers available?
+    if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+        $self->_error( $self->_no_buffer_content( $self->archive ) );
+    }
+
+    $self->_print($fh, $buffer) if defined $buffer;
+
+    close $fh;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+sub _gunzip_cz {
+    my $self = shift;
+
+    my $use_list = { 'Compress::Zlib' => '0.0' };
+    unless( can_load( modules => $use_list ) ) {
+        $self->_error(loc("You do not have '%1' installed - Please " .
+                    "install it as soon as possible.", 'Compress::Zlib'));
+        return METHOD_NA;
+    }
+
+    my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
+                return $self->_error(loc("Unable to open '%1': %2",
+                            $self->archive, $Compress::Zlib::gzerrno));
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    my $buffer;
+    $self->_print($fh, $buffer) while $gz->gzread($buffer) > 0;
+    $fh->close;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+#################################
+#
+# Uncompress code
+#
+#################################
+
+sub _uncompress_bin {
+    my $self = shift;
+
+    ### check for /bin/gzip -- we need it ###
+    unless( $self->bin_uncompress ) {
+        $self->_error(loc("No '%1' program found", '/bin/uncompress'));
+        return METHOD_NA;
+    }
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
+
+    my $buffer;
+    unless( scalar run( command => $cmd,
+                        verbose => $DEBUG,
+                        buffer  => \$buffer )
+    ) {
+        return $self->_error(loc("Unable to uncompress '%1': %2",
+                                    $self->archive, $buffer));
+    }
+
+    ### no buffers available?
+    if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+        $self->_error( $self->_no_buffer_content( $self->archive ) );
+    }
+
+    $self->_print($fh, $buffer) if defined $buffer;
+
+    close $fh;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+
+#################################
+#
+# Unzip code
+#
+#################################
+
+
+sub _unzip_bin {
+    my $self = shift;
+
+    ### check for /bin/gzip if we need it ###
+    unless( $self->bin_unzip ) {
+        $self->_error(loc("No '%1' program found", '/bin/unzip'));
+        return METHOD_NA;
+    }
+
+    ### first, get the files.. it must be 2 different commands with 'unzip' :(
+    {   ### on VMS, capital letter options have to be quoted. This is
+        ### reported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
+        ### Subject: [patch@31735]Archive Extract fix on VMS.
+        my $opt = ON_VMS ? '"-Z"' : '-Z';
+        my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
+
+        my $buffer = '';
+        unless( scalar run( command => $cmd,
+                            verbose => $DEBUG,
+                            buffer  => \$buffer )
+        ) {
+            return $self->_error(loc("Unable to unzip '%1': %2",
+                                        $self->archive, $buffer));
+        }
+
+        ### no buffers available?
+        if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+            $self->_error( $self->_no_buffer_files( $self->archive ) );
+
+        } else {
+            ### Annoyingly, pesky MSWin32 can either have 'native' tools
+            ### which have \r\n line endings or Cygwin-based tools which
+            ### have \n line endings. Jan Dubois suggested using this fix
+            my $split = ON_WIN32 ? qr/\r?\n/ : "\n";
+            $self->files( [split $split, $buffer] );
+        }
+    }
+
+    ### now, extract the archive ###
+    {   my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
+
+        my $buffer;
+        unless( scalar run( command => $cmd,
+                            verbose => $DEBUG,
+                            buffer  => \$buffer )
+        ) {
+            return $self->_error(loc("Unable to unzip '%1': %2",
+                                        $self->archive, $buffer));
+        }
+
+        if( scalar @{$self->files} ) {
+            my $files   = $self->files;
+            my $dir     = $self->__get_extract_dir( $files );
+
+            $self->extract_path( $dir );
+        }
+    }
+
+    return 1;
+}
+
+sub _unzip_az {
+    my $self = shift;
+
+    my $use_list = { 'Archive::Zip' => '0.0' };
+    unless( can_load( modules => $use_list ) ) {
+        $self->_error(loc("You do not have '%1' installed - Please " .
+                      "install it as soon as possible.", 'Archive::Zip'));
+        return METHOD_NA;
+    }
+
+    my $zip = Archive::Zip->new();
+
+    unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
+        return $self->_error(loc("Unable to read '%1'", $self->archive));
+    }
+
+    my @files;
+
+
+    ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
+    ### "In my BackPAN indexing, Archive::Zip was extracting things
+    ### in my script's directory instead of the current working directory.
+    ### I traced this back through Archive::Zip::_asLocalName which
+    ### eventually calls File::Spec::Win32::rel2abs which on Windows might
+    ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
+    ### case, even though I think I'm on the same drive.
+    ###
+    ### To fix this, I pass the optional second argument to
+    ### extractMember using the cwd from Archive::Extract." --bdfoy
+
+    ## store cwd() before looping; calls to cwd() can be expensive, and
+    ### it won't change during the loop
+    my $extract_dir = cwd();
+
+    ### have to extract every member individually ###
+    for my $member ($zip->members) {
+        push @files, $member->{fileName};
+
+        ### file to extract to, to avoid the above problem
+        my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
+
+        unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
+            return $self->_error(loc("Extraction of '%1' from '%2' failed",
+                        $member->{fileName}, $self->archive ));
+        }
+    }
+
+    my $dir = $self->__get_extract_dir( \@files );
+
+    ### set what files where extract, and where they went ###
+    $self->files( \@files );
+    $self->extract_path( File::Spec->rel2abs($dir) );
+
+    return 1;
+}
+
+sub __get_extract_dir {
+    my $self    = shift;
+    my $files   = shift || [];
+
+    return unless scalar @$files;
+
+    my($dir1, $dir2);
+    for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
+        my($dir,$pos) = @$aref;
+
+        ### add a catdir(), so that any trailing slashes get
+        ### take care of (removed)
+        ### also, a catdir() normalises './dir/foo' to 'dir/foo';
+        ### which was the problem in bug #23999
+        my $res = -d $files->[$pos]
+                    ? File::Spec->catdir( $files->[$pos], '' )
+                    : File::Spec->catdir( dirname( $files->[$pos] ) );
+
+        $$dir = $res;
+    }
+
+    ### if the first and last dir don't match, make sure the
+    ### dirname is not set wrongly
+    my $dir;
+
+    ### dirs are the same, so we know for sure what the extract dir is
+    if( $dir1 eq $dir2 ) {
+        $dir = $dir1;
+
+    ### dirs are different.. do they share the base dir?
+    ### if so, use that, if not, fall back to '.'
+    } else {
+        my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
+        my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
+
+        $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
+    }
+
+    return File::Spec->rel2abs( $dir );
+}
+
+#################################
+#
+# Bunzip2 code
+#
+#################################
+
+sub _bunzip2_bin {
+    my $self = shift;
+
+    ### check for /bin/gzip -- we need it ###
+    unless( $self->bin_bunzip2 ) {
+        $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
+        return METHOD_NA;
+    }
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    ### guard against broken bunzip2. See ->have_old_bunzip2()
+    ### for details
+    if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
+        return $self->_error(loc("Your bunzip2 version is too old and ".
+                                 "can only extract files ending in '%1'",
+                                 '.bz2'));
+    }
+
+    my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
+
+    my $buffer;
+    unless( scalar run( command => $cmd,
+                        verbose => $DEBUG,
+                        buffer  => \$buffer )
+    ) {
+        return $self->_error(loc("Unable to bunzip2 '%1': %2",
+                                    $self->archive, $buffer));
+    }
+
+    ### no buffers available?
+    if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+        $self->_error( $self->_no_buffer_content( $self->archive ) );
+    }
+
+    $self->_print($fh, $buffer) if defined $buffer;
+
+    close $fh;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+### using cz2, the compact versions... this we use mainly in archive::tar
+### extractor..
+# sub _bunzip2_cz1 {
+#     my $self = shift;
+#
+#     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
+#     unless( can_load( modules => $use_list ) ) {
+#         return $self->_error(loc("You do not have '%1' installed - Please " .
+#                         "install it as soon as possible.",
+#                         'IO::Uncompress::Bunzip2'));
+#     }
+#
+#     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
+#                 return $self->_error(loc("Unable to open '%1': %2",
+#                             $self->archive,
+#                             $IO::Uncompress::Bunzip2::Bunzip2Error));
+#
+#     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+#         return $self->_error(loc("Could not open '%1' for writing: %2",
+#                             $self->_gunzip_to, $! ));
+#
+#     my $buffer;
+#     $fh->print($buffer) while $bz->read($buffer) > 0;
+#     $fh->close;
+#
+#     ### set what files where extract, and where they went ###
+#     $self->files( [$self->_gunzip_to] );
+#     $self->extract_path( File::Spec->rel2abs(cwd()) );
+#
+#     return 1;
+# }
+
+sub _bunzip2_bz2 {
+    my $self = shift;
+
+    my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
+    unless( can_load( modules => $use_list ) ) {
+        $self->_error(loc("You do not have '%1' installed - Please " .
+                          "install it as soon as possible.",
+                          'IO::Uncompress::Bunzip2'));
+        return METHOD_NA;
+    }
+
+    IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
+        or return $self->_error(loc("Unable to uncompress '%1': %2",
+                            $self->archive,
+                            $IO::Uncompress::Bunzip2::Bunzip2Error));
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+#################################
+#
+# UnXz code
+#
+#################################
+
+sub _unxz_bin {
+    my $self = shift;
+
+    ### check for /bin/unxz -- we need it ###
+    unless( $self->bin_unxz ) {
+        $self->_error(loc("No '%1' program found", '/bin/unxz'));
+        return METHOD_NA;
+    }
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    my $cmd = [ $self->bin_unxz, '-c', '-d', '-f', $self->archive ];
+
+    my $buffer;
+    unless( scalar run( command => $cmd,
+                        verbose => $DEBUG,
+                        buffer  => \$buffer )
+    ) {
+        return $self->_error(loc("Unable to unxz '%1': %2",
+                                    $self->archive, $buffer));
+    }
+
+    ### no buffers available?
+    if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+        $self->_error( $self->_no_buffer_content( $self->archive ) );
+    }
+
+    $self->_print($fh, $buffer) if defined $buffer;
+
+    close $fh;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+sub _unxz_cz {
+    my $self = shift;
+
+    my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
+    unless( can_load( modules => $use_list ) ) {
+        $self->_error(loc("You do not have '%1' installed - Please " .
+                          "install it as soon as possible.",
+                          'IO::Uncompress::UnXz'));
+        return METHOD_NA;
+    }
+
+    IO::Uncompress::UnXz::unxz($self->archive => $self->_gunzip_to)
+        or return $self->_error(loc("Unable to uncompress '%1': %2",
+                            $self->archive,
+                            $IO::Uncompress::UnXz::UnXzError));
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+
+#################################
+#
+# unlzma code
+#
+#################################
+
+sub _unlzma_bin {
+    my $self = shift;
+
+    ### check for /bin/unlzma -- we need it ###
+    unless( $self->bin_unlzma ) {
+        $self->_error(loc("No '%1' program found", '/bin/unlzma'));
+        return METHOD_NA;
+    }
+
+    my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+        return $self->_error(loc("Could not open '%1' for writing: %2",
+                            $self->_gunzip_to, $! ));
+
+    my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
+
+    my $buffer;
+    unless( scalar run( command => $cmd,
+                        verbose => $DEBUG,
+                        buffer  => \$buffer )
+    ) {
+        return $self->_error(loc("Unable to unlzma '%1': %2",
+                                    $self->archive, $buffer));
+    }
+
+    ### no buffers available?
+    if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
+        $self->_error( $self->_no_buffer_content( $self->archive ) );
+    }
+
+    $self->_print($fh, $buffer) if defined $buffer;
+
+    close $fh;
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+sub _unlzma_cz {
+    my $self = shift;
+
+    my $use_list1 = { 'IO::Uncompress::UnLzma' => '0.0' };
+    my $use_list2 = { 'Compress::unLZMA' => '0.0' };
+
+    if (can_load( modules => $use_list1 ) ) {
+        IO::Uncompress::UnLzma::unlzma($self->archive => $self->_gunzip_to)
+            or return $self->_error(loc("Unable to uncompress '%1': %2",
+                                $self->archive,
+                                $IO::Uncompress::UnLzma::UnLzmaError));
+    }
+    elsif (can_load( modules => $use_list2 ) ) {
+
+        my $fh = FileHandle->new('>'. $self->_gunzip_to) or
+            return $self->_error(loc("Could not open '%1' for writing: %2",
+                                $self->_gunzip_to, $! ));
+
+        my $buffer;
+        $buffer = Compress::unLZMA::uncompressfile( $self->archive );
+        unless ( defined $buffer ) {
+            return $self->_error(loc("Could not unlzma '%1': %2",
+                                        $self->archive, $@));
+        }
+
+        $self->_print($fh, $buffer) if defined $buffer;
+
+        close $fh;
+    }
+    else {
+        $self->_error(loc("You do not have '%1' or '%2' installed - Please " .
+                    "install it as soon as possible.", 'Compress::unLZMA', 'IO::Uncompress::UnLzma'));
+        return METHOD_NA;
+    }
+
+    ### set what files where extract, and where they went ###
+    $self->files( [$self->_gunzip_to] );
+    $self->extract_path( File::Spec->rel2abs(cwd()) );
+
+    return 1;
+}
+
+#####################################
+#
+# unzip heuristics for FreeBSD-alikes
+#
+#####################################
+
+sub _is_infozip_esque {
+  my $unzip = shift;
+
+  my @strings;
+  my $buf = '';
+
+  {
+    open my $file, '<', $unzip or die "$!\n";
+    binmode $file;
+    local $/ = \1;
+    local $_;
+    while(<$file>) {
+      if ( m![[:print:]]! ) {
+        $buf .= $_;
+        next;
+      }
+      if ( $buf and m![^[:print:]]! ) {
+        push @strings, $buf if length $buf >= 4;
+        $buf = '';
+        next;
+      }
+    }
+  }
+  push @strings, $buf if $buf;
+  foreach my $part ( @strings ) {
+    if ( $part =~ m!ZIPINFO! or $part =~ m!usage:.+?Z1! ) {
+      return $unzip;
+    }
+  }
+  return;
+}
+
+#################################
+#
+# Error code
+#
+#################################
+
+# For printing binaries that avoids interfering globals
+sub _print {
+    my $self = shift;
+    my $fh = shift;
+
+    local( $\, $", $, ) = ( undef, ' ', '' );
+    return print $fh @_;
+}
+
+sub _error {
+    my $self    = shift;
+    my $error   = shift;
+    my $lerror  = Carp::longmess($error);
+
+    push @{$self->_error_msg},      $error;
+    push @{$self->_error_msg_long}, $lerror;
+
+    ### set $Archive::Extract::WARN to 0 to disable printing
+    ### of errors
+    if( $WARN ) {
+        carp $DEBUG ? $lerror : $error;
+    }
+
+    return;
+}
+
+sub error {
+    my $self = shift;
+
+    ### make sure we have a fallback aref
+    my $aref = do {
+        shift()
+            ? $self->_error_msg_long
+            : $self->_error_msg
+    } || [];
+
+    return join $/, @$aref;
+}
+
+=head2 debug( MESSAGE )
+
+This method outputs MESSAGE to the default filehandle if C<$DEBUG> is
+true. It's a small method, but it's here if you'd like to subclass it
+so you can so something else with any debugging output.
+
+=cut
+
+### this is really a stub for subclassing
+sub debug {
+    return unless $DEBUG;
+
+    print $_[1];
+}
+
+sub _no_buffer_files {
+    my $self = shift;
+    my $file = shift or return;
+    return loc("No buffer captured, unable to tell ".
+               "extracted files or extraction dir for '%1'", $file);
+}
+
+sub _no_buffer_content {
+    my $self = shift;
+    my $file = shift or return;
+    return loc("No buffer captured, unable to get content for '%1'", $file);
+}
+1;
+
+=pod
+
+=head1 UTILITY FUNCTION
+
+=head2 type_for($archive)
+
+Given an archive file name, it determins the type by parsing the file
+name extension. Used by C<new()> when the C<type> parameter is not passed.
+Also useful when the archive file does not include a suffix but the file
+name is otherwise known, such as when a file is uploaded to a web server
+and stored with a temporary name that differs from the original name, and
+you want to use the same detection pattern as Archive::Extract. Example:
+
+  my $ae = Archive::Extract->new(
+      archive => '/tmp/02af6s',
+      type    => Archive::Extract::type_for('archive.zip'),
+  );
+
+=head1 HOW IT WORKS
+
+C<Archive::Extract> tries first to determine what type of archive you
+are passing it, by inspecting its suffix. It does not do this by using
+Mime magic, or something related. See C<CAVEATS> below.
+
+Once it has determined the file type, it knows which extraction methods
+it can use on the archive. It will try a perl solution first, then fall
+back to a commandline tool if that fails. If that also fails, it will
+return false, indicating it was unable to extract the archive.
+See the section on C<GLOBAL VARIABLES> to see how to alter this order.
+
+=head1 CAVEATS
+
+=head2 File Extensions
+
+C<Archive::Extract> trusts on the extension of the archive to determine
+what type it is, and what extractor methods therefore can be used. If
+your archives do not have any of the extensions as described in the
+C<new()> method, you will have to specify the type explicitly, or
+C<Archive::Extract> will not be able to extract the archive for you.
+
+=head2 Supporting Very Large Files
+
+C<Archive::Extract> can use either pure perl modules or command line
+programs under the hood. Some of the pure perl modules (like
+C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
+which may not be feasible on your system. Consider setting the global
+variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
+the use of command line programs and won't consume so much memory.
+
+See the C<GLOBAL VARIABLES> section below for details.
+
+=head2 Bunzip2 support of arbitrary extensions.
+
+Older versions of C</bin/bunzip2> do not support arbitrary file
+extensions and insist on a C<.bz2> suffix. Although we do our best
+to guard against this, if you experience a bunzip2 error, it may
+be related to this. For details, please see the C<have_old_bunzip2>
+method.
+
+=head1 GLOBAL VARIABLES
+
+=head2 $Archive::Extract::DEBUG
+
+Set this variable to C<true> to have all calls to command line tools
+be printed out, including all their output.
+This also enables C<Carp::longmess> errors, instead of the regular
+C<carp> errors.
+
+Good for tracking down why things don't work with your particular
+setup.
+
+Defaults to C<false>.
+
+=head2 $Archive::Extract::WARN
+
+This variable controls whether errors encountered internally by
+C<Archive::Extract> should be C<carp>'d or not.
+
+Set to false to silence warnings. Inspect the output of the C<error()>
+method manually to see what went wrong.
+
+Defaults to C<true>.
+
+=head2 $Archive::Extract::PREFER_BIN
+
+This variables controls whether C<Archive::Extract> should prefer the
+use of perl modules, or commandline tools to extract archives.
+
+Set to C<true> to have C<Archive::Extract> prefer commandline tools.
+
+Defaults to C<false>.
+
+=head1 TODO / CAVEATS
+
+=over 4
+
+=item Mime magic support
+
+Maybe this module should use something like C<File::Type> to determine
+the type, rather than blindly trust the suffix.
+
+=item Thread safety
+
+Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
+extraction, and a C<chdir> back again after. This is not necessarily
+thread safe. See C<rt.cpan.org> bug C<#45671> for details.
+
+=back
+
+=head1 BUG REPORTS
+
+Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.orgE<gt>.
+
+=head1 AUTHOR
+
+This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+This library is free software; you may redistribute and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+# Local variables:
+# c-indentation-style: bsd
+# c-basic-offset: 4
+# indent-tabs-mode: nil
+# End:
+# vim: expandtab shiftwidth=4:
+
diff --git a/src/modules/DBM/Deep.pm b/src/modules/DBM/Deep.pm
new file mode 100644
index 0000000..966b360
--- /dev/null
+++ b/src/modules/DBM/Deep.pm
@@ -0,0 +1,688 @@
+package DBM::Deep;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+our $VERSION = q(2.0013);
+
+use Scalar::Util ();
+
+use overload
+   (
+    '""' =>
+    '0+' => sub { $_[0] },
+   )[0,2,1,2], # same sub for both
+    fallback => 1;
+
+use constant DEBUG => 0;
+
+use DBM::Deep::Engine;
+
+sub TYPE_HASH   () { DBM::Deep::Engine->SIG_HASH  }
+sub TYPE_ARRAY  () { DBM::Deep::Engine->SIG_ARRAY }
+
+my %obj_cache; # In external_refs mode, all objects are registered here,
+               # and dealt with in the END block at the bottom.
+use constant HAVE_HUFH => scalar eval{ require Hash::Util::FieldHash };
+HAVE_HUFH and Hash::Util::FieldHash::fieldhash(%obj_cache);
+
+# This is used in all the children of this class in their TIE<type> methods.
+sub _get_args {
+    my $proto = shift;
+
+    my $args;
+    if (scalar(@_) > 1) {
+        if ( @_ % 2 ) {
+            $proto->_throw_error( "Odd number of parameters to " . (caller(1))[2] );
+        }
+        $args = {@_};
+    }
+    elsif ( ref $_[0] ) {
+        unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) {
+            $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] );
+        }
+        $args = $_[0];
+    }
+    else {
+        $args = { file => shift };
+    }
+
+    return $args;
+}
+
+# Class constructor method for Perl OO interface.
+# Calls tie() and returns blessed reference to tied hash or array,
+# providing a hybrid OO/tie interface.
+sub new {
+    my $class = shift;
+    my $args = $class->_get_args( @_ );
+    my $self;
+
+    if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) {
+        $class = 'DBM::Deep::Array';
+        require DBM::Deep::Array;
+        tie @$self, $class, %$args;
+    }
+    else {
+        $class = 'DBM::Deep::Hash';
+        require DBM::Deep::Hash;
+        tie %$self, $class, %$args;
+    }
+
+    return bless $self, $class;
+}
+
+# This initializer is called from the various TIE* methods. new() calls tie(),
+# which allows for a single point of entry.
+sub _init {
+    my $class = shift;
+    my ($args) = @_;
+
+    # locking implicitly enables autoflush
+    if ($args->{locking}) { $args->{autoflush} = 1; }
+
+    # These are the defaults to be optionally overridden below
+    my $self = bless {
+        type        => TYPE_HASH,
+        base_offset => undef,
+        staleness   => undef,
+        engine      => undef,
+    }, $class;
+
+    unless ( exists $args->{engine} ) {
+        my $class =
+            exists $args->{dbi}   ? 'DBM::Deep::Engine::DBI'  :
+            exists $args->{_test} ? 'DBM::Deep::Engine::Test' :
+                                    'DBM::Deep::Engine::File' ;
+
+        eval "use $class"; die $@ if $@;
+        $args->{engine} = $class->new({
+            %{$args},
+            obj => $self,
+        });
+    }
+
+    # Grab the parameters we want to use
+    foreach my $param ( keys %$self ) {
+        next unless exists $args->{$param};
+        $self->{$param} = $args->{$param};
+    }
+
+    eval {
+        local $SIG{'__DIE__'};
+
+        $self->lock_exclusive;
+        $self->_engine->setup( $self );
+        $self->unlock;
+    }; if ( $@ ) {
+        my $e = $@;
+        eval { local $SIG{'__DIE__'}; $self->unlock; };
+        die $e;
+    }
+
+    if(  $self->{engine}->{external_refs}
+     and my $sector = $self->{engine}->load_sector( $self->{base_offset} )
+    ) {
+        $sector->increment_refcount;
+
+        Scalar::Util::weaken( my $feeble_ref = $self );
+        $obj_cache{ $self } = \$feeble_ref;
+
+        # Make sure this cache is not a memory hog
+        if(!HAVE_HUFH) {
+            for(keys %obj_cache) {
+                delete $obj_cache{$_} if not ${$obj_cache{$_}};
+            }
+        }
+    }
+
+    return $self;
+}
+
+sub TIEHASH {
+    shift;
+    require DBM::Deep::Hash;
+    return DBM::Deep::Hash->TIEHASH( @_ );
+}
+
+sub TIEARRAY {
+    shift;
+    require DBM::Deep::Array;
+    return DBM::Deep::Array->TIEARRAY( @_ );
+}
+
+sub lock_exclusive {
+    my $self = shift->_get_self;
+    return $self->_engine->lock_exclusive( $self, @_ );
+}
+*lock = \&lock_exclusive;
+
+sub lock_shared {
+    my $self = shift->_get_self;
+    # cluck() the problem with cached File objects.
+    unless ( $self->_engine ) {
+        require Carp;
+        require Data::Dumper;
+        Carp::cluck( Data::Dumper->Dump( [$self], ['self'] ) );
+    }
+    return $self->_engine->lock_shared( $self, @_ );
+}
+
+sub unlock {
+    my $self = shift->_get_self;
+    return $self->_engine->unlock( $self, @_ );
+}
+
+sub _copy_value {
+    my $self = shift->_get_self;
+    my ($spot, $value) = @_;
+
+    if ( !ref $value ) {
+        ${$spot} = $value;
+    }
+    else {
+        my $r = Scalar::Util::reftype( $value );
+        my $tied;
+        if ( $r eq 'ARRAY' ) {
+            $tied = tied(@$value);
+        }
+        elsif ( $r eq 'HASH' ) {
+            $tied = tied(%$value);
+        }
+        else {
+            __PACKAGE__->_throw_error( "Unknown type for '$value'" );
+        }
+
+        if ( eval { local $SIG{'__DIE__'}; $tied->isa( __PACKAGE__ ) } ) {
+            ${$spot} = $tied->_repr;
+            $tied->_copy_node( ${$spot} );
+        }
+        else {
+            if ( $r eq 'ARRAY' ) {
+                ${$spot} = [ @{$value} ];
+            }
+            else {
+                ${$spot} = { %{$value} };
+            }
+        }
+
+        my $c = Scalar::Util::blessed( $value );
+        if ( defined $c && !$c->isa( __PACKAGE__ ) ) {
+            ${$spot} = bless ${$spot}, $c
+        }
+    }
+
+    return 1;
+}
+
+sub export {
+    my $self = shift->_get_self;
+
+    my $temp = $self->_repr;
+
+    $self->lock_exclusive;
+    $self->_copy_node( $temp );
+    $self->unlock;
+
+    my $classname = $self->_engine->get_classname( $self );
+    if ( defined $classname ) {
+      bless $temp, $classname;
+    }
+
+    return $temp;
+}
+
+sub _check_legality {
+    my $self = shift;
+    my ($val) = @_;
+
+    my $r = Scalar::Util::reftype( $val );
+
+    return $r if !defined $r || '' eq $r;
+    return $r if 'HASH' eq $r;
+    return $r if 'ARRAY' eq $r;
+
+    __PACKAGE__->_throw_error(
+        "Storage of references of type '$r' is not supported."
+    );
+}
+
+sub import {
+    return if !ref $_[0]; # Perl calls import() on use -- ignore
+
+    my $self = shift->_get_self;
+    my ($struct) = @_;
+
+    my $type = $self->_check_legality( $struct );
+    if ( !$type ) {
+        __PACKAGE__->_throw_error( "Cannot import a scalar" );
+    }
+
+    if ( substr( $type, 0, 1 ) ne $self->_type ) {
+        __PACKAGE__->_throw_error(
+            "Cannot import " . ('HASH' eq $type ? 'a hash' : 'an array')
+            . " into " . ('HASH' eq $type ? 'an array' : 'a hash')
+        );
+    }
+
+    my %seen;
+    my $recurse;
+    $recurse = sub {
+        my ($db, $val) = @_;
+
+        my $obj = 'HASH' eq Scalar::Util::reftype( $db ) ? tied(%$db) : tied(@$db);
+        $obj ||= $db;
+
+        my $r = $self->_check_legality( $val );
+        if ( 'HASH' eq $r ) {
+            while ( my ($k, $v) = each %$val ) {
+                my $r = $self->_check_legality( $v );
+                if ( $r ) {
+                    my $temp = 'HASH' eq $r ? {} : [];
+                    if ( my $c = Scalar::Util::blessed( $v ) ) {
+                        bless $temp, $c;
+                    }
+                    $obj->put( $k, $temp );
+                    $recurse->( $temp, $v );
+                }
+                else {
+                    $obj->put( $k, $v );
+                }
+            }
+        }
+        elsif ( 'ARRAY' eq $r ) {
+            foreach my $k ( 0 .. $#$val ) {
+                my $v = $val->[$k];
+                my $r = $self->_check_legality( $v );
+                if ( $r ) {
+                    my $temp = 'HASH' eq $r ? {} : [];
+                    if ( my $c = Scalar::Util::blessed( $v ) ) {
+                        bless $temp, $c;
+                    }
+                    $obj->put( $k, $temp );
+                    $recurse->( $temp, $v );
+                }
+                else {
+                    $obj->put( $k, $v );
+                }
+            }
+        }
+    };
+    $recurse->( $self, $struct );
+
+    return 1;
+}
+
+#XXX Need to keep track of who has a fh to this file in order to
+#XXX close them all prior to optimize on Win32/cygwin
+# Rebuild entire database into new file, then move
+# it back on top of original.
+sub optimize {
+    my $self = shift->_get_self;
+
+    # Optimizing is only something we need to do when we're working with our
+    # own file format. Otherwise, let the other guy do the optimizations.
+    return unless $self->_engine->isa( 'DBM::Deep::Engine::File' );
+
+#XXX Need to create a new test for this
+#    if ($self->_engine->storage->{links} > 1) {
+#        $self->_throw_error("Cannot optimize: reference count is greater than 1");
+#    }
+
+    #XXX Do we have to lock the tempfile?
+
+    #XXX Should we use tempfile() here instead of a hard-coded name?
+    my $temp_filename = $self->_engine->storage->{file} . '.tmp';
+    my $db_temp = __PACKAGE__->new(
+        file => $temp_filename,
+        type => $self->_type,
+
+        # Bring over all the parameters that we need to bring over
+        ( map { $_ => $self->_engine->$_ } qw(
+            byte_size max_buckets data_sector_size num_txns
+        )),
+    );
+
+    $self->lock_exclusive;
+    $self->_engine->clear_cache;
+    $self->_copy_node( $db_temp );
+    $self->unlock;
+    $db_temp->_engine->storage->close;
+    undef $db_temp;
+
+    ##
+    # Attempt to copy user, group and permissions over to new file
+    ##
+    $self->_engine->storage->copy_stats( $temp_filename );
+
+    # q.v. perlport for more information on this variable
+    if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
+        ##
+        # Potential race condition when optimizing on Win32 with locking.
+        # The Windows filesystem requires that the filehandle be closed
+        # before it is overwritten with rename().  This could be redone
+        # with a soft copy.
+        ##
+        $self->unlock;
+        $self->_engine->storage->close;
+    }
+
+    if (!rename $temp_filename, $self->_engine->storage->{file}) {
+        unlink $temp_filename;
+        $self->unlock;
+        $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!");
+    }
+
+    $self->unlock;
+    $self->_engine->storage->close;
+
+    $self->_engine->storage->open;
+    $self->lock_exclusive;
+    $self->_engine->setup( $self );
+    $self->unlock;
+
+    return 1;
+}
+
+sub clone {
+    my $self = shift->_get_self;
+
+    return __PACKAGE__->new(
+        type        => $self->_type,
+        base_offset => $self->_base_offset,
+        staleness   => $self->_staleness,
+        engine      => $self->_engine,
+    );
+}
+
+sub supports {
+    my $self = shift->_get_self;
+    return $self->_engine->supports( @_ );
+}
+
+sub db_version {
+    shift->_get_self->_engine->db_version;
+}
+
+#XXX Migrate this to the engine, where it really belongs and go through some
+# API - stop poking in the innards of someone else..
+{
+    my %is_legal_filter = map {
+        $_ => ~~1,
+    } qw(
+        store_key store_value
+        fetch_key fetch_value
+    );
+
+    sub set_filter {
+        my $self = shift->_get_self;
+        my $type = lc shift;
+        my $func = shift;
+
+        if ( $is_legal_filter{$type} ) {
+            $self->_engine->storage->{"filter_$type"} = $func;
+            return 1;
+        }
+
+        return;
+    }
+
+    sub filter_store_key   { $_[0]->set_filter( store_key   => $_[1] ); }
+    sub filter_store_value { $_[0]->set_filter( store_value => $_[1] ); }
+    sub filter_fetch_key   { $_[0]->set_filter( fetch_key   => $_[1] ); }
+    sub filter_fetch_value { $_[0]->set_filter( fetch_value => $_[1] ); }
+}
+
+sub begin_work {
+    my $self = shift->_get_self;
+    $self->lock_exclusive;
+    my $rv = eval {
+        local $SIG{'__DIE__'};
+        $self->_engine->begin_work( $self, @_ );
+    };
+    my $e = $@;
+    $self->unlock;
+    die $e if $e;
+    return $rv;
+}
+
+sub rollback {
+    my $self = shift->_get_self;
+
+    $self->lock_exclusive;
+    my $rv = eval {
+        local $SIG{'__DIE__'};
+        $self->_engine->rollback( $self, @_ );
+    };
+    my $e = $@;
+    $self->unlock;
+    die $e if $e;
+    return $rv;
+}
+
+sub commit {
+    my $self = shift->_get_self;
+    $self->lock_exclusive;
+    my $rv = eval {
+        local $SIG{'__DIE__'};
+        $self->_engine->commit( $self, @_ );
+    };
+    my $e = $@;
+    $self->unlock;
+    die $e if $e;
+    return $rv;
+}
+
+# Accessor methods
+sub _engine {
+    my $self = $_[0]->_get_self;
+    return $self->{engine};
+}
+
+sub _type {
+    my $self = $_[0]->_get_self;
+    return $self->{type};
+}
+
+sub _base_offset {
+    my $self = $_[0]->_get_self;
+    return $self->{base_offset};
+}
+
+sub _staleness {
+    my $self = $_[0]->_get_self;
+    return $self->{staleness};
+}
+
+# Utility methods
+sub _throw_error {
+    my $n = 0;
+    while( 1 ) {
+        my @caller = caller( ++$n );
+        next if $caller[0] =~ m/^DBM::Deep/;
+
+        die "DBM::Deep: $_[1] at $caller[1] line $caller[2]\n";
+    }
+}
+
+# Store single hash key/value or array element in database.
+sub STORE {
+    my $self = shift->_get_self;
+    my ($key, $value) = @_;
+    warn "STORE($self, '$key', '@{[defined$value?$value:'undef']}')\n" if DEBUG;
+
+    unless ( $self->_engine->storage->is_writable ) {
+        $self->_throw_error( 'Cannot write to a readonly filehandle' );
+    }
+
+    $self->lock_exclusive;
+
+    # User may be storing a complex value, in which case we do not want it run
+    # through the filtering system.
+    if ( !ref($value) && $self->_engine->storage->{filter_store_value} ) {
+        $value = $self->_engine->storage->{filter_store_value}->( $value );
+    }
+
+    eval {
+        local $SIG{'__DIE__'};
+        $self->_engine->write_value( $self, $key, $value );
+    }; if ( my $e = $@ ) {
+        $self->unlock;
+        die $e;
+    }
+
+    $self->unlock;
+
+    return 1;
+}
+
+# Fetch single value or element given plain key or array index
+sub FETCH {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+    warn "FETCH($self, '$key')\n" if DEBUG;
+
+    $self->lock_shared;
+
+    my $result = $self->_engine->read_value( $self, $key );
+
+    $self->unlock;
+
+    # Filters only apply to scalar values, so the ref check is making
+    # sure the fetched bucket is a scalar, not a child hash or array.
+    return ($result && !ref($result) && $self->_engine->storage->{filter_fetch_value})
+        ? $self->_engine->storage->{filter_fetch_value}->($result)
+        : $result;
+}
+
+# Delete single key/value pair or element given plain key or array index
+sub DELETE {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+    warn "DELETE($self, '$key')\n" if DEBUG;
+
+    unless ( $self->_engine->storage->is_writable ) {
+        $self->_throw_error( 'Cannot write to a readonly filehandle' );
+    }
+
+    $self->lock_exclusive;
+
+    ##
+    # Delete bucket
+    ##
+    my $value = $self->_engine->delete_key( $self, $key);
+
+    if (defined $value && !ref($value) && $self->_engine->storage->{filter_fetch_value}) {
+        $value = $self->_engine->storage->{filter_fetch_value}->($value);
+    }
+
+    $self->unlock;
+
+    return $value;
+}
+
+# Check if a single key or element exists given plain key or array index
+sub EXISTS {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+    warn "EXISTS($self, '$key')\n" if DEBUG;
+
+    $self->lock_shared;
+
+    my $result = $self->_engine->key_exists( $self, $key );
+
+    $self->unlock;
+
+    return $result;
+}
+
+# Clear all keys from hash, or all elements from array.
+sub CLEAR {
+    my $self = shift->_get_self;
+    warn "CLEAR($self)\n" if DEBUG;
+
+    my $engine = $self->_engine;
+    unless ( $engine->storage->is_writable ) {
+        $self->_throw_error( 'Cannot write to a readonly filehandle' );
+    }
+
+    $self->lock_exclusive;
+    eval {
+        local $SIG{'__DIE__'};
+        $engine->clear( $self );
+    };
+    my $e = $@;
+    warn "$e\n" if $e && DEBUG;
+
+    $self->unlock;
+
+    die $e if $e;
+
+    return 1;
+}
+
+# Public method aliases
+sub put    { (shift)->STORE( @_ )  }
+sub get    { (shift)->FETCH( @_ )  }
+sub store  { (shift)->STORE( @_ )  }
+sub fetch  { (shift)->FETCH( @_ )  }
+sub delete { (shift)->DELETE( @_ ) }
+sub exists { (shift)->EXISTS( @_ ) }
+sub clear  { (shift)->CLEAR( @_ )  }
+
+sub _dump_file {shift->_get_self->_engine->_dump_file;}
+
+sub _warnif {
+ # There is, unfortunately, no way to avoid this hack. warnings.pm does not
+ # allow us to specify exactly the call frame we want. So, for now, we just
+ # look at the bitmask ourselves.
+ my $level;
+ {
+  my($pack, $file, $line, $bitmask) = (caller $level++)[0..2,9];
+  redo if $pack =~ /^DBM::Deep(?:::|\z)/;
+  if(  vec $bitmask, $warnings'Offsets{$_[0]}, 1,
+    || vec $bitmask, $warnings'Offsets{all}, 1,
+    ) {
+     my $msg = $_[1] =~ /\n\z/ ? $_[1] : "$_[1] at $file line $line.\n";
+     die $msg
+      if  vec $bitmask, $warnings'Offsets{$_[0]}+1, 1,
+       || vec $bitmask, $warnings'Offsets{all}+1, 1;
+     warn $msg;
+  }
+ }
+}
+
+sub _free {
+ my $self = shift;
+ if(my $sector = $self->{engine}->load_sector( $self->{base_offset} )) {
+  $sector->free;
+ }
+}
+
+sub DESTROY {
+ my $self = shift;
+ my $alter_ego = $self->_get_self;
+ if( !$alter_ego  ||  $self != $alter_ego ) {
+  return; # Don’t run the destructor twice! (What follows only applies to
+ }        # the inner object, not the tie.)
+
+ # If the engine is gone, the END block has beaten us to it.
+ return if !$self->{engine}; 
+ if(  $self->{engine}->{external_refs} ) {
+  $self->_free;
+ }
+}
+
+# Relying on the destructor alone is problematic, as the order in which
+# objects are discarded is random in global destruction. So we do the
+# clean-up here before preemptively before global destruction.
+END {
+ defined $$_ and  $$_->_free, delete $$_->{engine}
+   for(values %obj_cache);
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep.pod b/src/modules/DBM/Deep.pod
new file mode 100644
index 0000000..cd456de
--- /dev/null
+++ b/src/modules/DBM/Deep.pod
@@ -0,0 +1,1330 @@
+=head1 NAME
+
+DBM::Deep - A pure perl multi-level hash/array DBM that supports transactions
+
+=head1 VERSION
+
+2.0013
+
+=head1 SYNOPSIS
+
+  use DBM::Deep;
+  my $db = DBM::Deep->new( "foo.db" );
+
+  $db->{key} = 'value';
+  print $db->{key};
+
+  $db->put('key' => 'value');
+  print $db->get('key');
+
+  # true multi-level support
+  $db->{my_complex} = [
+      'hello', { perl => 'rules' },
+      42, 99,
+  ];
+
+  $db->begin_work;
+
+  # Do stuff here
+
+  $db->rollback;
+  $db->commit;
+
+  tie my %db, 'DBM::Deep', 'foo.db';
+  $db{key} = 'value';
+  print $db{key};
+
+  tied(%db)->put('key' => 'value');
+  print tied(%db)->get('key');
+
+=head1 DESCRIPTION
+
+A unique flat-file database module, written in pure perl. True multi-level
+hash/array support (unlike MLDBM, which is faked), hybrid OO / tie()
+interface, cross-platform FTPable files, ACID transactions, and is quite fast.
+Can handle millions of keys and unlimited levels without significant
+slow-down. Written from the ground-up in pure perl -- this is NOT a wrapper
+around a C-based DBM. Out-of-the-box compatibility with Unix, Mac OS X and
+Windows.
+
+=head1 VERSION DIFFERENCES
+
+B<NOTE>: 2.0000 introduces Unicode support in the File back end. This
+necessitates a change in the file format. The version 1.0003 format is
+still supported, though, so we have added a L<db_version()|/db_version>
+method. If you are using a database in the old format, you will have to
+upgrade it to get Unicode support.
+
+B<NOTE>: 1.0020 introduces different engines which are backed by different types
+of storage. There is the original storage (called 'File') and a database storage
+(called 'DBI'). q.v. L</PLUGINS> for more information.
+
+B<NOTE>: 1.0000 has significant file format differences from prior versions.
+There is a backwards-compatibility layer at C<utils/upgrade_db.pl>. Files
+created by 1.0000 or higher are B<NOT> compatible with scripts using prior
+versions.
+
+=head1 PLUGINS
+
+DBM::Deep is a wrapper around different storage engines. These are:
+
+=head2 File
+
+This is the traditional storage engine, storing the data to a custom file
+format. The parameters accepted are:
+
+=over 4
+
+=item * file
+
+Filename of the DB file to link the handle to. You can pass a full absolute
+filesystem path, partial path, or a plain filename if the file is in the
+current working directory. This is a required parameter (though q.v. fh).
+
+=item * fh
+
+If you want, you can pass in the fh instead of the file. This is most useful for
+doing something like:
+
+  my $db = DBM::Deep->new( { fh => \*DATA } );
+
+You are responsible for making sure that the fh has been opened appropriately
+for your needs. If you open it read-only and attempt to write, an exception will
+be thrown. If you open it write-only or append-only, an exception will be thrown
+immediately as DBM::Deep needs to read from the fh.
+
+=item * file_offset
+
+This is the offset within the file that the DBM::Deep db starts. Most of the
+time, you will not need to set this. However, it's there if you want it.
+
+If you pass in fh and do not set this, it will be set appropriately.
+
+=item * locking
+
+Specifies whether locking is to be enabled. DBM::Deep uses Perl's flock()
+function to lock the database in exclusive mode for writes, and shared mode
+for reads. Pass any true value to enable. This affects the base DB handle
+I<and any child hashes or arrays> that use the same DB file. This is an
+optional parameter, and defaults to 1 (enabled). See L</LOCKING> below for
+more.
+
+=back
+
+When you open an existing database file, the version of the database format
+will stay the same. But if you are creating a new file, it will be in the
+latest format.
+
+=head2 DBI
+
+This is a storage engine that stores the data in a relational database. Funnily
+enough, this engine doesn't work with transactions (yet) as InnoDB doesn't do
+what DBM::Deep needs it to do.
+
+The parameters accepted are:
+
+=over 4
+
+=item * dbh
+
+This is a DBH that's already been opened with L<DBI/connect>.
+
+=item * dbi
+
+This is a hashref containing:
+
+=over 4
+
+=item * dsn
+
+=item * username
+
+=item * password
+
+=item * connect_args
+
+=back
+
+These correspond to the 4 parameters L<DBI/connect> takes.
+
+=back
+
+B<NOTE>: This has only been tested with MySQL and SQLite (with
+disappointing results). I plan on extending this to work with PostgreSQL in
+the near future. Oracle, Sybase, and other engines will come later.
+
+=head2 Planned engines
+
+There are plans to extend this functionality to (at least) the following:
+
+=over 4
+
+=item * BDB (and other hash engines like memcached)
+
+=item * NoSQL engines (such as Tokyo Cabinet)
+
+=item * DBIx::Class (and other ORMs)
+
+=back
+
+=head1 SETUP
+
+Construction can be done OO-style (which is the recommended way), or using
+Perl's tie() function. Both are examined here.
+
+=head2 OO Construction
+
+The recommended way to construct a DBM::Deep object is to use the new()
+method, which gets you a blessed I<and> tied hash (or array) reference.
+
+  my $db = DBM::Deep->new( "foo.db" );
+
+This opens a new database handle, mapped to the file "foo.db". If this
+file does not exist, it will automatically be created. DB files are
+opened in "r+" (read/write) mode, and the type of object returned is a
+hash, unless otherwise specified (see L</Options> below).
+
+You can pass a number of options to the constructor to specify things like
+locking, autoflush, etc. This is done by passing an inline hash (or hashref):
+
+  my $db = DBM::Deep->new(
+      file      => "foo.db",
+      locking   => 1,
+      autoflush => 1
+  );
+
+Notice that the filename is now specified I<inside> the hash with
+the "file" parameter, as opposed to being the sole argument to the
+constructor. This is required if any options are specified.
+See L</Options> below for the complete list.
+
+You can also start with an array instead of a hash. For this, you must
+specify the C<type> parameter:
+
+  my $db = DBM::Deep->new(
+      file => "foo.db",
+      type => DBM::Deep->TYPE_ARRAY
+  );
+
+B<Note:> Specifying the C<type> parameter only takes effect when beginning
+a new DB file. If you create a DBM::Deep object with an existing file, the
+C<type> will be loaded from the file header, and an error will be thrown if
+the wrong type is passed in.
+
+=head2 Tie Construction
+
+Alternately, you can create a DBM::Deep handle by using Perl's built-in
+tie() function. The object returned from tie() can be used to call methods,
+such as lock() and unlock(). (That object can be retrieved from the tied
+variable at any time using tied() - please see L<perltie> for more info.)
+
+  my %hash;
+  my $db = tie %hash, "DBM::Deep", "foo.db";
+
+  my @array;
+  my $db = tie @array, "DBM::Deep", "bar.db";
+
+As with the OO constructor, you can replace the DB filename parameter with
+a hash containing one or more options (see L</Options> just below for the
+complete list).
+
+  tie %hash, "DBM::Deep", {
+      file => "foo.db",
+      locking => 1,
+      autoflush => 1
+  };
+
+=head2 Options
+
+There are a number of options that can be passed in when constructing your
+DBM::Deep objects. These apply to both the OO- and tie- based approaches.
+
+=over
+
+=item * type
+
+This parameter specifies what type of object to create, a hash or array. Use
+one of these two constants:
+
+=over 4
+
+=item * C<< DBM::Deep->TYPE_HASH >>
+
+=item * C<< DBM::Deep->TYPE_ARRAY >>
+
+=back
+
+This only takes effect when beginning a new file. This is an optional
+parameter, and defaults to C<< DBM::Deep->TYPE_HASH >>.
+
+=item * autoflush
+
+Specifies whether autoflush is to be enabled on the underlying filehandle.
+This obviously slows down write operations, but is required if you may have
+multiple processes accessing the same DB file (also consider enable I<locking>).
+Pass any true value to enable. This is an optional parameter, and defaults to 1
+(enabled).
+
+=item * filter_*
+
+See L</FILTERS> below.
+
+=back
+
+The following parameters may be specified in the constructor the first time the
+datafile is created. However, they will be stored in the header of the file and
+cannot be overridden by subsequent openings of the file - the values will be set
+from the values stored in the datafile's header.
+
+=over 4
+
+=item * num_txns
+
+This is the number of transactions that can be running at one time. The
+default is one - the HEAD. The minimum is one and the maximum is 255. The more
+transactions, the larger and quicker the datafile grows.
+
+Simple access to a database, regardless of how many processes are doing it,
+already counts as one transaction (the HEAD). So, if you want, say, 5
+processes to be able to call begin_work at the same time, C<num_txns> must
+be at least 6.
+
+See L</TRANSACTIONS> below.
+
+=item * max_buckets
+
+This is the number of entries that can be added before a reindexing. The larger
+this number is made, the larger a file gets, but the better performance you will
+have. The default and minimum number this can be is 16. The maximum is 256, but
+more than 64 isn't recommended.
+
+=item * data_sector_size
+
+This is the size in bytes of a given data sector. Data sectors will chain, so
+a value of any size can be stored. However, chaining is expensive in terms of
+time. Setting this value to something close to the expected common length of
+your scalars will improve your performance. If it is too small, your file will
+have a lot of chaining. If it is too large, your file will have a lot of dead
+space in it.
+
+The default for this is 64 bytes. The minimum value is 32 and the maximum is
+256 bytes.
+
+B<Note:> There are between 6 and 10 bytes taken up in each data sector for
+bookkeeping. (It's 4 + the number of bytes in your L</pack_size>.) This is
+included within the data_sector_size, thus the effective value is 6-10 bytes
+less than what you specified.
+
+B<Another note:> If your strings contain any characters beyond the byte
+range, they will be encoded as UTF-8 before being stored in the file. This
+will make all non-ASCII characters take up more than one byte each.
+
+=item * pack_size
+
+This is the size of the file pointer used throughout the file. The valid values
+are:
+
+=over 4
+
+=item * small
+
+This uses 2-byte offsets, allowing for a maximum file size of 65 KB.
+
+=item * medium (default)
+
+This uses 4-byte offsets, allowing for a maximum file size of 4 GB.
+
+=item * large
+
+This uses 8-byte offsets, allowing for a maximum file size of 16 XB
+(exabytes). This can only be enabled if your Perl is compiled for 64-bit.
+
+=back
+
+See L</LARGEFILE SUPPORT> for more information.
+
+=item * external_refs
+
+This is a boolean option. When enabled, it allows external references to
+database entries to hold on to those entries, even when they are deleted.
+
+To illustrate, if you retrieve a hash (or array) reference from the
+database,
+
+  $foo_hash = $db->{foo};
+
+the hash reference is still tied to the database. So if you
+
+  delete $db->{foo};
+
+C<$foo_hash> will point to a location in the DB that is no longer valid (we
+call this a stale reference). So if you try to retrieve the data from
+C<$foo_hash>,
+
+  for(keys %$foo_hash) {
+
+you will get an error.
+
+The C<external_refs> option causes C<$foo_hash> to 'hang on' to the
+DB entry, so it will not be deleted from the database if there is still a
+reference to it in a running program. It will be deleted, instead, when the
+C<$foo_hash> variable no longer exists, or is overwritten.
+
+This has the potential to cause database bloat if your program crashes, so
+it is not enabled by default. (See also the L</export> method for an
+alternative workaround.)
+
+=back
+
+=head1 TIE INTERFACE
+
+With DBM::Deep you can access your databases using Perl's standard hash/array
+syntax. Because all DBM::Deep objects are I<tied> to hashes or arrays, you can
+treat them as such (but see L</external_refs>, above, and
+L</Stale References>, below). DBM::Deep will intercept
+all reads/writes and direct them
+to the right place -- the DB file. This has nothing to do with the
+L</Tie Construction> section above. This simply tells you how to use DBM::Deep
+using regular hashes and arrays, rather than calling functions like C<get()>
+and C<put()> (although those work too). It is entirely up to you how to want
+to access your databases.
+
+=head2 Hashes
+
+You can treat any DBM::Deep object like a normal Perl hash reference. Add keys,
+or even nested hashes (or arrays) using standard Perl syntax:
+
+  my $db = DBM::Deep->new( "foo.db" );
+
+  $db->{mykey} = "myvalue";
+  $db->{myhash} = {};
+  $db->{myhash}->{subkey} = "subvalue";
+
+  print $db->{myhash}->{subkey} . "\n";
+
+You can even step through hash keys using the normal Perl C<keys()> function:
+
+  foreach my $key (keys %$db) {
+      print "$key: " . $db->{$key} . "\n";
+  }
+
+Remember that Perl's C<keys()> function extracts I<every> key from the hash and
+pushes them onto an array, all before the loop even begins. If you have an
+extremely large hash, this may exhaust Perl's memory. Instead, consider using
+Perl's C<each()> function, which pulls keys/values one at a time, using very
+little memory:
+
+  while (my ($key, $value) = each %$db) {
+      print "$key: $value\n";
+  }
+
+Please note that when using C<each()>, you should always pass a direct
+hash reference, not a lookup. Meaning, you should B<never> do this:
+
+  # NEVER DO THIS
+  while (my ($key, $value) = each %{$db->{foo}}) { # BAD
+
+This causes an infinite loop, because for each iteration, Perl is calling
+FETCH() on the $db handle, resulting in a "new" hash for foo every time, so
+it effectively keeps returning the first key over and over again. Instead,
+assign a temporary variable to C<< $db->{foo} >>, then pass that to each().
+
+=head2 Arrays
+
+As with hashes, you can treat any DBM::Deep object like a normal Perl array
+reference. This includes inserting, removing and manipulating elements,
+and the C<push()>, C<pop()>, C<shift()>, C<unshift()> and C<splice()> functions.
+The object must have first been created using type
+C<< DBM::Deep->TYPE_ARRAY >>,
+or simply be a nested array reference inside a hash. Example:
+
+  my $db = DBM::Deep->new(
+      file => "foo-array.db",
+      type => DBM::Deep->TYPE_ARRAY
+  );
+
+  $db->[0] = "foo";
+  push @$db, "bar", "baz";
+  unshift @$db, "bah";
+
+  my $last_elem   = pop @$db;   # baz
+  my $first_elem  = shift @$db; # bah
+  my $second_elem = $db->[1];   # bar
+
+  my $num_elements = scalar @$db;
+
+=head1 OO INTERFACE
+
+In addition to the I<tie()> interface, you can also use a standard OO interface
+to manipulate all aspects of DBM::Deep databases. Each type of object (hash or
+array) has its own methods, but both types share the following common methods:
+C<put()>, C<get()>, C<exists()>, C<delete()> and C<clear()>. C<fetch()> and
+C<store()> are aliases to C<put()> and C<get()>, respectively.
+
+=over
+
+=item * new() / clone()
+X<new>
+X<clone>
+
+These are the constructor and copy-functions.
+
+=item * put() / store()
+X<put>
+X<store>
+
+Stores a new hash key/value pair, or sets an array element value. Takes two
+arguments, the hash key or array index, and the new value. The value can be
+a scalar, hash ref or array ref. Returns true on success, false on failure.
+
+  $db->put("foo", "bar"); # for hashes
+  $db->put(1, "bar"); # for arrays
+
+=item * get() / fetch()
+X<get>
+X<fetch>
+
+Fetches the value of a hash key or array element. Takes one argument: the hash
+key or array index. Returns a scalar, hash ref or array ref, depending on the
+data type stored.
+
+  my $value = $db->get("foo"); # for hashes
+  my $value = $db->get(1); # for arrays
+
+=item * exists()
+X<exists>
+
+Checks if a hash key or array index exists. Takes one argument: the hash key
+or array index. Returns true if it exists, false if not.
+
+  if ($db->exists("foo")) { print "yay!\n"; } # for hashes
+  if ($db->exists(1)) { print "yay!\n"; } # for arrays
+
+=item * delete()
+X<delete>
+
+Deletes one hash key/value pair or array element. Takes one argument: the hash
+key or array index. Returns the data that the element used to contain (just
+like Perl's C<delete> function), which is C<undef> if it did not exist. For
+arrays, the remaining elements located after the deleted element are NOT
+moved over. The deleted element is essentially just undefined, which is
+exactly how Perl's
+internal arrays work.
+
+  $db->delete("foo"); # for hashes
+  $db->delete(1); # for arrays
+
+=item * clear()
+X<clear>
+
+Deletes B<all> hash keys or array elements. Takes no arguments. No return
+value.
+
+  $db->clear(); # hashes or arrays
+
+=item * lock() / unlock() / lock_exclusive() / lock_shared()
+X<lock>
+X<unlock>
+X<lock_exclusive>
+X<lock_shared>
+
+q.v. L</LOCKING> for more info.
+
+=item * optimize()
+X<optimize>
+
+This will compress the datafile so that it takes up as little space as possible.
+There is a freespace manager so that when space is freed up, it is used before
+extending the size of the datafile. But, that freespace just sits in the
+datafile unless C<optimize()> is called.
+
+C<optimize> basically copies everything into a new database, so, if it is
+in version 1.0003 format, it will be upgraded.
+
+=item * import()
+X<import>
+
+Unlike simple assignment, C<import()> does not tie the right-hand side. Instead,
+a copy of your data is put into the DB. C<import()> takes either an arrayref (if
+your DB is an array) or a hashref (if your DB is a hash). C<import()> will die
+if anything else is passed in.
+
+=item * export()
+X<export>
+
+This returns a complete copy of the data structure at the point you do the export.
+This copy is in RAM, not on disk like the DB is.
+
+=item * begin_work() / commit() / rollback()
+
+These are the transactional functions. L</TRANSACTIONS> for more information.
+
+=item * supports( $option )
+X<supports>
+
+This returns a boolean indicating whether this instance of DBM::Deep
+supports that feature. C<$option> can be one of:
+
+=over 4
+
+=item * transactions
+X<translation>
+
+=item * unicode
+X<unicode>
+
+=back
+
+=item * db_version()
+X<db_version>
+
+This returns the version of the database format that the current database
+is in. This is specified as the earliest version of DBM::Deep that supports
+it.
+
+For the File back end, this will be 1.0003 or 2.
+
+For the DBI back end, it is currently always 1.0020.
+
+=back
+
+=head2 Hashes
+
+For hashes, DBM::Deep supports all the common methods described above, and the
+following additional methods: C<first_key()> and C<next_key()>.
+
+=over
+
+=item * first_key()
+X<first_key>
+
+Returns the "first" key in the hash. As with built-in Perl hashes, keys are
+fetched in an undefined order (which appears random). Takes no arguments,
+returns the key as a scalar value.
+
+  my $key = $db->first_key();
+
+=item * next_key()
+X<next_key>
+
+Returns the "next" key in the hash, given the previous one as the sole argument.
+Returns undef if there are no more keys to be fetched.
+
+  $key = $db->next_key($key);
+
+=back
+
+Here are some examples of using hashes:
+
+  my $db = DBM::Deep->new( "foo.db" );
+
+  $db->put("foo", "bar");
+  print "foo: " . $db->get("foo") . "\n";
+
+  $db->put("baz", {}); # new child hash ref
+  $db->get("baz")->put("buz", "biz");
+  print "buz: " . $db->get("baz")->get("buz") . "\n";
+
+  my $key = $db->first_key();
+  while ($key) {
+      print "$key: " . $db->get($key) . "\n";
+      $key = $db->next_key($key);
+  }
+
+  if ($db->exists("foo")) { $db->delete("foo"); }
+
+=head2 Arrays
+
+For arrays, DBM::Deep supports all the common methods described above, and the
+following additional methods: C<length()>, C<push()>, C<pop()>, C<shift()>,
+C<unshift()> and C<splice()>.
+
+=over
+
+=item * length()
+X<length>
+
+Returns the number of elements in the array. Takes no arguments.
+
+  my $len = $db->length();
+
+=item * push()
+X<push>
+
+Adds one or more elements onto the end of the array. Accepts scalars, hash
+refs or array refs. No return value.
+
+  $db->push("foo", "bar", {});
+
+=item * pop()
+X<pop>
+
+Fetches the last element in the array, and deletes it. Takes no arguments.
+Returns undef if array is empty. Returns the element value.
+
+  my $elem = $db->pop();
+
+=item * shift()
+X<shift>
+
+Fetches the first element in the array, deletes it, then shifts all the
+remaining elements over to take up the space. Returns the element value. This
+method is not recommended with large arrays -- see L</Large Arrays> below for
+details.
+
+  my $elem = $db->shift();
+
+=item * unshift()
+X<unshift>
+
+Inserts one or more elements onto the beginning of the array, shifting all
+existing elements over to make room. Accepts scalars, hash refs or array refs.
+No return value. This method is not recommended with large arrays -- see
+<Large Arrays> below for details.
+
+  $db->unshift("foo", "bar", {});
+
+=item * splice()
+X<splice>
+
+Performs exactly like Perl's built-in function of the same name. See
+L<perlfunc/splice> for usage -- it is too complicated to document here. This
+method is not recommended with large arrays -- see L</Large Arrays> below for
+details.
+
+=back
+
+Here are some examples of using arrays:
+
+  my $db = DBM::Deep->new(
+      file => "foo.db",
+      type => DBM::Deep->TYPE_ARRAY
+  );
+
+  $db->push("bar", "baz");
+  $db->unshift("foo");
+  $db->put(3, "buz");
+
+  my $len = $db->length();
+  print "length: $len\n"; # 4
+
+  for (my $k=0; $k<$len; $k++) {
+      print "$k: " . $db->get($k) . "\n";
+  }
+
+  $db->splice(1, 2, "biz", "baf");
+
+  while (my $elem = shift @$db) {
+      print "shifted: $elem\n";
+  }
+
+=head1 LOCKING
+
+Enable or disable automatic file locking by passing a boolean value to the
+C<locking> parameter when constructing your DBM::Deep object (see L</SETUP>
+above).
+
+  my $db = DBM::Deep->new(
+      file => "foo.db",
+      locking => 1
+  );
+
+This causes DBM::Deep to C<flock()> the underlying filehandle with exclusive
+mode for writes, and shared mode for reads. This is required if you have
+multiple processes accessing the same database file, to avoid file corruption.
+Please note that C<flock()> does NOT work for files over NFS. See L</DB over
+NFS> below for more.
+
+=head2 Explicit Locking
+
+You can explicitly lock a database, so it remains locked for multiple
+actions. This is done by calling the C<lock_exclusive()> method (for when you
+want to write) or the C<lock_shared()> method (for when you want to read).
+This is particularly useful for things like counters, where the current value
+needs to be fetched, then incremented, then stored again.
+
+  $db->lock_exclusive();
+  my $counter = $db->get("counter");
+  $counter++;
+  $db->put("counter", $counter);
+  $db->unlock();
+
+  # or...
+
+  $db->lock_exclusive();
+  $db->{counter}++;
+  $db->unlock();
+
+=head2 Win32/Cygwin
+
+Due to Win32 actually enforcing the read-only status of a shared lock, all
+locks on Win32 and cygwin are exclusive. This is because of how autovivification
+currently works. Hopefully, this will go away in a future release.
+
+=head1 IMPORTING/EXPORTING
+
+You can import existing complex structures by calling the C<import()> method,
+and export an entire database into an in-memory structure using the C<export()>
+method. Both are examined here.
+
+=head2 Importing
+
+Say you have an existing hash with nested hashes/arrays inside it. Instead of
+walking the structure and adding keys/elements to the database as you go,
+simply pass a reference to the C<import()> method. This recursively adds
+everything to an existing DBM::Deep object for you. Here is an example:
+
+  my $struct = {
+      key1 => "value1",
+      key2 => "value2",
+      array1 => [ "elem0", "elem1", "elem2" ],
+      hash1 => {
+          subkey1 => "subvalue1",
+          subkey2 => "subvalue2"
+      }
+  };
+
+  my $db = DBM::Deep->new( "foo.db" );
+  $db->import( $struct );
+
+  print $db->{key1} . "\n"; # prints "value1"
+
+This recursively imports the entire C<$struct> object into C<$db>, including
+all nested hashes and arrays. If the DBM::Deep object contains existing data,
+keys are merged with the existing ones, replacing if they already exist.
+The C<import()> method can be called on any database level (not just the base
+level), and works with both hash and array DB types.
+
+B<Note:> Make sure your existing structure has no circular references in it.
+These will cause an infinite loop when importing. There are plans to fix this
+in a later release.
+
+=head2 Exporting
+
+Calling the C<export()> method on an existing DBM::Deep object will return
+a reference to a new in-memory copy of the database. The export is done
+recursively, so all nested hashes/arrays are all exported to standard Perl
+objects. Here is an example:
+
+  my $db = DBM::Deep->new( "foo.db" );
+
+  $db->{key1} = "value1";
+  $db->{key2} = "value2";
+  $db->{hash1} = {};
+  $db->{hash1}->{subkey1} = "subvalue1";
+  $db->{hash1}->{subkey2} = "subvalue2";
+
+  my $struct = $db->export();
+
+  print $struct->{key1} . "\n"; # prints "value1"
+
+This makes a complete copy of the database in memory, and returns a reference
+to it. The C<export()> method can be called on any database level (not just
+the base level), and works with both hash and array DB types. Be careful of
+large databases -- you can store a lot more data in a DBM::Deep object than an
+in-memory Perl structure.
+
+B<Note:> Make sure your database has no circular references in it.
+These will cause an infinite loop when exporting. There are plans to fix this
+in a later release.
+
+=head1 FILTERS
+
+DBM::Deep has a number of hooks where you can specify your own Perl function
+to perform filtering on incoming or outgoing data. This is a perfect
+way to extend the engine, and implement things like real-time compression or
+encryption. Filtering applies to the base DB level, and all child hashes /
+arrays. Filter hooks can be specified when your DBM::Deep object is first
+constructed, or by calling the C<set_filter()> method at any time. There are
+four available filter hooks.
+
+=head2 set_filter()
+
+This method takes two parameters - the filter type and the filter subreference.
+The four types are:
+
+=over
+
+=item * filter_store_key
+
+This filter is called whenever a hash key is stored. It
+is passed the incoming key, and expected to return a transformed key.
+
+=item * filter_store_value
+
+This filter is called whenever a hash key or array element is stored. It
+is passed the incoming value, and expected to return a transformed value.
+
+=item * filter_fetch_key
+
+This filter is called whenever a hash key is fetched (i.e. via
+C<first_key()> or C<next_key()>). It is passed the transformed key,
+and expected to return the plain key.
+
+=item * filter_fetch_value
+
+This filter is called whenever a hash key or array element is fetched.
+It is passed the transformed value, and expected to return the plain value.
+
+=back
+
+Here are the two ways to setup a filter hook:
+
+  my $db = DBM::Deep->new(
+      file => "foo.db",
+      filter_store_value => \&my_filter_store,
+      filter_fetch_value => \&my_filter_fetch
+  );
+
+  # or...
+
+  $db->set_filter( "store_value", \&my_filter_store );
+  $db->set_filter( "fetch_value", \&my_filter_fetch );
+
+Your filter function will be called only when dealing with SCALAR keys or
+values. When nested hashes and arrays are being stored/fetched, filtering
+is bypassed. Filters are called as static functions, passed a single SCALAR
+argument, and expected to return a single SCALAR value. If you want to
+remove a filter, set the function reference to C<undef>:
+
+  $db->set_filter( "store_value", undef );
+
+=head2 Examples
+
+Please read L<DBM::Deep::Cookbook> for examples of filters.
+
+=head1 ERROR HANDLING
+
+Most DBM::Deep methods return a true value for success, and call die() on
+failure. You can wrap calls in an eval block to catch the die.
+
+  my $db = DBM::Deep->new( "foo.db" ); # create hash
+  eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call
+
+  print $@;           # prints error message
+
+=head1 LARGEFILE SUPPORT
+
+If you have a 64-bit system, and your Perl is compiled with both LARGEFILE
+and 64-bit support, you I<may> be able to create databases larger than 4 GB.
+DBM::Deep by default uses 32-bit file offset tags, but these can be changed
+by specifying the 'pack_size' parameter when constructing the file.
+
+  DBM::Deep->new(
+      file      => $filename,
+      pack_size => 'large',
+  );
+
+This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words
+instead of 32-bit longs. After setting these values your DB files have a
+theoretical maximum size of 16 XB (exabytes).
+
+You can also use C<< pack_size => 'small' >> in order to use 16-bit file
+offsets.
+
+B<Note:> Changing these values will B<NOT> work for existing database files.
+Only change this for new files. Once the value has been set, it is stored in
+the file's header and cannot be changed for the life of the file. These
+parameters are per-file, meaning you can access 32-bit and 64-bit files, as
+you choose.
+
+B<Note:> We have not personally tested files larger than 4 GB -- all our
+systems have only a 32-bit Perl. However, we have received user reports that
+this does indeed work.
+
+=head1 LOW-LEVEL ACCESS
+
+If you require low-level access to the underlying filehandle that DBM::Deep uses,
+you can call the C<_fh()> method, which returns the handle:
+
+  my $fh = $db->_fh();
+
+This method can be called on the root level of the database, or any child
+hashes or arrays. All levels share a I<root> structure, which contains things
+like the filehandle, a reference counter, and all the options specified
+when you created the object. You can get access to this file object by
+calling the C<_storage()> method.
+
+  my $file_obj = $db->_storage();
+
+This is useful for changing options after the object has already been created,
+such as enabling/disabling locking. You can also store your own temporary user
+data in this structure (be wary of name collision), which is then accessible from
+any child hash or array.
+
+=head1 CIRCULAR REFERENCES
+
+DBM::Deep has full support for circular references. Meaning you
+can have a nested hash key or array element that points to a parent object.
+This relationship is stored in the DB file, and is preserved between sessions.
+Here is an example:
+
+  my $db = DBM::Deep->new( "foo.db" );
+
+  $db->{foo} = "bar";
+  $db->{circle} = $db; # ref to self
+
+  print $db->{foo} . "\n"; # prints "bar"
+  print $db->{circle}->{foo} . "\n"; # prints "bar" again
+
+This also works as expected with array and hash references. So, the following
+works as expected:
+
+  $db->{foo} = [ 1 .. 3 ];
+  $db->{bar} = $db->{foo};
+
+  push @{$db->{foo}}, 42;
+  is( $db->{bar}[-1], 42 ); # Passes
+
+This, however, does I<not> extend to assignments from one DB file to another.
+So, the following will throw an error:
+
+  my $db1 = DBM::Deep->new( "foo.db" );
+  my $db2 = DBM::Deep->new( "bar.db" );
+
+  $db1->{foo} = [];
+  $db2->{foo} = $db1->{foo}; # dies
+
+B<Note>: Passing the object to a function that recursively walks the
+object tree (such as I<Data::Dumper> or even the built-in C<optimize()> or
+C<export()> methods) will result in an infinite loop. This will be fixed in
+a future release by adding singleton support.
+
+=head1 TRANSACTIONS
+
+As of 1.0000, DBM::Deep has ACID transactions. Every DBM::Deep object is completely
+transaction-ready - it is not an option you have to turn on. You do have to
+specify how many transactions may run simultaneously (q.v. L</num_txns>).
+
+Three new methods have been added to support them. They are:
+
+=over 4
+
+=item * begin_work()
+
+This starts a transaction.
+
+=item * commit()
+
+This applies the changes done within the transaction to the mainline and ends
+the transaction.
+
+=item * rollback()
+
+This discards the changes done within the transaction to the mainline and ends
+the transaction.
+
+=back
+
+Transactions in DBM::Deep are done using a variant of the MVCC method, the
+same method used by the InnoDB MySQL engine.
+
+=head1 MIGRATION
+
+As of 1.0000, the file format has changed. To aid in upgrades, a migration
+script is provided within the CPAN distribution, called
+F<utils/upgrade_db.pl>.
+
+B<NOTE:> This script is not installed onto your system because it carries a copy
+of every version prior to the current version.
+
+As of version 2.0000, databases created by old versions back to 1.0003 can
+be read, but new features may not be available unless the database is
+upgraded first.
+
+=head1 TODO
+
+The following are items that are planned to be added in future releases. These
+are separate from the L</CAVEATS, ISSUES & BUGS> below.
+
+=head2 Sub-Transactions
+
+Right now, you cannot run a transaction within a transaction. Removing this
+restriction is technically straightforward, but the combinatorial explosion of
+possible usecases hurts my head. If this is something you want to see
+immediately, please submit many testcases.
+
+=head2 Caching
+
+If a client is willing to assert upon opening the file that this process will be
+the only consumer of that datafile, then there are a number of caching
+possibilities that can be taken advantage of. This does, however, mean that
+DBM::Deep is more vulnerable to losing data due to unflushed changes. It also
+means a much larger in-memory footprint. As such, it's not clear exactly how
+this should be done. Suggestions are welcome.
+
+=head2 Ram-only
+
+The techniques used in DBM::Deep simply require a seekable contiguous
+datastore. This could just as easily be a large string as a file. By using
+substr, the STM capabilities of DBM::Deep could be used within a
+single-process. I have no idea how I'd specify this, though. Suggestions are
+welcome.
+
+=head2 Different contention resolution mechanisms
+
+Currently, the only contention resolution mechanism is last-write-wins. This
+is the mechanism used by most RDBMSes and should be good enough for most uses.
+For advanced uses of STM, other contention mechanisms will be needed. If you
+have an idea of how you'd like to see contention resolution in DBM::Deep,
+please let me know.
+
+=head1 CAVEATS, ISSUES & BUGS
+
+This section describes all the known issues with DBM::Deep. These are issues
+that are either intractable or depend on some feature within Perl working
+exactly right. It you have found something that is not listed below, please
+send an e-mail to L<bug-DBM-Deep@rt.cpan.org|mailto:bug-DBM-Deep@rt.cpan.org>.
+Likewise, if you think you know of a way around one of these issues, please
+let me know.
+
+=head2 References
+
+(The following assumes a high level of Perl understanding, specifically of
+references. Most users can safely skip this section.)
+
+Currently, the only references supported are HASH and ARRAY. The other reference
+types (SCALAR, CODE, GLOB, and REF) cannot be supported for various reasons.
+
+=over 4
+
+=item * GLOB
+
+These are things like filehandles and other sockets. They can't be supported
+because it's completely unclear how DBM::Deep should serialize them.
+
+=item * SCALAR / REF
+
+The discussion here refers to the following type of example:
+
+  my $x = 25;
+  $db->{key1} = \$x;
+
+  $x = 50;
+
+  # In some other process ...
+
+  my $val = ${ $db->{key1} };
+
+  is( $val, 50, "What actually gets stored in the DB file?" );
+
+The problem is one of synchronization. When the variable being referred to
+changes value, the reference isn't notified, which is kind of the point of
+references. This means that the new value won't be stored in the datafile for
+other processes to read. There is no TIEREF.
+
+It is theoretically possible to store references to values already within a
+DBM::Deep object because everything already is synchronized, but the change to
+the internals would be quite large. Specifically, DBM::Deep would have to tie
+every single value that is stored. This would bloat the RAM footprint of
+DBM::Deep at least twofold (if not more) and be a significant performance drain,
+all to support a feature that has never been requested.
+
+=item * CODE
+
+L<Data::Dump::Streamer> provides a mechanism for serializing coderefs,
+including saving off all closure state. This would allow for DBM::Deep to
+store the code for a subroutine. Then, whenever the subroutine is read, the
+code could be C<eval()>'ed into being. However, just as for SCALAR and REF,
+that closure state may change without notifying the DBM::Deep object storing
+the reference. Again, this would generally be considered a feature.
+
+=back
+
+=head2 External references and transactions
+
+If you do C<< my $x = $db->{foo}; >>, then start a transaction, $x will be
+referencing the database from outside the transaction. A fix for this (and other
+issues with how external references into the database) is being looked into. This
+is the skipped set of tests in t/39_singletons.t and a related issue is the focus
+of t/37_delete_edge_cases.t
+
+=head2 File corruption
+
+The current level of error handling in DBM::Deep is minimal. Files I<are> checked
+for a 32-bit signature when opened, but any other form of corruption in the
+datafile can cause segmentation faults. DBM::Deep may try to C<seek()> past
+the end of a file, or get stuck in an infinite loop depending on the level and
+type of corruption. File write operations are not checked for failure (for
+speed), so if you happen to run out of disk space, DBM::Deep will probably fail in
+a bad way. These things will be addressed in a later version of DBM::Deep.
+
+=head2 DB over NFS
+
+Beware of using DBM::Deep files over NFS. DBM::Deep uses flock(), which works
+well on local filesystems, but will NOT protect you from file corruption over
+NFS. I've heard about setting up your NFS server with a locking daemon, then
+using C<lockf()> to lock your files, but your mileage may vary there as well.
+From what I understand, there is no real way to do it. However, if you need
+access to the underlying filehandle in DBM::Deep for using some other kind of
+locking scheme like C<lockf()>, see the L</LOW-LEVEL ACCESS> section above.
+
+=head2 Copying Objects
+
+Beware of copying tied objects in Perl. Very strange things can happen.
+Instead, use DBM::Deep's C<clone()> method which safely copies the object and
+returns a new, blessed and tied hash or array to the same level in the DB.
+
+  my $copy = $db->clone();
+
+B<Note>: Since clone() here is cloning the object, not the database location,
+any modifications to either $db or $copy will be visible to both.
+
+=head2 Stale References
+
+If you take a reference to an array or hash from the database, it is tied
+to the database itself. This means that if the datum in question is
+subsequently deleted from the database, the reference to it will point to
+an invalid location and unpredictable things will happen if you try to use
+it.
+
+So a seemingly innocuous piece of code like this:
+
+  my %hash = %{ $db->{some_hash} };
+
+can fail if another process deletes or clobbers C<< $db->{some_hash} >>
+while the data are being extracted, since S<C<%{ ... }>> is not atomic.
+(This actually happened.) The solution is to lock the database before
+reading the data:
+
+  $db->lock_exclusive;
+  my %hash = %{ $db->{some_hash} };
+  $db->unlock;
+
+As of version 1.0024, if you assign a stale reference to a location
+in the database, DBM::Deep will warn, if you have uninitialized warnings
+enabled, and treat the stale reference as C<undef>. An attempt to use a
+stale reference as an array or hash reference will cause an error.
+
+=head2 Large Arrays
+
+Beware of using C<shift()>, C<unshift()> or C<splice()> with large arrays.
+These functions cause every element in the array to move, which can be murder
+on DBM::Deep, as every element has to be fetched from disk, then stored again in
+a different location. This will be addressed in a future version.
+
+This has been somewhat addressed so that the cost is constant, regardless of
+what is stored at those locations. So, small arrays with huge data structures in
+them are faster. But, large arrays are still large.
+
+=head2 Writeonly Files
+
+If you pass in a filehandle to new(), you may have opened it in either a
+readonly or writeonly mode. STORE will verify that the filehandle is writable.
+However, there doesn't seem to be a good way to determine if a filehandle is
+readable. And, if the filehandle isn't readable, it's not clear what will
+happen. So, don't do that.
+
+=head2 Assignments Within Transactions
+
+The following will I<not> work as one might expect:
+
+  my $x = { a => 1 };
+
+  $db->begin_work;
+  $db->{foo} = $x;
+  $db->rollback;
+
+  is( $x->{a}, 1 ); # This will fail!
+
+The problem is that the moment a reference used as the rvalue to a DBM::Deep
+object's lvalue, it becomes tied itself. This is so that future changes to
+C<$x> can be tracked within the DBM::Deep file and is considered to be a
+feature. By the time the rollback occurs, there is no knowledge that there had
+been an C<$x> or what memory location to assign an C<export()> to.
+
+B<NOTE:> This does not affect importing because imports do a walk over the
+reference to be imported in order to explicitly leave it untied.
+
+=head1 CODE COVERAGE
+
+L<Devel::Cover> is used to test the code coverage of the tests. Below is the
+L<Devel::Cover> report on this distribution's test suite.
+
+  ---------------------------- ------ ------ ------ ------ ------ ------ ------
+  File                           stmt   bran   cond    sub    pod   time  total
+  ---------------------------- ------ ------ ------ ------ ------ ------ ------
+  blib/lib/DBM/Deep.pm          100.0   89.1   82.9  100.0  100.0   32.5   98.1
+  blib/lib/DBM/Deep/Array.pm    100.0   94.4  100.0  100.0  100.0    5.2   98.8
+  blib/lib/DBM/Deep/Engine.pm   100.0   92.9  100.0  100.0  100.0    7.4  100.0
+  ...ib/DBM/Deep/Engine/DBI.pm   95.0   73.1  100.0  100.0  100.0    1.5   90.4
+  ...b/DBM/Deep/Engine/File.pm   92.3   78.5   88.9  100.0  100.0    4.9   90.3
+  blib/lib/DBM/Deep/Hash.pm     100.0  100.0  100.0  100.0  100.0    3.8  100.0
+  .../lib/DBM/Deep/Iterator.pm  100.0    n/a    n/a  100.0  100.0    0.0  100.0
+  .../DBM/Deep/Iterator/DBI.pm  100.0  100.0    n/a  100.0  100.0    1.2  100.0
+  ...DBM/Deep/Iterator/File.pm   92.5   84.6    n/a  100.0   66.7    0.6   90.0
+  ...erator/File/BucketList.pm  100.0   75.0    n/a  100.0   66.7    0.4   93.8
+  ...ep/Iterator/File/Index.pm  100.0  100.0    n/a  100.0  100.0    0.2  100.0
+  blib/lib/DBM/Deep/Null.pm      87.5    n/a    n/a   75.0    n/a    0.0   83.3
+  blib/lib/DBM/Deep/Sector.pm    91.7    n/a    n/a   83.3    0.0    6.7   74.4
+  ...ib/DBM/Deep/Sector/DBI.pm   96.8   83.3    n/a  100.0    0.0    1.0   89.8
+  ...p/Sector/DBI/Reference.pm  100.0   95.5  100.0  100.0    0.0    2.2   91.2
+  ...Deep/Sector/DBI/Scalar.pm  100.0  100.0    n/a  100.0    0.0    1.1   92.9
+  ...b/DBM/Deep/Sector/File.pm   96.0   87.5  100.0   92.3   25.0    2.2   91.0
+  ...Sector/File/BucketList.pm   98.2   85.7   83.3  100.0    0.0    3.3   89.4
+  .../Deep/Sector/File/Data.pm  100.0    n/a    n/a  100.0    0.0    0.1   90.9
+  ...Deep/Sector/File/Index.pm  100.0   80.0   33.3  100.0    0.0    0.8   83.1
+  .../Deep/Sector/File/Null.pm  100.0  100.0    n/a  100.0    0.0    0.0   91.7
+  .../Sector/File/Reference.pm  100.0   90.0   80.0  100.0    0.0    1.4   91.5
+  ...eep/Sector/File/Scalar.pm   98.4   87.5    n/a  100.0    0.0    0.8   91.9
+  blib/lib/DBM/Deep/Storage.pm  100.0    n/a    n/a  100.0  100.0    0.0  100.0
+  ...b/DBM/Deep/Storage/DBI.pm   97.3   70.8    n/a  100.0   38.5    6.7   87.0
+  .../DBM/Deep/Storage/File.pm   96.6   77.1   80.0   95.7  100.0   16.0   91.8
+  Total                          99.3   85.2   84.9   99.8   63.3  100.0   97.6
+  ---------------------------- ------ ------ ------ ------ ------ ------ ------
+
+=head1 MORE INFORMATION
+
+Check out the DBM::Deep Google Group at L<http://groups.google.com/group/DBM-Deep>
+or send email to L<DBM-Deep@googlegroups.com|mailto:DBM-Deep@googlegroups.com>.
+You can also visit #dbm-deep on irc.perl.org
+
+The source code repository is at L<http://github.com/robkinyon/dbm-deep>
+
+=head1 MAINTAINERS
+
+Rob Kinyon, L<rkinyon@cpan.org|mailto:rkinyon@cpan.org>
+
+Originally written by Joseph Huckaby, L<jhuckaby@cpan.org|mailto:jhuckaby@cpan.org>
+
+=head1 SPONSORS
+
+Stonehenge Consulting (L<http://www.stonehenge.com/>) sponsored the
+development of transactions and freespace management, leading to the 1.0000
+release. A great debt of gratitude goes out to them for their continuing
+leadership in and support of the Perl community.
+
+=head1 CONTRIBUTORS
+
+The following have contributed greatly to make DBM::Deep what it is today:
+
+=over 4
+
+=item * Adam Sah and Rich Gaushell for innumerable contributions early on.
+
+=item * Dan Golden and others at YAPC::NA 2006 for helping me design through transactions.
+
+=item * James Stanley for bug fix
+
+=item * David Steinbrunner for fixing typos and adding repository cpan metadata
+
+=item * H. Merijn Brandt for fixing the POD escapes.
+
+=item * Breno G. de Oliveira for minor packaging tweaks
+
+=back
+
+=head1 SEE ALSO
+
+L<DBM::Deep::Cookbook(3)>
+
+L<perltie(1)>, L<Tie::Hash(3)>, L<Fcntl(3)>, L<flock(2)>, L<lockf(3)>,
+L<nfs(5)>
+
+=head1 LICENSE
+
+Copyright (c) 2007-14 Rob Kinyon. All Rights Reserved.
+This is free software, you may use it and distribute it under the same terms
+as Perl itself.
+
+=cut
diff --git a/src/modules/DBM/Deep/Array.pm b/src/modules/DBM/Deep/Array.pm
new file mode 100644
index 0000000..a3291bd
--- /dev/null
+++ b/src/modules/DBM/Deep/Array.pm
@@ -0,0 +1,427 @@
+package DBM::Deep::Array;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+# This is to allow DBM::Deep::Array to handle negative indices on
+# its own. Otherwise, Perl would intercept the call to negative
+# indices for us. This was causing bugs for negative index handling.
+our $NEGATIVE_INDICES = 1;
+
+use base 'DBM::Deep';
+
+use Scalar::Util ();
+
+sub _get_self {
+    # We used to have
+    #  eval { local $SIG{'__DIE__'}; tied( @{$_[0]} ) } || $_[0]
+    # but this does not always work during global destruction (DBM::Deep’s
+    # destructor calls this method), but will return $_[0] even when $_[0]
+    # is tied, if it’s tied to undef. In those cases it’s better to return
+    # undef, so the destructor can tell not to do anything,  and,  if any-
+    # thing else calls us, it will fail with a more helpful error message.
+
+    Scalar::Util::reftype $_[0] eq 'ARRAY' ? tied @{$_[0]} : $_[0];
+}
+
+sub _repr { [] }
+
+sub TIEARRAY {
+    my $class = shift;
+    my $args = $class->_get_args( @_ );
+
+    $args->{type} = $class->TYPE_ARRAY;
+
+    return $class->_init($args);
+}
+
+sub FETCH {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+
+    $self->lock_shared;
+
+    if ( !defined $key ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use an undefined array index." );
+    }
+    elsif ( $key =~ /^-?\d+$/ ) {
+        if ( $key < 0 ) {
+            $key += $self->FETCHSIZE;
+            unless ( $key >= 0 ) {
+                $self->unlock;
+                return;
+            }
+        }
+    }
+    elsif ( $key ne 'length' ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
+    }
+
+    my $rv = $self->SUPER::FETCH( $key );
+
+    $self->unlock;
+
+    return $rv;
+}
+
+sub STORE {
+    my $self = shift->_get_self;
+    my ($key, $value) = @_;
+
+    $self->lock_exclusive;
+
+    my $size;
+    my $idx_is_numeric;
+    if ( !defined $key ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use an undefined array index." );
+    }
+    elsif ( $key =~ /^-?\d+$/ ) {
+        $idx_is_numeric = 1;
+        if ( $key < 0 ) {
+            $size = $self->FETCHSIZE;
+            if ( $key + $size < 0 ) {
+                die( "Modification of non-creatable array value attempted, subscript $key" );
+            }
+            $key += $size
+        }
+    }
+    elsif ( $key ne 'length' ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
+    }
+
+    my $rv = $self->SUPER::STORE( $key, $value );
+
+    if ( $idx_is_numeric ) {
+        $size = $self->FETCHSIZE unless defined $size;
+        if ( $key >= $size ) {
+            $self->STORESIZE( $key + 1 );
+        }
+    }
+
+    $self->unlock;
+
+    return $rv;
+}
+
+sub EXISTS {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+
+    $self->lock_shared;
+
+    if ( !defined $key ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use an undefined array index." );
+    }
+    elsif ( $key =~ /^-?\d+$/ ) {
+        if ( $key < 0 ) {
+            $key += $self->FETCHSIZE;
+            unless ( $key >= 0 ) {
+                $self->unlock;
+                return;
+            }
+        }
+    }
+    elsif ( $key ne 'length' ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
+    }
+
+    my $rv = $self->SUPER::EXISTS( $key );
+
+    $self->unlock;
+
+    return $rv;
+}
+
+sub DELETE {
+    my $self = shift->_get_self;
+    my ($key) = @_;
+    warn "ARRAY::DELETE($self,$key)\n" if DBM::Deep::DEBUG;
+
+    $self->lock_exclusive;
+
+    my $size = $self->FETCHSIZE;
+    if ( !defined $key ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use an undefined array index." );
+    }
+    elsif ( $key =~ /^-?\d+$/ ) {
+        if ( $key < 0 ) {
+            $key += $size;
+            unless ( $key >= 0 ) {
+                $self->unlock;
+                return;
+            }
+        }
+    }
+    elsif ( $key ne 'length' ) {
+        $self->unlock;
+        DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
+    }
+
+    my $rv = $self->SUPER::DELETE( $key );
+
+    if ($rv && $key == $size - 1) {
+        $self->STORESIZE( $key );
+    }
+
+    $self->unlock;
+
+    return $rv;
+}
+
+# Now that we have a real Reference sector, we should store arrayzize there.
+# However, arraysize needs to be transactionally-aware, so a simple location to
+# store it isn't going to work.
+sub FETCHSIZE {
+    my $self = shift->_get_self;
+
+    $self->lock_shared;
+
+    my $SAVE_FILTER = $self->_engine->storage->{filter_fetch_value};
+    $self->_engine->storage->{filter_fetch_value} = undef;
+
+    my $size = $self->FETCH('length') || 0;
+
+    $self->_engine->storage->{filter_fetch_value} = $SAVE_FILTER;
+
+    $self->unlock;
+
+    return $size;
+}
+
+sub STORESIZE {
+    my $self = shift->_get_self;
+    my ($new_length) = @_;
+
+    $self->lock_exclusive;
+
+    my $SAVE_FILTER = $self->_engine->storage->{filter_store_value};
+    $self->_engine->storage->{filter_store_value} = undef;
+
+    my $result = $self->STORE('length', $new_length, 'length');
+
+    $self->_engine->storage->{filter_store_value} = $SAVE_FILTER;
+
+    $self->unlock;
+
+    return $result;
+}
+
+sub POP {
+    my $self = shift->_get_self;
+
+    $self->lock_exclusive;
+
+    my $length = $self->FETCHSIZE();
+
+    if ($length) {
+        my $content = $self->FETCH( $length - 1 );
+        $self->DELETE( $length - 1 );
+
+        $self->unlock;
+
+        return $content;
+    }
+    else {
+        $self->unlock;
+        return;
+    }
+}
+
+sub PUSH {
+    my $self = shift->_get_self;
+
+    $self->lock_exclusive;
+
+    my $length = $self->FETCHSIZE();
+
+    for my $content (@_) {
+        $self->STORE( $length, $content );
+        $length++;
+    }
+
+    $self->unlock;
+
+    return $length;
+}
+
+# XXX This really needs to be something more direct within the file, not a
+# fetch and re-store. -RobK, 2007-09-20
+sub _move_value {
+    my $self = shift;
+    my ($old_key, $new_key) = @_;
+
+    return $self->_engine->make_reference( $self, $old_key, $new_key );
+}
+
+sub SHIFT {
+    my $self = shift->_get_self;
+    warn "SHIFT($self)\n" if DBM::Deep::DEBUG;
+
+    $self->lock_exclusive;
+
+    my $length = $self->FETCHSIZE();
+
+    if ( !$length ) {
+        $self->unlock;
+        return;
+    }
+
+    my $content = $self->DELETE( 0 );
+
+    # Unless the deletion above has cleared the array ...
+    if ( $length > 1 ) {
+        for (my $i = 0; $i < $length - 1; $i++) {
+            $self->_move_value( $i+1, $i );
+        }
+
+        $self->DELETE( $length - 1 );
+    }
+
+    $self->unlock;
+
+    return $content;
+}
+
+sub UNSHIFT {
+    my $self = shift->_get_self;
+    my @new_elements = @_;
+
+    $self->lock_exclusive;
+
+    my $length = $self->FETCHSIZE();
+    my $new_size = scalar @new_elements;
+
+    if ($length) {
+        for (my $i = $length - 1; $i >= 0; $i--) {
+            $self->_move_value( $i, $i+$new_size );
+        }
+
+        $self->STORESIZE( $length + $new_size );
+    }
+
+    for (my $i = 0; $i < $new_size; $i++) {
+        $self->STORE( $i, $new_elements[$i] );
+    }
+
+    $self->unlock;
+
+    return $length + $new_size;
+}
+
+sub SPLICE {
+    my $self = shift->_get_self;
+
+    $self->lock_exclusive;
+
+    my $length = $self->FETCHSIZE();
+
+    ##
+    # Calculate offset and length of splice
+    ##
+    my $offset = shift;
+    $offset = 0 unless defined $offset;
+    if ($offset < 0) { $offset += $length; }
+
+    my $splice_length;
+    if (scalar @_) { $splice_length = shift; }
+    else { $splice_length = $length - $offset; }
+    if ($splice_length < 0) { $splice_length += ($length - $offset); }
+
+    ##
+    # Setup array with new elements, and copy out old elements for return
+    ##
+    my @new_elements = @_;
+    my $new_size = scalar @new_elements;
+
+    my @old_elements = map {
+        $self->FETCH( $_ )
+    } $offset .. ($offset + $splice_length - 1);
+
+    ##
+    # Adjust array length, and shift elements to accommodate new section.
+    ##
+    if ( $new_size != $splice_length ) {
+        if ($new_size > $splice_length) {
+            for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) {
+                $self->_move_value( $i, $i + ($new_size - $splice_length) );
+            }
+            $self->STORESIZE( $length + $new_size - $splice_length );
+        }
+        else {
+            for (my $i = $offset + $splice_length; $i < $length; $i++) {
+                $self->_move_value( $i, $i + ($new_size - $splice_length) );
+            }
+            for (my $i = 0; $i < $splice_length - $new_size; $i++) {
+                $self->DELETE( $length - 1 );
+                $length--;
+            }
+        }
+    }
+
+    ##
+    # Insert new elements into array
+    ##
+    for (my $i = $offset; $i < $offset + $new_size; $i++) {
+        $self->STORE( $i, shift @new_elements );
+    }
+
+    $self->unlock;
+
+    ##
+    # Return deleted section, or last element in scalar context.
+    ##
+    return wantarray ? @old_elements : $old_elements[-1];
+}
+
+# We don't need to populate it, yet.
+# It will be useful, though, when we split out HASH and ARRAY
+# Perl will call EXTEND() when the array is likely to grow.
+# We don't care, but include it because it gets called at times.
+sub EXTEND {}
+
+sub _copy_node {
+    my $self = shift;
+    my ($db_temp) = @_;
+
+    my $length = $self->length();
+    for (my $index = 0; $index < $length; $index++) {
+        $self->_copy_value( \$db_temp->[$index], $self->get($index) );
+    }
+
+    return 1;
+}
+
+sub _clear {
+    my $self = shift;
+
+    my $size = $self->FETCHSIZE;
+    for my $key ( 0 .. $size - 1 ) {
+        $self->_engine->delete_key( $self, $key, $key );
+    }
+    $self->STORESIZE( 0 );
+
+    return;
+}
+
+sub length  { (shift)->FETCHSIZE(@_) }
+sub pop     { (shift)->POP(@_)       }
+sub push    { (shift)->PUSH(@_)      }
+sub unshift { (shift)->UNSHIFT(@_)   }
+sub splice  { (shift)->SPLICE(@_)    }
+
+# This must be last otherwise we have to qualify all other calls to shift
+# as calls to CORE::shift
+sub shift { (CORE::shift)->SHIFT(@_) }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Cookbook.pod b/src/modules/DBM/Deep/Cookbook.pod
new file mode 100644
index 0000000..2617609
--- /dev/null
+++ b/src/modules/DBM/Deep/Cookbook.pod
@@ -0,0 +1,215 @@
+=head1 NAME
+
+DBM::Deep::Cookbook - Cookbook for DBM::Deep
+
+=head1 DESCRIPTION
+
+This is the Cookbook for L<DBM::Deep>. It contains useful tips and tricks,
+plus some examples of how to do common tasks.
+
+=head1 RECIPES
+
+=head2 Unicode data
+
+If possible, it is highly recommended that you upgrade your database to
+version 2 (using the F<utils/upgrade_db.pl> script in the CPAN
+distribution), in order to use Unicode.
+
+If your databases are still shared by perl installations with older
+DBM::Deep versions, you can use filters to encode strings on the fly:
+
+  my $db = DBM::Deep->new( ... );
+  my $encode_sub = sub { my $s = shift; utf8::encode($s); $s };
+  my $decode_sub = sub { my $s = shift; utf8::decode($s); $s };
+  $db->set_filter( 'store_value' => $encode_sub );
+  $db->set_filter( 'fetch_value' => $decode_sub );
+  $db->set_filter( 'store_key' => $encode_sub );
+  $db->set_filter( 'fetch_key' => $decode_sub );
+
+A previous version of this cookbook recommended using
+C<binmode $db-E<gt>_fh, ":utf8">, but that is I<not> a good idea, as it
+could easily corrupt the database.
+
+=head2 Real-time Encryption Example
+
+B<NOTE>: This is just an example of how to write a filter. This most
+definitely should B<NOT> be taken as a proper way to write a filter that does
+encryption. (Furthermore, it fails to take Unicode into account.)
+
+Here is a working example that uses the I<Crypt::Blowfish> module to
+do real-time encryption / decryption of keys & values with DBM::Deep Filters.
+Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more
+on I<Crypt::Blowfish>. You'll also need the I<Crypt::CBC> module.
+
+  use DBM::Deep;
+  use Crypt::Blowfish;
+  use Crypt::CBC;
+
+  my $cipher = Crypt::CBC->new({
+      'key'             => 'my secret key',
+      'cipher'          => 'Blowfish',
+      'iv'              => '$KJh#(}q',
+      'regenerate_key'  => 0,
+      'padding'         => 'space',
+      'prepend_iv'      => 0
+  });
+
+  my $db = DBM::Deep->new(
+      file => "foo-encrypt.db",
+      filter_store_key => \&my_encrypt,
+      filter_store_value => \&my_encrypt,
+      filter_fetch_key => \&my_decrypt,
+      filter_fetch_value => \&my_decrypt,
+  );
+
+  $db->{key1} = "value1";
+  $db->{key2} = "value2";
+  print "key1: " . $db->{key1} . "\n";
+  print "key2: " . $db->{key2} . "\n";
+
+  undef $db;
+  exit;
+
+  sub my_encrypt {
+      return $cipher->encrypt( $_[0] );
+  }
+  sub my_decrypt {
+      return $cipher->decrypt( $_[0] );
+  }
+
+=head2 Real-time Compression Example
+
+Here is a working example that uses the I<Compress::Zlib> module to do real-time
+compression / decompression of keys & values with DBM::Deep Filters.
+Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for
+more on I<Compress::Zlib>.
+
+  use DBM::Deep;
+  use Compress::Zlib;
+
+  my $db = DBM::Deep->new(
+      file => "foo-compress.db",
+      filter_store_key => \&my_compress,
+      filter_store_value => \&my_compress,
+      filter_fetch_key => \&my_decompress,
+      filter_fetch_value => \&my_decompress,
+  );
+
+  $db->{key1} = "value1";
+  $db->{key2} = "value2";
+  print "key1: " . $db->{key1} . "\n";
+  print "key2: " . $db->{key2} . "\n";
+
+  undef $db;
+  exit;
+
+  sub my_compress {
+      my $s = shift;
+      utf8::encode($s);
+      return Compress::Zlib::memGzip( $s ) ;
+  }
+  sub my_decompress {
+      my $s = Compress::Zlib::memGunzip( shift ) ;
+      utf8::decode($s);
+      return $s;
+  }
+
+B<Note:> Filtering of keys only applies to hashes. Array "keys" are
+actually numerical index numbers, and are not filtered.
+
+=head1 Custom Digest Algorithm
+
+DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
+keys. However you can override this, and use another algorithm (such as SHA-256)
+or even write your own. But please note that DBM::Deep currently expects zero
+collisions, so your algorithm has to be I<perfect>, so to speak. Collision
+detection may be introduced in a later version.
+
+You can specify a custom digest algorithm by passing it into the parameter
+list for new(), passing a reference to a subroutine as the 'digest' parameter,
+and the length of the algorithm's hashes (in bytes) as the 'hash_size'
+parameter. Here is a working example that uses a 256-bit hash from the
+I<Digest::SHA256> module. Please see
+L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
+
+The value passed to your digest function will be encoded as UTF-8 if the
+database is in version 2 format or higher.
+
+  use DBM::Deep;
+  use Digest::SHA256;
+
+  my $context = Digest::SHA256::new(256);
+
+  my $db = DBM::Deep->new(
+      filename => "foo-sha.db",
+      digest => \&my_digest,
+      hash_size => 32,
+  );
+
+  $db->{key1} = "value1";
+  $db->{key2} = "value2";
+  print "key1: " . $db->{key1} . "\n";
+  print "key2: " . $db->{key2} . "\n";
+
+  undef $db;
+  exit;
+
+  sub my_digest {
+      return substr( $context->hash($_[0]), 0, 32 );
+  }
+
+B<Note:> Your returned digest strings must be B<EXACTLY> the number
+of bytes you specify in the hash_size parameter (in this case 32). Undefined
+behavior will occur otherwise.
+
+B<Note:> If you do choose to use a custom digest algorithm, you must set it
+every time you access this file. Otherwise, the default (MD5) will be used.
+
+=head1 PERFORMANCE
+
+Because DBM::Deep is a conncurrent datastore, every change is flushed to disk
+immediately and every read goes to disk. This means that DBM::Deep functions
+at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally
+50-70ns), or at least 150-200x slower than the comparable in-memory
+datastructure in Perl.
+
+There are several techniques you can use to speed up how DBM::Deep functions.
+
+=over 4
+
+=item * Put it on a ramdisk
+
+The easiest and quickest mechanism to making DBM::Deep run faster is to create
+a ramdisk and locate the DBM::Deep file there. Doing this as an option may
+become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN.
+
+=item * Work at the tightest level possible
+
+It is much faster to assign the level of your db that you are working with to
+an intermediate variable than to re-look it up every time. Thus
+
+  # BAD
+  while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) {
+    ...
+  }
+
+  # GOOD
+  my $x = $db->{foo}{bar}{baz};
+  while ( my ($k, $v) = each %$x ) {
+    ...
+  }
+
+=item * Make your file as tight as possible
+
+If you know that you are not going to use more than 65K in your database,
+consider using the C<pack_size =E<gt> 'small'> option. This will instruct
+DBM::Deep to use 16bit addresses, meaning that the seek times will be less.
+
+=back
+
+=head1 SEE ALSO
+
+L<DBM::Deep(3)>, L<Digest::MD5(3)>, L<Digest::SHA256(3)>,
+L<Crypt::Blowfish(3)>, L<Compress::Zlib(3)>
+
+=cut
diff --git a/src/modules/DBM/Deep/Engine.pm b/src/modules/DBM/Deep/Engine.pm
new file mode 100644
index 0000000..4b9cf91
--- /dev/null
+++ b/src/modules/DBM/Deep/Engine.pm
@@ -0,0 +1,442 @@
+package DBM::Deep::Engine;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+use DBM::Deep::Iterator ();
+
+# File-wide notes:
+# * Every method in here assumes that the storage has been appropriately
+#   safeguarded. This can be anything from flock() to some sort of manual
+#   mutex. But, it's the caller's responsibility to make sure that this has
+#   been done.
+
+sub SIG_HASH     () { 'H' }
+sub SIG_ARRAY    () { 'A' }
+
+=head1 NAME
+
+DBM::Deep::Engine - mediate mapping between DBM::Deep objects and storage medium
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It mediates the low-level
+mapping between the L<DBM::Deep> objects and the storage medium.
+
+The purpose of this documentation is to provide low-level documentation for
+developers. It is B<not> intended to be used by the general public. This
+documentation and what it documents can and will change without notice.
+
+=head1 OVERVIEW
+
+The engine exposes an API to the DBM::Deep objects (DBM::Deep, DBM::Deep::Array,
+and DBM::Deep::Hash) for their use to access the actual stored values. This API
+is the following:
+
+=over 4
+
+=item * new
+
+=item * read_value
+
+=item * get_classname
+
+=item * make_reference
+
+=item * key_exists
+
+=item * delete_key
+
+=item * write_value
+
+=item * get_next_key
+
+=item * setup
+
+=item * clear
+
+=item * begin_work
+
+=item * commit
+
+=item * rollback
+
+=item * lock_exclusive
+
+=item * lock_shared
+
+=item * unlock
+
+=back
+
+They are explained in their own sections below. These methods, in turn, may
+provide some bounds-checking, but primarily act to instantiate objects in the
+Engine::Sector::* hierarchy and dispatch to them.
+
+=head1 TRANSACTIONS
+
+Transactions in DBM::Deep are implemented using a variant of MVCC. This attempts
+to keep the amount of actual work done against the file low while still providing
+Atomicity, Consistency, and Isolation. Durability, unfortunately, cannot be done
+with only one file.
+
+=head2 STALENESS
+
+If another process uses a transaction slot and writes stuff to it, then
+terminates, the data that process wrote is still within the file. In order to
+address this, there is also a transaction staleness counter associated within
+every write.  Each time a transaction is started, that process increments that
+transaction's staleness counter. If, when it reads a value, the staleness
+counters aren't identical, DBM::Deep will consider the value on disk to be stale
+and discard it.
+
+=head2 DURABILITY
+
+The fourth leg of ACID is Durability, the guarantee that when a commit returns,
+the data will be there the next time you read from it. This should be regardless
+of any crashes or powerdowns in between the commit and subsequent read.
+DBM::Deep does provide that guarantee; once the commit returns, all of the data
+has been transferred from the transaction shadow to the HEAD. The issue arises
+with partial commits - a commit that is interrupted in some fashion. In keeping
+with DBM::Deep's "tradition" of very light error-checking and non-existent
+error-handling, there is no way to recover from a partial commit. (This is
+probably a failure in Consistency as well as Durability.)
+
+Other DBMSes use transaction logs (a separate file, generally) to achieve
+Durability.  As DBM::Deep is a single-file, we would have to do something
+similar to what SQLite and BDB do in terms of committing using synchronized
+writes. To do this, we would have to use a much higher RAM footprint and some
+serious programming that makes my head hurt just to think about it.
+
+=cut
+
+=head1 METHODS
+
+=head2 read_value( $obj, $key )
+
+This takes an object that provides _base_offset() and a string. It returns the
+value stored in the corresponding Sector::Value's data section.
+
+=cut
+
+sub read_value { die "read_value must be implemented in a child class" }
+
+=head2 get_classname( $obj )
+
+This takes an object that provides _base_offset() and returns the classname (if
+any) associated with it.
+
+It delegates to Sector::Reference::get_classname() for the heavy lifting.
+
+It performs a staleness check.
+
+=cut
+
+sub get_classname { die "get_classname must be implemented in a child class" }
+
+=head2 make_reference( $obj, $old_key, $new_key )
+
+This takes an object that provides _base_offset() and two strings. The
+strings correspond to the old key and new key, respectively. This operation
+is equivalent to (given C<< $db->{foo} = []; >>) C<< $db->{bar} = $db->{foo} >>.
+
+This returns nothing.
+
+=cut
+
+sub make_reference { die "make_reference must be implemented in a child class" }
+
+=head2 key_exists( $obj, $key )
+
+This takes an object that provides _base_offset() and a string for
+the key to be checked. This returns 1 for true and "" for false.
+
+=cut
+
+sub key_exists { die "key_exists must be implemented in a child class" }
+
+=head2 delete_key( $obj, $key )
+
+This takes an object that provides _base_offset() and a string for
+the key to be deleted. This returns the result of the Sector::Reference
+delete_key() method.
+
+=cut
+
+sub delete_key { die "delete_key must be implemented in a child class" }
+
+=head2 write_value( $obj, $key, $value )
+
+This takes an object that provides _base_offset(), a string for the
+key, and a value. This value can be anything storable within L<DBM::Deep>.
+
+This returns 1 upon success.
+
+=cut
+
+sub write_value { die "write_value must be implemented in a child class" }
+
+=head2 setup( $obj )
+
+This takes an object that provides _base_offset(). It will do everything needed
+in order to properly initialize all values for necessary functioning. If this is
+called upon an already initialized object, this will also reset the inode.
+
+This returns 1.
+
+=cut
+
+sub setup { die "setup must be implemented in a child class" }
+
+=head2 begin_work( $obj )
+
+This takes an object that provides _base_offset(). It will set up all necessary
+bookkeeping in order to run all work within a transaction.
+
+If $obj is already within a transaction, an error will be thrown. If there are
+no more available transactions, an error will be thrown.
+
+This returns undef.
+
+=cut
+
+sub begin_work { die "begin_work must be implemented in a child class" }
+
+=head2 rollback( $obj )
+
+This takes an object that provides _base_offset(). It will revert all
+actions taken within the running transaction.
+
+If $obj is not within a transaction, an error will be thrown.
+
+This returns 1.
+
+=cut
+
+sub rollback { die "rollback must be implemented in a child class" }
+
+=head2 commit( $obj )
+
+This takes an object that provides _base_offset(). It will apply all
+actions taken within the transaction to the HEAD.
+
+If $obj is not within a transaction, an error will be thrown.
+
+This returns 1.
+
+=cut
+
+sub commit { die "commit must be implemented in a child class" }
+
+=head2 get_next_key( $obj, $prev_key )
+
+This takes an object that provides _base_offset() and an optional string
+representing the prior key returned via a prior invocation of this method.
+
+This method delegates to C<< DBM::Deep::Iterator->get_next_key() >>.
+
+=cut
+
+# XXX Add staleness here
+sub get_next_key {
+    my $self = shift;
+    my ($obj, $prev_key) = @_;
+
+    # XXX Need to add logic about resetting the iterator if any key in the
+    # reference has changed
+    unless ( defined $prev_key ) {
+        eval "use " . $self->iterator_class; die $@ if $@;
+        $obj->{iterator} = $self->iterator_class->new({
+            base_offset => $obj->_base_offset,
+            engine      => $self,
+        });
+    }
+
+    return $obj->{iterator}->get_next_key( $obj );
+}
+
+=head2 lock_exclusive()
+
+This takes an object that provides _base_offset(). It will guarantee that
+the storage has taken precautions to be safe for a write.
+
+This returns nothing.
+
+=cut
+
+sub lock_exclusive {
+    my $self = shift;
+    my ($obj) = @_;
+    return $self->storage->lock_exclusive( $obj );
+}
+
+=head2 lock_shared()
+
+This takes an object that provides _base_offset(). It will guarantee that
+the storage has taken precautions to be safe for a read.
+
+This returns nothing.
+
+=cut
+
+sub lock_shared {
+    my $self = shift;
+    my ($obj) = @_;
+    return $self->storage->lock_shared( $obj );
+}
+
+=head2 unlock()
+
+This takes an object that provides _base_offset(). It will guarantee that
+the storage has released the most recently-taken lock.
+
+This returns nothing.
+
+=cut
+
+sub unlock {
+    my $self = shift;
+    my ($obj) = @_;
+
+    my $rv = $self->storage->unlock( $obj );
+
+    $self->flush if $rv;
+
+    return $rv;
+}
+
+=head1 INTERNAL METHODS
+
+The following methods are internal-use-only to DBM::Deep::Engine and its
+child classes.
+
+=cut
+
+=head2 flush()
+
+This takes no arguments. It will do everything necessary to flush all things to
+disk. This is usually called during unlock() and setup().
+
+This returns nothing.
+
+=cut
+
+sub flush {
+    my $self = shift;
+
+    # Why do we need to have the storage flush? Shouldn't autoflush take care of
+    # things? -RobK, 2008-06-26
+    $self->storage->flush;
+
+    return;
+}
+
+=head2 load_sector( $loc )
+
+This takes an id/location/offset and loads the sector based on the engine's
+defined sector type.
+
+=cut
+
+sub load_sector { $_[0]->sector_type->load( @_ ) }
+
+=head2 clear( $obj )
+
+This takes an object that provides _base_offset() and deletes all its 
+elements, returning nothing.
+
+=cut
+
+sub clear { die "clear must be implemented in a child class" }
+
+=head2 cache / clear_cache
+
+This is the cache of loaded Reference sectors.
+
+=cut
+
+sub cache       { $_[0]{cache} ||= {} }
+sub clear_cache { %{$_[0]->cache} = () }
+
+=head2 supports( $option )
+
+This returns a boolean depending on if this instance of DBM::Dep supports
+that feature. C<$option> can be one of:
+
+=over 4
+
+=item * transactions
+
+=item * singletons
+
+=back
+
+Any other value will return false.
+
+=cut
+
+sub supports { die "supports must be implemented in a child class" }
+
+=head1 ACCESSORS
+
+The following are readonly attributes.
+
+=over 4
+
+=item * storage
+
+=item * sector_type
+
+=item * iterator_class
+
+=back
+
+=cut
+
+sub storage { $_[0]{storage} }
+
+sub sector_type { die "sector_type must be implemented in a child class" }
+sub iterator_class { die "iterator_class must be implemented in a child class" }
+
+# This code is to make sure we write all the values in the $value to the
+# disk and to make sure all changes to $value after the assignment are
+# reflected on disk. This may be counter-intuitive at first, but it is
+# correct dwimmery.
+#   NOTE - simply tying $value won't perform a STORE on each value. Hence,
+# the copy to a temp value.
+sub _descend {
+    my $self = shift;
+    my ($value, $value_sector) = @_;
+    my $r = Scalar::Util::reftype( $value ) || '';
+
+    if ( $r eq 'ARRAY' ) {
+        my @temp = @$value;
+        tie @$value, 'DBM::Deep', {
+            base_offset => $value_sector->offset,
+            staleness   => $value_sector->staleness,
+            storage     => $self->storage,
+            engine      => $self,
+        };
+        @$value = @temp;
+        bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
+    }
+    elsif ( $r eq 'HASH' ) {
+        my %temp = %$value;
+        tie %$value, 'DBM::Deep', {
+            base_offset => $value_sector->offset,
+            staleness   => $value_sector->staleness,
+            storage     => $self->storage,
+            engine      => $self,
+        };
+        %$value = %temp;
+        bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
+    }
+
+    return;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Engine/DBI.pm b/src/modules/DBM/Deep/Engine/DBI.pm
new file mode 100644
index 0000000..28b7daf
--- /dev/null
+++ b/src/modules/DBM/Deep/Engine/DBI.pm
@@ -0,0 +1,367 @@
+package DBM::Deep::Engine::DBI;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+use base 'DBM::Deep::Engine';
+
+use DBM::Deep::Sector::DBI ();
+use DBM::Deep::Storage::DBI ();
+
+sub sector_type { 'DBM::Deep::Sector::DBI' }
+sub iterator_class { 'DBM::Deep::Iterator::DBI' }
+
+sub new {
+    my $class = shift;
+    my ($args) = @_;
+
+    $args->{storage} = DBM::Deep::Storage::DBI->new( $args )
+        unless exists $args->{storage};
+
+    my $self = bless {
+        storage => undef,
+        external_refs => undef,
+    }, $class;
+
+    # Grab the parameters we want to use
+    foreach my $param ( keys %$self ) {
+        next unless exists $args->{$param};
+        $self->{$param} = $args->{$param};
+    }
+
+    return $self;
+}
+
+sub setup {
+    my $self = shift;
+    my ($obj) = @_;
+
+    # Default the id to 1. This means that we will be creating a row if there
+    # isn't one. The assumption is that the row_id=1 cannot never be deleted. I
+    # don't know if this is a good assumption.
+    $obj->{base_offset} ||= 1;
+
+    my ($rows) = $self->storage->read_from(
+        refs => $obj->_base_offset,
+        qw( ref_type ),
+    );
+
+    # We don't have a row yet.
+    unless ( @$rows ) {
+        $self->storage->write_to(
+            refs => $obj->_base_offset,
+            ref_type => $obj->_type,
+        );
+    }
+
+    my $sector = DBM::Deep::Sector::DBI::Reference->new({
+        engine => $self,
+        offset => $obj->_base_offset,
+    });
+}
+
+sub read_value {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return;
+
+#    if ( $sector->staleness != $obj->_staleness ) {
+#        return;
+#    }
+
+#    my $key_md5 = $self->_apply_digest( $key );
+
+    my $value_sector = $sector->get_data_for({
+        key => $key,
+#        key_md5    => $key_md5,
+        allow_head => 1,
+    });
+
+    unless ( $value_sector ) {
+        return undef
+    }
+
+    return $value_sector->data;
+}
+
+sub get_classname {
+    my $self = shift;
+    my ($obj) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return;
+
+    return $sector->get_classname;
+}
+
+sub make_reference {
+    my $self = shift;
+    my ($obj, $old_key, $new_key) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return;
+
+#    if ( $sector->staleness != $obj->_staleness ) {
+#        return;
+#    }
+
+    my $value_sector = $sector->get_data_for({
+        key        => $old_key,
+        allow_head => 1,
+    });
+
+    unless ( $value_sector ) {
+        $value_sector = DBM::Deep::Sector::DBI::Scalar->new({
+            engine => $self,
+            data   => undef,
+        });
+
+        $sector->write_data({
+            key     => $old_key,
+            value   => $value_sector,
+        });
+    }
+
+    if ( $value_sector->isa( 'DBM::Deep::Sector::DBI::Reference' ) ) {
+        $sector->write_data({
+            key     => $new_key,
+            value   => $value_sector,
+        });
+        $value_sector->increment_refcount;
+    }
+    else {
+        $sector->write_data({
+            key     => $new_key,
+            value   => $value_sector->clone,
+        });
+    }
+
+    return;
+}
+
+# exists returns '', not undefined.
+sub key_exists {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return '';
+
+#    if ( $sector->staleness != $obj->_staleness ) {
+#        return '';
+#    }
+
+    my $data = $sector->get_data_for({
+#        key_md5    => $self->_apply_digest( $key ),
+        key        => $key,
+        allow_head => 1,
+    });
+
+    # exists() returns 1 or '' for true/false.
+    return $data ? 1 : '';
+}
+
+sub delete_key {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return '';
+
+#    if ( $sector->staleness != $obj->_staleness ) {
+#        return '';
+#    }
+
+    return $sector->delete_key({
+#        key_md5    => $self->_apply_digest( $key ),
+        key        => $key,
+        allow_head => 0,
+    });
+}
+
+sub write_value {
+    my $self = shift;
+    my ($obj, $key, $value) = @_;
+
+    my $r = Scalar::Util::reftype( $value ) || '';
+    {
+        last if $r eq '';
+        last if $r eq 'HASH';
+        last if $r eq 'ARRAY';
+
+        DBM::Deep->_throw_error(
+            "Storage of references of type '$r' is not supported."
+        );
+    }
+
+    # Load the reference entry
+    # Determine if the row was deleted under us
+    # 
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or die "Cannot load sector at '@{[$obj->_base_offset]}'\n";;
+
+    my ($type, $class);
+    if (
+     $r eq 'ARRAY' || $r eq 'HASH' and ref $value ne 'DBM::Deep::Null'
+    ) {
+        my $tmpvar;
+        if ( $r eq 'ARRAY' ) {
+            $tmpvar = tied @$value;
+        } elsif ( $r eq 'HASH' ) {
+            $tmpvar = tied %$value;
+        }
+
+        if ( $tmpvar ) {
+            my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
+
+            unless ( $is_dbm_deep ) {
+                DBM::Deep->_throw_error( "Cannot store something that is tied." );
+            }
+
+            unless ( $tmpvar->_engine->storage == $self->storage ) {
+                DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
+            }
+
+            # Load $tmpvar's sector
+
+            # First, verify if we're storing the same thing to this spot. If we
+            # are, then this should be a no-op. -EJS, 2008-05-19
+            
+            # See whether or not we are storing ourselves to ourself.
+            # Write the sector as data in this reference (keyed by $key)
+            my $value_sector = $self->load_sector( $tmpvar->_base_offset, 'refs' );
+            $sector->write_data({
+                key     => $key,
+#                key_md5 => $self->_apply_digest( $key ),
+                value   => $value_sector,
+            });
+            $value_sector->increment_refcount;
+
+            return 1;
+        }
+
+        $type = substr( $r, 0, 1 );
+        $class = 'DBM::Deep::Sector::DBI::Reference';
+    }
+    else {
+        if ( tied($value) ) {
+            DBM::Deep->_throw_error( "Cannot store something that is tied." );
+        }
+
+        if ( ref $value eq 'DBM::Deep::Null' ) {
+            DBM::Deep::_warnif(
+             'uninitialized', 'Assignment of stale reference'
+            );
+            $value = undef;
+        }
+
+        $class = 'DBM::Deep::Sector::DBI::Scalar';
+        $type  = 'S';
+    }
+
+    # Create this after loading the reference sector in case something bad
+    # happens. This way, we won't allocate value sector(s) needlessly.
+    my $value_sector = $class->new({
+        engine => $self,
+        data   => $value,
+        type   => $type,
+    });
+
+    $sector->write_data({
+        key     => $key,
+#        key_md5 => $self->_apply_digest( $key ),
+        value   => $value_sector,
+    });
+
+    $self->_descend( $value, $value_sector );
+
+    return 1;
+}
+
+#sub begin_work {
+#    my $self = shift;
+#    die "Transactions are not supported by this engine"
+#        unless $self->supports('transactions');
+#
+#    if ( $self->in_txn ) {
+#        DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
+#    }
+#
+#    $self->storage->begin_work;
+#
+#    $self->in_txn( 1 );
+#
+#    return 1;
+#} 
+#
+#sub rollback {
+#    my $self = shift;
+#    die "Transactions are not supported by this engine"
+#        unless $self->supports('transactions');
+#
+#    if ( !$self->in_txn ) {
+#        DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
+#    }
+#
+#    $self->storage->rollback;
+#
+#    $self->in_txn( 0 );
+#
+#    return 1;
+#} 
+#
+#sub commit {
+#    my $self = shift;
+#    die "Transactions are not supported by this engine"
+#        unless $self->supports('transactions');
+#
+#    if ( !$self->in_txn ) {
+#        DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
+#    }
+#
+#    $self->storage->commit;
+#
+#    $self->in_txn( 0 );
+#
+#    return 1;
+#}
+#
+#sub in_txn {
+#    my $self = shift;
+#    $self->{in_txn} = shift if @_;
+#    $self->{in_txn};
+#}
+
+sub supports {
+    my $self = shift;
+    my ($feature) = @_;
+
+    return if $feature eq 'transactions';
+    return 1 if $feature eq 'singletons';
+    return;
+}
+
+sub db_version {
+    return '1.0020'
+}
+
+sub clear {
+    my $self = shift;
+    my $obj = shift;
+
+    my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
+        or return;
+
+    $sector->clear;
+
+    return;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Engine/File.pm b/src/modules/DBM/Deep/Engine/File.pm
new file mode 100644
index 0000000..435e84c
--- /dev/null
+++ b/src/modules/DBM/Deep/Engine/File.pm
@@ -0,0 +1,1191 @@
+package DBM::Deep::Engine::File;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+use base qw( DBM::Deep::Engine );
+
+use Scalar::Util ();
+
+use DBM::Deep::Null ();
+use DBM::Deep::Sector::File ();
+use DBM::Deep::Storage::File ();
+
+sub sector_type { 'DBM::Deep::Sector::File' }
+sub iterator_class { 'DBM::Deep::Iterator::File' }
+
+my $STALE_SIZE = 2;
+
+# Setup file and tag signatures.  These should never change.
+sub SIG_FILE     () { 'DPDB' }
+sub SIG_HEADER   () { 'h'    }
+sub SIG_NULL     () { 'N'    }
+sub SIG_DATA     () { 'D'    }
+sub SIG_UNIDATA  () { 'U'    }
+sub SIG_INDEX    () { 'I'    }
+sub SIG_BLIST    () { 'B'    }
+sub SIG_FREE     () { 'F'    }
+sub SIG_SIZE     () {  1     }
+# SIG_HASH and SIG_ARRAY are defined in DBM::Deep::Engine
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+=head1 NAME
+
+DBM::Deep::Engine::File - engine for use with DBM::Deep::Storage::File
+
+=head1 PURPOSE
+
+This is the engine for use with L<DBM::Deep::Storage::File>.
+
+=head1 EXTERNAL METHODS
+
+=head2 new()
+
+This takes a set of args. These args are described in the documentation for
+L<DBM::Deep/new>.
+
+=cut
+
+sub new {
+    my $class = shift;
+    my ($args) = @_;
+
+    $args->{storage} = DBM::Deep::Storage::File->new( $args )
+        unless exists $args->{storage};
+
+    my $self = bless {
+        byte_size   => 4,
+
+        digest      => undef,
+        hash_size   => 16,  # In bytes
+        hash_chars  => 256, # Number of chars the algorithm uses per byte
+        max_buckets => 16,
+        num_txns    => 1,   # The HEAD
+        trans_id    => 0,   # Default to the HEAD
+
+        data_sector_size => 64, # Size in bytes of each data sector
+
+        entries => {}, # This is the list of entries for transactions
+        storage => undef,
+
+        external_refs => undef,
+    }, $class;
+
+    # Never allow byte_size to be set directly.
+    delete $args->{byte_size};
+    if ( defined $args->{pack_size} ) {
+        if ( lc $args->{pack_size} eq 'small' ) {
+            $args->{byte_size} = 2;
+        }
+        elsif ( lc $args->{pack_size} eq 'medium' ) {
+            $args->{byte_size} = 4;
+        }
+        elsif ( lc $args->{pack_size} eq 'large' ) {
+            $args->{byte_size} = 8;
+        }
+        else {
+            DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
+        }
+    }
+
+    # Grab the parameters we want to use
+    foreach my $param ( keys %$self ) {
+        next unless exists $args->{$param};
+        $self->{$param} = $args->{$param};
+    }
+
+    my %validations = (
+        max_buckets      => { floor => 16, ceil => 256 },
+        num_txns         => { floor => 1,  ceil => 255 },
+        data_sector_size => { floor => 32, ceil => 256 },
+    );
+
+    while ( my ($attr, $c) = each %validations ) {
+        if (   !defined $self->{$attr}
+            || !length $self->{$attr}
+            || $self->{$attr} =~ /\D/
+            || $self->{$attr} < $c->{floor}
+        ) {
+            $self->{$attr} = '(undef)' if !defined $self->{$attr};
+            warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
+            $self->{$attr} = $c->{floor};
+        }
+        elsif ( $self->{$attr} > $c->{ceil} ) {
+            warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
+            $self->{$attr} = $c->{ceil};
+        }
+    }
+
+    if ( !$self->{digest} ) {
+        require Digest::MD5;
+        $self->{digest} = \&Digest::MD5::md5;
+    }
+
+    return $self;
+}
+
+sub read_value {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    # This will be a Reference sector
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or return;
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        return;
+    }
+
+    my $key_md5 = $self->_apply_digest( $key );
+
+    my $value_sector = $sector->get_data_for({
+        key_md5    => $key_md5,
+        allow_head => 1,
+    });
+
+    unless ( $value_sector ) {
+        return undef
+    }
+
+    return $value_sector->data;
+}
+
+sub get_classname {
+    my $self = shift;
+    my ($obj) = @_;
+
+    # This will be a Reference sector
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        return;
+    }
+
+    return $sector->get_classname;
+}
+
+sub make_reference {
+    my $self = shift;
+    my ($obj, $old_key, $new_key) = @_;
+
+    # This will be a Reference sector
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        return;
+    }
+
+    my $old_md5 = $self->_apply_digest( $old_key );
+
+    my $value_sector = $sector->get_data_for({
+        key_md5    => $old_md5,
+        allow_head => 1,
+    });
+
+    unless ( $value_sector ) {
+        $value_sector = DBM::Deep::Sector::File::Null->new({
+            engine => $self,
+            data   => undef,
+        });
+
+        $sector->write_data({
+            key_md5 => $old_md5,
+            key     => $old_key,
+            value   => $value_sector,
+        });
+    }
+
+    if ( $value_sector->isa( 'DBM::Deep::Sector::File::Reference' ) ) {
+        $sector->write_data({
+            key     => $new_key,
+            key_md5 => $self->_apply_digest( $new_key ),
+            value   => $value_sector,
+        });
+        $value_sector->increment_refcount;
+    }
+    else {
+        $sector->write_data({
+            key     => $new_key,
+            key_md5 => $self->_apply_digest( $new_key ),
+            value   => $value_sector->clone,
+        });
+    }
+
+    return;
+}
+
+# exists returns '', not undefined.
+sub key_exists {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    # This will be a Reference sector
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or return '';
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        return '';
+    }
+
+    my $data = $sector->get_data_for({
+        key_md5    => $self->_apply_digest( $key ),
+        allow_head => 1,
+    });
+
+    # exists() returns 1 or '' for true/false.
+    return $data ? 1 : '';
+}
+
+sub delete_key {
+    my $self = shift;
+    my ($obj, $key) = @_;
+
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or return;
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        return;
+    }
+
+    return $sector->delete_key({
+        key_md5    => $self->_apply_digest( $key ),
+        allow_head => 0,
+    });
+}
+
+sub write_value {
+    my $self = shift;
+    my ($obj, $key, $value) = @_;
+
+    my $r = Scalar::Util::reftype( $value ) || '';
+    {
+        last if $r eq '';
+        last if $r eq 'HASH';
+        last if $r eq 'ARRAY';
+
+        DBM::Deep->_throw_error(
+            "Storage of references of type '$r' is not supported."
+        );
+    }
+
+    # This will be a Reference sector
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." );
+
+    if ( $sector->staleness != $obj->_staleness ) {
+        DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." );
+    }
+
+    my ($class, $type);
+    if ( !defined $value ) {
+        $class = 'DBM::Deep::Sector::File::Null';
+    }
+    elsif ( ref $value eq 'DBM::Deep::Null' ) {
+        DBM::Deep::_warnif(
+             'uninitialized', 'Assignment of stale reference'
+        );
+        $class = 'DBM::Deep::Sector::File::Null';
+        $value = undef;
+    }
+    elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
+        my $tmpvar;
+        if ( $r eq 'ARRAY' ) {
+            $tmpvar = tied @$value;
+        } elsif ( $r eq 'HASH' ) {
+            $tmpvar = tied %$value;
+        }
+
+        if ( $tmpvar ) {
+            my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
+
+            unless ( $is_dbm_deep ) {
+                DBM::Deep->_throw_error( "Cannot store something that is tied." );
+            }
+
+            unless ( $tmpvar->_engine->storage == $self->storage ) {
+                DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
+            }
+
+            # First, verify if we're storing the same thing to this spot. If we
+            # are, then this should be a no-op. -EJS, 2008-05-19
+            my $loc = $sector->get_data_location_for({
+                key_md5 => $self->_apply_digest( $key ),
+                allow_head => 1,
+            });
+
+            if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
+                return 1;
+            }
+
+            #XXX Can this use $loc?
+            my $value_sector = $self->load_sector( $tmpvar->_base_offset );
+            $sector->write_data({
+                key     => $key,
+                key_md5 => $self->_apply_digest( $key ),
+                value   => $value_sector,
+            });
+            $value_sector->increment_refcount;
+
+            return 1;
+        }
+
+        $class = 'DBM::Deep::Sector::File::Reference';
+        $type = substr( $r, 0, 1 );
+    }
+    else {
+        if ( tied($value) ) {
+            DBM::Deep->_throw_error( "Cannot store something that is tied." );
+        }
+        $class = 'DBM::Deep::Sector::File::Scalar';
+    }
+
+    # Create this after loading the reference sector in case something bad
+    # happens. This way, we won't allocate value sector(s) needlessly.
+    my $value_sector = $class->new({
+        engine => $self,
+        data   => $value,
+        type   => $type,
+    });
+
+    $sector->write_data({
+        key     => $key,
+        key_md5 => $self->_apply_digest( $key ),
+        value   => $value_sector,
+    });
+
+    $self->_descend( $value, $value_sector );
+
+    return 1;
+}
+
+sub setup {
+    my $self = shift;
+    my ($obj) = @_;
+
+    # We're opening the file.
+    unless ( $obj->_base_offset ) {
+        my $bytes_read = $self->_read_file_header;
+
+        # Creating a new file
+        unless ( $bytes_read ) {
+            $self->_write_file_header;
+
+            # 1) Create Array/Hash entry
+            my $initial_reference = DBM::Deep::Sector::File::Reference->new({
+                engine => $self,
+                type   => $obj->_type,
+            });
+            $obj->{base_offset} = $initial_reference->offset;
+            $obj->{staleness} = $initial_reference->staleness;
+
+            $self->storage->flush;
+        }
+        # Reading from an existing file
+        else {
+            $obj->{base_offset} = $bytes_read;
+            my $initial_reference = DBM::Deep::Sector::File::Reference->new({
+                engine => $self,
+                offset => $obj->_base_offset,
+            });
+            unless ( $initial_reference ) {
+                DBM::Deep->_throw_error("Corrupted file, no master index record");
+            }
+
+            unless ($obj->_type eq $initial_reference->type) {
+                DBM::Deep->_throw_error("File type mismatch");
+            }
+
+            $obj->{staleness} = $initial_reference->staleness;
+        }
+    }
+
+    $self->storage->set_inode;
+
+    return 1;
+}
+
+sub begin_work {
+    my $self = shift;
+    my ($obj) = @_;
+
+    unless ($self->supports('transactions')) {
+        DBM::Deep->_throw_error( "Cannot begin_work unless transactions are supported" );
+    }
+
+    if ( $self->trans_id ) {
+        DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
+    }
+
+    my @slots = $self->read_txn_slots;
+    my $found;
+    for my $i ( 0 .. $self->num_txns-2 ) {
+        next if $slots[$i];
+
+        $slots[$i] = 1;
+        $self->set_trans_id( $i + 1 );
+        $found = 1;
+        last;
+    }
+    unless ( $found ) {
+        DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
+    }
+    $self->write_txn_slots( @slots );
+
+    if ( !$self->trans_id ) {
+        DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
+    }
+
+    return;
+}
+
+sub rollback {
+    my $self = shift;
+    my ($obj) = @_;
+
+    unless ($self->supports('transactions')) {
+        DBM::Deep->_throw_error( "Cannot rollback unless transactions are supported" );
+    }
+
+    if ( !$self->trans_id ) {
+        DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
+    }
+
+    # Each entry is the file location for a bucket that has a modification for
+    # this transaction. The entries need to be expunged.
+    foreach my $entry (@{ $self->get_entries } ) {
+        # Remove the entry here
+        my $read_loc = $entry
+          + $self->hash_size
+          + $self->byte_size
+          + $self->byte_size
+          + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
+
+        my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size );
+        $data_loc = unpack( $StP{$self->byte_size}, $data_loc );
+        $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) );
+
+        if ( $data_loc > 1 ) {
+            $self->load_sector( $data_loc )->free;
+        }
+    }
+
+    $self->clear_entries;
+
+    my @slots = $self->read_txn_slots;
+    $slots[$self->trans_id-1] = 0;
+    $self->write_txn_slots( @slots );
+    $self->inc_txn_staleness_counter( $self->trans_id );
+    $self->set_trans_id( 0 );
+
+    return 1;
+}
+
+sub commit {
+    my $self = shift;
+    my ($obj) = @_;
+
+    unless ($self->supports('transactions')) {
+        DBM::Deep->_throw_error( "Cannot commit unless transactions are supported" );
+    }
+
+    if ( !$self->trans_id ) {
+        DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
+    }
+
+    foreach my $entry (@{ $self->get_entries } ) {
+        # Overwrite the entry in head with the entry in trans_id
+        my $base = $entry
+          + $self->hash_size
+          + $self->byte_size;
+
+        my $head_loc = $self->storage->read_at( $base, $self->byte_size );
+        $head_loc = unpack( $StP{$self->byte_size}, $head_loc );
+
+        my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
+        my $trans_loc = $self->storage->read_at(
+            $spot, $self->byte_size,
+        );
+
+        $self->storage->print_at( $base, $trans_loc );
+        $self->storage->print_at(
+            $spot,
+            pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
+        );
+
+        if ( $head_loc > 1 ) {
+            $self->load_sector( $head_loc )->free;
+        }
+    }
+
+    $self->clear_entries;
+
+    my @slots = $self->read_txn_slots;
+    $slots[$self->trans_id-1] = 0;
+    $self->write_txn_slots( @slots );
+    $self->inc_txn_staleness_counter( $self->trans_id );
+    $self->set_trans_id( 0 );
+
+    return 1;
+}
+
+=head1 INTERNAL METHODS
+
+The following methods are internal-use-only to DBM::Deep::Engine::File.
+
+=cut
+
+=head2 read_txn_slots()
+
+This takes no arguments.
+
+This will return an array with a 1 or 0 in each slot. Each spot represents one
+available transaction. If the slot is 1, that transaction is taken. If it is 0,
+the transaction is available.
+
+=cut
+
+sub read_txn_slots {
+    my $self = shift;
+    my $bl = $self->txn_bitfield_len;
+    my $num_bits = $bl * 8;
+    return split '', unpack( 'b'.$num_bits,
+        $self->storage->read_at(
+            $self->trans_loc, $bl,
+        )
+    );
+}
+
+=head2 write_txn_slots( @slots )
+
+This takes an array of 1's and 0's. This array represents the transaction slots
+returned by L</read_txn_slots()>. In other words, the following is true:
+
+  @x = read_txn_slots( write_txn_slots( @x ) );
+
+(With the obviously missing object referents added back in.)
+
+=cut
+
+sub write_txn_slots {
+    my $self = shift;
+    my $num_bits = $self->txn_bitfield_len * 8;
+    $self->storage->print_at( $self->trans_loc,
+        pack( 'b'.$num_bits, join('', @_) ),
+    );
+}
+
+=head2 get_running_txn_ids()
+
+This takes no arguments.
+
+This will return an array of taken transaction IDs. This wraps L</read_txn_slots()>.
+
+=cut
+
+sub get_running_txn_ids {
+    my $self = shift;
+    my @transactions = $self->read_txn_slots;
+    my @trans_ids = map { $_+1 } grep { $transactions[$_] } 0 .. $#transactions;
+}
+
+=head2 get_txn_staleness_counter( $trans_id )
+
+This will return the staleness counter for the given transaction ID. Please see
+L<DBM::Deep::Engine/STALENESS> for more information.
+
+=cut
+
+sub get_txn_staleness_counter {
+    my $self = shift;
+    my ($trans_id) = @_;
+
+    # Hardcode staleness of 0 for the HEAD
+    return 0 unless $trans_id;
+
+    return unpack( $StP{$STALE_SIZE},
+        $self->storage->read_at(
+            $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
+            $STALE_SIZE,
+        )
+    );
+}
+
+=head2 inc_txn_staleness_counter( $trans_id )
+
+This will increment the staleness counter for the given transaction ID. Please see
+L<DBM::Deep::Engine/STALENESS> for more information.
+
+=cut
+
+sub inc_txn_staleness_counter {
+    my $self = shift;
+    my ($trans_id) = @_;
+
+    # Hardcode staleness of 0 for the HEAD
+    return 0 unless $trans_id;
+
+    $self->storage->print_at(
+        $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
+        pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ),
+    );
+}
+
+=head2 get_entries()
+
+This takes no arguments.
+
+This returns a list of all the sectors that have been modified by this transaction.
+
+=cut
+
+sub get_entries {
+    my $self = shift;
+    return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
+}
+
+=head2 add_entry( $trans_id, $location )
+
+This takes a transaction ID and a file location and marks the sector at that
+location as having been modified by the transaction identified by $trans_id.
+
+This returns nothing.
+
+B<NOTE>: Unlike all the other _entries() methods, there are several cases where
+C<< $trans_id != $self->trans_id >> for this method.
+
+=cut
+
+sub add_entry {
+    my $self = shift;
+    my ($trans_id, $loc) = @_;
+
+    $self->{entries}{$trans_id} ||= {};
+    $self->{entries}{$trans_id}{$loc} = undef;
+}
+
+=head2 reindex_entry( $old_loc, $new_loc )
+
+This takes two locations (old and new, respectively). If a location that has
+been modified by this transaction is subsequently reindexed due to a bucketlist
+overflowing, then the entries hash needs to be made aware of this change.
+
+This returns nothing.
+
+=cut
+
+sub reindex_entry {
+    my $self = shift;
+    my ($old_loc, $new_loc) = @_;
+
+    TRANS:
+    while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
+        if ( exists $locs->{$old_loc} ) {
+            delete $locs->{$old_loc};
+            $locs->{$new_loc} = undef;
+            next TRANS;
+        }
+    }
+}
+
+=head2 clear_entries()
+
+This takes no arguments. It will clear the entries list for the running
+transaction.
+
+This returns nothing.
+
+=cut
+
+sub clear_entries {
+    my $self = shift;
+    delete $self->{entries}{$self->trans_id};
+}
+
+=head2 _write_file_header()
+
+This writes the file header for a new file. This will write the various settings
+that set how the file is interpreted.
+
+=head2 _read_file_header()
+
+This reads the file header from an existing file. This will read the various
+settings that set how the file is interpreted.
+
+=cut
+
+{
+    my $header_fixed = length( __PACKAGE__->SIG_FILE ) + 1 + 4 + 4;
+    my $this_file_version = 4;
+    my $min_file_version = 3;
+
+    sub _write_file_header {
+        my $self = shift;
+
+        my $nt = $self->num_txns;
+        my $bl = $self->txn_bitfield_len;
+
+        my $header_var = 1 + 1 + 1 + 1 + $bl + $STALE_SIZE * ($nt - 1) + 3 * $self->byte_size;
+
+        my $loc = $self->storage->request_space( $header_fixed + $header_var );
+
+        $self->storage->print_at( $loc,
+            $self->SIG_FILE,
+            $self->SIG_HEADER,
+            pack('N', $this_file_version), # At this point, we're at 9 bytes
+            pack('N', $header_var),        # header size
+            # --- Above is $header_fixed. Below is $header_var
+            pack('C', $self->byte_size),
+
+            # These shenanigans are to allow a 256 within a C
+            pack('C', $self->max_buckets - 1),
+            pack('C', $self->data_sector_size - 1),
+
+            pack('C', $nt),
+            pack('C' . $bl, 0 ),                           # Transaction activeness bitfield
+            pack($StP{$STALE_SIZE}.($nt-1), 0 x ($nt-1) ), # Transaction staleness counters
+            pack($StP{$self->byte_size}, 0), # Start of free chain (blist size)
+            pack($StP{$self->byte_size}, 0), # Start of free chain (data size)
+            pack($StP{$self->byte_size}, 0), # Start of free chain (index size)
+        );
+
+        #XXX Set these less fragilely
+        $self->set_trans_loc( $header_fixed + 4 );
+        $self->set_chains_loc( $header_fixed + 4 + $bl + $STALE_SIZE * ($nt-1) );
+
+        $self->{v} = $this_file_version;
+
+        return;
+    }
+
+    sub _read_file_header {
+        my $self = shift;
+
+        my $buffer = $self->storage->read_at( 0, $header_fixed );
+        return unless length($buffer);
+
+        my ($file_signature, $sig_header, $file_version, $size) = unpack(
+            'A4 A N N', $buffer
+        );
+
+        unless ( $file_signature eq $self->SIG_FILE ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error( "Signature not found -- file is not a Deep DB" );
+        }
+
+        unless ( $sig_header eq $self->SIG_HEADER ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error( "Pre-1.00 file version found" );
+        }
+
+        if ( $file_version < $min_file_version ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error(
+                "This file version is too old - "
+              . _guess_version($file_version) .
+                " - expected " . _guess_version($min_file_version)
+              . " to " . _guess_version($this_file_version)
+            );
+        }
+        if ( $file_version > $this_file_version ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error(
+                "This file version is too new - probably "
+              . _guess_version($file_version) .
+                " - expected " . _guess_version($min_file_version)
+              . " to " . _guess_version($this_file_version)
+            );
+        }
+        $self->{v} = $file_version;
+
+        my $buffer2 = $self->storage->read_at( undef, $size );
+        my @values = unpack( 'C C C C', $buffer2 );
+
+        if ( @values != 4 || grep { !defined } @values ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error("Corrupted file - bad header");
+        }
+
+        if ($values[3] != $self->{num_txns}) {
+            warn "num_txns ($self->{num_txns}) is different from the file ($values[3])\n";
+        }
+
+        #XXX Add warnings if values weren't set right
+        @{$self}{qw(byte_size max_buckets data_sector_size num_txns)} = @values;
+
+        # These shenanigans are to allow a 256 within a C
+        $self->{max_buckets} += 1;
+        $self->{data_sector_size} += 1;
+
+        my $bl = $self->txn_bitfield_len;
+
+        my $header_var = scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) + 3 * $self->byte_size;
+        unless ( $size == $header_var ) {
+            $self->storage->close;
+            DBM::Deep->_throw_error( "Unexpected size found ($size <-> $header_var)." );
+        }
+
+        $self->set_trans_loc( $header_fixed + scalar(@values) );
+        $self->set_chains_loc( $header_fixed + scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) );
+
+        return length($buffer) + length($buffer2);
+    }
+
+    sub _guess_version {
+        $_[0] == 4 and return  2;
+        $_[0] == 3 and return '1.0003';
+        $_[0] == 2 and return '1.00';
+        $_[0] == 1 and return '0.99';
+        $_[0] == 0 and return '0.91';
+
+        return $_[0]-2;
+    }
+}
+
+=head2 _apply_digest( @stuff )
+
+This will apply the digest method (default to Digest::MD5::md5) to the arguments
+passed in and return the result.
+
+=cut
+
+sub _apply_digest {
+    my $self = shift;
+    my $victim = shift;
+    utf8::encode $victim if $self->{v} >= 4;
+    return $self->{digest}->($victim);
+}
+
+=head2 _add_free_blist_sector( $offset, $size )
+
+=head2 _add_free_data_sector( $offset, $size )
+
+=head2 _add_free_index_sector( $offset, $size )
+
+These methods are all wrappers around _add_free_sector(), providing the proper
+chain offset ($multiple) for the sector type.
+
+=cut
+
+sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
+sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
+sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
+
+=head2 _add_free_sector( $multiple, $offset, $size )
+
+_add_free_sector() takes the offset into the chains location, the offset of the
+sector, and the size of that sector. It will mark the sector as a free sector
+and put it into the list of sectors that are free of this type for use later.
+
+This returns nothing.
+
+B<NOTE>: $size is unused?
+
+=cut
+
+sub _add_free_sector {
+    my $self = shift;
+    my ($multiple, $offset, $size) = @_;
+
+    my $chains_offset = $multiple * $self->byte_size;
+
+    my $storage = $self->storage;
+
+    # Increment staleness.
+    # XXX Can this increment+modulo be done by "&= 0x1" ?
+    my $staleness = unpack( $StP{$STALE_SIZE}, $storage->read_at( $offset + $self->SIG_SIZE, $STALE_SIZE ) );
+    $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $STALE_SIZE ) );
+    $storage->print_at( $offset + $self->SIG_SIZE, pack( $StP{$STALE_SIZE}, $staleness ) );
+
+    my $old_head = $storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
+
+    $storage->print_at( $self->chains_loc + $chains_offset,
+        pack( $StP{$self->byte_size}, $offset ),
+    );
+
+    # Record the old head in the new sector after the signature and staleness counter
+    $storage->print_at( $offset + $self->SIG_SIZE + $STALE_SIZE, $old_head );
+}
+
+=head2 _request_blist_sector( $size )
+
+=head2 _request_data_sector( $size )
+
+=head2 _request_index_sector( $size )
+
+These methods are all wrappers around _request_sector(), providing the proper
+chain offset ($multiple) for the sector type.
+
+=cut
+
+sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
+sub _request_data_sector { shift->_request_sector( 1, @_ ) }
+sub _request_index_sector { shift->_request_sector( 2, @_ ) }
+
+=head2 _request_sector( $multiple $size )
+
+This takes the offset into the chains location and the size of that sector.
+
+This returns the object with the sector. If there is an available free sector of
+that type, then it will be reused. If there isn't one, then a new one will be
+allocated.
+
+=cut
+
+sub _request_sector {
+    my $self = shift;
+    my ($multiple, $size) = @_;
+
+    my $chains_offset = $multiple * $self->byte_size;
+
+    my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
+    my $loc = unpack( $StP{$self->byte_size}, $old_head );
+
+    # We don't have any free sectors of the right size, so allocate a new one.
+    unless ( $loc ) {
+        my $offset = $self->storage->request_space( $size );
+
+        # Zero out the new sector. This also guarantees correct increases
+        # in the filesize.
+        $self->storage->print_at( $offset, chr(0) x $size );
+
+        return $offset;
+    }
+
+    # Read the new head after the signature and the staleness counter
+    my $new_head = $self->storage->read_at( $loc + $self->SIG_SIZE + $STALE_SIZE, $self->byte_size );
+    $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head );
+    $self->storage->print_at(
+        $loc + $self->SIG_SIZE + $STALE_SIZE,
+        pack( $StP{$self->byte_size}, 0 ),
+    );
+
+    return $loc;
+}
+
+=head2 ACCESSORS
+
+The following are readonly attributes.
+
+=over 4
+
+=item * byte_size
+
+=item * hash_size
+
+=item * hash_chars
+
+=item * num_txns
+
+=item * max_buckets
+
+=item * blank_md5
+
+=item * data_sector_size
+
+=item * txn_bitfield_len
+
+=back
+
+=cut
+
+sub byte_size   { $_[0]{byte_size} }
+sub hash_size   { $_[0]{hash_size} }
+sub hash_chars  { $_[0]{hash_chars} }
+sub num_txns    { $_[0]{num_txns} }
+sub max_buckets { $_[0]{max_buckets} }
+sub blank_md5   { chr(0) x $_[0]->hash_size }
+sub data_sector_size { $_[0]{data_sector_size} }
+
+# This is a calculated value
+sub txn_bitfield_len {
+    my $self = shift;
+    unless ( exists $self->{txn_bitfield_len} ) {
+        my $temp = ($self->num_txns) / 8;
+        if ( $temp > int( $temp ) ) {
+            $temp = int( $temp ) + 1;
+        }
+        $self->{txn_bitfield_len} = $temp;
+    }
+    return $self->{txn_bitfield_len};
+}
+
+=pod
+
+The following are read/write attributes. 
+
+=over 4
+
+=item * trans_id / set_trans_id( $new_id )
+
+=item * trans_loc / set_trans_loc( $new_loc )
+
+=item * chains_loc / set_chains_loc( $new_loc )
+
+=back
+
+=cut
+
+sub trans_id     { $_[0]{trans_id} }
+sub set_trans_id { $_[0]{trans_id} = $_[1] }
+
+sub trans_loc     { $_[0]{trans_loc} }
+sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
+
+sub chains_loc     { $_[0]{chains_loc} }
+sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
+
+sub supports {
+    my $self = shift;
+    my ($feature) = @_;
+
+    if ( $feature eq 'transactions' ) {
+        return $self->num_txns > 1;
+    }
+    return 1 if $feature eq 'singletons';
+    return 1 if $feature eq 'unicode';
+    return;
+}
+
+sub db_version {
+    return $_[0]{v} == 3 ? '1.0003' : 2;
+}
+
+sub clear {
+    my $self = shift;
+    my $obj = shift;
+
+    my $sector = $self->load_sector( $obj->_base_offset )
+        or return;
+
+    return unless $sector->staleness == $obj->_staleness;
+
+    $sector->clear;
+
+    return;
+}
+
+=head2 _dump_file()
+
+This method takes no arguments. It's used to print out a textual representation
+of the DBM::Deep DB file. It assumes the file is not-corrupted.
+
+=cut
+
+sub _dump_file {
+    my $self = shift;
+
+    # Read the header
+    my $spot = $self->_read_file_header();
+
+    my %types = (
+        0 => 'B',
+        1 => 'D',
+        2 => 'I',
+    );
+
+    my %sizes = (
+        'D' => $self->data_sector_size,
+        'B' => DBM::Deep::Sector::File::BucketList->new({engine=>$self,offset=>1})->size,
+        'I' => DBM::Deep::Sector::File::Index->new({engine=>$self,offset=>1})->size,
+    );
+
+    my $return = "";
+
+    # Header values
+    $return .= "NumTxns: " . $self->num_txns . $/;
+
+    # Read the free sector chains
+    my %sectors;
+    foreach my $multiple ( 0 .. 2 ) {
+        $return .= "Chains($types{$multiple}):";
+        my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
+        while ( 1 ) {
+            my $loc = unpack(
+                $StP{$self->byte_size},
+                $self->storage->read_at( $old_loc, $self->byte_size ),
+            );
+
+            # We're now out of free sectors of this kind.
+            unless ( $loc ) {
+                last;
+            }
+
+            $sectors{ $types{$multiple} }{ $loc } = undef;
+            $old_loc = $loc + $self->SIG_SIZE + $STALE_SIZE;
+            $return .= " $loc";
+        }
+        $return .= $/;
+    }
+
+    SECTOR:
+    while ( $spot < $self->storage->{end} ) {
+        # Read each sector in order.
+        my $sector = $self->load_sector( $spot );
+        if ( !$sector ) {
+            # Find it in the free-sectors that were found already
+            foreach my $type ( keys %sectors ) {
+                if ( exists $sectors{$type}{$spot} ) {
+                    my $size = $sizes{$type};
+                    $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
+                    $spot += $size;
+                    next SECTOR;
+                }
+            }
+
+            die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
+        }
+        else {
+            $return .= sprintf "%08d: %s  %04d", $spot, $sector->type, $sector->size;
+            if ( $sector->type =~ /^[DU]\z/ ) {
+                $return .= ' ' . $sector->data;
+            }
+            elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
+                $return .= ' REF: ' . $sector->get_refcount;
+            }
+            elsif ( $sector->type eq 'B' ) {
+                foreach my $bucket ( $sector->chopped_up ) {
+                    $return .= "\n    ";
+                    $return .= sprintf "%08d", unpack($StP{$self->byte_size},
+                        substr( $bucket->[-1], $self->hash_size, $self->byte_size),
+                    );
+                    my $l = unpack( $StP{$self->byte_size},
+                        substr( $bucket->[-1],
+                            $self->hash_size + $self->byte_size,
+                            $self->byte_size,
+                        ),
+                    );
+                    $return .= sprintf " %08d", $l;
+                    foreach my $txn ( 0 .. $self->num_txns - 2 ) {
+                        my $l = unpack( $StP{$self->byte_size},
+                            substr( $bucket->[-1],
+                                $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
+                                $self->byte_size,
+                            ),
+                        );
+                        $return .= sprintf " %08d", $l;
+                    }
+                }
+            }
+            $return .= $/;
+
+            $spot += $sector->size;
+        }
+    }
+
+    return $return;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Hash.pm b/src/modules/DBM/Deep/Hash.pm
new file mode 100644
index 0000000..39d1503
--- /dev/null
+++ b/src/modules/DBM/Deep/Hash.pm
@@ -0,0 +1,136 @@
+package DBM::Deep::Hash;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+use base 'DBM::Deep';
+
+sub _get_self {
+    # See the note in Array.pm as to why this is commented out.
+    # eval { local $SIG{'__DIE__'}; tied( %{$_[0]} ) } || $_[0]
+
+    # During global destruction %{$_[0]} might get tied to undef, so we
+    # need to check that case if tied returns false.
+    tied %{$_[0]} or local *@, eval { exists $_[0]{_}; 1 } ? $_[0] : undef
+}
+
+sub _repr { return {} }
+
+sub TIEHASH {
+    my $class = shift;
+    my $args = $class->_get_args( @_ );
+    
+    $args->{type} = $class->TYPE_HASH;
+
+    return $class->_init($args);
+}
+
+sub FETCH {
+    my $self = shift->_get_self;
+    DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
+    my $key = ($self->_engine->storage->{filter_store_key})
+        ? $self->_engine->storage->{filter_store_key}->($_[0])
+        : $_[0];
+
+    return $self->SUPER::FETCH( $key, $_[0] );
+}
+
+sub STORE {
+    my $self = shift->_get_self;
+    DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
+    my $key = ($self->_engine->storage->{filter_store_key})
+        ? $self->_engine->storage->{filter_store_key}->($_[0])
+        : $_[0];
+    my $value = $_[1];
+
+    return $self->SUPER::STORE( $key, $value, $_[0] );
+}
+
+sub EXISTS {
+    my $self = shift->_get_self;
+    DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
+    my $key = ($self->_engine->storage->{filter_store_key})
+        ? $self->_engine->storage->{filter_store_key}->($_[0])
+        : $_[0];
+
+    return $self->SUPER::EXISTS( $key );
+}
+
+sub DELETE {
+    my $self = shift->_get_self;
+    DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
+    my $key = ($self->_engine->storage->{filter_store_key})
+        ? $self->_engine->storage->{filter_store_key}->($_[0])
+        : $_[0];
+
+    return $self->SUPER::DELETE( $key, $_[0] );
+}
+
+# Locate and return first key (in no particular order)
+sub FIRSTKEY {
+    my $self = shift->_get_self;
+
+    $self->lock_shared;
+    
+    my $result = $self->_engine->get_next_key( $self );
+    
+    $self->unlock;
+    
+    return ($result && $self->_engine->storage->{filter_fetch_key})
+        ? $self->_engine->storage->{filter_fetch_key}->($result)
+        : $result;
+}
+
+# Return next key (in no particular order), given previous one
+sub NEXTKEY {
+    my $self = shift->_get_self;
+
+    my $prev_key = ($self->_engine->storage->{filter_store_key})
+        ? $self->_engine->storage->{filter_store_key}->($_[0])
+        : $_[0];
+
+    $self->lock_shared;
+    
+    my $result = $self->_engine->get_next_key( $self, $prev_key );
+    
+    $self->unlock;
+    
+    return ($result && $self->_engine->storage->{filter_fetch_key})
+        ? $self->_engine->storage->{filter_fetch_key}->($result)
+        : $result;
+}
+
+sub first_key { (shift)->FIRSTKEY(@_) }
+sub next_key  { (shift)->NEXTKEY(@_)  }
+
+sub _clear {
+    my $self = shift;
+
+    while ( defined(my $key = $self->first_key) ) {
+      do {
+        $self->_engine->delete_key( $self, $key, $key );
+      } while defined($key = $self->next_key($key));
+    }
+
+    return;
+}
+
+sub _copy_node {
+    my $self = shift;
+    my ($db_temp) = @_;
+
+    my $key = $self->first_key();
+    while (defined $key) {
+        my $value = $self->get($key);
+        $self->_copy_value( \$db_temp->{$key}, $value );
+        $key = $self->next_key($key);
+    }
+
+    return 1;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Internals.pod b/src/modules/DBM/Deep/Internals.pod
new file mode 100644
index 0000000..510aa31
--- /dev/null
+++ b/src/modules/DBM/Deep/Internals.pod
@@ -0,0 +1,347 @@
+=head1 NAME
+
+DBM::Deep::Internals - Out of date documentation on DBM::Deep internals
+
+=head1 OUT OF DATE
+
+This document is out-of-date. It describes an intermediate file format used
+during the development from 0.983 to 1.0000. It will be rewritten soon.
+
+So far, the description of the header format has been updated.
+
+=head1 DESCRIPTION
+
+This is a document describing the internal workings of L<DBM::Deep>. It is
+not necessary to read this document if you only intend to be a user. This
+document is intended for people who either want a deeper understanding of
+specifics of how L<DBM::Deep> works or who wish to help program
+L<DBM::Deep>.
+
+=head1 CLASS LAYOUT
+
+L<DBM::Deep> is broken up into five classes in three inheritance hierarchies.
+
+=over 4
+
+=item *
+
+L<DBM::Deep> is the parent of L<DBM::Deep::Array> and L<DBM::Deep::Hash>.
+These classes form the immediate interface to the outside world. They are the
+classes that provide the TIE mechanisms as well as the OO methods.
+
+=item *
+
+L<DBM::Deep::Engine> is the layer that deals with the mechanics of reading
+and writing to the file. This is where the logic of the file layout is
+handled.
+
+=item *
+
+L<DBM::Deep::File> is the layer that deals with the physical file. As a
+singleton that every other object has a reference to, it also provides a place
+to handle datastructure-wide items, such as transactions.
+
+=back
+
+=head1 FILE LAYOUT
+
+This describes the 1.0003 and 2.0000 formats, which internally are numbered
+3 and 4, respectively. The internal numbers are used in this section. These
+two formats are almost identical.
+
+DBM::Deep uses a tagged file layout. Every section has a tag, a size, and then
+the data.
+
+=head2 File header
+
+The file header consists of two parts. The first part is a fixed length of
+13 bytes:
+
+  DPDB h VVVV SSSS
+  \  / |    \   \
+   \/  '---. \   '--- size of the second part of the header
+  file      \ '--- version
+ signature  tag
+  
+=over 4
+
+=item * File Signature
+
+The first four bytes are 'DPDB' in network byte order, signifying that this is
+a DBM::Deep file.
+
+=item * File tag
+
+A literal ASCII 'h', indicating that this is the header. The file used by
+versions prior to 1.00 had a different fifth byte, allowing the difference
+to be determined.
+
+=item * Version
+
+This is four bytes containing the file version. This lets the file format change over time.
+
+It is packed in network order, so version 4 is stored as "\0\0\0\cD".
+
+=item * Header size
+
+The size of the second part of the header, in bytes. This number is also
+packed in network order.
+
+=back
+
+The second part of the header is as follows:
+
+  S B S T T(TTTTTTTTT...) (SS SS SS SS ...)  (continued...)
+  | | | |              \       |
+  | | | '----------.    \  staleness counters
+  | | '--------.    \  txn bitfield
+  | '------.    \  number of transactions
+ byte size  \  data sector size
+          max buckets
+
+ (continuation...)
+  BB(BBBBBB) DD(DDDDDD) II(IIIIII)
+      |        |            |
+      |    free data        |
+  free blist           free index
+
+=over
+
+=item * Constants
+
+These are the file-wide constants that determine how the file is laid out.
+They can only be set upon file creation.
+
+The byte size is the number of bytes used to point to an offset elsewhere
+in the file. This corresponds to the C<pack_size> option. This and the
+next three values are stored as packed 8-bit integers (chars), so 2 is
+represented by "\cB".
+
+C<max_buckets> and C<data_sector_size> are documented in the main
+L<DBM::Deep> man page. The number stored is actually one less than what is
+passed to the constructor, to allow for a range of 1-256.
+
+The number of transactions corresponds to the C<num_txns> value passed to
+the constructor.
+
+=item * Transaction information
+
+The transaction bitfield consists of one bit for every available
+transaction ID. It is therefore anywhere from 1 byte to 32 bytes long.
+
+The staleness counters each take two bytes (packed 32-bit integers), one
+for each transaction, not including the so-called HEAD (the main
+transaction that all processes share I<before> calling C<begin_work>). So
+these take up 0 to 508 bytes.
+
+Staleness is explained in L<DBM::Deep::Engine|DBM::Deep::Engine/STALENESS>.
+
+=item * Freespace information
+
+Pointers into the first free sectors of the various sector sizes (Index,
+Bucketlist, and Data) are stored here. These are called chains internally,
+as each free sector points to the next one.
+
+The number of bytes is determined by the byte size, ranging from 2 to 8.
+
+=back
+
+=head2 Index
+
+The Index parts can be tagged either as Hash, Array, or Index. The latter
+is if there was a reindexing due to a bucketlist growing too large. The others
+are the root index for their respective datatypes. The index consists of a
+tag, a size, and then 256 sections containing file locations. Each section
+corresponds to each value representable in a byte.
+
+The index is used as follows - whenever a hashed key is being looked up, the
+first byte is used to determine which location to go to from the root index.
+Then, if that's also an index, the second byte is used, and so forth until a
+bucketlist is found.
+
+=head2 Bucketlist
+
+This is the part that contains the link to the data section. A bucketlist
+defaults to being 16 buckets long (modifiable by the I<max_buckets>
+parameter used when creating a new file). Each bucket contains an MD5 and a
+location of the appropriate key section.
+
+=head2 Key area
+
+This is the part that handles transactional awareness. There are
+I<max_buckets> sections. Each section contains the location to the data
+section, a transaction ID, and whether that transaction considers this key to
+be deleted or not.
+
+=head2 Data area
+
+This is the part that actual stores the key, value, and class (if
+appropriate). The layout is:
+
+=over 4
+
+=item * tag
+
+=item * length of the value
+
+=item * the actual value
+
+=item * keylength
+
+=item * the actual key
+
+=item * a byte indicating if this value has a classname
+
+=item * the classname (if one is there)
+
+=back
+
+The key is stored after the value because the value is requested more often
+than the key.
+
+=head1 PERFORMANCE
+
+L<DBM::Deep> is written completely in Perl. It also is a multi-process DBM
+that uses the datafile as a method of synchronizing between multiple
+processes. This is unlike most RDBMSes like MySQL and Oracle. Furthermore,
+unlike all RDBMSes, L<DBM::Deep> stores both the data and the structure of
+that data as it would appear in a Perl program.
+
+=head2 CPU
+
+DBM::Deep attempts to be CPU-light. As it stores all the data on disk,
+DBM::Deep is I/O-bound, not CPU-bound.
+
+=head2 RAM
+
+DBM::Deep uses extremely little RAM relative to the amount of data you can
+access. You can iterate through a million keys (using C<each()>) without
+increasing your memory usage at all.
+
+=head2 DISK
+
+DBM::Deep is I/O-bound, pure and simple. The faster your disk, the faster
+DBM::Deep will be. Currently, when performing C<my $x = $db-E<gt>{foo}>, there
+are a minimum of 4 seeks and 1332 + N bytes read (where N is the length of your
+data). (All values assume a medium filesize.) The actions taken are:
+
+=over 4
+
+=item 1 Lock the file
+
+=item 1 Perform a stat() to determine if the inode has changed
+
+=item 1 Go to the primary index for the $db (1 seek)
+
+=item 1 Read the tag/size of the primary index (5 bytes)
+
+=item 1 Read the body of the primary index (1024 bytes)
+
+=item 1 Go to the bucketlist for this MD5 (1 seek)
+
+=item 1 Read the tag/size of the bucketlist (5 bytes)
+
+=item 1 Read the body of the bucketlist (144 bytes)
+
+=item 1 Go to the keys location for this MD5 (1 seek)
+
+=item 1 Read the tag/size of the keys section (5 bytes)
+
+=item 1 Read the body of the keys location (144 bytes)
+
+=item 1 Go to the data section that corresponds to this transaction ID. (1 seek)
+
+=item 1 Read the tag/size of the data section (5 bytes)
+
+=item 1 Read the value for this data (N bytes)
+
+=item 1 Unlock the file
+
+=back
+
+Every additional level of indexing (if there are enough keys) requires an
+additional seek and the reading of 1029 additional bytes. If the value is
+blessed, an additional 1 seek and 9 + M bytes are read (where M is the length
+of the classname).
+
+Arrays are (currently) even worse because they're considered "funny hashes"
+with the length stored as just another key. This means that if you do any sort
+of lookup with a negative index, this entire process is performed twice - once
+for the length and once for the value.
+
+=head1 ACTUAL TESTS
+
+=head2 SPEED
+
+Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as
+the almighty I<BerkeleyDB>.  But it makes up for it in features like true
+multi-level hash/array support, and cross-platform FTPable files.  Even so,
+DBM::Deep is still pretty fast, and the speed stays fairly consistent, even
+with huge databases.  Here is some test data:
+
+    Adding 1,000,000 keys to new DB file...
+
+    At 100 keys, avg. speed is 2,703 keys/sec
+    At 200 keys, avg. speed is 2,642 keys/sec
+    At 300 keys, avg. speed is 2,598 keys/sec
+    At 400 keys, avg. speed is 2,578 keys/sec
+    At 500 keys, avg. speed is 2,722 keys/sec
+    At 600 keys, avg. speed is 2,628 keys/sec
+    At 700 keys, avg. speed is 2,700 keys/sec
+    At 800 keys, avg. speed is 2,607 keys/sec
+    At 900 keys, avg. speed is 2,190 keys/sec
+    At 1,000 keys, avg. speed is 2,570 keys/sec
+    At 2,000 keys, avg. speed is 2,417 keys/sec
+    At 3,000 keys, avg. speed is 1,982 keys/sec
+    At 4,000 keys, avg. speed is 1,568 keys/sec
+    At 5,000 keys, avg. speed is 1,533 keys/sec
+    At 6,000 keys, avg. speed is 1,787 keys/sec
+    At 7,000 keys, avg. speed is 1,977 keys/sec
+    At 8,000 keys, avg. speed is 2,028 keys/sec
+    At 9,000 keys, avg. speed is 2,077 keys/sec
+    At 10,000 keys, avg. speed is 2,031 keys/sec
+    At 20,000 keys, avg. speed is 1,970 keys/sec
+    At 30,000 keys, avg. speed is 2,050 keys/sec
+    At 40,000 keys, avg. speed is 2,073 keys/sec
+    At 50,000 keys, avg. speed is 1,973 keys/sec
+    At 60,000 keys, avg. speed is 1,914 keys/sec
+    At 70,000 keys, avg. speed is 2,091 keys/sec
+    At 80,000 keys, avg. speed is 2,103 keys/sec
+    At 90,000 keys, avg. speed is 1,886 keys/sec
+    At 100,000 keys, avg. speed is 1,970 keys/sec
+    At 200,000 keys, avg. speed is 2,053 keys/sec
+    At 300,000 keys, avg. speed is 1,697 keys/sec
+    At 400,000 keys, avg. speed is 1,838 keys/sec
+    At 500,000 keys, avg. speed is 1,941 keys/sec
+    At 600,000 keys, avg. speed is 1,930 keys/sec
+    At 700,000 keys, avg. speed is 1,735 keys/sec
+    At 800,000 keys, avg. speed is 1,795 keys/sec
+    At 900,000 keys, avg. speed is 1,221 keys/sec
+    At 1,000,000 keys, avg. speed is 1,077 keys/sec
+
+This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl
+5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM.  The hash keys and
+values were between 6 - 12 chars in length.  The DB file ended up at 210MB.
+Run time was 12 min 3 sec.
+
+=head2 MEMORY USAGE
+
+One of the great things about L<DBM::Deep> is that it uses very little memory.
+Even with huge databases (1,000,000+ keys) you will not see much increased
+memory on your process.  L<DBM::Deep> relies solely on the filesystem for storing
+and fetching data.  Here is output from I<top> before even opening a database
+handle:
+
+    PID USER     PRI  NI  SIZE  RSS SHARE STAT %CPU %MEM   TIME COMMAND
+  22831 root      11   0  2716 2716  1296 R     0.0  0.2   0:07 perl
+
+Basically the process is taking 2,716K of memory.  And here is the same
+process after storing and fetching 1,000,000 keys:
+
+    PID USER     PRI  NI  SIZE  RSS SHARE STAT %CPU %MEM   TIME COMMAND
+  22831 root      14   0  2772 2772  1328 R     0.0  0.2  13:32 perl
+
+Notice the memory usage increased by only 56K.  Test was performed on a 700mHz
+x86 box running Linux RedHat 7.2 & Perl 5.6.1.
+
+=cut
diff --git a/src/modules/DBM/Deep/Iterator.pm b/src/modules/DBM/Deep/Iterator.pm
new file mode 100644
index 0000000..f5a4eff
--- /dev/null
+++ b/src/modules/DBM/Deep/Iterator.pm
@@ -0,0 +1,73 @@
+package DBM::Deep::Iterator;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+=head1 NAME
+
+DBM::Deep::Iterator - iterator for FIRSTKEY() and NEXTKEY()
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It is the iterator
+for FIRSTKEY() and NEXTKEY().
+
+=head1 OVERVIEW
+
+This object 
+
+=head1 METHODS
+
+=head2 new(\%params)
+
+The constructor takes a hashref of params. The hashref is assumed to have the
+following elements:
+
+=over 4
+
+=item * engine (of type L<DBM::Deep::Engine>
+
+=item * base_offset (the base_offset of the invoking DBM::Deep object)
+
+=back
+
+=cut
+
+sub new {
+    my $class = shift;
+    my ($args) = @_;
+
+    my $self = bless {
+        engine      => $args->{engine},
+        base_offset => $args->{base_offset},
+    }, $class;
+
+    Scalar::Util::weaken( $self->{engine} );
+
+    $self->reset;
+
+    return $self;
+}
+
+=head2 reset()
+
+This method takes no arguments.
+
+It will reset the iterator so that it will start from the beginning again.
+
+This method returns nothing.
+
+=cut
+
+sub reset { die "reset must be implemented in a child class" }
+
+=head2 get_next_key( $obj )
+
+=cut
+
+sub get_next_key { die "get_next_key must be implemented in a child class" }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Iterator/DBI.pm b/src/modules/DBM/Deep/Iterator/DBI.pm
new file mode 100644
index 0000000..0aecbe8
--- /dev/null
+++ b/src/modules/DBM/Deep/Iterator/DBI.pm
@@ -0,0 +1,37 @@
+package DBM::Deep::Iterator::DBI;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Iterator );
+
+sub reset {
+    my $self = shift;
+
+    eval { $self->{sth}->finish; };
+    delete $self->{sth};
+
+    return;
+}
+
+sub get_next_key {
+    my $self = shift;
+    my ($obj) = @_;
+
+    unless ( exists $self->{sth} ) {
+        # For mysql, this needs to be RAND()
+        # For sqlite, this needs to be random()
+        my $storage = $self->{engine}->storage;
+        $self->{sth} = $storage->{dbh}->prepare(
+            "SELECT `key` FROM datas WHERE ref_id = ? ORDER BY "
+          . $storage->rand_function,
+        );
+        $self->{sth}->execute( $self->{base_offset} );
+    }
+
+    my ($key) = $self->{sth}->fetchrow_array;
+    return $key;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Iterator/File.pm b/src/modules/DBM/Deep/Iterator/File.pm
new file mode 100644
index 0000000..b38ed94
--- /dev/null
+++ b/src/modules/DBM/Deep/Iterator/File.pm
@@ -0,0 +1,104 @@
+package DBM::Deep::Iterator::File;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Iterator );
+
+use DBM::Deep::Iterator::File::BucketList ();
+use DBM::Deep::Iterator::File::Index ();
+
+sub reset { $_[0]{breadcrumbs} = []; return }
+
+sub get_sector_iterator {
+    my $self = shift;
+    my ($loc) = @_;
+
+    my $sector = $self->{engine}->load_sector( $loc )
+        or return;
+
+    if ( $sector->isa( 'DBM::Deep::Sector::File::Index' ) ) {
+        return DBM::Deep::Iterator::File::Index->new({
+            iterator => $self,
+            sector   => $sector,
+        });
+    }
+    elsif ( $sector->isa( 'DBM::Deep::Sector::File::BucketList' ) ) {
+        return DBM::Deep::Iterator::File::BucketList->new({
+            iterator => $self,
+            sector   => $sector,
+        });
+    }
+
+    DBM::Deep->_throw_error( "get_sector_iterator(): Why did $loc make a $sector?" );
+}
+
+sub get_next_key {
+    my $self = shift;
+    my ($obj) = @_;
+
+    my $crumbs = $self->{breadcrumbs};
+    my $e = $self->{engine};
+
+    unless ( @$crumbs ) {
+        # This will be a Reference sector
+        my $sector = $e->load_sector( $self->{base_offset} )
+            # If no sector is found, this must have been deleted from under us.
+            or return;
+
+        if ( $sector->staleness != $obj->_staleness ) {
+            return;
+        }
+
+        my $loc = $sector->get_blist_loc
+            or return;
+
+        push @$crumbs, $self->get_sector_iterator( $loc );
+    }
+
+    FIND_NEXT_KEY: {
+        # We're at the end.
+        unless ( @$crumbs ) {
+            $self->reset;
+            return;
+        }
+
+        my $iterator = $crumbs->[-1];
+
+        # This level is done.
+        if ( $iterator->at_end ) {
+            pop @$crumbs;
+            redo FIND_NEXT_KEY;
+        }
+
+        if ( $iterator->isa( 'DBM::Deep::Iterator::File::Index' ) ) {
+            # If we don't have any more, it will be caught at the
+            # prior check.
+            if ( my $next = $iterator->get_next_iterator ) {
+                push @$crumbs, $next;
+            }
+            redo FIND_NEXT_KEY;
+        }
+
+        unless ( $iterator->isa( 'DBM::Deep::Iterator::File::BucketList' ) ) {
+            DBM::Deep->_throw_error(
+                "Should have a bucketlist iterator here - instead have $iterator"
+            );
+        }
+
+        # At this point, we have a BucketList iterator
+        my $key = $iterator->get_next_key;
+        if ( defined $key ) {
+            return $key;
+        }
+        #XXX else { $iterator->set_to_end() } ?
+
+        # We hit the end of the bucketlist iterator, so redo
+        redo FIND_NEXT_KEY;
+    }
+
+    DBM::Deep->_throw_error( "get_next_key(): How did we get here?" );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Iterator/File/BucketList.pm b/src/modules/DBM/Deep/Iterator/File/BucketList.pm
new file mode 100644
index 0000000..d62b9e9
--- /dev/null
+++ b/src/modules/DBM/Deep/Iterator/File/BucketList.pm
@@ -0,0 +1,90 @@
+package DBM::Deep::Iterator::File::BucketList;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+=head1 NAME
+
+DBM::Deep::Iterator::BucketList - mediate between DBM::Deep::Iterator and DBM::Deep::Engine::Sector::BucketList
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It acts as the mediator
+between the L<DBM::Deep::Iterator> object and a L<DBM::Deep::Engine::Sector::BucketList>
+sector.
+
+=head1 OVERVIEW
+
+This object, despite the implied class hierarchy, does B<NOT> inherit from
+L<DBM::Deep::Iterator>. Instead, it delegates to it, essentially acting as a
+facade over it. L<DBM::Deep::Iterator/get_next_key> will instantiate one of
+these objects as needed to handle an BucketList sector.
+
+=head1 METHODS
+
+=head2 new(\%params)
+
+The constructor takes a hashref of params and blesses it into the invoking class. The
+hashref is assumed to have the following elements:
+
+=over 4
+
+=item * iterator (of type L<DBM::Deep::Iterator>
+
+=item * sector (of type L<DBM::Deep::Engine::Sector::BucketList>
+
+=back
+
+=cut
+
+sub new {
+    my $self = bless $_[1] => $_[0];
+    $self->{curr_index} = 0;
+    return $self;
+}
+
+=head2 at_end()
+
+This takes no arguments.
+
+This returns true/false indicating whether this sector has any more elements that can be
+iterated over.
+
+=cut
+
+sub at_end {
+    my $self = shift;
+    return $self->{curr_index} >= $self->{iterator}{engine}->max_buckets;
+}
+
+=head2 get_next_iterator()
+
+This takes no arguments.
+
+This returns the next key pointed to by this bucketlist. This value is suitable for
+returning by FIRSTKEY or NEXTKEY().
+
+If the bucketlist is exhausted, it returns nothing.
+
+=cut
+
+sub get_next_key {
+    my $self = shift;
+
+    return if $self->at_end;
+
+    my $idx = $self->{curr_index}++;
+
+    my $data_loc = $self->{sector}->get_data_location_for({
+        allow_head => 1,
+        idx        => $idx,
+    }) or return;
+
+    #XXX Do we want to add corruption checks here?
+    return $self->{sector}->get_key_for( $idx )->data;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Iterator/File/Index.pm b/src/modules/DBM/Deep/Iterator/File/Index.pm
new file mode 100644
index 0000000..d2f6611
--- /dev/null
+++ b/src/modules/DBM/Deep/Iterator/File/Index.pm
@@ -0,0 +1,86 @@
+package DBM::Deep::Iterator::File::Index;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+=head1 NAME
+
+DBM::Deep::Iterator::Index - mediate between DBM::Deep::Iterator and DBM::Deep::Engine::Sector::Index
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It acts as the mediator
+between the L<DBM::Deep::Iterator> object and a L<DBM::Deep::Engine::Sector::Index>
+sector.
+
+=head1 OVERVIEW
+
+This object, despite the implied class hierarchy, does B<NOT> inherit from
+L<DBM::Deep::Iterator>. Instead, it delegates to it, essentially acting as a
+facade over it. L<DBM::Deep::Iterator/get_next_key> will instantiate one of
+these objects as needed to handle an Index sector.
+
+=head1 METHODS
+
+=head2 new(\%params)
+
+The constructor takes a hashref of params and blesses it into the invoking class. The
+hashref is assumed to have the following elements:
+
+=over 4
+
+=item * iterator (of type L<DBM::Deep::Iterator>
+
+=item * sector (of type L<DBM::Deep::Engine::Sector::Index>
+
+=back
+
+=cut
+
+sub new {
+    my $self = bless $_[1] => $_[0];
+    $self->{curr_index} = 0;
+    return $self;
+}
+
+=head2 at_end()
+
+This takes no arguments.
+
+This returns true/false indicating whether this sector has any more elements that can be
+iterated over.
+
+=cut
+
+sub at_end {
+    my $self = shift;
+    return $self->{curr_index} >= $self->{iterator}{engine}->hash_chars;
+}
+
+=head2 get_next_iterator()
+
+This takes no arguments.
+
+This returns an iterator (built by L<DBM::Deep::Iterator/get_sector_iterator>) based
+on the sector pointed to by the next occupied location in this index.
+
+If the sector is exhausted, it returns nothing.
+
+=cut
+
+sub get_next_iterator {
+    my $self = shift;
+
+    my $loc;
+    while ( !$loc ) {
+        return if $self->at_end;
+        $loc = $self->{sector}->get_entry( $self->{curr_index}++ );
+    }
+
+    return $self->{iterator}->get_sector_iterator( $loc );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Null.pm b/src/modules/DBM/Deep/Null.pm
new file mode 100644
index 0000000..a521671
--- /dev/null
+++ b/src/modules/DBM/Deep/Null.pm
@@ -0,0 +1,49 @@
+package DBM::Deep::Null;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+=head1 NAME
+
+DBM::Deep::Null - NULL object
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It acts as a NULL object
+in the same vein as MARCEL's L<Class::Null>. I couldn't use L<Class::Null>
+because DBM::Deep needed an object that always evaluated as undef, not an
+implementation of the Null Class pattern.
+
+=head1 OVERVIEW
+
+It is used to represent null sectors in DBM::Deep.
+
+=cut
+
+use overload
+    'bool'   => sub { undef },
+    '""'     => sub { undef },
+    '0+'     => sub { 0 },
+   ('cmp'    => 
+    '<=>'    => sub {
+                  return 0 if !defined $_[1] || !length $_[1];
+                  return $_[2] ? 1 : -1;
+                }
+   )[0,2,1,2], # same sub for both ops
+    '%{}'    => sub {
+                  require Carp;
+                  Carp::croak("Can't use a stale reference as a HASH");
+                },
+    '@{}'    => sub {
+                  require Carp;
+                  Carp::croak("Can't use a stale reference as an ARRAY");
+                },
+    fallback => 1,
+    nomethod => 'AUTOLOAD';
+
+sub AUTOLOAD { return; }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector.pm b/src/modules/DBM/Deep/Sector.pm
new file mode 100644
index 0000000..f394397
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector.pm
@@ -0,0 +1,37 @@
+package DBM::Deep::Sector;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use Scalar::Util ();
+
+sub new {
+    my $self = bless $_[1], $_[0];
+    Scalar::Util::weaken( $self->{engine} );
+    $self->_init;
+    return $self;
+}
+
+sub _init {}
+
+sub clone {
+    my $self = shift;
+    return ref($self)->new({
+        engine => $self->engine,
+        type   => $self->type,
+        data   => $self->data,
+    });
+}
+
+
+sub engine { $_[0]{engine} }
+sub offset { $_[0]{offset} }
+sub type   { $_[0]{type}   }
+sub staleness { $_[0]{staleness} }
+
+sub load { die "load must be implemented in a child class" }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/DBI.pm b/src/modules/DBM/Deep/Sector/DBI.pm
new file mode 100644
index 0000000..ec6ecdf
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/DBI.pm
@@ -0,0 +1,55 @@
+package DBM::Deep::Sector::DBI;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector );
+
+use DBM::Deep::Sector::DBI::Reference ();
+use DBM::Deep::Sector::DBI::Scalar ();
+
+sub free {
+    my $self = shift;
+
+    $self->engine->storage->delete_from(
+        $self->table, $self->offset,
+    );
+}
+
+sub reload {
+    my $self = shift;
+    $self->_init;
+}
+
+sub load {
+    my $self = shift;
+    my ($engine, $offset, $type) = @_;
+
+    if ( !defined $type || $type eq 'refs' ) {
+        return DBM::Deep::Sector::DBI::Reference->new({
+            engine => $engine,
+            offset => $offset,
+        });
+    }
+    elsif ( $type eq 'datas' ) {
+        my $sector = DBM::Deep::Sector::DBI::Scalar->new({
+            engine => $engine,
+            offset => $offset,
+        });
+
+        if ( $sector->{data_type} eq 'R' ) {
+            return $self->load(
+                $engine, $sector->{value}, 'refs',
+            );
+        }
+
+        return $sector;
+    }
+
+    DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/DBI/Reference.pm b/src/modules/DBM/Deep/Sector/DBI/Reference.pm
new file mode 100644
index 0000000..ff828eb
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/DBI/Reference.pm
@@ -0,0 +1,238 @@
+package DBM::Deep::Sector::DBI::Reference;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base 'DBM::Deep::Sector::DBI';
+
+use Scalar::Util;
+
+sub table { 'refs' }
+
+sub _init {
+    my $self = shift;
+
+    my $e = $self->engine;
+
+    unless ( $self->offset ) {
+        my $classname = Scalar::Util::blessed( delete $self->{data} );
+        $self->{offset} = $self->engine->storage->write_to(
+            refs => undef,
+            ref_type  => $self->type,
+            classname => $classname,
+        );
+    }
+    else {
+        my ($rows) = $self->engine->storage->read_from(
+            refs => $self->offset,
+            qw( ref_type ),
+        );
+
+        $self->{type} = $rows->[0]{ref_type};
+    }
+
+    return;
+}
+
+sub get_data_for {
+    my $self = shift;
+    my ($args) = @_;
+
+    my ($rows) = $self->engine->storage->read_from(
+        datas => { ref_id => $self->offset, key => $args->{key} },
+        qw( id ),
+    );
+
+    return unless $rows->[0]{id};
+
+    $self->load(
+        $self->engine,
+        $rows->[0]{id},
+        'datas',
+    );
+}
+
+sub write_data {
+    my $self = shift;
+    my ($args) = @_;
+
+    if ( ( $args->{value}->type || 'S' ) eq 'S' ) {
+        $args->{value}{offset} = $self->engine->storage->write_to(
+            datas => $args->{value}{offset},
+            ref_id    => $self->offset,
+            data_type => 'S',
+            key       => $args->{key},
+            value     => $args->{value}{data},
+        );
+
+        $args->{value}->reload;
+    }
+    else {
+        # Write the Scalar of the Reference
+        $self->engine->storage->write_to(
+            datas => undef,
+            ref_id    => $self->offset,
+            data_type => 'R',
+            key       => $args->{key},
+            value     => $args->{value}{offset},
+        );
+    }
+}
+
+sub delete_key {
+    my $self = shift;
+    my ($args) = @_;
+
+    my $old_value = $self->get_data_for({
+        key => $args->{key},
+    });
+
+    my $data;
+    if ( $old_value ) {
+        $data = $old_value->data({ export => 1 });
+
+        $self->engine->storage->delete_from(
+            'datas',
+             { ref_id => $self->offset,
+               key => $args->{key},  },
+        );
+        $old_value->free;
+    }
+
+    return $data;
+}
+
+sub get_classname {
+    my $self = shift;
+    my ($rows) = $self->engine->storage->read_from(
+        'refs', $self->offset,
+        qw( classname ),
+    );
+    return unless @$rows;
+    return $rows->[0]{classname};
+}
+
+# Look to hoist this method into a ::Reference trait
+sub data {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    my $engine = $self->engine;
+    my $cache = $engine->cache;
+    my $off = $self->offset;
+    my $obj;
+    if ( !defined $cache->{ $off } ) {
+        $obj = DBM::Deep->new({
+            type        => $self->type,
+            base_offset => $self->offset,
+            storage     => $engine->storage,
+            engine      => $engine,
+        });
+
+        $cache->{$off} = $obj;
+        Scalar::Util::weaken($cache->{$off});
+    }
+    else {
+        $obj = $cache->{$off};
+    }
+
+    # We're not exporting, so just return.
+    unless ( $args->{export} ) {
+        if ( $engine->storage->{autobless} ) {
+            my $classname = $self->get_classname;
+            if ( defined $classname ) {
+                bless $obj, $classname;
+            }
+        }
+
+        return $obj;
+    }
+
+    # We shouldn't export if this is still referred to.
+    if ( $self->get_refcount > 1 ) {
+        return $obj;
+    }
+
+    return $obj->export;
+}
+
+sub free {
+    my $self = shift;
+
+    # We're not ready to be removed yet.
+    return if $self->decrement_refcount > 0;
+
+    # Rebless the object into DBM::Deep::Null.
+    # In external_refs mode, this will already have been removed from
+    # the cache, so we can skip this.
+    my $e  = $self->engine;
+    if(!$e->{external_refs}) {
+        eval { %{ $e->cache->{ $self->offset } } = (); };
+        eval { @{ $e->cache->{ $self->offset } } = (); };
+        bless $e->cache->{ $self->offset }, 'DBM::Deep::Null';
+        delete $e->cache->{ $self->offset };
+    }
+
+    $e->storage->delete_from(
+        'datas', { ref_id => $self->offset },
+    );
+
+    $e->storage->delete_from(
+        'datas', { value => $self->offset, data_type => 'R' },
+    );
+
+    $self->SUPER::free( @_ );
+}
+
+sub increment_refcount {
+    my $self = shift;
+    my $refcount = $self->get_refcount;
+    $refcount++;
+    $self->write_refcount( $refcount );
+    return $refcount;
+}
+
+sub decrement_refcount {
+    my $self = shift;
+    my $refcount = $self->get_refcount;
+    $refcount--;
+    $self->write_refcount( $refcount );
+    return $refcount;
+}
+
+sub get_refcount {
+    my $self = shift;
+    my ($rows) = $self->engine->storage->read_from(
+        'refs', $self->offset,
+        qw( refcount ),
+    );
+    return $rows->[0]{refcount};
+}
+
+sub write_refcount {
+    my $self = shift;
+    my ($num) = @_;
+    $self->engine->storage->{dbh}->do(
+        "UPDATE refs SET refcount = ? WHERE id = ?", undef,
+        $num, $self->offset,
+    );
+}
+
+sub clear {
+    my $self = shift;
+
+    DBM::Deep->new({
+        type        => $self->type,
+        base_offset => $self->offset,
+        storage     => $self->engine->storage,
+        engine      => $self->engine,
+    })->_clear;
+
+    return;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/DBI/Scalar.pm b/src/modules/DBM/Deep/Sector/DBI/Scalar.pm
new file mode 100644
index 0000000..276e66c
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/DBI/Scalar.pm
@@ -0,0 +1,31 @@
+package DBM::Deep::Sector::DBI::Scalar;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::DBI );
+
+sub table { 'datas' }
+
+sub _init {
+    my $self = shift;
+
+    if ( $self->offset ) {
+        my ($rows) = $self->engine->storage->read_from(
+            datas => $self->offset,
+            qw( id data_type key value ),
+        );
+
+        $self->{$_} = $rows->[0]{$_} for qw( data_type key value );
+    }
+
+    return;
+}
+
+sub data {
+    my $self = shift;
+    $self->{value};
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File.pm b/src/modules/DBM/Deep/Sector/File.pm
new file mode 100644
index 0000000..c93b9d8
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File.pm
@@ -0,0 +1,104 @@
+package DBM::Deep::Sector::File;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector );
+
+use DBM::Deep::Sector::File::BucketList ();
+use DBM::Deep::Sector::File::Index ();
+use DBM::Deep::Sector::File::Null ();
+use DBM::Deep::Sector::File::Reference ();
+use DBM::Deep::Sector::File::Scalar ();
+
+my $STALE_SIZE = 2;
+
+sub base_size {
+    my $self = shift;
+    return $self->engine->SIG_SIZE + $STALE_SIZE;
+}
+
+sub free_meth { die "free_meth must be implemented in a child class" }
+
+sub free {
+    my $self = shift;
+
+    my $e = $self->engine;
+
+    $e->storage->print_at( $self->offset, $e->SIG_FREE );
+    # Skip staleness counter
+    $e->storage->print_at( $self->offset + $self->base_size,
+        chr(0) x ($self->size - $self->base_size),
+    );
+
+    my $free_meth = $self->free_meth;
+    $e->$free_meth( $self->offset, $self->size );
+
+    return;
+}
+
+#=head2 load( $offset )
+#
+#This will instantiate and return the sector object that represents the data
+#found at $offset.
+#
+#=cut
+
+sub load {
+    my $self = shift;
+    my ($engine, $offset) = @_;
+
+    # Add a catch for offset of 0 or 1
+    return if !$offset || $offset <= 1;
+
+    my $type = $engine->storage->read_at( $offset, 1 );
+    return if $type eq chr(0);
+
+    if ( $type eq $engine->SIG_ARRAY || $type eq $engine->SIG_HASH ) {
+        return DBM::Deep::Sector::File::Reference->new({
+            engine => $engine,
+            type   => $type,
+            offset => $offset,
+        });
+    }
+    # XXX Don't we need key_md5 here?
+    elsif ( $type eq $engine->SIG_BLIST ) {
+        return DBM::Deep::Sector::File::BucketList->new({
+            engine => $engine,
+            type   => $type,
+            offset => $offset,
+        });
+    }
+    elsif ( $type eq $engine->SIG_INDEX ) {
+        return DBM::Deep::Sector::File::Index->new({
+            engine => $engine,
+            type   => $type,
+            offset => $offset,
+        });
+    }
+    elsif ( $type eq $engine->SIG_NULL ) {
+        return DBM::Deep::Sector::File::Null->new({
+            engine => $engine,
+            type   => $type,
+            offset => $offset,
+        });
+    }
+    elsif ( $type eq $engine->SIG_DATA || $type eq $engine->SIG_UNIDATA ) {
+        return DBM::Deep::Sector::File::Scalar->new({
+            engine => $engine,
+            type   => $type,
+            offset => $offset,
+        });
+    }
+    # This was deleted from under us, so just return and let the caller figure it out.
+    elsif ( $type eq $engine->SIG_FREE ) {
+        return;
+    }
+
+    DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/BucketList.pm b/src/modules/DBM/Deep/Sector/File/BucketList.pm
new file mode 100644
index 0000000..3cb9aeb
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/BucketList.pm
@@ -0,0 +1,376 @@
+package DBM::Deep::Sector::File::BucketList;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::File );
+
+my $STALE_SIZE = 2;
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+sub _init {
+    my $self = shift;
+
+    my $engine = $self->engine;
+
+    unless ( $self->offset ) {
+        my $leftover = $self->size - $self->base_size;
+
+        $self->{offset} = $engine->_request_blist_sector( $self->size );
+        $engine->storage->print_at( $self->offset, $engine->SIG_BLIST ); # Sector type
+        # Skip staleness counter
+        $engine->storage->print_at( $self->offset + $self->base_size,
+            chr(0) x $leftover, # Zero-fill the data
+        );
+    }
+
+    if ( $self->{key_md5} ) {
+        $self->find_md5;
+    }
+
+    return $self;
+}
+
+sub wipe {
+    my $self = shift;
+    $self->engine->storage->print_at( $self->offset + $self->base_size,
+        chr(0) x ($self->size - $self->base_size), # Zero-fill the data
+    );
+}
+
+sub size {
+    my $self = shift;
+    unless ( $self->{size} ) {
+        my $e = $self->engine;
+        # Base + numbuckets * bucketsize
+        $self->{size} = $self->base_size + $e->max_buckets * $self->bucket_size;
+    }
+    return $self->{size};
+}
+
+sub free_meth { '_add_free_blist_sector' }
+
+sub free {
+    my $self = shift;
+
+    my $e = $self->engine;
+    foreach my $bucket ( $self->chopped_up ) {
+        my $rest = $bucket->[-1];
+
+        # Delete the keysector
+        my $l = unpack( $StP{$e->byte_size}, substr( $rest, $e->hash_size, $e->byte_size ) );
+        my $s = $e->load_sector( $l ); $s->free if $s;
+
+        # Delete the HEAD sector
+        $l = unpack( $StP{$e->byte_size},
+            substr( $rest,
+                $e->hash_size + $e->byte_size,
+                $e->byte_size,
+            ),
+        );
+        $s = $e->load_sector( $l ); $s->free if $s;
+
+        foreach my $txn ( 0 .. $e->num_txns - 2 ) {
+            my $l = unpack( $StP{$e->byte_size},
+                substr( $rest,
+                    $e->hash_size + 2 * $e->byte_size + $txn * ($e->byte_size + $STALE_SIZE),
+                    $e->byte_size,
+                ),
+            );
+            my $s = $e->load_sector( $l ); $s->free if $s;
+        }
+    }
+
+    $self->SUPER::free();
+}
+
+sub bucket_size {
+    my $self = shift;
+    unless ( $self->{bucket_size} ) {
+        my $e = $self->engine;
+        # Key + head (location) + transactions (location + staleness-counter)
+        my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $STALE_SIZE);
+        $self->{bucket_size} = $e->hash_size + $location_size;
+    }
+    return $self->{bucket_size};
+}
+
+# XXX This is such a poor hack. I need to rethink this code.
+sub chopped_up {
+    my $self = shift;
+
+    my $e = $self->engine;
+
+    my @buckets;
+    foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
+        my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
+        my $md5 = $e->storage->read_at( $spot, $e->hash_size );
+
+        #XXX If we're chopping, why would we ever have the blank_md5?
+        last if $md5 eq $e->blank_md5;
+
+        my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size );
+        push @buckets, [ $spot, $md5 . $rest ];
+    }
+
+    return @buckets;
+}
+
+sub write_at_next_open {
+    my $self = shift;
+    my ($entry) = @_;
+
+    #XXX This is such a hack!
+    $self->{_next_open} = 0 unless exists $self->{_next_open};
+
+    my $spot = $self->offset + $self->base_size + $self->{_next_open}++ * $self->bucket_size;
+    $self->engine->storage->print_at( $spot, $entry );
+
+    return $spot;
+}
+
+sub has_md5 {
+    my $self = shift;
+    unless ( exists $self->{found} ) {
+        $self->find_md5;
+    }
+    return $self->{found};
+}
+
+sub find_md5 {
+    my $self = shift;
+
+    $self->{found} = undef;
+    $self->{idx}   = -1;
+
+    if ( @_ ) {
+        $self->{key_md5} = shift;
+    }
+
+    # If we don't have an MD5, then what are we supposed to do?
+    unless ( exists $self->{key_md5} ) {
+        DBM::Deep->_throw_error( "Cannot find_md5 without a key_md5 set" );
+    }
+
+    my $e = $self->engine;
+    foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
+        my $potential = $e->storage->read_at(
+            $self->offset + $self->base_size + $idx * $self->bucket_size, $e->hash_size,
+        );
+
+        if ( $potential eq $e->blank_md5 ) {
+            $self->{idx} = $idx;
+            return;
+        }
+
+        if ( $potential eq $self->{key_md5} ) {
+            $self->{found} = 1;
+            $self->{idx} = $idx;
+            return;
+        }
+    }
+
+    return;
+}
+
+sub write_md5 {
+    my $self = shift;
+    my ($args) = @_;
+
+    DBM::Deep->_throw_error( "write_md5: no key" ) unless exists $args->{key};
+    DBM::Deep->_throw_error( "write_md5: no key_md5" ) unless exists $args->{key_md5};
+    DBM::Deep->_throw_error( "write_md5: no value" ) unless exists $args->{value};
+
+    my $engine = $self->engine;
+
+    $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
+
+    my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
+    $engine->add_entry( $args->{trans_id}, $spot );
+
+    unless ($self->{found}) {
+        my $key_sector = DBM::Deep::Sector::File::Scalar->new({
+            engine => $engine,
+            data   => $args->{key},
+        });
+
+        $engine->storage->print_at( $spot,
+            $args->{key_md5},
+            pack( $StP{$engine->byte_size}, $key_sector->offset ),
+        );
+    }
+
+    my $loc = $spot
+      + $engine->hash_size
+      + $engine->byte_size;
+
+    if ( $args->{trans_id} ) {
+        $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
+
+        $engine->storage->print_at( $loc,
+            pack( $StP{$engine->byte_size}, $args->{value}->offset ),
+            pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
+        );
+    }
+    else {
+        $engine->storage->print_at( $loc,
+            pack( $StP{$engine->byte_size}, $args->{value}->offset ),
+        );
+    }
+}
+
+sub mark_deleted {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    my $engine = $self->engine;
+
+    $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
+
+    my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
+    $engine->add_entry( $args->{trans_id}, $spot );
+
+    my $loc = $spot
+      + $engine->hash_size
+      + $engine->byte_size;
+
+    if ( $args->{trans_id} ) {
+        $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
+
+        $engine->storage->print_at( $loc,
+            pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
+            pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
+        );
+    }
+    else {
+        $engine->storage->print_at( $loc,
+            pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
+        );
+    }
+}
+
+sub delete_md5 {
+    my $self = shift;
+    my ($args) = @_;
+
+    my $engine = $self->engine;
+    return undef unless $self->{found};
+
+    # Save the location so that we can free the data
+    my $location = $self->get_data_location_for({
+        allow_head => 0,
+    });
+    my $key_sector = $self->get_key_for;
+
+    my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
+    $engine->storage->print_at( $spot,
+        $engine->storage->read_at(
+            $spot + $self->bucket_size,
+            $self->bucket_size * ( $engine->max_buckets - $self->{idx} - 1 ),
+        ),
+        chr(0) x $self->bucket_size,
+    );
+
+    $key_sector->free;
+
+    my $data_sector = $self->engine->load_sector( $location );
+    my $data = $data_sector->data({ export => 1 });
+    $data_sector->free;
+
+    return $data;
+}
+
+sub get_data_location_for {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    $args->{allow_head} = 0 unless exists $args->{allow_head};
+    $args->{trans_id}   = $self->engine->trans_id unless exists $args->{trans_id};
+    $args->{idx}        = $self->{idx} unless exists $args->{idx};
+
+    my $e = $self->engine;
+
+    my $spot = $self->offset + $self->base_size
+      + $args->{idx} * $self->bucket_size
+      + $e->hash_size
+      + $e->byte_size;
+
+    if ( $args->{trans_id} ) {
+        $spot += $e->byte_size + ($args->{trans_id} - 1) * ( $e->byte_size + $STALE_SIZE );
+    }
+
+    my $buffer = $e->storage->read_at(
+        $spot,
+        $e->byte_size + $STALE_SIZE,
+    );
+    my ($loc, $staleness) = unpack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, $buffer );
+
+    # XXX Merge the two if-clauses below
+    if ( $args->{trans_id} ) {
+        # We have found an entry that is old, so get rid of it
+        if ( $staleness != (my $s = $e->get_txn_staleness_counter( $args->{trans_id} ) ) ) {
+            $e->storage->print_at(
+                $spot,
+                pack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ), 
+            );
+            $loc = 0;
+        }
+    }
+
+    # If we're in a transaction and we never wrote to this location, try the
+    # HEAD instead.
+    if ( $args->{trans_id} && !$loc && $args->{allow_head} ) {
+        return $self->get_data_location_for({
+            trans_id   => 0,
+            allow_head => 1,
+            idx        => $args->{idx},
+        });
+    }
+
+    return $loc <= 1 ? 0 : $loc;
+}
+
+sub get_data_for {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    return unless $self->{found};
+    my $location = $self->get_data_location_for({
+        allow_head => $args->{allow_head},
+    });
+    return $self->engine->load_sector( $location );
+}
+
+sub get_key_for {
+    my $self = shift;
+    my ($idx) = @_;
+    $idx = $self->{idx} unless defined $idx;
+
+    if ( $idx >= $self->engine->max_buckets ) {
+        DBM::Deep->_throw_error( "get_key_for(): Attempting to retrieve $idx" );
+    }
+
+    my $location = $self->engine->storage->read_at(
+        $self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size,
+        $self->engine->byte_size,
+    );
+    $location = unpack( $StP{$self->engine->byte_size}, $location );
+    DBM::Deep->_throw_error( "get_key_for: No location?" ) unless $location;
+
+    return $self->engine->load_sector( $location );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/Data.pm b/src/modules/DBM/Deep/Sector/File/Data.pm
new file mode 100644
index 0000000..9838a5f
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/Data.pm
@@ -0,0 +1,15 @@
+package DBM::Deep::Sector::File::Data;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::File );
+
+# This is in bytes
+sub size { $_[0]{engine}->data_sector_size }
+sub free_meth { return '_add_free_data_sector' }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/Index.pm b/src/modules/DBM/Deep/Sector/File/Index.pm
new file mode 100644
index 0000000..62b46f0
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/Index.pm
@@ -0,0 +1,98 @@
+package DBM::Deep::Sector::File::Index;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::File );
+
+my $STALE_SIZE = 2;
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+sub _init {
+    my $self = shift;
+
+    my $engine = $self->engine;
+
+    unless ( $self->offset ) {
+        my $leftover = $self->size - $self->base_size;
+
+        $self->{offset} = $engine->_request_index_sector( $self->size );
+        $engine->storage->print_at( $self->offset, $engine->SIG_INDEX ); # Sector type
+        # Skip staleness counter
+        $engine->storage->print_at( $self->offset + $self->base_size,
+            chr(0) x $leftover, # Zero-fill the rest
+        );
+    }
+
+    return $self;
+}
+
+#XXX Change here
+sub size {
+    my $self = shift;
+    unless ( $self->{size} ) {
+        my $e = $self->engine;
+        $self->{size} = $self->base_size + $e->byte_size * $e->hash_chars;
+    }
+    return $self->{size};
+}
+
+sub free_meth { return '_add_free_index_sector' }
+
+sub free {
+    my $self = shift;
+    my $e = $self->engine;
+
+    for my $i ( 0 .. $e->hash_chars - 1 ) {
+        my $l = $self->get_entry( $i ) or next;
+        $e->load_sector( $l )->free;
+    }
+
+    $self->SUPER::free();
+}
+
+sub _loc_for {
+    my $self = shift;
+    my ($idx) = @_;
+    return $self->offset + $self->base_size + $idx * $self->engine->byte_size;
+}
+
+sub get_entry {
+    my $self = shift;
+    my ($idx) = @_;
+
+    my $e = $self->engine;
+
+    DBM::Deep->_throw_error( "get_entry: Out of range ($idx)" )
+        if $idx < 0 || $idx >= $e->hash_chars;
+
+    return unpack(
+        $StP{$e->byte_size},
+        $e->storage->read_at( $self->_loc_for( $idx ), $e->byte_size ),
+    );
+}
+
+sub set_entry {
+    my $self = shift;
+    my ($idx, $loc) = @_;
+
+    my $e = $self->engine;
+
+    DBM::Deep->_throw_error( "set_entry: Out of range ($idx)" )
+        if $idx < 0 || $idx >= $e->hash_chars;
+
+    $self->engine->storage->print_at(
+        $self->_loc_for( $idx ),
+        pack( $StP{$e->byte_size}, $loc ),
+    );
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/Null.pm b/src/modules/DBM/Deep/Sector/File/Null.pm
new file mode 100644
index 0000000..ce77ea0
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/Null.pm
@@ -0,0 +1,46 @@
+package DBM::Deep::Sector::File::Null;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::File::Data );
+
+my $STALE_SIZE = 2;
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+sub type { $_[0]{engine}->SIG_NULL }
+sub data_length { 0 }
+sub data { return }
+
+sub _init {
+    my $self = shift;
+
+    my $engine = $self->engine;
+
+    unless ( $self->offset ) {
+        my $leftover = $self->size - $self->base_size - 1 * $engine->byte_size - 1;
+
+        $self->{offset} = $engine->_request_data_sector( $self->size );
+        $engine->storage->print_at( $self->offset, $self->type ); # Sector type
+        # Skip staleness counter
+        $engine->storage->print_at( $self->offset + $self->base_size,
+            pack( $StP{$engine->byte_size}, 0 ),  # Chain loc
+            pack( $StP{1}, $self->data_length ),  # Data length
+            chr(0) x $leftover,                   # Zero-fill the rest
+        );
+
+        return;
+    }
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/Reference.pm b/src/modules/DBM/Deep/Sector/File/Reference.pm
new file mode 100644
index 0000000..6f94ddd
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/Reference.pm
@@ -0,0 +1,564 @@
+package DBM::Deep::Sector::File::Reference;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base qw( DBM::Deep::Sector::File::Data );
+
+use Scalar::Util;
+
+my $STALE_SIZE = 2;
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+sub _init {
+    my $self = shift;
+
+    my $e = $self->engine;
+
+    unless ( $self->offset ) {
+        my $classname = Scalar::Util::blessed( delete $self->{data} );
+        my $leftover = $self->size - $self->base_size - 3 * $e->byte_size;
+
+        my $class_offset = 0;
+        if ( defined $classname ) {
+            my $class_sector = DBM::Deep::Sector::File::Scalar->new({
+                engine => $e,
+                data   => $classname,
+            });
+            $class_offset = $class_sector->offset;
+        }
+
+        $self->{offset} = $e->_request_data_sector( $self->size );
+        $e->storage->print_at( $self->offset, $self->type ); # Sector type
+        # Skip staleness counter
+        $e->storage->print_at( $self->offset + $self->base_size,
+            pack( $StP{$e->byte_size}, 0 ),             # Index/BList loc
+            pack( $StP{$e->byte_size}, $class_offset ), # Classname loc
+            pack( $StP{$e->byte_size}, 1 ),             # Initial refcount
+            chr(0) x $leftover,                         # Zero-fill the rest
+        );
+    }
+    else {
+        $self->{type} = $e->storage->read_at( $self->offset, 1 );
+    }
+
+    $self->{staleness} = unpack(
+        $StP{$STALE_SIZE},
+        $e->storage->read_at( $self->offset + $e->SIG_SIZE, $STALE_SIZE ),
+    );
+
+    return;
+}
+
+sub get_data_location_for {
+    my $self = shift;
+    my ($args) = @_;
+
+    # Assume that the head is not allowed unless otherwise specified.
+    $args->{allow_head} = 0 unless exists $args->{allow_head};
+
+    # Assume we don't create a new blist location unless otherwise specified.
+    $args->{create} = 0 unless exists $args->{create};
+
+    my $blist = $self->get_bucket_list({
+        key_md5 => $args->{key_md5},
+        key => $args->{key},
+        create  => $args->{create},
+    });
+    return unless $blist && $blist->{found};
+
+    # At this point, $blist knows where the md5 is. What it -doesn't- know yet
+    # is whether or not this transaction has this key. That's part of the next
+    # function call.
+    my $location = $blist->get_data_location_for({
+        allow_head => $args->{allow_head},
+    }) or return;
+
+    return $location;
+}
+
+sub get_data_for {
+    my $self = shift;
+    my ($args) = @_;
+
+    my $location = $self->get_data_location_for( $args )
+        or return;
+
+    return $self->engine->load_sector( $location );
+}
+
+sub write_data {
+    my $self = shift;
+    my ($args) = @_;
+
+    my $blist = $self->get_bucket_list({
+        key_md5 => $args->{key_md5},
+        key => $args->{key},
+        create  => 1,
+    }) or DBM::Deep->_throw_error( "How did write_data fail (no blist)?!" );
+
+    # Handle any transactional bookkeeping.
+    if ( $self->engine->trans_id ) {
+        if ( ! $blist->has_md5 ) {
+            $blist->mark_deleted({
+                trans_id => 0,
+            });
+        }
+    }
+    else {
+        my @trans_ids = $self->engine->get_running_txn_ids;
+        if ( $blist->has_md5 ) {
+            if ( @trans_ids ) {
+                my $old_value = $blist->get_data_for;
+                foreach my $other_trans_id ( @trans_ids ) {
+                    next if $blist->get_data_location_for({
+                        trans_id   => $other_trans_id,
+                        allow_head => 0,
+                    });
+                    $blist->write_md5({
+                        trans_id => $other_trans_id,
+                        key      => $args->{key},
+                        key_md5  => $args->{key_md5},
+                        value    => $old_value->clone,
+                    });
+                }
+            }
+        }
+        else {
+            if ( @trans_ids ) {
+                foreach my $other_trans_id ( @trans_ids ) {
+                    #XXX This doesn't seem to possible to ever happen . . .
+                    next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
+                    $blist->mark_deleted({
+                        trans_id => $other_trans_id,
+                    });
+                }
+            }
+        }
+    }
+
+    #XXX Is this safe to do transactionally?
+    # Free the place we're about to write to.
+    if ( $blist->get_data_location_for({ allow_head => 0 }) ) {
+        $blist->get_data_for({ allow_head => 0 })->free;
+    }
+
+    $blist->write_md5({
+        key      => $args->{key},
+        key_md5  => $args->{key_md5},
+        value    => $args->{value},
+    });
+}
+
+sub delete_key {
+    my $self = shift;
+    my ($args) = @_;
+
+    # This can return nothing if we are deleting an entry in a hashref that was
+    # auto-vivified as part of the delete process. For example:
+    #     my $x = {};
+    #     delete $x->{foo}{bar};
+    my $blist = $self->get_bucket_list({
+        key_md5 => $args->{key_md5},
+    }) or return;
+
+    # Save the location so that we can free the data
+    my $location = $blist->get_data_location_for({
+        allow_head => 0,
+    });
+    my $old_value = $location && $self->engine->load_sector( $location );
+
+    my @trans_ids = $self->engine->get_running_txn_ids;
+
+    # If we're the HEAD and there are running txns, then we need to clone this
+    # value to the other transactions to preserve Isolation.
+    if ( $self->engine->trans_id == 0 ) {
+        if ( @trans_ids ) {
+            foreach my $other_trans_id ( @trans_ids ) {
+                next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
+                $blist->write_md5({
+                    trans_id => $other_trans_id,
+                    key      => $args->{key},
+                    key_md5  => $args->{key_md5},
+                    value    => $old_value->clone,
+                });
+            }
+        }
+    }
+
+    my $data;
+    if ( @trans_ids ) {
+        $blist->mark_deleted( $args );
+
+        if ( $old_value ) {
+            #XXX Is this export => 1 actually doing anything?
+            $data = $old_value->data({ export => 1 });
+            $old_value->free;
+        }
+    }
+    else {
+        $data = $blist->delete_md5( $args );
+    }
+
+    return $data;
+}
+
+sub write_blist_loc {
+    my $self = shift;
+    my ($loc) = @_;
+
+    my $engine = $self->engine;
+    $engine->storage->print_at( $self->offset + $self->base_size,
+        pack( $StP{$engine->byte_size}, $loc ),
+    );
+}
+
+sub get_blist_loc {
+    my $self = shift;
+
+    my $e = $self->engine;
+    my $blist_loc = $e->storage->read_at( $self->offset + $self->base_size, $e->byte_size );
+    return unpack( $StP{$e->byte_size}, $blist_loc );
+}
+
+sub get_bucket_list {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    # XXX Add in check here for recycling?
+
+    my $engine = $self->engine;
+
+    my $blist_loc = $self->get_blist_loc;
+
+    # There's no index or blist yet
+    unless ( $blist_loc ) {
+        return unless $args->{create};
+
+        my $blist = DBM::Deep::Sector::File::BucketList->new({
+            engine  => $engine,
+            key_md5 => $args->{key_md5},
+        });
+
+        $self->write_blist_loc( $blist->offset );
+#        $engine->storage->print_at( $self->offset + $self->base_size,
+#            pack( $StP{$engine->byte_size}, $blist->offset ),
+#        );
+
+        return $blist;
+    }
+
+    my $sector = $engine->load_sector( $blist_loc )
+        or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
+    my $i = 0;
+    my $last_sector = undef;
+    while ( $sector->isa( 'DBM::Deep::Sector::File::Index' ) ) {
+        $blist_loc = $sector->get_entry( ord( substr( $args->{key_md5}, $i++, 1 ) ) );
+        $last_sector = $sector;
+        if ( $blist_loc ) {
+            $sector = $engine->load_sector( $blist_loc )
+                or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
+        }
+        else {
+            $sector = undef;
+            last;
+        }
+    }
+
+    # This means we went through the Index sector(s) and found an empty slot
+    unless ( $sector ) {
+        return unless $args->{create};
+
+        DBM::Deep->_throw_error( "No last_sector when attempting to build a new entry" )
+            unless $last_sector;
+
+        my $blist = DBM::Deep::Sector::File::BucketList->new({
+            engine  => $engine,
+            key_md5 => $args->{key_md5},
+        });
+
+        $last_sector->set_entry( ord( substr( $args->{key_md5}, $i - 1, 1 ) ) => $blist->offset );
+
+        return $blist;
+    }
+
+    $sector->find_md5( $args->{key_md5} );
+
+    # See whether or not we need to reindex the bucketlist
+    # Yes, the double-braces are there for a reason. if() doesn't create a
+    # redo-able block, so we have to create a bare block within the if() for
+    # redo-purposes.
+    # Patch and idea submitted by sprout@cpan.org. -RobK, 2008-01-09
+    if ( !$sector->has_md5 && $args->{create} && $sector->{idx} == -1 ) {{
+        my $redo;
+
+        my $new_index = DBM::Deep::Sector::File::Index->new({
+            engine => $engine,
+        });
+
+        my %blist_cache;
+        #XXX q.v. the comments for this function.
+        foreach my $entry ( $sector->chopped_up ) {
+            my ($spot, $md5) = @{$entry};
+            my $idx = ord( substr( $md5, $i, 1 ) );
+
+            # XXX This is inefficient
+            my $blist = $blist_cache{$idx}
+                ||= DBM::Deep::Sector::File::BucketList->new({
+                    engine => $engine,
+                });
+
+            $new_index->set_entry( $idx => $blist->offset );
+
+            my $new_spot = $blist->write_at_next_open( $md5 );
+            $engine->reindex_entry( $spot => $new_spot );
+        }
+
+        # Handle the new item separately.
+        {
+            my $idx = ord( substr( $args->{key_md5}, $i, 1 ) );
+
+            # If all the previous blist's items have been thrown into one
+            # blist and the new item belongs in there too, we need
+            # another index.
+            if ( keys %blist_cache == 1 and each %blist_cache == $idx ) {
+                ++$i, ++$redo;
+            } else {
+                my $blist = $blist_cache{$idx}
+                    ||= DBM::Deep::Sector::File::BucketList->new({
+                        engine => $engine,
+                    });
+    
+                $new_index->set_entry( $idx => $blist->offset );
+    
+                #XXX THIS IS HACKY!
+                $blist->find_md5( $args->{key_md5} );
+                $blist->write_md5({
+                    key     => $args->{key},
+                    key_md5 => $args->{key_md5},
+                    value   => DBM::Deep::Sector::File::Null->new({
+                        engine => $engine,
+                        data   => undef,
+                    }),
+                });
+            }
+        }
+
+        if ( $last_sector ) {
+            $last_sector->set_entry(
+                ord( substr( $args->{key_md5}, $i - 1, 1 ) ),
+                $new_index->offset,
+            );
+        } else {
+            $engine->storage->print_at( $self->offset + $self->base_size,
+                pack( $StP{$engine->byte_size}, $new_index->offset ),
+            );
+        }
+
+        $sector->wipe;
+        $sector->free;
+
+        if ( $redo ) {
+            (undef, $sector) = %blist_cache;
+            $last_sector = $new_index;
+            redo;
+        }
+
+        $sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) };
+        $sector->find_md5( $args->{key_md5} );
+    }}
+
+    return $sector;
+}
+
+sub get_class_offset {
+    my $self = shift;
+
+    my $e = $self->engine;
+    return unpack(
+        $StP{$e->byte_size},
+        $e->storage->read_at(
+            $self->offset + $self->base_size + 1 * $e->byte_size, $e->byte_size,
+        ),
+    );
+}
+
+sub get_classname {
+    my $self = shift;
+
+    my $class_offset = $self->get_class_offset;
+
+    return unless $class_offset;
+
+    return $self->engine->load_sector( $class_offset )->data;
+}
+
+# Look to hoist this method into a ::Reference trait
+sub data {
+    my $self = shift;
+    my ($args) = @_;
+    $args ||= {};
+
+    my $engine = $self->engine;
+    my $cache_entry = $engine->cache->{ $self->offset } ||= {};
+    my $trans_id = $engine->trans_id;
+    my $obj;
+    if ( !defined $$cache_entry{ $trans_id } ) {
+        $obj = DBM::Deep->new({
+            type        => $self->type,
+            base_offset => $self->offset,
+            staleness   => $self->staleness,
+            storage     => $engine->storage,
+            engine      => $engine,
+        });
+
+        $$cache_entry{ $trans_id } = $obj;
+        Scalar::Util::weaken($$cache_entry{ $trans_id });
+    }
+    else {
+        $obj = $$cache_entry{ $trans_id };
+    }
+
+    # We're not exporting, so just return.
+    unless ( $args->{export} ) {
+        if ( $engine->storage->{autobless} ) {
+            my $classname = $self->get_classname;
+            if ( defined $classname ) {
+                bless $obj, $classname;
+            }
+        }
+
+        return $obj;
+    }
+
+    # We shouldn't export if this is still referred to.
+    if ( $self->get_refcount > 1 ) {
+        return $obj;
+    }
+
+    return $obj->export;
+}
+
+sub free {
+    my $self = shift;
+
+    # We're not ready to be removed yet.
+    return if $self->decrement_refcount > 0;
+
+    my $e = $self->engine;
+
+    # Rebless the object into DBM::Deep::Null.
+    # In external_refs mode, this will already have been removed from
+    # the cache, so we can skip this.
+    if(!$e->{external_refs}) {
+#    eval { %{ $e->cache->{ $self->offset }{ $e->trans_id } } = (); };
+#    eval { @{ $e->cache->{ $self->offset }{ $e->trans_id } } = (); };
+      my $cache = $e->cache;
+      my $off = $self->offset;
+      if(  exists $cache->{ $off }
+       and exists $cache->{ $off }{ my $trans_id = $e->trans_id } ) {
+        bless $cache->{ $off }{ $trans_id }, 'DBM::Deep::Null'
+         if defined $cache->{ $off }{ $trans_id };
+        delete $cache->{ $off }{ $trans_id };
+      }
+    }
+
+    my $blist_loc = $self->get_blist_loc;
+    $e->load_sector( $blist_loc )->free if $blist_loc;
+
+    my $class_loc = $self->get_class_offset;
+    $e->load_sector( $class_loc )->free if $class_loc;
+
+    $self->SUPER::free();
+}
+
+sub increment_refcount {
+    my $self = shift;
+
+    my $refcount = $self->get_refcount;
+
+    $refcount++;
+
+    $self->write_refcount( $refcount );
+
+    return $refcount;
+}
+
+sub decrement_refcount {
+    my $self = shift;
+
+    my $refcount = $self->get_refcount;
+
+    $refcount--;
+
+    $self->write_refcount( $refcount );
+
+    return $refcount;
+}
+
+sub get_refcount {
+    my $self = shift;
+
+    my $e = $self->engine;
+    return unpack(
+        $StP{$e->byte_size},
+        $e->storage->read_at(
+            $self->offset + $self->base_size + 2 * $e->byte_size, $e->byte_size,
+        ),
+    );
+}
+
+sub write_refcount {
+    my $self = shift;
+    my ($num) = @_;
+
+    my $e = $self->engine;
+    $e->storage->print_at(
+        $self->offset + $self->base_size + 2 * $e->byte_size,
+        pack( $StP{$e->byte_size}, $num ),
+    );
+}
+
+sub clear {
+    my $self = shift;
+
+    my $blist_loc = $self->get_blist_loc or return;
+
+    my $engine = $self->engine;
+
+    # This won't work with autoblessed items.
+    if ($engine->get_running_txn_ids) {
+        # ~~~ Temporary; the code below this block needs to be modified to
+        #     take transactions into account.
+        $self->data->_get_self->_clear;
+        return;
+    }
+
+    my $sector = $engine->load_sector( $blist_loc )
+        or DBM::Deep->_throw_error(
+           "Cannot read sector at $blist_loc in clear()"
+        );
+
+    # Set blist offset to 0
+    $engine->storage->print_at( $self->offset + $self->base_size,
+        pack( $StP{$engine->byte_size}, 0 ),
+    );
+
+    # Free the blist
+    $sector->free;
+
+    return;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Sector/File/Scalar.pm b/src/modules/DBM/Deep/Sector/File/Scalar.pm
new file mode 100644
index 0000000..cd4d793
--- /dev/null
+++ b/src/modules/DBM/Deep/Sector/File/Scalar.pm
@@ -0,0 +1,143 @@
+package DBM::Deep::Sector::File::Scalar;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+no warnings 'recursion';
+
+use base qw( DBM::Deep::Sector::File::Data );
+
+my $STALE_SIZE = 2;
+
+# Please refer to the pack() documentation for further information
+my %StP = (
+    1 => 'C', # Unsigned char value (no order needed as it's just one byte)
+    2 => 'n', # Unsigned short in "network" (big-endian) order
+    4 => 'N', # Unsigned long in "network" (big-endian) order
+    8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
+);
+
+sub free {
+    my $self = shift;
+
+    my $chain_loc = $self->chain_loc;
+
+    $self->SUPER::free();
+
+    if ( $chain_loc ) {
+        $self->engine->load_sector( $chain_loc )->free;
+    }
+
+    return;
+}
+
+sub _init {
+    my $self = shift;
+
+    my $engine = $self->engine;
+
+    unless ( $self->offset ) {
+        my $data_section = $self->size - $self->base_size - $engine->byte_size - 1;
+
+        $self->{offset} = $engine->_request_data_sector( $self->size );
+
+        my $data = delete $self->{data};
+        my $utf8 = do { no warnings 'utf8'; $data !~ /^[\0-\xff]*\z/ };
+        if($utf8){
+            if($engine->{v} < 4) {
+                DBM::Deep->_throw_error(
+                 "This database format version is too old for Unicode"
+                );
+            }
+            utf8::encode $data;
+            $self->{type} = $engine->SIG_UNIDATA;
+        }
+        else { $self->{type} = $engine->SIG_DATA; }
+
+        my $dlen = length $data;
+        my $continue = 1;
+        my $curr_offset = $self->offset;
+        while ( $continue ) {
+
+            my $next_offset = 0;
+
+            my ($leftover, $this_len, $chunk);
+            if ( $dlen > $data_section ) {
+                $leftover = 0;
+                $this_len = $data_section;
+                $chunk = substr( $data, 0, $this_len );
+
+                $dlen -= $data_section;
+                $next_offset = $engine->_request_data_sector( $self->size );
+                $data = substr( $data, $this_len );
+            }
+            else {
+                $leftover = $data_section - $dlen;
+                $this_len = $dlen;
+                $chunk = $data;
+
+                $continue = 0;
+            }
+
+            $engine->storage->print_at( $curr_offset, $self->type ); # Sector type
+            # Skip staleness
+            $engine->storage->print_at( $curr_offset + $self->base_size,
+                pack( $StP{$engine->byte_size}, $next_offset ),  # Chain loc
+                pack( $StP{1}, $this_len ),                      # Data length
+                $chunk,                                          # Data to be stored in this sector
+                chr(0) x $leftover,                              # Zero-fill the rest
+            );
+
+            $curr_offset = $next_offset;
+        }
+
+        return;
+    }
+}
+
+sub data_length {
+    my $self = shift;
+
+    my $buffer = $self->engine->storage->read_at(
+        $self->offset + $self->base_size + $self->engine->byte_size, 1
+    );
+
+    return unpack( $StP{1}, $buffer );
+}
+
+sub chain_loc {
+    my $self = shift;
+    return unpack(
+        $StP{$self->engine->byte_size},
+        $self->engine->storage->read_at(
+            $self->offset + $self->base_size,
+            $self->engine->byte_size,
+        ),
+    );
+}
+
+sub data {
+    my $self = shift;
+    my $engine = $self->engine;
+
+    my $data;
+    while ( 1 ) {
+        my $chain_loc = $self->chain_loc;
+
+        $data .= $engine->storage->read_at(
+            $self->offset + $self->base_size + $engine->byte_size + 1, $self->data_length,
+        );
+
+        last unless $chain_loc;
+
+        $self = $engine->load_sector( $chain_loc );
+    }
+
+    utf8::decode $data  if $self->type eq $engine->SIG_UNIDATA;
+
+    return $data;
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Storage.pm b/src/modules/DBM/Deep/Storage.pm
new file mode 100644
index 0000000..ec6b0ac
--- /dev/null
+++ b/src/modules/DBM/Deep/Storage.pm
@@ -0,0 +1,70 @@
+package DBM::Deep::Storage;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+=head1 NAME
+
+DBM::Deep::Storage - abstract base class for storage
+
+=head2 flush()
+
+This flushes the filehandle. This takes no parameters and returns nothing.
+
+=cut
+
+sub flush { die "flush must be implemented in a child class" }
+
+=head2 is_writable()
+
+This takes no parameters. It returns a boolean saying if this filehandle is
+writable.
+
+Taken from L<http://www.perlmonks.org/?node_id=691054/>.
+
+=cut
+
+sub is_writable { die "is_writable must be implemented in a child class" }
+
+=head1 LOCKING
+
+This is where the actual locking of the storage medium is performed.
+Nested locking is supported.
+
+B<NOTE>: It is unclear what will happen if a read lock is taken, then
+a write lock is taken as a nested lock, then the write lock is released.
+
+Currently, the only locking method supported is flock(1). This is a
+whole-file lock. In the future, more granular locking may be supported.
+The API for that is unclear right now.
+
+The following methods manage the locking status. In all cases, they take
+a L<DBM::Deep> object and returns nothing.
+
+=over 4
+
+=item * lock_exclusive( $obj )
+
+Take a lock usable for writing.
+
+=item * lock_shared( $obj )
+
+Take a lock usable for reading.
+
+=item * unlock( $obj )
+
+Releases the last lock taken. If this is the outermost lock, then the
+object is actually unlocked.
+
+=back
+
+=cut
+
+sub lock_exclusive { die "lock_exclusive must be implemented in a child class" }
+sub lock_shared { die "lock_shared must be implemented in a child class" }
+sub unlock { die "unlock must be implemented in a child class" }
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Storage/DBI.pm b/src/modules/DBM/Deep/Storage/DBI.pm
new file mode 100644
index 0000000..26e55e7
--- /dev/null
+++ b/src/modules/DBM/Deep/Storage/DBI.pm
@@ -0,0 +1,170 @@
+package DBM::Deep::Storage::DBI;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use base 'DBM::Deep::Storage';
+
+use DBI;
+
+sub new {
+    my $class = shift;
+    my ($args) = @_;
+
+    my $self = bless {
+        autobless => 1,
+        dbh       => undef,
+        dbi       => undef,
+    }, $class;
+
+    # Grab the parameters we want to use
+    foreach my $param ( keys %$self ) {
+        next unless exists $args->{$param};
+        $self->{$param} = $args->{$param};
+    }
+
+    if ( $self->{dbh} ) {
+        $self->{driver} = lc $self->{dbh}->{Driver}->{Name};
+    }
+    else {
+        $self->open;
+    }
+
+    # Foreign keys are turned off by default in SQLite3 (for now)
+    #q.v.  http://search.cpan.org/~adamk/DBD-SQLite-1.27/lib/DBD/SQLite.pm#Foreign_Keys
+    # for more info.
+    if ( $self->driver eq 'sqlite' ) {
+        $self->{dbh}->do( 'PRAGMA foreign_keys = ON' );
+    }
+
+    return $self;
+}
+
+sub open {
+    my $self = shift;
+
+    return if $self->{dbh};
+
+    $self->{dbh} = DBI->connect(
+        $self->{dbi}{dsn}, $self->{dbi}{username}, $self->{dbi}{password}, {
+            AutoCommit => 1,
+            PrintError => 0,
+            RaiseError => 1,
+            %{ $self->{dbi}{connect_args} || {} },
+        },
+    ) or die $DBI::error;
+
+    # Should we use the same method as done in new() if passed a $dbh?
+    (undef, $self->{driver}) = map defined($_) ? lc($_) : undef, DBI->parse_dsn( $self->{dbi}{dsn} );
+
+    return 1;
+}
+
+sub close {
+    my $self = shift;
+    $self->{dbh}->disconnect if $self->{dbh};
+    return 1;
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close if ref $self;
+}
+
+# Is there a portable way of determining writability to a DBH?
+sub is_writable {
+    my $self = shift;
+    return 1;
+}
+
+sub lock_exclusive {
+    my $self = shift;
+}
+
+sub lock_shared {
+    my $self = shift;
+}
+
+sub unlock {
+    my $self = shift;
+#    $self->{dbh}->commit;
+}
+
+#sub begin_work {
+#    my $self = shift;
+#    $self->{dbh}->begin_work;
+#}
+#
+#sub commit {
+#    my $self = shift;
+#    $self->{dbh}->commit;
+#}
+#
+#sub rollback {
+#    my $self = shift;
+#    $self->{dbh}->rollback;
+#}
+
+sub read_from {
+    my $self = shift;
+    my ($table, $cond, @cols) = @_;
+
+    $cond = { id => $cond } unless ref $cond;
+
+    my @keys = keys %$cond;
+    my $where = join ' AND ', map { "`$_` = ?" } @keys;
+
+    return $self->{dbh}->selectall_arrayref(
+        "SELECT `@{[join '`,`', @cols ]}` FROM $table WHERE $where",
+        { Slice => {} }, @{$cond}{@keys},
+    );
+}
+
+sub flush {}
+
+sub write_to {
+    my $self = shift;
+    my ($table, $id, %args) = @_;
+
+    my @keys = keys %args;
+    my $sql =
+        "REPLACE INTO $table ( `id`, "
+          . join( ',', map { "`$_`" } @keys )
+      . ") VALUES ("
+          . join( ',', ('?') x (@keys + 1) )
+      . ")";
+    $self->{dbh}->do( $sql, undef, $id, @args{@keys} );
+
+    return $self->{dbh}->last_insert_id("", "", "", "");
+}
+
+sub delete_from {
+    my $self = shift;
+    my ($table, $cond) = @_;
+
+    $cond = { id => $cond } unless ref $cond;
+
+    my @keys = keys %$cond;
+    my $where = join ' AND ', map { "`$_` = ?" } @keys;
+
+    $self->{dbh}->do(
+        "DELETE FROM $table WHERE $where", undef, @{$cond}{@keys},
+    );
+}
+
+sub driver { $_[0]{driver} }
+
+sub rand_function {
+    my $self = shift;
+    my $driver = $self->driver;
+
+    $driver eq 'sqlite' and return 'random()';
+    $driver eq 'mysql'  and return 'RAND()';
+
+    die "rand_function undefined for $driver\n";
+}
+
+1;
+__END__
diff --git a/src/modules/DBM/Deep/Storage/File.pm b/src/modules/DBM/Deep/Storage/File.pm
new file mode 100644
index 0000000..38eaef3
--- /dev/null
+++ b/src/modules/DBM/Deep/Storage/File.pm
@@ -0,0 +1,399 @@
+package DBM::Deep::Storage::File;
+
+use 5.008_004;
+
+use strict;
+use warnings FATAL => 'all';
+
+use Fcntl qw( :DEFAULT :flock :seek );
+
+use constant DEBUG => 0;
+
+use base 'DBM::Deep::Storage';
+
+=head1 NAME
+
+DBM::Deep::Storage::File - mediate low-level interaction with storage mechanism
+
+=head1 PURPOSE
+
+This is an internal-use-only object for L<DBM::Deep>. It mediates the low-level
+interaction with the storage mechanism.
+
+Currently, the only storage mechanism supported is the file system.
+
+=head1 OVERVIEW
+
+This class provides an abstraction to the storage mechanism so that the Engine
+(the only class that uses this class) doesn't have to worry about that.
+
+=head1 METHODS
+
+=head2 new( \%args )
+
+=cut
+
+sub new {
+    my $class = shift;
+    my ($args) = @_;
+
+    my $self = bless {
+        autobless          => 1,
+        autoflush          => 1,
+        end                => 0,
+        fh                 => undef,
+        file               => undef,
+        file_offset        => 0,
+        locking            => 1,
+        locked             => 0,
+#XXX Migrate this to the engine, where it really belongs.
+        filter_store_key   => undef,
+        filter_store_value => undef,
+        filter_fetch_key   => undef,
+        filter_fetch_value => undef,
+    }, $class;
+
+    # Grab the parameters we want to use
+    foreach my $param ( keys %$self ) {
+        next unless exists $args->{$param};
+        $self->{$param} = $args->{$param};
+    }
+
+    if ( $self->{fh} && !$self->{file_offset} ) {
+        $self->{file_offset} = tell( $self->{fh} );
+    }
+
+    $self->open unless $self->{fh};
+
+    return $self;
+}
+
+=head2 open()
+
+This method opens the filehandle for the filename in C< file >. 
+
+There is no return value.
+
+=cut
+
+# TODO: What happens if we ->open when we already have a $fh?
+sub open {
+    my $self = shift;
+
+    # Adding O_BINARY should remove the need for the binmode below. However,
+    # I'm not going to remove it because I don't have the Win32 chops to be
+    # absolutely certain everything will be ok.
+    my $flags = O_CREAT | O_BINARY;
+
+    if ( !-e $self->{file} || -w _ ) {
+      $flags |= O_RDWR;
+    }
+    else {
+      $flags |= O_RDONLY;
+    }
+
+    my $fh;
+    sysopen( $fh, $self->{file}, $flags )
+        or die "DBM::Deep: Cannot sysopen file '$self->{file}': $!\n";
+    $self->{fh} = $fh;
+
+    # Even though we use O_BINARY, better be safe than sorry.
+    binmode $fh;
+
+    if ($self->{autoflush}) {
+        my $old = select $fh;
+        $|=1;
+        select $old;
+    }
+
+    return 1;
+}
+
+=head2 close()
+
+If the filehandle is opened, this will close it.
+
+There is no return value.
+
+=cut
+
+sub close {
+    my $self = shift;
+
+    if ( $self->{fh} ) {
+        close $self->{fh};
+        $self->{fh} = undef;
+    }
+
+    return 1;
+}
+
+=head2 size()
+
+This will return the size of the DB. If file_offset is set, this will take that into account.
+
+B<NOTE>: This function isn't used internally anywhere.
+
+=cut
+
+sub size {
+    my $self = shift;
+
+    return 0 unless $self->{fh};
+    return( (-s $self->{fh}) - $self->{file_offset} );
+}
+
+=head2 set_inode()
+
+This will set the inode value of the underlying file object.
+
+This is only needed to handle some obscure Win32 bugs. It really shouldn't be
+needed outside this object.
+
+There is no return value.
+
+=cut
+
+sub set_inode {
+    my $self = shift;
+
+    unless ( defined $self->{inode} ) {
+        my @stats = stat($self->{fh});
+        $self->{inode} = $stats[1];
+        $self->{end} = $stats[7];
+    }
+
+    return 1;
+}
+
+=head2 print_at( $offset, @data )
+
+This takes an optional offset and some data to print.
+
+C< $offset >, if defined, will be used to seek into the file. If file_offset is
+set, it will be used as the zero location. If it is undefined, no seeking will
+occur. Then, C< @data > will be printed to the current location.
+
+There is no return value.
+
+=cut
+
+sub print_at {
+    my $self = shift;
+    my $loc  = shift;
+
+    local ($,,$\);
+
+    my $fh = $self->{fh};
+    if ( defined $loc ) {
+        seek( $fh, $loc + $self->{file_offset}, SEEK_SET );
+    }
+
+    if ( DEBUG ) {
+        my $caller = join ':', (caller)[0,2];
+        my $len = length( join '', @_ );
+        warn "($caller) print_at( " . (defined $loc ? $loc : '<undef>') . ", $len )\n";
+    }
+
+    print( $fh @_ ) or die "Internal Error (print_at($loc)): $!\n";
+
+    return 1;
+}
+
+=head2 read_at( $offset, $length )
+
+This takes an optional offset and a length.
+
+C< $offset >, if defined, will be used to seek into the file. If file_offset is
+set, it will be used as the zero location. If it is undefined, no seeking will
+occur. Then, C< $length > bytes will be read from the current location.
+
+The data read will be returned.
+
+=cut
+
+sub read_at {
+    my $self = shift;
+    my ($loc, $size) = @_;
+
+    my $fh = $self->{fh};
+    if ( defined $loc ) {
+        seek( $fh, $loc + $self->{file_offset}, SEEK_SET );
+    }
+
+    if ( DEBUG ) {
+        my $caller = join ':', (caller)[0,2];
+        warn "($caller) read_at( " . (defined $loc ? $loc : '<undef>') . ", $size )\n";
+    }
+
+    my $buffer;
+    read( $fh, $buffer, $size);
+
+    return $buffer;
+}
+
+=head2 DESTROY
+
+When the ::Storage::File object goes out of scope, it will be closed.
+
+=cut
+
+sub DESTROY {
+    my $self = shift;
+    return unless $self;
+
+    $self->close;
+
+    return;
+}
+
+=head2 request_space( $size )
+
+This takes a size and adds that much space to the DBM.
+
+This returns the offset for the new location.
+
+=cut
+
+sub request_space {
+    my $self = shift;
+    my ($size) = @_;
+
+    #XXX Do I need to reset $self->{end} here? I need a testcase
+    my $loc = $self->{end};
+    $self->{end} += $size;
+
+    return $loc;
+}
+
+=head2 copy_stats( $target_filename )
+
+This will take the stats for the current filehandle and apply them to
+C< $target_filename >. The stats copied are:
+
+=over 4
+
+=item * Onwer UID and GID
+
+=item * Permissions
+
+=back
+
+=cut
+
+sub copy_stats {
+    my $self = shift;
+    my ($temp_filename) = @_;
+
+    my @stats = stat( $self->{fh} );
+    my $perms = $stats[2] & 07777;
+    my $uid = $stats[4];
+    my $gid = $stats[5];
+    chown( $uid, $gid, $temp_filename );
+    chmod( $perms, $temp_filename );
+}
+
+sub flush {
+    my $self = shift;
+
+    # Flush the filehandle
+    my $old_fh = select $self->{fh};
+    my $old_af = $|; $| = 1; $| = $old_af;
+    select $old_fh;
+
+    return 1;
+}
+
+sub is_writable {
+    my $self = shift;
+
+    my $fh = $self->{fh};
+    return unless defined $fh;
+    return unless defined fileno $fh;
+    local $\ = '';  # just in case
+    no warnings;    # temporarily disable warnings
+    local $^W;      # temporarily disable warnings
+    return print $fh '';
+}
+
+sub lock_exclusive {
+    my $self = shift;
+    my ($obj) = @_;
+    return $self->_lock( $obj, LOCK_EX );
+}
+
+sub lock_shared {
+    my $self = shift;
+    my ($obj) = @_;
+    return $self->_lock( $obj, LOCK_SH );
+}
+
+sub _lock {
+    my $self = shift;
+    my ($obj, $type) = @_;
+
+    $type = LOCK_EX unless defined $type;
+
+    #XXX This is a temporary fix for Win32 and autovivification. It
+    # needs to improve somehow. -RobK, 2008-03-09
+    if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
+        $type = LOCK_EX;
+    }
+
+    if (!defined($self->{fh})) { return; }
+
+    #XXX This either needs to allow for upgrading a shared lock to an
+    # exclusive lock or something else with autovivification.
+    # -RobK, 2008-03-09
+    if ($self->{locking}) {
+        if (!$self->{locked}) {
+            flock($self->{fh}, $type);
+
+            # refresh end counter in case file has changed size
+            my @stats = stat($self->{fh});
+            $self->{end} = $stats[7];
+
+            # double-check file inode, in case another process
+            # has optimize()d our file while we were waiting.
+            if (defined($self->{inode}) && $stats[1] != $self->{inode}) {
+                $self->close;
+                $self->open;
+
+                #XXX This needs work
+                $obj->{engine}->setup( $obj );
+
+                flock($self->{fh}, $type); # re-lock
+
+                # This may not be necessary after re-opening
+                $self->{end} = (stat($self->{fh}))[7]; # re-end
+            }
+        }
+        $self->{locked}++;
+
+        return 1;
+    }
+
+    return;
+}
+
+sub unlock {
+    my $self = shift;
+
+    if (!defined($self->{fh})) { return; }
+
+    if ($self->{locking} && $self->{locked} > 0) {
+        $self->{locked}--;
+
+        if (!$self->{locked}) {
+            flock($self->{fh}, LOCK_UN);
+            return 1;
+        }
+
+        return;
+    }
+
+    return;
+}
+
+1;
+__END__
diff --git a/src/modules/Exporter/Shiny.pm b/src/modules/Exporter/Shiny.pm
new file mode 100644
index 0000000..7b7ae07
--- /dev/null
+++ b/src/modules/Exporter/Shiny.pm
@@ -0,0 +1,116 @@
+package Exporter::Shiny;
+
+use 5.006001;
+use strict;
+use warnings;
+
+use Exporter::Tiny ();
+
+our $AUTHORITY = 'cpan:TOBYINK';
+our $VERSION   = '1.004004';
+
+sub import {
+	my $me     = shift;
+	my $caller = caller;
+	
+	(my $nominal_file = $caller) =~ s(::)(/)g;
+	$INC{"$nominal_file\.pm"} ||= __FILE__;
+	
+	if (@_ == 2 and $_[0] eq -setup)
+	{
+		my (undef, $opts) = @_;
+		@_ = @{ delete($opts->{exports}) || [] };
+		
+		if (%$opts) {
+			Exporter::Tiny::_croak(
+				'Unsupported Sub::Exporter-style options: %s',
+				join(q[, ], sort keys %$opts),
+			);
+		}
+	}
+	
+	ref($_) && Exporter::Tiny::_croak('Expected sub name, got ref %s', $_) for @_;
+	
+	no strict qw(refs);
+	push @{"$caller\::ISA"}, 'Exporter::Tiny';
+	push @{"$caller\::EXPORT_OK"}, @_;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Exporter::Shiny - shortcut for Exporter::Tiny
+
+=head1 SYNOPSIS
+
+   use Exporter::Shiny qw( foo bar );
+
+Is a shortcut for:
+
+   use base "Exporter::Tiny";
+   push our(@EXPORT_OK), qw( foo bar );
+
+For compatibility with L<Sub::Exporter>, the following longer syntax is
+also supported:
+
+   use Exporter::Shiny -setup => {
+      exports => [qw( foo bar )],
+   };
+
+=head1 DESCRIPTION
+
+This is a very small wrapper to simplify using L<Exporter::Tiny>.
+
+It does the following:
+
+=over
+
+=item * Marks your package as loaded in C<< %INC >>;
+
+=item * Pushes any function names in the import list onto your C<< @EXPORT_OK >>; and
+
+=item * Pushes C<< "Exporter::Tiny" >> onto your C<< @ISA >>.
+
+=back
+
+It doesn't set up C<< %EXPORT_TAGS >> or C<< @EXPORT >>, but there's
+nothing stopping you doing that yourself.
+
+=head1 BUGS
+
+Please report any bugs to
+L<https://github.com/tobyink/p5-exporter-tiny/issues>.
+
+=head1 SEE ALSO
+
+This module is just a wrapper around L<Exporter::Tiny>, so take a look
+at L<Exporter::Tiny::Manual::QuickStart> and
+L<Exporter::Tiny::Manual::Exporting> for further information on what
+features are available.
+
+Other interesting exporters: L<Sub::Exporter>, L<Exporter>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2014, 2017 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/Exporter/Tiny.pm b/src/modules/Exporter/Tiny.pm
new file mode 100644
index 0000000..3879a40
--- /dev/null
+++ b/src/modules/Exporter/Tiny.pm
@@ -0,0 +1,525 @@
+package Exporter::Tiny;
+
+use 5.006001;
+use strict;
+use warnings; no warnings qw(void once uninitialized numeric redefine);
+
+our $AUTHORITY = 'cpan:TOBYINK';
+our $VERSION   = '1.004004';
+our @EXPORT_OK = qw< mkopt mkopt_hash _croak _carp >;
+
+sub _croak ($;@) { require Carp; my $fmt = shift; @_ = sprintf($fmt, @_); goto \&Carp::croak }
+sub _carp  ($;@) { require Carp; my $fmt = shift; @_ = sprintf($fmt, @_); goto \&Carp::carp }
+
+my $_process_optlist = sub
+{
+	my $class = shift;
+	my ($global_opts, $opts, $want, $not_want) = @_;
+	
+	while (@$opts)
+	{
+		my $opt = shift @{$opts};
+		my ($name, $value) = @$opt;
+		
+		($name =~ m{\A\!(/.+/[msixpodual]*)\z}) ?
+			do {
+				my @not = $class->_exporter_expand_regexp("$1", $value, $global_opts);
+				++$not_want->{$_->[0]} for @not;
+			} :
+		($name =~ m{\A\![:-](.+)\z}) ?
+			do {
+				my @not = $class->_exporter_expand_tag("$1", $value, $global_opts);
+				++$not_want->{$_->[0]} for @not;
+			} :
+		($name =~ m{\A\!(.+)\z}) ?
+			(++$not_want->{$1}) :
+		($name =~ m{\A[:-](.+)\z}) ?
+			push(@$opts, $class->_exporter_expand_tag("$1", $value, $global_opts)) :
+		($name =~ m{\A/.+/[msixpodual]*\z}) ?
+			push(@$opts, $class->_exporter_expand_regexp($name, $value, $global_opts)) :
+		# else ?
+			push(@$want, $opt);
+	}
+};
+
+sub import
+{
+	my $class = shift;
+	my $global_opts = +{ @_ && ref($_[0]) eq q(HASH) ? %{+shift} : () };
+	$global_opts->{into} = caller unless exists $global_opts->{into};
+	
+	my @want;
+	my %not_want; $global_opts->{not} = \%not_want;
+	my @args = do { no strict qw(refs); @_ ? @_ : @{"$class\::EXPORT"} };
+	my $opts = mkopt(\@args);
+	$class->$_process_optlist($global_opts, $opts, \@want, \%not_want);
+	
+	my $permitted = $class->_exporter_permitted_regexp($global_opts);
+	$class->_exporter_validate_opts($global_opts);
+	
+	for my $wanted (@want)
+	{
+		next if $not_want{$wanted->[0]};
+		
+		my %symbols = $class->_exporter_expand_sub(@$wanted, $global_opts, $permitted);
+		$class->_exporter_install_sub($_, $wanted->[1], $global_opts, $symbols{$_})
+			for keys %symbols;
+	}
+}
+
+sub unimport
+{
+	my $class = shift;
+	my $global_opts = +{ @_ && ref($_[0]) eq q(HASH) ? %{+shift} : () };
+	$global_opts->{into} = caller unless exists $global_opts->{into};
+	$global_opts->{is_unimport} = 1;
+	
+	my @want;
+	my %not_want; $global_opts->{not} = \%not_want;
+	my @args = do { our %TRACKED; @_ ? @_ : keys(%{$TRACKED{$class}{$global_opts->{into}}}) };
+	my $opts = mkopt(\@args);
+	$class->$_process_optlist($global_opts, $opts, \@want, \%not_want);
+	
+	my $permitted = $class->_exporter_permitted_regexp($global_opts);
+	$class->_exporter_validate_unimport_opts($global_opts);
+	
+	my $expando = $class->can('_exporter_expand_sub');
+	$expando = undef if $expando == \&_exporter_expand_sub;
+	
+	for my $wanted (@want)
+	{
+		next if $not_want{$wanted->[0]};
+		
+		if ($wanted->[1])
+		{
+			_carp("Passing options to unimport '%s' makes no sense", $wanted->[0])
+				unless (ref($wanted->[1]) eq 'HASH' and not keys %{$wanted->[1]});
+		}
+		
+		my %symbols = defined($expando)
+			? $class->$expando(@$wanted, $global_opts, $permitted)
+			: ($wanted->[0] => sub { "dummy" });
+		$class->_exporter_uninstall_sub($_, $wanted->[1], $global_opts)
+			for keys %symbols;
+	}
+}
+
+# Called once per import/unimport, passed the "global" import options.
+# Expected to validate the options and carp or croak if there are problems.
+# Can also take the opportunity to do other stuff if needed.
+#
+sub _exporter_validate_opts          { 1 }
+sub _exporter_validate_unimport_opts { 1 }
+
+# Called after expanding a tag or regexp to merge the tag's options with
+# any sub-specific options.
+#
+sub _exporter_merge_opts
+{
+	my $class = shift;
+	my ($tag_opts, $global_opts, @stuff) = @_;
+	
+	$tag_opts = {} unless ref($tag_opts) eq q(HASH);
+	_croak('Cannot provide an -as option for tags')
+		if exists $tag_opts->{-as} && ref $tag_opts->{-as} ne 'CODE';
+	
+	my $optlist = mkopt(\@stuff);
+	for my $export (@$optlist)
+	{
+		next if defined($export->[1]) && ref($export->[1]) ne q(HASH);
+		
+		my %sub_opts = ( %{ $export->[1] or {} }, %$tag_opts );
+		$sub_opts{-prefix} = sprintf('%s%s', $tag_opts->{-prefix}, $export->[1]{-prefix})
+			if exists($export->[1]{-prefix}) && exists($tag_opts->{-prefix});
+		$sub_opts{-suffix} = sprintf('%s%s', $export->[1]{-suffix}, $tag_opts->{-suffix})
+			if exists($export->[1]{-suffix}) && exists($tag_opts->{-suffix});
+		$export->[1] = \%sub_opts;
+	}
+	return @$optlist;
+}
+
+# Given a tag name, looks it up in %EXPORT_TAGS and returns the list of
+# associated functions. The default implementation magically handles tags
+# "all" and "default". The default implementation interprets any undefined
+# tags as being global options.
+# 
+sub _exporter_expand_tag
+{
+	no strict qw(refs);
+	
+	my $class = shift;
+	my ($name, $value, $globals) = @_;
+	my $tags  = \%{"$class\::EXPORT_TAGS"};
+	
+	return $class->_exporter_merge_opts($value, $globals, $tags->{$name}->($class, @_))
+		if ref($tags->{$name}) eq q(CODE);
+	
+	return $class->_exporter_merge_opts($value, $globals, @{$tags->{$name}})
+		if exists $tags->{$name};
+	
+	return $class->_exporter_merge_opts($value, $globals, @{"$class\::EXPORT"}, @{"$class\::EXPORT_OK"})
+		if $name eq 'all';
+	
+	return $class->_exporter_merge_opts($value, $globals, @{"$class\::EXPORT"})
+		if $name eq 'default';
+	
+	$globals->{$name} = $value || 1;
+	return;
+}
+
+# Given a regexp-like string, looks it up in @EXPORT_OK and returns the
+# list of matching functions.
+# 
+sub _exporter_expand_regexp
+{
+	no strict qw(refs);
+	our %TRACKED;
+	
+	my $class = shift;
+	my ($name, $value, $globals) = @_;
+	my $compiled = eval("qr$name");
+	
+	my @possible = $globals->{is_unimport}
+		? keys( %{$TRACKED{$class}{$globals->{into}}} )
+		: @{"$class\::EXPORT_OK"};
+	
+	$class->_exporter_merge_opts($value, $globals, grep /$compiled/, @possible);
+}
+
+# Helper for _exporter_expand_sub. Returns a regexp matching all subs in
+# the exporter package which are available for export.
+#
+sub _exporter_permitted_regexp
+{
+	no strict qw(refs);
+	my $class = shift;
+	my $re = join "|", map quotemeta, sort {
+		length($b) <=> length($a) or $a cmp $b
+	} @{"$class\::EXPORT"}, @{"$class\::EXPORT_OK"};
+	qr{^(?:$re)$}ms;
+}
+
+# Given a sub name, returns a hash of subs to install (usually just one sub).
+# Keys are sub names, values are coderefs.
+#
+sub _exporter_expand_sub
+{
+	my $class = shift;
+	my ($name, $value, $globals, $permitted) = @_;
+	$permitted ||= $class->_exporter_permitted_regexp($globals);
+	
+	no strict qw(refs);
+	
+	my $sigil = "&";
+	if ($name =~ /\A([&\$\%\@\*])(.+)\z/) {
+		$sigil = $1;
+		$name  = $2;
+		if ($sigil eq '*') {
+			_croak("Cannot export symbols with a * sigil");
+		}
+	}
+	my $sigilname = $sigil eq '&' ? $name : "$sigil$name";
+	
+	if ($sigilname =~ $permitted)
+	{
+		my $generatorprefix = {
+			'&' => "_generate_",
+			'$' => "_generateScalar_",
+			'@' => "_generateArray_",
+			'%' => "_generateHash_",
+		}->{$sigil};
+		
+		my $generator = $class->can("$generatorprefix$name");
+		return $sigilname => $class->$generator($sigilname, $value, $globals) if $generator;
+		
+		if ($sigil eq '&') {
+			my $sub = $class->can($name);
+			return $sigilname => $sub if $sub;
+		}
+		else {
+			# Could do this more cleverly, but this works.
+			my $evalled = eval "\\${sigil}${class}::${name}";
+			return $sigilname => $evalled if $evalled;
+		}
+	}
+	
+	$class->_exporter_fail(@_);
+}
+
+# Called by _exporter_expand_sub if it is unable to generate a key-value
+# pair for a sub.
+#
+sub _exporter_fail
+{
+	my $class = shift;
+	my ($name, $value, $globals) = @_;
+	return if $globals->{is_unimport};
+	_croak("Could not find sub '%s' exported by %s", $name, $class);
+}
+
+# Actually performs the installation of the sub into the target package. This
+# also handles renaming the sub.
+#
+sub _exporter_install_sub
+{
+	my $class = shift;
+	my ($name, $value, $globals, $sym) = @_;
+	my $value_hash = ( ref($value) eq 'HASH' ) ? $value : {};
+	
+	my $into      = $globals->{into};
+	my $installer = $globals->{installer} || $globals->{exporter};
+
+	if ( $into eq '-lexical' or $globals->{lexical} ) {
+		$] ge '5.037002'
+			or _croak( 'Lexical export requires Perl 5.37.2 or above' );
+		$installer ||= sub {
+			my ( $sigilname, $sym ) = @{ $_[1] };
+			no warnings ( $] ge '5.037002' ? 'experimental::builtin' : () );
+			builtin::export_lexically( $sigilname, $sym );
+		};
+	}
+
+	$name =
+		ref    $globals->{as}      ? $globals->{as}->($name) :
+		ref    $value_hash->{-as}  ? $value_hash->{-as}->($name) :
+		exists $value_hash->{-as}  ? $value_hash->{-as} :
+		$name;
+	
+	return unless defined $name;
+	
+	my $sigil = "&";
+	unless (ref($name)) {
+		if ($name =~ /\A([&\$\%\@\*])(.+)\z/) {
+			$sigil = $1;
+			$name  = $2;
+			if ($sigil eq '*') {
+				_croak("Cannot export symbols with a * sigil");
+			}
+		}
+		my ($prefix) = grep defined, $value_hash->{-prefix}, $globals->{prefix}, q();
+		my ($suffix) = grep defined, $value_hash->{-suffix}, $globals->{suffix}, q();
+		$name = "$prefix$name$suffix";
+	}
+	
+	my $sigilname = $sigil eq '&' ? $name : "$sigil$name";
+	
+#	if ({qw/$ SCALAR @ ARRAY % HASH & CODE/}->{$sigil} ne ref($sym)) {
+#		warn $sym;
+#		warn $sigilname;
+#		_croak("Reference type %s does not match sigil %s", ref($sym), $sigil);
+#	}
+		
+	return ($$name = $sym)              if ref($name) eq q(SCALAR);
+	return ($into->{$sigilname} = $sym) if ref($into) eq q(HASH);
+	
+	no strict qw(refs);
+	our %TRACKED;
+	
+	if (ref($sym) eq 'CODE' and exists &{"$into\::$name"} and \&{"$into\::$name"} != $sym)
+	{
+		my ($level) = grep defined, $value_hash->{-replace}, $globals->{replace}, q(0);
+		my $action = {
+			carp     => \&_carp,
+			0        => \&_carp,
+			''       => \&_carp,
+			warn     => \&_carp,
+			nonfatal => \&_carp,
+			croak    => \&_croak,
+			fatal    => \&_croak,
+			die      => \&_croak,
+		}->{$level} || sub {};
+		
+		# Don't complain about double-installing the same sub. This isn't ideal
+		# because the same named sub might be generated in two different ways.
+		$action = sub {} if $TRACKED{$class}{$into}{$sigilname};
+		
+		$action->(
+			$action == \&_croak
+				? "Refusing to overwrite existing sub '%s::%s' with sub '%s' exported by %s"
+				: "Overwriting existing sub '%s::%s' with sub '%s' exported by %s",
+			$into,
+			$name,
+			$_[0],
+			$class,
+		);
+	}
+	
+	$TRACKED{$class}{$into}{$sigilname} = $sym;
+	
+	no warnings qw(prototype);
+	$installer
+		? $installer->($globals, [$sigilname, $sym])
+		: (*{"$into\::$name"} = $sym);
+}
+
+sub _exporter_uninstall_sub
+{
+	our %TRACKED;
+	my $class = shift;
+	my ($name, $value, $globals, $sym) = @_;
+	my $into = $globals->{into};
+	ref $into and return;
+	
+	no strict qw(refs);
+
+	my $sigil = "&";
+	if ($name =~ /\A([&\$\%\@\*])(.+)\z/) {
+		$sigil = $1;
+		$name  = $2;
+		if ($sigil eq '*') {
+			_croak("Cannot export symbols with a * sigil");
+		}
+	}
+	my $sigilname = $sigil eq '&' ? $name : "$sigil$name";
+	
+	if ($sigil ne '&') {
+		_croak("Unimporting non-code symbols not supported yet");
+	}
+
+	# Cowardly refuse to uninstall a sub that differs from the one
+	# we installed!
+	my $our_coderef = $TRACKED{$class}{$into}{$name};
+	my $cur_coderef = exists(&{"$into\::$name"}) ? \&{"$into\::$name"} : -1;
+	return unless $our_coderef == $cur_coderef;
+	
+	my $stash     = \%{"$into\::"};
+	my $old       = delete $stash->{$name};
+	my $full_name = join('::', $into, $name);
+	foreach my $type (qw(SCALAR HASH ARRAY IO)) # everything but the CODE
+	{
+		next unless defined(*{$old}{$type});
+		*$full_name = *{$old}{$type};
+	}
+	
+	delete $TRACKED{$class}{$into}{$name};
+}
+
+sub mkopt
+{
+	my $in = shift or return [];
+	my @out;
+	
+	$in = [map(($_ => ref($in->{$_}) ? $in->{$_} : ()), sort keys %$in)]
+		if ref($in) eq q(HASH);
+	
+	for (my $i = 0; $i < @$in; $i++)
+	{
+		my $k = $in->[$i];
+		my $v;
+		
+		($i == $#$in)         ? ($v = undef) :
+		!defined($in->[$i+1]) ? (++$i, ($v = undef)) :
+		!ref($in->[$i+1])     ? ($v = undef) :
+		($v = $in->[++$i]);
+		
+		push @out, [ $k => $v ];
+	}
+	
+	\@out;
+}
+
+sub mkopt_hash
+{
+	my $in  = shift or return;
+	my %out = map +($_->[0] => $_->[1]), @{ mkopt($in) };
+	\%out;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=for stopwords frobnicate greps regexps
+
+=head1 NAME
+
+Exporter::Tiny - an exporter with the features of Sub::Exporter but only core dependencies
+
+=head1 SYNOPSIS
+
+   package MyUtils;
+   use base "Exporter::Tiny";
+   our @EXPORT = qw(frobnicate);
+   sub frobnicate { ... }
+   1;
+
+   package MyScript;
+   use MyUtils "frobnicate" => { -as => "frob" };
+   print frob(42);
+   exit;
+
+=head1 DESCRIPTION
+
+Exporter::Tiny supports many of Sub::Exporter's external-facing features
+including renaming imported functions with the C<< -as >>, C<< -prefix >> and
+C<< -suffix >> options; explicit destinations with the C<< into >> option;
+and alternative installers with the C<< installer >> option. But it's written
+in only about 40% as many lines of code and with zero non-core dependencies.
+
+Its internal-facing interface is closer to Exporter.pm, with configuration
+done through the C<< @EXPORT >>, C<< @EXPORT_OK >> and C<< %EXPORT_TAGS >>
+package variables.
+
+If you are trying to B<write> a module that inherits from Exporter::Tiny,
+then look at:
+
+=over
+
+=item *
+
+L<Exporter::Tiny::Manual::QuickStart>
+
+=item *
+
+L<Exporter::Tiny::Manual::Exporting>
+
+=back
+
+If you are trying to B<use> a module that inherits from Exporter::Tiny,
+then look at:
+
+=over
+
+=item *
+
+L<Exporter::Tiny::Manual::Importing>
+
+=back
+
+=head1 BUGS
+
+Please report any bugs to
+L<https://github.com/tobyink/p5-exporter-tiny/issues>.
+
+=head1 SUPPORT
+
+B<< IRC: >> support is available through in the I<< #moops >> channel
+on L<irc.perl.org|http://www.irc.perl.org/channels.html>.
+
+=head1 SEE ALSO
+
+Simplified interface to this module: L<Exporter::Shiny>.
+
+Other interesting exporters: L<Sub::Exporter>, L<Exporter>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2013-2014, 2017, 2022 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/Exporter/Tiny/Manual/Etc.pod b/src/modules/Exporter/Tiny/Manual/Etc.pod
new file mode 100644
index 0000000..6da7615
--- /dev/null
+++ b/src/modules/Exporter/Tiny/Manual/Etc.pod
@@ -0,0 +1,134 @@
+=pod
+
+=encoding utf-8
+
+=for stopwords frobnicate greps regexps
+
+=head1 NAME
+
+Exporter::Tiny::Manual::Etc - odds and ends
+
+=head1 DESCRIPTION
+
+=head2 Utility Functions
+
+Exporter::Tiny is itself an exporter!
+
+These functions are really for internal use, but can be exported if you
+need them:
+
+=over
+
+=item C<< mkopt(\@array) >>
+
+Similar to C<mkopt> from L<Data::OptList>. It doesn't support all the
+fancy options that Data::OptList does (C<moniker>, C<require_unique>,
+C<must_be> and C<name_test>) but runs about 50% faster.
+
+=item C<< mkopt_hash(\@array) >>
+
+Similar to C<mkopt_hash> from L<Data::OptList>. See also C<mkopt>.
+
+=back
+
+=head2 History
+
+L<Type::Library> had a bunch of custom exporting code which poked coderefs
+into its caller's stash. It needed this to be something more powerful than
+most exporters so that it could switch between exporting Moose, Mouse and
+Moo-compatible objects on request. L<Sub::Exporter> would have been capable,
+but had too many dependencies for the Type::Tiny project.
+
+Meanwhile L<Type::Utils>, L<Types::TypeTiny> and L<Test::TypeTiny> each
+used the venerable L<Exporter.pm|Exporter>. However, this meant they were
+unable to use the features like L<Sub::Exporter>-style function renaming
+which I'd built into Type::Library:
+
+   ## import "Str" but rename it to "String".
+   use Types::Standard "Str" => { -as => "String" };
+
+And so I decided to factor out code that could be shared by all Type-Tiny's
+exporters into a single place: Exporter::TypeTiny.
+
+As of version 0.026, Exporter::TypeTiny was also made available as
+L<Exporter::Tiny>, distributed independently on CPAN. CHOCOLATEBOY had
+convinced me that it was mature enough to live a life of its own.
+
+As of version 0.030, Type-Tiny depends on Exporter::Tiny and
+Exporter::TypeTiny is being phased out.
+
+=head2 Obligatory Exporter Comparison
+
+Exporting is unlikely to be your application's performance bottleneck, but
+nonetheless here are some comparisons.
+
+B<< Comparative sizes according to L<Devel::SizeMe>: >>
+
+   Exporter                     217.1Kb
+   Sub::Exporter::Progressive   263.2Kb
+   Exporter::Tiny               267.7Kb
+   Exporter + Exporter::Heavy   281.5Kb
+   Exporter::Renaming           406.2Kb
+   Sub::Exporter                701.0Kb
+
+B<< Performance exporting a single sub: >>
+
+              Rate     SubExp    ExpTiny SubExpProg      ExpPM
+SubExp      2489/s         --       -56%       -85%       -88%
+ExpTiny     5635/s       126%         --       -67%       -72%
+SubExpProg 16905/s       579%       200%         --       -16%
+ExpPM      20097/s       707%       257%        19%         --
+
+(Exporter::Renaming globally changes the behaviour of Exporter.pm, so could
+not be included in the same benchmarks.)
+
+B<< (Non-Core) Dependencies: >>
+
+   Exporter                    -1
+   Exporter::Renaming           0
+   Exporter::Tiny               0
+   Sub::Exporter::Progressive   0
+   Sub::Exporter                3
+
+B<< Features: >>
+
+                                      ExpPM   ExpTiny SubExp  SubExpProg
+ Can export code symbols............. Yes     Yes     Yes     Yes      
+ Can export non-code symbols......... Yes     Yes                      
+ Groups/tags......................... Yes     Yes     Yes     Yes      
+ Export by regexp.................... Yes     Yes                      
+ Bang prefix......................... Yes     Yes                      
+ Allows renaming of subs.............         Yes     Yes     Maybe    
+ Install code into scalar refs.......         Yes     Yes     Maybe    
+ Can be passed an "into" parameter...         Yes     Yes     Maybe    
+ Can be passed an "installer" sub....         Yes     Yes     Maybe    
+ Config avoids package variables.....                 Yes              
+ Supports generators.................         Yes     Yes              
+ Sane API for generators.............         Yes     Yes              
+ Unimport............................         Yes                      
+
+(Certain Sub::Exporter::Progressive features are only available if
+Sub::Exporter is installed.)
+
+=head1 SEE ALSO
+
+L<Exporter::Shiny>,
+L<Exporter::Tiny>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2013-2014, 2017 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/Exporter/Tiny/Manual/Exporting.pod b/src/modules/Exporter/Tiny/Manual/Exporting.pod
new file mode 100644
index 0000000..d0db61a
--- /dev/null
+++ b/src/modules/Exporter/Tiny/Manual/Exporting.pod
@@ -0,0 +1,266 @@
+=pod
+
+=encoding utf-8
+
+=for stopwords frobnicate greps regexps
+
+=head1 NAME
+
+Exporter::Tiny::Manual::Exporting - creating an exporter using Exporter::Tiny
+
+=head1 SYNOPSIS
+
+B<< Read L<Exporter::Tiny::Manual::QuickStart> first! >>
+
+=head1 DESCRIPTION
+
+Simple configuration works the same as L<Exporter>; inherit from
+L<Exporter::Tiny>, and use the C<< @EXPORT >>, C<< @EXPORT_OK >>,
+and C<< %EXPORT_TAGS >> package variables to list subs to export.
+
+Unlike Exporter, Exporter::Tiny performs most of its internal duties
+(including resolution of tag names to sub names, resolution of sub
+names to coderefs, and installation of coderefs into the target
+package) as B<method calls>, which means that your module (which is a
+subclass of Exporter::Tiny) can override them to provide interesting
+behaviour.
+
+=head2 Advanced Tag Stuff
+
+You can define tags using other tags:
+
+   use Exporter::Shiny qw(
+      black white red green blue cyan magenta yellow
+   );
+   
+   our %EXPORT_TAGS = (
+      rgb        => [qw( red green blue )],
+      cym        => [qw( cyan magenta yellow )],
+      cymk       => [qw( black :cym )],
+      monochrome => [qw( black white )],
+      all        => [qw( :rgb :cymk :monochrome )],
+   );
+
+B<< CAVEAT: >> If you create a cycle in the tags, this could put
+Exporter::Tiny into an infinite loop expanding the tags. Don't do that.
+
+=head2 More on Generators
+
+Exporter::Tiny has always allowed exported subs to be generated (like
+L<Sub::Exporter>), but until version 0.025 did not have an especially nice
+API for it.
+
+Now, it's easy. If you want to generate a sub C<foo> to export, list it in
+C<< @EXPORT >> or C<< @EXPORT_OK >> as usual, and then simply give your
+exporter module a class method called C<< _generate_foo >>.
+
+   push @EXPORT_OK, 'foo';
+   
+   sub _generate_foo {
+      my $class = shift;
+      my ($name, $args, $globals) = @_;
+      
+      return sub {
+         ...;
+      }
+   }
+
+We showed how to do that in L<Exporter::Tiny::Manual::QuickStart>, but
+one thing we didn't show was that C<< $globals >> gets passed in there.
+This is the global options hash, as described in
+L<Exporter::Tiny::Manual::Importing>. It can often be useful. In
+particular it will tell you what package the generated sub is destined
+to be installed into.
+
+To generate non-code symbols, name your generators like this:
+
+  sub _generateScalar_Foo { ... }  # generate a symbol $Foo
+  sub _generateArray_Bar  { ... }  # generate a symbol @Bar
+  sub _generateHash_Baz   { ... }  # generate a symbol %Baz
+
+You can also generate tags:
+
+   my %constants;
+   BEGIN {
+      %constants = (FOO => 1, BAR => 2);
+   }
+   use constant \%constants;
+   
+   $EXPORT_TAGS{constants} = sub {
+      my $class = shift;
+      my ($name, $args, $globals) = @_;
+      
+      return keys(%constants);
+   };
+
+=head2 Hooks
+
+Sometimes as well as exporting stuff, you want to do some setup or
+something.
+
+You can define a couple of class methods in your package, and they'll
+get called at the appropriate time:
+
+   package MyUtils;
+   
+   ...;
+   
+   sub _exporter_validate_opts {
+      my $class = shift;
+      my ($globals) = @_;
+      
+      ...;   # do stuff here
+      
+      $class->SUPER::_exporter_validate_opts(@_);
+   }
+   
+   sub _exporter_validate_unimport_opts {
+      my $class = shift;
+      my ($globals) = @_;
+      
+      ...;   # do stuff here
+      
+      $class->SUPER::_exporter_validate_unimport_opts(@_);
+   }
+
+The C<< $globals >> variable is that famous global options hash. In
+particular, C<< $globals->{into} >> is useful because it tells you what
+package has imported you.
+
+As you might have guessed, these methods were originally intended to
+validate the global options hash, but can be used to perform any
+general duties before the real exporting work is done.
+
+=head2 Overriding Internals
+
+An important difference between L<Exporter> and Exporter::Tiny is that
+the latter calls all its internal functions as I<< class methods >>. This
+means that your subclass can I<< override them >> to alter their behaviour.
+
+The following methods are available to be overridden. Despite being named
+with a leading underscore, they are considered public methods. (The underscore
+is there to avoid accidentally colliding with any of your own function names.)
+
+=over
+
+=item C<< _exporter_validate_opts($globals) >>
+
+Documented above.
+
+=item C<< _exporter_validate_unimport_opts($globals) >>
+
+Documented above.
+
+=item C<< _exporter_merge_opts($tag_opts, $globals, @exports) >>
+
+Called to merge options which have been provided for a tag into the
+options provided for the exports that the tag expanded to.
+
+=item C<< _exporter_expand_tag($name, $args, $globals) >>
+
+This method is called to expand an import tag (e.g. C<< ":constants" >>).
+It is passed the tag name (minus the leading ":"), an optional hashref
+of options (like C<< { -prefix => "foo_" } >>), and the global options
+hashref.
+
+It is expected to return a list of ($name, $args) arrayref pairs. These
+names can be sub names to export, or further tag names (which must have
+their ":"). If returning tag names, be careful to avoid creating a tag
+expansion loop!
+
+The default implementation uses C<< %EXPORT_TAGS >> to expand tags, and
+provides fallbacks for the C<< :default >> and C<< :all >> tags.
+
+=item C<< _exporter_expand_regexp($regexp, $args, $globals) >>
+
+Like C<_exporter_expand_regexp>, but given a regexp-like string instead
+of a tag name.
+
+The default implementation greps through C<< @EXPORT_OK >> for imports,
+and the list of already-imported functions for exports.
+
+=item C<< _exporter_expand_sub($name, $args, $globals) >>
+
+This method is called to translate a sub name to a hash of name => coderef
+pairs for exporting to the caller. In general, this would just be a hash with
+one key and one value, but, for example, L<Type::Library> overrides this
+method so that C<< "+Foo" >> gets expanded to:
+
+   (
+      Foo         => sub { $type },
+      is_Foo      => sub { $type->check(@_) },
+      to_Foo      => sub { $type->assert_coerce(@_) },
+      assert_Foo  => sub { $type->assert_return(@_) },
+   )
+
+The default implementation checks that the name is allowed to be exported
+(using the C<_exporter_permitted_regexp> method), gets the coderef using
+the generator if there is one (or by calling C<< can >> on your exporter
+otherwise) and calls C<_exporter_fail> if it's unable to generate or
+retrieve a coderef.
+
+Despite the name, is also called for non-code symbols.
+
+=item C<< _exporter_permitted_regexp($globals) >>
+
+This method is called to retrieve a regexp for validating the names of
+exportable subs. If a sub doesn't match the regexp, then the default
+implementation of C<_exporter_expand_sub> will refuse to export it. (Of
+course, you may override the default C<_exporter_expand_sub>.)
+
+The default implementation of this method assembles the regexp from
+C<< @EXPORT >> and C<< @EXPORT_OK >>.
+
+=item C<< _exporter_fail($name, $args, $globals) >>
+
+Called by C<_exporter_expand_sub> if it can't find a coderef to export.
+
+The default implementation just throws an exception. But you could emit
+a warning instead, or just ignore the failed export.
+
+If you don't throw an exception then you should be aware that this
+method is called in list context, and any list it returns will be treated
+as an C<_exporter_expand_sub>-style hash of names and coderefs for
+export.
+
+=item C<< _exporter_install_sub($name, $args, $globals, $coderef) >>
+
+This method actually installs the exported sub into its new destination.
+Its return value is ignored.
+
+The default implementation handles sub renaming (i.e. the C<< -as >>,
+C<< -prefix >> and C<< -suffix >> functions. This method does a lot of
+stuff; if you need to override it, it's probably a good idea to just
+pre-process the arguments and then call the super method rather than
+trying to handle all of it yourself.
+
+Despite the name, is also called for non-code symbols.
+
+=item C<< _exporter_uninstall_sub($name, $args, $globals) >>
+
+The opposite of C<_exporter_install_sub>.
+
+=back
+
+=head1 SEE ALSO
+
+L<Exporter::Shiny>,
+L<Exporter::Tiny>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2013-2014, 2017 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/Exporter/Tiny/Manual/Importing.pod b/src/modules/Exporter/Tiny/Manual/Importing.pod
new file mode 100644
index 0000000..336f25c
--- /dev/null
+++ b/src/modules/Exporter/Tiny/Manual/Importing.pod
@@ -0,0 +1,269 @@
+=pod
+
+=encoding utf-8
+
+=for stopwords frobnicate greps regexps
+
+=head1 NAME
+
+Exporter::Tiny::Manual::Importing - importing from Exporter::Tiny-based modules
+
+=head1 DESCRIPTION
+
+For the purposes of this discussion we'll assume we have a module called
+C<< MyUtils >> which exports functions called C<frobnicate>, C<red>,
+C<blue>, and C<green>. It has a tag set up called C<:colours> which
+corresponds to C<red>, C<blue>, and C<green>.
+
+Many of these tricks may seem familiar from L<Sub::Exporter>. That is
+intentional. Exporter::Tiny doesn't attempt to provide every feature of
+Sub::Exporter, but where it does it usually uses a fairly similar API.
+
+=head2 Basic importing
+
+It's easy to import a single function from a module:
+
+   use MyUtils "frobnicate";
+
+Or a list of functions:
+
+   use MyUtils "red", "green";
+
+Perl's C<< qw() >> shorthand for a list of words is pretty useful:
+
+   use MyUtils qw( red green );
+
+If the module defines tags, you can import them like this:
+
+   use MyUtils qw( :colours );
+
+Or with a hyphen instead of a colon:
+
+   use MyUtils qw( -colours );
+
+Hyphens are good because Perl will autoquote a bareword that follows
+them:
+
+   use MyUtils -colours;
+
+And it's possible to mix function names and tags in the same list:
+
+   use MyUtils qw( frobnicate :colours );
+
+=head2 Renaming imported functions
+
+It's possible to rename a function you're importing:
+
+   use MyUtils "frobnicate" => { -as => "frob" };
+
+Or you can apply a prefix and/or suffix. The following imports the
+function and calls it C<my_frobinate_thing>.
+
+   use MyUtils "frobnicate" => { -prefix => "my_", -suffix => "_thing" };
+
+You can apply a prefix/suffix to B<all> functions you import by
+placing the hashref B<first> in the import list. (This first hashref
+is referred to as the global options hash, and can do some special
+things.)
+
+   use MyUtils { prefix => "my_" }, "frobnicate";
+
+Did you notice that we used C<< -prefix >> and C<< -suffix >> in the
+normal options hash, but C<< prefix >> and C<< suffix >> (no hyphen)
+in the global options hash? That's a common pattern with this module.
+
+You can import the same function multiple times with different names:
+
+   use MyUtils
+      "frobnicate" => { -as => "frob" },
+      "frobnicate" => { -as => "frbnct" };
+
+Tags can take the C<< -prefix >> and C<< -suffix >> options too. The 
+following imports C<colour_red>, C<colour_green>, and C<colour_blue>:
+
+   use MyUtils -colours => { -prefix => "colour_" };
+
+You can also set C<< -as >> to be a coderef to generate a function
+name. This imports functions called C<RED>, C<GREEN>, and C<BLUE>:
+
+   use MyUtils -colours => { -as => sub { uc($_[0]) } };
+
+Note that it doesn't make sense to use C<< -as >> with a tag unless
+you're doing this coderef thing. Coderef C<< as >> also works in the
+global options hash.
+
+=head2 DO NOT WANT!
+
+Sometimes you want to supply a list of functions you B<< don't >> want
+to import. To do that, prefix the function with a bang. This imports
+everything except "frobnicate":
+
+   use MyUtils qw( -all !frobnicate );
+
+You can add the bang prefix to tags too. This will import everything
+except the colours.
+
+   use MyUtils qw( -all !:colours );
+
+Negated imports always "win", so the following will not import
+"frobnicate", no matter how many times you repeat it...
+
+   use MyUtils qw( !frobnicate frobnicate frobnicate frobnicate );
+
+=head2 Importing by regexp
+
+Here's how you could import all functions beginning with an "f":
+
+   use MyUtils qw( /^F/i );
+
+Or import everything except functions beginning with a "z":
+
+   use MyUtils qw( -all !/^Z/i );
+
+Note that regexps are always supplied as I<strings> starting with
+C<< "/" >>, and not as quoted regexp references (C<< qr/.../ >>).
+
+=head2 Import functions into another package
+
+Occasionally you need to import functions not into your own package,
+but into a different package. You can do that like this:
+
+   use MyUtils { into => "OtherPkg" }, "frobnicate";
+   
+   OtherPkg::frobincate(...);
+
+However, L<Import::Into> will probably provide you with a better
+approach which doesn't just work with Exporter::Tiny, but B<all>
+exporters.
+
+=head2 Lexical subs on Perl 5.37.2 and above
+
+Often you want to make use of an exported function, but don't want
+it to "pollute" your namespace.
+
+On newer versions of Perl, Exporter::Tiny can use C<export_lexically>
+from L<builtin> to give you lexical versions of exports.
+
+   {
+      use MyUtils -lexical, "frobnicate";
+      
+      frobnicate(...);  # ok
+   }
+   
+   frobnicate(...);  # not ok
+
+This functionality should be considered B<EXPERIMENTAL> until
+C<export_lexically> is included in a stable release of Perl.
+
+=head2 Lexical subs on Perl older than 5.37.2
+
+There is this L<Sub::Exporter::Lexical> thing that was designed as a
+plugin for L<Sub::Exporter>, but Exporter::Tiny's API is close enough
+that it will work. Do you remember that global options hash? Just
+use that to tell Exporter::Tiny to use an alternative sub installer.
+
+   {
+      use Sub::Exporter::Lexical lexical_installer => { -as => "lex" };
+      use MyUtils { installer => lex }, "frobnicate";
+      
+      frobnicate(...);  # ok
+   }
+   
+   frobnicate(...);  # not ok
+
+Another way to do lexical functions is to import a function into a
+scalar variable:
+
+   my $func;
+   use MyUtils "frobnicate" => { -as => \$func };
+   
+   $func->(...);
+
+You can even provide a hashref to put all imported functions into as
+part of that global options hash I mentioned earlier.
+
+   my %funcs;
+   use MyUtils { into => \%funcs }, "frobnicate";
+   
+   $funcs{frobnicate}->(...);
+
+=head2 Unimporting
+
+You can unimport the functions that MyUtils added to your namespace:
+
+   no MyUtils;
+
+Or just specific ones:
+
+   no MyUtils qw(frobnicate);
+
+If you renamed a function when you imported it, you should unimport by
+the new name:
+
+   use MyUtils frobnicate => { -as => "frob" };
+   ...;
+   no MyUtils "frob";
+
+Unimporting using tags and regexps should mostly do what you want.
+
+=head1 DIAGNOSTICS
+
+=over
+
+=item B<< Overwriting existing sub '%s::%s' with sub '%s' exported by %s >>
+
+A warning issued if Exporter::Tiny is asked to export a symbol which
+will result in an existing sub being overwritten. This warning can be
+suppressed using either of the following:
+
+   use MyUtils { replace => 1 }, "frobnicate";
+   use MyUtils "frobnicate" => { -replace => 1 };
+
+Or can be upgraded to a fatal error:
+
+   use MyUtils { replace => "die" }, "frobnicate";
+   use MyUtils "frobnicate" => { -replace => "die" };
+
+=item B<< Refusing to overwrite existing sub '%s::%s' with sub '%s' exported by %s >>
+
+The fatal version of the above warning.
+
+=item B<< Could not find sub '%s' exported by %s >>
+
+You requested to import a sub which the package does not provide.
+
+=item B<< Cannot provide an -as option for tags >>
+
+Because a tag may provide more than one function, it does not make sense
+to request a single name for it. Instead use C<< -prefix >> or C<< -suffix >>.
+
+=item B<< Passing options to unimport '%s' makes no sense >>
+
+When you import a sub, it occasionally makes sense to pass some options
+for it. However, when unimporting, options do nothing, so this warning
+is issued.
+
+=back
+
+=head1 SEE ALSO
+
+L<Exporter::Shiny>,
+L<Exporter::Tiny>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2013-2014, 2017 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/Exporter/Tiny/Manual/QuickStart.pod b/src/modules/Exporter/Tiny/Manual/QuickStart.pod
new file mode 100644
index 0000000..13335be
--- /dev/null
+++ b/src/modules/Exporter/Tiny/Manual/QuickStart.pod
@@ -0,0 +1,195 @@
+=pod
+
+=encoding utf-8
+
+=for stopwords frobnicate greps regexps
+
+=head1 NAME
+
+Exporter::Tiny::Manual::QuickStart - the quickest way to get up and running with Exporter::Tiny
+
+=head1 SYNOPSIS
+
+   package MyUtils;
+   
+   use Exporter::Shiny qw( frobnicate );
+   
+   sub frobnicate {
+      ...;   # your code here
+   }
+   
+   1;
+
+Now people can use your module like this:
+
+   use MyUtils "frobnicate";
+   
+   frobnicate(42);
+
+Or like this:
+
+   use MyUtils "frobnicate" => { -as => "frob" };
+   
+   frob(42);
+
+=head1 DESCRIPTION
+
+See the synopsis. Yes, it's that simple.
+
+=head2 Next steps
+
+=head3 Default exports
+
+Note that the module in the synopsis doesn't export anything by default.
+If people load C<MyUtils> like this:
+
+   use MyUtils;
+
+Then they haven't imported any functions. You can specify a default set
+of functions to be exported like this:
+
+   package MyUtils;
+   
+   use Exporter::Shiny qw( frobnicate );
+   
+   our @EXPORT = qw( frobnicate );
+   
+   sub frobnicate { ... }
+   
+   1;
+
+Or, if you want to be a superstar rock god:
+
+   package MyUtils;
+   
+   use Exporter::Shiny our @EXPORT = qw( frobnicate );
+   
+   sub frobnicate { ... }
+   
+   1;
+
+=head3 Tags
+
+You can provide tags for people to use:
+
+   package MyUtils;
+   
+   use Exporter::Shiny qw( frobnicate red green blue );
+   
+   our %EXPORT_TAGS = (
+      utils   => [qw/ frobnicate /],
+      colours => [qw/ red green blue /],
+   );
+   
+   sub frobnicate { ... }
+   sub red        { ... }
+   sub green      { ... }
+   sub blue       { ... }
+   
+   1;
+
+And people can now import your functions like this:
+
+   use MyUtils ":colours";
+
+Or this:
+
+   use MyUtils "-colours";
+
+Or take advantage of the fact that Perl magically quotes barewords
+preceded by a hyphen:
+
+   use MyUtils -colours;
+
+Two tags are automatically defined for you: C<< -default >> (which is
+just the same as C<< @EXPORT >>) and C<< -all >> (which is the union of
+C<< @EXPORT >> and C<< @EXPORT_OK >>). If you don't like them, then you
+can override them:
+
+   our %EXPORT_TAGS = (
+      default => \@some_other_stuff,
+      all     => \@more_stuff,
+   );
+
+=head3 Generators
+
+Exporting normally just works by copying a sub from your package into
+your caller's package. But sometimes it's useful instead to generate
+a I<custom> sub to insert into your caller's package. This is pretty
+easy to do.
+
+   package MyUtils;
+   
+   use Exporter::Shiny qw( frobnicate );
+   
+   sub _generate_frobnicate {
+      my ( $me, $name, $args, $globals ) = @_;
+      my $caller = $globals->{into};
+      
+      return sub {
+          ...;  # your code here
+      };
+   }
+   
+   1;
+
+The parameter C<< $me >> here is a string containing the package name
+which is being imported from; C<< $caller >> is the destination package;
+C<< $name >> is the name of the sub (in this case "frobnicate"); and
+C<< $args >> is a custom argument for this function. (By convention,
+C<< $args >> is normally a hashref.)
+
+   # The hashref { foo => 42 } is $args above.
+   #
+   use MyUtils "frobnicate" => { foo => 42 };
+
+=head2 Avoiding Exporter::Shiny
+
+Exporter::Shiny is a tiny shim around Exporter::Tiny. It should mostly
+do what you want, but you may sometimes prefer to use Exporter::Tiny
+directly.
+
+The example in the synopsis could have been written as:
+
+   package MyUtils;
+   
+   use parent "Exporter::Tiny";
+   our @EXPORT_OK = qw( frobnicate );
+   
+   sub frobnicate {
+      ...;   # your code here
+   }
+   
+   1;
+
+What Exporter::Shiny does is mostly just to set C<< @EXPORT_OK >> for
+you and set up inheritance from the base class (Exporter::Tiny).
+
+Exporter::Shiny also sets C<< $INC{'MyUtils.pm'} >> for you, which in
+usually makes little difference, but is useful in some edge cases.
+
+=head1 SEE ALSO
+
+L<Exporter::Shiny>,
+L<Exporter::Tiny>.
+
+For more advanced information, see
+L<Exporter::Tiny::Manual::Exporting>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENCE
+
+This software is copyright (c) 2013-2014, 2017 by Toby Inkster.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=head1 DISCLAIMER OF WARRANTIES
+
+THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
diff --git a/src/modules/List/MoreUtils.pm b/src/modules/List/MoreUtils.pm
new file mode 100644
index 0000000..05152fd
--- /dev/null
+++ b/src/modules/List/MoreUtils.pm
@@ -0,0 +1,1286 @@
+package List::MoreUtils;
+
+use 5.008_001;
+use strict;
+use warnings;
+
+my $have_xs;
+our $VERSION = '0.430';
+
+BEGIN
+{
+    unless (defined($have_xs))
+    {
+        ## no critic (ErrorHandling::RequireCheckingReturnValueOfEval)
+        eval { require List::MoreUtils::XS; } unless $ENV{LIST_MOREUTILS_PP};
+        ## no critic (ErrorHandling::RequireCarping)
+        die $@ if $@ && defined $ENV{LIST_MOREUTILS_PP} && $ENV{LIST_MOREUTILS_PP} == 0;
+        $have_xs = 0 + defined($INC{'List/MoreUtils/XS.pm'});
+    }
+
+    use List::MoreUtils::PP qw();
+}
+
+use Exporter::Tiny qw();
+
+my @junctions = qw(any all none notall);
+my @v0_22     = qw(
+  true false
+  firstidx lastidx
+  insert_after insert_after_string
+  apply indexes
+  after after_incl before before_incl
+  firstval lastval
+  each_array each_arrayref
+  pairwise natatime
+  mesh uniq
+  minmax part
+  _XScompiled
+);
+my @v0_24  = qw(bsearch);
+my @v0_33  = qw(sort_by nsort_by);
+my @v0_400 = qw(one any_u all_u none_u notall_u one_u
+  firstres onlyidx onlyval onlyres lastres
+  singleton bsearchidx
+);
+my @v0_420 = qw(arrayify duplicates minmaxstr samples zip6 reduce_0 reduce_1 reduce_u
+  listcmp frequency occurrences mode
+  binsert bremove equal_range lower_bound upper_bound qsort
+  slide slideatatime);
+
+my @all_functions = (@junctions, @v0_22, @v0_24, @v0_33, @v0_400, @v0_420);
+
+## no critic (TestingAndDebugging::ProhibitNoStrict)
+no strict "refs";
+if ($have_xs)
+{
+    my $x;
+    for (@all_functions)
+    {
+        List::MoreUtils->can($_) or *$_ = $x if ($x = List::MoreUtils::XS->can($_));
+    }
+}
+List::MoreUtils->can($_) or *$_ = List::MoreUtils::PP->can($_) for (@all_functions);
+use strict;
+## use critic (TestingAndDebugging::ProhibitNoStrict)
+use parent qw(Exporter::Tiny);
+
+my %alias_list = (
+    v0_22 => {
+        first_index => "firstidx",
+        last_index  => "lastidx",
+        first_value => "firstval",
+        last_value  => "lastval",
+        zip         => "mesh",
+    },
+    v0_33 => {
+        distinct => "uniq",
+    },
+    v0_400 => {
+        first_result  => "firstres",
+        only_index    => "onlyidx",
+        only_value    => "onlyval",
+        only_result   => "onlyres",
+        last_result   => "lastres",
+        bsearch_index => "bsearchidx",
+    },
+    v0_420 => {
+        bsearch_insert => "binsert",
+        bsearch_remove => "bremove",
+        zip_unflatten  => "zip6",
+    },
+);
+
+our @EXPORT_OK   = (@all_functions, map { keys %$_ } values %alias_list);
+our %EXPORT_TAGS = (
+    all         => \@EXPORT_OK,
+    'like_0.22' => [
+        any_u    => {-as => 'any'},
+        all_u    => {-as => 'all'},
+        none_u   => {-as => 'none'},
+        notall_u => {-as => 'notall'},
+        @v0_22,
+        keys %{$alias_list{v0_22}},
+    ],
+    'like_0.24' => [
+        any_u    => {-as => 'any'},
+        all_u    => {-as => 'all'},
+        notall_u => {-as => 'notall'},
+        'none',
+        @v0_22,
+        @v0_24,
+        keys %{$alias_list{v0_22}},
+    ],
+    'like_0.33' => [
+        @junctions,
+        @v0_22,
+        # v0_24 functions were omitted
+        @v0_33,
+        keys %{$alias_list{v0_22}},
+        keys %{$alias_list{v0_33}},
+    ],
+);
+
+for my $set (values %alias_list)
+{
+    for my $alias (keys %$set)
+    {
+        ## no critic (TestingAndDebugging::ProhibitNoStrict)
+        no strict qw(refs);
+        *$alias = __PACKAGE__->can($set->{$alias});
+        ## use critic (TestingAndDebugging::ProhibitNoStrict)
+    }
+}
+use strict;
+
+=pod
+
+=head1 NAME
+
+List::MoreUtils - Provide the stuff missing in List::Util
+
+=head1 SYNOPSIS
+
+    # import specific functions
+
+    use List::MoreUtils qw(any uniq);
+
+    if ( any { /foo/ } uniq @has_duplicates ) {
+        # do stuff
+    }
+
+    # import everything
+
+    use List::MoreUtils ':all';
+
+    # import by API
+
+    # has "original" any/all/none/notall behavior
+    use List::MoreUtils ':like_0.22';
+    # 0.22 + bsearch
+    use List::MoreUtils ':like_0.24';
+    # has "simplified" any/all/none/notall behavior + (n)sort_by
+    use List::MoreUtils ':like_0.33';
+
+=head1 DESCRIPTION
+
+B<List::MoreUtils> provides some trivial but commonly needed functionality on
+lists which is not going to go into L<List::Util>.
+
+All of the below functions are implementable in only a couple of lines of Perl
+code. Using the functions from this module however should give slightly better
+performance as everything is implemented in C. The pure-Perl implementation of
+these functions only serves as a fallback in case the C portions of this module
+couldn't be compiled on this machine.
+
+=head1 EXPORTS
+
+=head2 Default behavior
+
+Nothing by default. To import all of this module's symbols use the C<:all> tag.
+Otherwise functions can be imported by name as usual:
+
+    use List::MoreUtils ':all';
+
+    use List::MoreUtils qw{ any firstidx };
+
+Because historical changes to the API might make upgrading List::MoreUtils
+difficult for some projects, the legacy API is available via special import
+tags.
+
+=head2 Like version 0.22 (last release with original API)
+
+This API was available from 2006 to 2009, returning undef for empty lists on
+C<all>/C<any>/C<none>/C<notall>:
+
+    use List::MoreUtils ':like_0.22';
+
+This import tag will import all functions available as of version 0.22.
+However, it will import C<any_u> as C<any>, C<all_u> as C<all>, C<none_u> as
+C<none>, and C<notall_u> as C<notall>.
+
+=head2 Like version 0.24 (first incompatible change)
+
+This API was available from 2010 to 2011.  It changed the return value of C<none>
+and added the C<bsearch> function.
+
+    use List::MoreUtils ':like_0.24';
+
+This import tag will import all functions available as of version 0.24.
+However it will import C<any_u> as C<any>, C<all_u> as C<all>, and
+C<notall_u> as C<notall>.  It will import C<none> as described in
+the documentation below (true for empty list).
+
+=head2 Like version 0.33 (second incompatible change)
+
+This API was available from 2011 to 2014. It is widely used in several CPAN
+modules and thus it's closest to the current API.  It changed the return values
+of C<any>, C<all>, and C<notall>.  It added the C<sort_by> and C<nsort_by> functions
+and the C<distinct> alias for C<uniq>.  It omitted C<bsearch>.
+
+    use List::MoreUtils ':like_0.33';
+
+This import tag will import all functions available as of version 0.33.  Note:
+it will not import C<bsearch> for consistency with the 0.33 API.
+
+=head1 FUNCTIONS
+
+=head2 Junctions
+
+=head3 I<Treatment of an empty list>
+
+There are two schools of thought for how to evaluate a junction on an
+empty list:
+
+=over
+
+=item *
+
+Reduction to an identity (boolean)
+
+=item *
+
+Result is undefined (three-valued)
+
+=back
+
+In the first case, the result of the junction applied to the empty list is
+determined by a mathematical reduction to an identity depending on whether
+the underlying comparison is "or" or "and".  Conceptually:
+
+                    "any are true"      "all are true"
+                    --------------      --------------
+    2 elements:     A || B || 0         A && B && 1
+    1 element:      A || 0              A && 1
+    0 elements:     0                   1
+
+In the second case, three-value logic is desired, in which a junction
+applied to an empty list returns C<undef> rather than true or false 
+
+Junctions with a C<_u> suffix implement three-valued logic.  Those
+without are boolean.
+
+=head3 all BLOCK LIST
+
+=head3 all_u BLOCK LIST
+
+Returns a true value if all items in LIST meet the criterion given through
+BLOCK. Sets C<$_> for each item in LIST in turn:
+
+  print "All values are non-negative"
+    if all { $_ >= 0 } ($x, $y, $z);
+
+For an empty LIST, C<all> returns true (i.e. no values failed the condition)
+and C<all_u> returns C<undef>.
+
+Thus, C<< all_u(@list) >> is equivalent to C<< @list ? all(@list) : undef >>.
+
+B<Note>: because Perl treats C<undef> as false, you must check the return value
+of C<all_u> with C<defined> or you will get the opposite result of what you
+expect.
+
+=head3 any BLOCK LIST
+
+=head3 any_u BLOCK LIST
+
+Returns a true value if any item in LIST meets the criterion given through
+BLOCK. Sets C<$_> for each item in LIST in turn:
+
+  print "At least one non-negative value"
+    if any { $_ >= 0 } ($x, $y, $z);
+
+For an empty LIST, C<any> returns false and C<any_u> returns C<undef>.
+
+Thus, C<< any_u(@list) >> is equivalent to C<< @list ? any(@list) : undef >>.
+
+=head3 none BLOCK LIST
+
+=head3 none_u BLOCK LIST
+
+Logically the negation of C<any>. Returns a true value if no item in LIST meets
+the criterion given through BLOCK. Sets C<$_> for each item in LIST in turn:
+
+  print "No non-negative values"
+    if none { $_ >= 0 } ($x, $y, $z);
+
+For an empty LIST, C<none> returns true (i.e. no values failed the condition)
+and C<none_u> returns C<undef>.
+
+Thus, C<< none_u(@list) >> is equivalent to C<< @list ? none(@list) : undef >>.
+
+B<Note>: because Perl treats C<undef> as false, you must check the return value
+of C<none_u> with C<defined> or you will get the opposite result of what you
+expect.
+
+=head3 notall BLOCK LIST
+
+=head3 notall_u BLOCK LIST
+
+Logically the negation of C<all>. Returns a true value if not all items in LIST
+meet the criterion given through BLOCK. Sets C<$_> for each item in LIST in
+turn:
+
+  print "Not all values are non-negative"
+    if notall { $_ >= 0 } ($x, $y, $z);
+
+For an empty LIST, C<notall> returns false and C<notall_u> returns C<undef>.
+
+Thus, C<< notall_u(@list) >> is equivalent to C<< @list ? notall(@list) : undef >>.
+
+=head3 one BLOCK LIST
+
+=head3 one_u BLOCK LIST
+
+Returns a true value if precisely one item in LIST meets the criterion
+given through BLOCK. Sets C<$_> for each item in LIST in turn:
+
+    print "Precisely one value defined"
+        if one { defined($_) } @list;
+
+Returns false otherwise.
+
+For an empty LIST, C<one> returns false and C<one_u> returns C<undef>.
+
+The expression C<one BLOCK LIST> is almost equivalent to
+C<1 == true BLOCK LIST>, except for short-cutting.
+Evaluation of BLOCK will immediately stop at the second true value.
+
+=head2 Transformation
+
+=head3 apply BLOCK LIST
+
+Applies BLOCK to each item in LIST and returns a list of the values after BLOCK
+has been applied. In scalar context, the last element is returned.  This
+function is similar to C<map> but will not modify the elements of the input
+list:
+
+  my @list = (1 .. 4);
+  my @mult = apply { $_ *= 2 } @list;
+  print "\@list = @list\n";
+  print "\@mult = @mult\n";
+  __END__
+  @list = 1 2 3 4
+  @mult = 2 4 6 8
+
+Think of it as syntactic sugar for
+
+  for (my @mult = @list) { $_ *= 2 }
+
+=head3 insert_after BLOCK VALUE LIST
+
+Inserts VALUE after the first item in LIST for which the criterion in BLOCK is
+true. Sets C<$_> for each item in LIST in turn.
+
+  my @list = qw/This is a list/;
+  insert_after { $_ eq "a" } "longer" => @list;
+  print "@list";
+  __END__
+  This is a longer list
+
+=head3 insert_after_string STRING VALUE LIST
+
+Inserts VALUE after the first item in LIST which is equal to STRING. 
+
+  my @list = qw/This is a list/;
+  insert_after_string "a", "longer" => @list;
+  print "@list";
+  __END__
+  This is a longer list
+
+=head3 pairwise BLOCK ARRAY1 ARRAY2
+
+Evaluates BLOCK for each pair of elements in ARRAY1 and ARRAY2 and returns a
+new list consisting of BLOCK's return values. The two elements are set to C<$a>
+and C<$b>.  Note that those two are aliases to the original value so changing
+them will modify the input arrays.
+
+  @a = (1 .. 5);
+  @b = (11 .. 15);
+  @x = pairwise { $a + $b } @a, @b;     # returns 12, 14, 16, 18, 20
+
+  # mesh with pairwise
+  @a = qw/a b c/;
+  @b = qw/1 2 3/;
+  @x = pairwise { ($a, $b) } @a, @b;    # returns a, 1, b, 2, c, 3
+
+=head3 mesh ARRAY1 ARRAY2 [ ARRAY3 ... ]
+
+=head3 zip ARRAY1 ARRAY2 [ ARRAY3 ... ]
+
+Returns a list consisting of the first elements of each array, then
+the second, then the third, etc, until all arrays are exhausted.
+
+Examples:
+
+  @x = qw/a b c d/;
+  @y = qw/1 2 3 4/;
+  @z = mesh @x, @y;         # returns a, 1, b, 2, c, 3, d, 4
+
+  @a = ('x');
+  @b = ('1', '2');
+  @c = qw/zip zap zot/;
+  @d = mesh @a, @b, @c;   # x, 1, zip, undef, 2, zap, undef, undef, zot
+
+C<zip> is an alias for C<mesh>.
+
+=head3 zip6
+
+=head3 zip_unflatten
+
+Returns a list of arrays consisting of the first elements of each array,
+then the second, then the third, etc, until all arrays are exhausted.
+
+  @x = qw/a b c d/;
+  @y = qw/1 2 3 4/;
+  @z = zip6 @x, @y;         # returns [a, 1], [b, 2], [c, 3], [d, 4]
+
+  @a = ('x');
+  @b = ('1', '2');
+  @c = qw/zip zap zot/;
+  @d = zip6 @a, @b, @c;     # [x, 1, zip], [undef, 2, zap], [undef, undef, zot]
+
+C<zip_unflatten> is an alias for C<zip6>.
+
+=head3 listcmp ARRAY0 ARRAY1 [ ARRAY2 ... ]
+
+Returns an associative list of elements and every I<id> of the list it
+was found in. Allows easy implementation of @a & @b, @a | @b, @a ^ @b and
+so on.
+Undefined entries in any given array are skipped.
+
+  my @a = qw(one two three four five six seven eight nine ten eleven twelve thirteen);
+  my @b = qw(two three five seven eleven thirteen seventeen);
+  my @c = qw(one one two three five eight thirteen twentyone);
+  my %cmp = listcmp @a, @b, @c; # returns (one => [0, 2], two => [0, 1, 2], three => [0, 1, 2], four => [0], ...)
+
+  my @seq = (1, 2, 3);
+  my @prim = (undef, 2, 3, 5);
+  my @fib = (1, 1, 2);
+  my %cmp = listcmp @seq, @prim, @fib;
+  # returns ( 1 => [0, 2], 2 => [0, 1, 2], 3 => [0, 1], 5 => [1] )
+
+=head3 arrayify LIST[,LIST[,LIST...]]
+
+Returns a list consisting of each element of given arrays. Recursive arrays
+are flattened, too.
+
+  @a = (1, [[2], 3], 4, [5], 6, [7], 8, 9);
+  @l = arrayify @a;         # returns 1, 2, 3, 4, 5, 6, 7, 8, 9
+
+=head3 uniq LIST
+
+=head3 distinct LIST
+
+Returns a new list by stripping duplicate values in LIST by comparing
+the values as hash keys, except that undef is considered separate from ''.
+The order of elements in the returned list is the same as in LIST. In
+scalar context, returns the number of unique elements in LIST.
+
+  my @x = uniq 1, 1, 2, 2, 3, 5, 3, 4; # returns 1 2 3 5 4
+  my $x = uniq 1, 1, 2, 2, 3, 5, 3, 4; # returns 5
+  # returns "Mike", "Michael", "Richard", "Rick"
+  my @n = distinct "Mike", "Michael", "Richard", "Rick", "Michael", "Rick"
+  # returns "A8", "", undef, "A5", "S1"
+  my @s = distinct "A8", "", undef, "A5", "S1", "A5", "A8"
+  # returns "Giulia", "Giulietta", undef, "", 156, "GTA", "GTV", 159, "Brera", "4C"
+  my @w = uniq "Giulia", "Giulietta", undef, "", 156, "GTA", "GTV", 159, "Brera", "4C", "Giulietta", "Giulia"
+
+C<distinct> is an alias for C<uniq>.
+
+B<RT#49800> can be used to give feedback about this behavior.
+
+=head3 singleton LIST
+
+Returns a new list by stripping values in LIST occurring more than once by
+comparing the values as hash keys, except that undef is considered separate
+from ''.  The order of elements in the returned list is the same as in LIST.
+In scalar context, returns the number of elements occurring only once in LIST.
+
+  my @x = singleton 1,1,2,2,3,4,5 # returns 3 4 5
+
+=head3 duplicates LIST
+
+Returns a new list by stripping values in LIST occurring less than twice by
+comparing the values as hash keys, except that undef is considered separate
+from ''.  The order of elements in the returned list is the same as in LIST.
+In scalar context, returns the number of elements occurring more than once
+in LIST.
+
+  my @y = duplicates 1,1,2,4,7,2,3,4,6,9; #returns 1,2,4
+
+=head3 frequency LIST
+
+Returns an associative list of distinct values and the corresponding frequency.
+
+  my @f = frequency values %radio_nrw; # returns (
+  #  'Deutschlandfunk (DLF)' => 9, 'WDR 3' => 10,
+  #  'WDR 4' => 11, 'WDR 5' => 14, 'WDR Eins Live' => 14,
+  #  'Deutschlandradio Kultur' => 8,...)
+
+=head3 occurrences LIST
+
+Returns a new list of frequencies and the corresponding values from LIST.
+
+  my @o = occurrences ((1) x 3, (2) x 4, (3) x 2, (4) x 7, (5) x 2, (6) x 4);
+  #  @o = (undef, undef, [3, 5], [1], [2, 6], undef, undef, [4]);
+
+=head3 mode LIST
+
+Returns the modal value of LIST. In scalar context, just the modal value
+is returned, in list context all probes occurring I<modal> times are returned,
+too.
+
+  my @m = mode ((1) x 3, (2) x 4, (3) x 2, (4) x 7, (5) x 2, (6) x 4, (7) x 3, (8) x 7);
+  #  @m = (7, 4, 8) - bimodal LIST
+
+=head3 slide BLOCK LIST
+
+The function C<slide> operates on pairs of list elements like:
+
+  my @s = slide { "$a and $b" } (0..3);
+  # @s = ("0 and 1", "1 and 2", "2 and 3")
+
+The idea behind this function is a kind of magnifying glass that is moved
+along a list and calls C<BLOCK> every time the next list item is reached.
+
+=head2 Partitioning
+
+=head3 after BLOCK LIST
+
+Returns a list of the values of LIST after (and not including) the point
+where BLOCK returns a true value. Sets C<$_> for each element in LIST in turn.
+
+  @x = after { $_ % 5 == 0 } (1..9);    # returns 6, 7, 8, 9
+
+=head3 after_incl BLOCK LIST
+
+Same as C<after> but also includes the element for which BLOCK is true.
+
+=head3 before BLOCK LIST
+
+Returns a list of values of LIST up to (and not including) the point where BLOCK
+returns a true value. Sets C<$_> for each element in LIST in turn.
+
+=head3 before_incl BLOCK LIST
+
+Same as C<before> but also includes the element for which BLOCK is true.
+
+=head3 part BLOCK LIST
+
+Partitions LIST based on the return value of BLOCK which denotes into which
+partition the current value is put.
+
+Returns a list of the partitions thusly created. Each partition created is a
+reference to an array.
+
+  my $i = 0;
+  my @part = part { $i++ % 2 } 1 .. 8;   # returns [1, 3, 5, 7], [2, 4, 6, 8]
+
+You can have a sparse list of partitions as well where non-set partitions will
+be undef:
+
+  my @part = part { 2 } 1 .. 10;            # returns undef, undef, [ 1 .. 10 ]
+
+Be careful with negative values, though:
+
+  my @part = part { -1 } 1 .. 10;
+  __END__
+  Modification of non-creatable array value attempted, subscript -1 ...
+
+Negative values are only ok when they refer to a partition previously created:
+
+  my @idx  = ( 0, 1, -1 );
+  my $i    = 0;
+  my @part = part { $idx[$i++ % 3] } 1 .. 8; # [1, 4, 7], [2, 3, 5, 6, 8]
+
+=head3 samples COUNT LIST
+
+Returns a new list containing COUNT random samples from LIST. Is similar to
+L<List::Util/shuffle>, but stops after COUNT.
+
+  @r  = samples 10, 1..10; # same as shuffle
+  @r2 = samples 5, 1..10; # gives 5 values from 1..10;
+
+=head2 Iteration
+
+=head3 each_array ARRAY1 ARRAY2 ...
+
+Creates an array iterator to return the elements of the list of arrays ARRAY1,
+ARRAY2 throughout ARRAYn in turn.  That is, the first time it is called, it
+returns the first element of each array.  The next time, it returns the second
+elements.  And so on, until all elements are exhausted.
+
+This is useful for looping over more than one array at once:
+
+  my $ea = each_array(@a, @b, @c);
+  while ( my ($a, $b, $c) = $ea->() )   { .... }
+
+The iterator returns the empty list when it reached the end of all arrays.
+
+If the iterator is passed an argument of 'C<index>', then it returns
+the index of the last fetched set of values, as a scalar.
+
+=head3 each_arrayref LIST
+
+Like each_array, but the arguments are references to arrays, not the
+plain arrays.
+
+=head3 natatime EXPR, LIST
+
+Creates an array iterator, for looping over an array in chunks of
+C<$n> items at a time.  (n at a time, get it?).  An example is
+probably a better explanation than I could give in words.
+
+Example:
+
+  my @x = ('a' .. 'g');
+  my $it = natatime 3, @x;
+  while (my @vals = $it->())
+  {
+    print "@vals\n";
+  }
+
+This prints
+
+  a b c
+  d e f
+  g
+
+=head3 slideatatime STEP, WINDOW, LIST
+
+Creates an array iterator, for looping over an array in chunks of
+C<$windows-size> items at a time.
+
+The idea behind this function is a kind of magnifying glass (finer
+controllable compared to L</slide>) that is moved along a list.
+
+Example:
+
+  my @x = ('a' .. 'g');
+  my $it = slideatatime 2, 3, @x;
+  while (my @vals = $it->())
+  {
+    print "@vals\n";
+  }
+
+This prints
+
+  a b c
+  c d e
+  e f g
+  g
+
+=head2 Searching
+
+=head3 firstval BLOCK LIST
+
+=head3 first_value BLOCK LIST
+
+Returns the first element in LIST for which BLOCK evaluates to true. Each
+element of LIST is set to C<$_> in turn. Returns C<undef> if no such element
+has been found.
+
+C<first_value> is an alias for C<firstval>.
+
+=head3 onlyval BLOCK LIST
+
+=head3 only_value BLOCK LIST
+
+Returns the only element in LIST for which BLOCK evaluates to true. Sets
+C<$_> for each item in LIST in turn. Returns C<undef> if no such element
+has been found.
+
+C<only_value> is an alias for C<onlyval>.
+
+=head3 lastval BLOCK LIST
+
+=head3 last_value BLOCK LIST
+
+Returns the last value in LIST for which BLOCK evaluates to true. Each element
+of LIST is set to C<$_> in turn. Returns C<undef> if no such element has been
+found.
+
+C<last_value> is an alias for C<lastval>.
+
+=head3 firstres BLOCK LIST
+
+=head3 first_result BLOCK LIST
+
+Returns the result of BLOCK for the first element in LIST for which BLOCK
+evaluates to true. Each element of LIST is set to C<$_> in turn. Returns
+C<undef> if no such element has been found.
+
+C<first_result> is an alias for C<firstres>.
+
+=head3 onlyres BLOCK LIST
+
+=head3 only_result BLOCK LIST
+
+Returns the result of BLOCK for the first element in LIST for which BLOCK
+evaluates to true. Sets C<$_> for each item in LIST in turn. Returns
+C<undef> if no such element has been found.
+
+C<only_result> is an alias for C<onlyres>.
+
+=head3 lastres BLOCK LIST
+
+=head3 last_result BLOCK LIST
+
+Returns the result of BLOCK for the last element in LIST for which BLOCK
+evaluates to true. Each element of LIST is set to C<$_> in turn. Returns
+C<undef> if no such element has been found.
+
+C<last_result> is an alias for C<lastres>.
+
+=head3 indexes BLOCK LIST
+
+Evaluates BLOCK for each element in LIST (assigned to C<$_>) and returns a list
+of the indices of those elements for which BLOCK returned a true value. This is
+just like C<grep> only that it returns indices instead of values:
+
+  @x = indexes { $_ % 2 == 0 } (1..10);   # returns 1, 3, 5, 7, 9
+
+=head3 firstidx BLOCK LIST
+
+=head3 first_index BLOCK LIST
+
+Returns the index of the first element in LIST for which the criterion in BLOCK
+is true. Sets C<$_> for each item in LIST in turn:
+
+  my @list = (1, 4, 3, 2, 4, 6);
+  printf "item with index %i in list is 4", firstidx { $_ == 4 } @list;
+  __END__
+  item with index 1 in list is 4
+
+Returns C<-1> if no such item could be found.
+
+C<first_index> is an alias for C<firstidx>.
+
+=head3 onlyidx BLOCK LIST
+
+=head3 only_index BLOCK LIST
+
+Returns the index of the only element in LIST for which the criterion
+in BLOCK is true. Sets C<$_> for each item in LIST in turn:
+
+    my @list = (1, 3, 4, 3, 2, 4);
+    printf "uniqe index of item 2 in list is %i", onlyidx { $_ == 2 } @list;
+    __END__
+    unique index of item 2 in list is 4
+
+Returns C<-1> if either no such item or more than one of these
+has been found.
+
+C<only_index> is an alias for C<onlyidx>.
+
+=head3 lastidx BLOCK LIST
+
+=head3 last_index BLOCK LIST
+
+Returns the index of the last element in LIST for which the criterion in BLOCK
+is true. Sets C<$_> for each item in LIST in turn:
+
+  my @list = (1, 4, 3, 2, 4, 6);
+  printf "item with index %i in list is 4", lastidx { $_ == 4 } @list;
+  __END__
+  item with index 4 in list is 4
+
+Returns C<-1> if no such item could be found.
+
+C<last_index> is an alias for C<lastidx>.
+
+=head2 Sorting
+
+=head3 sort_by BLOCK LIST
+
+Returns the list of values sorted according to the string values returned by the
+KEYFUNC block or function. A typical use of this may be to sort objects according
+to the string value of some accessor, such as
+
+  sort_by { $_->name } @people
+
+The key function is called in scalar context, being passed each value in turn as
+both $_ and the only argument in the parameters, @_. The values are then sorted
+according to string comparisons on the values returned.
+This is equivalent to
+
+  sort { $a->name cmp $b->name } @people
+
+except that it guarantees the name accessor will be executed only once per value.
+One interesting use-case is to sort strings which may have numbers embedded in them
+"naturally", rather than lexically.
+
+  sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings
+
+This sorts strings by generating sort keys which zero-pad the embedded numbers to
+some level (9 digits in this case), helping to ensure the lexical sort puts them
+in the correct order.
+
+=head3 nsort_by BLOCK LIST
+
+Similar to sort_by but compares its key values numerically.
+
+=head3 qsort BLOCK ARRAY
+
+This sorts the given array B<in place> using the given compare code. Except for
+tiny compare code like C<< $a <=> $b >>, qsort is much faster than Perl's C<sort>
+depending on the version.
+
+Compared 5.8 and 5.26:
+
+  my @rl;
+  for(my $i = 0; $i < 1E6; ++$i) { push @rl, rand(1E5) }
+  my $idx;
+
+  sub ext_cmp { $_[0] <=> $_[1] }
+
+  cmpthese( -60, {
+      'qsort' => sub {
+	  my @qrl = @rl;
+	  qsort { ext_cmp($a, $b) } @qrl;
+	  $idx = bsearchidx { ext_cmp($_, $rl[0]) } @qrl
+      },
+      'reverse qsort' => sub {
+	  my @qrl = @rl;
+	  qsort { ext_cmp($b, $a) } @qrl;
+	  $idx = bsearchidx { ext_cmp($rl[0], $_) } @qrl
+      },
+      'sort' => sub {
+	  my @srl = @rl;
+	  @srl = sort { ext_cmp($a, $b) } @srl;
+	  $idx = bsearchidx { ext_cmp($_, $rl[0]) } @srl
+      },
+      'reverse sort' => sub {
+	  my @srl = @rl;
+	  @srl = sort { ext_cmp($b, $a) } @srl;
+	  $idx = bsearchidx { ext_cmp($rl[0], $_) } @srl
+      },
+  });
+
+5.8 results
+
+		  s/iter  reverse sort          sort reverse qsort         qsort
+  reverse sort    6.21            --           -0%           -8%          -10%
+  sort            6.19            0%            --           -7%          -10%
+  reverse qsort   5.73            8%            8%            --           -2%
+  qsort           5.60           11%           11%            2%            --
+
+5.26 results
+
+		s/iter  reverse sort          sort reverse qsort         qsort
+  reverse sort    4.54            --           -0%          -96%          -96%
+  sort            4.52            0%            --          -96%          -96%
+  reverse qsort  0.203         2139%         2131%            --          -19%
+  qsort          0.164         2666%         2656%           24%            --
+
+Use it where external data sources might have to be compared (think of L<Unix::Statgrab>
+"tables").
+
+C<qsort> is available from List::MoreUtils::XS only. It's insane to maintain
+a wrapper around Perl's sort nor having a pure Perl implementation. One could
+create a flip-book in same speed as PP runs a qsort.
+
+=head2 Searching in sorted Lists
+
+=head3 bsearch BLOCK LIST
+
+Performs a binary search on LIST which must be a sorted list of values. BLOCK
+must return a negative value if the current element (stored in C<$_>) is smaller,
+a positive value if it is bigger and zero if it matches.
+
+Returns a boolean value in scalar context. In list context, it returns the element
+if it was found, otherwise the empty list.
+
+=head3 bsearchidx BLOCK LIST
+
+=head3 bsearch_index BLOCK LIST
+
+Performs a binary search on LIST which must be a sorted list of values. BLOCK
+must return a negative value if the current element (stored in C<$_>) is smaller,
+a positive value if it is bigger and zero if it matches.
+
+Returns the index of found element, otherwise C<-1>.
+
+C<bsearch_index> is an alias for C<bsearchidx>.
+
+=head3 lower_bound BLOCK LIST
+
+Returns the index of the first element in LIST which does not compare
+I<less than val>. Technically it's the first element in LIST which does
+not return a value below zero when passed to BLOCK.
+
+  @ids = (1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9, 11, 13, 13, 13, 17);
+  $lb = lower_bound { $_ <=> 2 } @ids; # returns 2
+  $lb = lower_bound { $_ <=> 4 } @ids; # returns 10
+
+lower_bound has a complexity of O(log n).
+
+=head3 upper_bound BLOCK LIST
+
+Returns the index of the first element in LIST which does not compare
+I<greater than val>. Technically it's the first element in LIST which does
+not return a value below or equal to zero when passed to BLOCK.
+
+  @ids = (1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9, 11, 13, 13, 13, 17);
+  $lb = upper_bound { $_ <=> 2 } @ids; # returns 4
+  $lb = upper_bound { $_ <=> 4 } @ids; # returns 14
+
+upper_bound has a complexity of O(log n).
+
+=head3 equal_range BLOCK LIST
+
+Returns a pair of indices containing the lower_bound and the upper_bound.
+
+=head2 Operations on sorted Lists
+
+=head3 binsert BLOCK ITEM LIST
+
+=head3 bsearch_insert BLOCK ITEM LIST
+
+Performs a binary search on LIST which must be a sorted list of values. BLOCK
+must return a negative value if the current element (stored in C<$_>) is smaller,
+a positive value if it is bigger and zero if it matches.
+
+ITEM is inserted at the index where the ITEM should be placed (based on above
+search). That means, it's inserted before the next bigger element.
+
+  @l = (2,3,5,7);
+  binsert { $_ <=> 4 }  4, @l; # @l = (2,3,4,5,7)
+  binsert { $_ <=> 6 } 42, @l; # @l = (2,3,4,42,7)
+
+You take care that the inserted element matches the compare result.
+
+=head3 bremove BLOCK LIST
+
+=head3 bsearch_remove BLOCK LIST
+
+Performs a binary search on LIST which must be a sorted list of values. BLOCK
+must return a negative value if the current element (stored in C<$_>) is smaller,
+a positive value if it is bigger and zero if it matches.
+
+The item at the found position is removed and returned.
+
+  @l = (2,3,4,5,7);
+  bremove { $_ <=> 4 }, @l; # @l = (2,3,5,7);
+
+=head2 Counting and calculation
+
+=head3 true BLOCK LIST
+
+Counts the number of elements in LIST for which the criterion in BLOCK is true.
+Sets C<$_> for  each item in LIST in turn:
+
+  printf "%i item(s) are defined", true { defined($_) } @list;
+
+=head3 false BLOCK LIST
+
+Counts the number of elements in LIST for which the criterion in BLOCK is false.
+Sets C<$_> for each item in LIST in turn:
+
+  printf "%i item(s) are not defined", false { defined($_) } @list;
+
+=head3 reduce_0 BLOCK LIST
+
+Reduce LIST by calling BLOCK in scalar context for each element of LIST.
+C<$a> contains the progressional result and is initialized with 0.
+C<$b> contains the current processed element of LIST and C<$_> contains the
+index of the element in C<$b>.
+
+The idea behind reduce_0 is B<summation> (addition of a sequence of numbers).
+
+=head3 reduce_1 BLOCK LIST
+
+Reduce LIST by calling BLOCK in scalar context for each element of LIST.
+C<$a> contains the progressional result and is initialized with 1.
+C<$b> contains the current processed element of LIST and C<$_> contains the
+index of the element in C<$b>.
+
+The idea behind reduce_1 is product of a sequence of numbers.
+
+=head3 reduce_u BLOCK LIST
+
+Reduce LIST by calling BLOCK in scalar context for each element of LIST.
+C<$a> contains the progressional result and is uninitialized.
+C<$b> contains the current processed element of LIST and C<$_> contains the
+index of the element in C<$b>.
+
+This function has been added if one might need the extra of the index
+value but need an individual initialization.
+
+B<Use with caution>: In most cases L<List::Util/reduce> will do the
+job better.
+
+=head3 minmax LIST
+
+Calculates the minimum and maximum of LIST and returns a two element list with
+the first element being the minimum and the second the maximum. Returns the
+empty list if LIST was empty.
+
+The C<minmax> algorithm differs from a naive iteration over the list where each
+element is compared to two values being the so far calculated min and max value
+in that it only requires 3n/2 - 2 comparisons. Thus it is the most efficient
+possible algorithm.
+
+However, the Perl implementation of it has some overhead simply due to the fact
+that there are more lines of Perl code involved. Therefore, LIST needs to be
+fairly big in order for C<minmax> to win over a naive implementation. This
+limitation does not apply to the XS version.
+
+=head3 minmaxstr LIST
+
+Computes the minimum and maximum of LIST using string compare and returns a
+two element list with the first element being the minimum and the second the
+maximum. Returns the empty list if LIST was empty.
+
+The implementation is similar to C<minmax>.
+
+=head1 ENVIRONMENT
+
+When C<LIST_MOREUTILS_PP> is set, the module will always use the pure-Perl
+implementation and not the XS one. This environment variable is really just
+there for the test-suite to force testing the Perl implementation, and possibly
+for reporting of bugs. I don't see any reason to use it in a production
+environment.
+
+=head1 MAINTENANCE
+
+The maintenance goal is to preserve the documented semantics of the API;
+bug fixes that bring actual behavior in line with semantics are allowed.
+New API functions may be added over time.  If a backwards incompatible
+change is unavoidable, we will attempt to provide support for the legacy
+API using the same export tag mechanism currently in place.
+
+This module attempts to use few non-core dependencies. Non-core
+configuration and testing modules will be bundled when reasonable;
+run-time dependencies will be added only if they deliver substantial
+benefit.
+
+=head1 CONTRIBUTING
+
+While contributions are appreciated, a contribution should not cause more
+effort for the maintainer than the contribution itself saves (see
+L<Open Source Contribution Etiquette|http://tirania.org/blog/archive/2010/Dec-31.html>).
+
+To get more familiar where help could be needed - see L<List::MoreUtils::Contributing>.
+
+=head1 BUGS
+
+There is a problem with a bug in 5.6.x perls. It is a syntax error to write
+things like:
+
+    my @x = apply { s/foo/bar/ } qw{ foo bar baz };
+
+It has to be written as either
+
+    my @x = apply { s/foo/bar/ } 'foo', 'bar', 'baz';
+
+or
+
+    my @x = apply { s/foo/bar/ } my @dummy = qw/foo bar baz/;
+
+Perl 5.5.x and Perl 5.8.x don't suffer from this limitation.
+
+If you have a functionality that you could imagine being in this module, please
+drop me a line. This module's policy will be less strict than L<List::Util>'s
+when it comes to additions as it isn't a core module.
+
+When you report bugs, it would be nice if you could additionally give me the
+output of your program with the environment variable C<LIST_MOREUTILS_PP> set
+to a true value. That way I know where to look for the problem (in XS,
+pure-Perl or possibly both).
+
+=head1 SUPPORT
+
+Bugs should always be submitted via the CPAN bug tracker.
+
+You can find documentation for this module with the perldoc command.
+
+    perldoc List::MoreUtils
+
+You can also look for information at:
+
+=over 4
+
+=item * RT: CPAN's request tracker
+
+L<https://rt.cpan.org/Dist/Display.html?Name=List-MoreUtils>
+
+=item * AnnoCPAN: Annotated CPAN documentation
+
+L<http://annocpan.org/dist/List-MoreUtils>
+
+=item * CPAN Ratings
+
+L<http://cpanratings.perl.org/dist/List-MoreUtils>
+
+=item * MetaCPAN
+
+L<https://metacpan.org/release/List-MoreUtils>
+
+=item * CPAN Search
+
+L<http://search.cpan.org/dist/List-MoreUtils/>
+
+=item * Git Repository
+
+L<https://github.com/perl5-utils/List-MoreUtils>
+
+=back
+
+=head2 Where can I go for help?
+
+If you have a bug report, a patch or a suggestion, please open a new
+report ticket at CPAN (but please check previous reports first in case
+your issue has already been addressed) or open an issue on GitHub.
+
+Report tickets should contain a detailed description of the bug or
+enhancement request and at least an easily verifiable way of
+reproducing the issue or fix. Patches are always welcome, too - and
+it's cheap to send pull-requests on GitHub. Please keep in mind that
+code changes are more likely accepted when they're bundled with an
+approving test.
+
+If you think you've found a bug then please read
+"How to Report Bugs Effectively" by Simon Tatham:
+L<http://www.chiark.greenend.org.uk/~sgtatham/bugs.html>.
+
+=head2 Where can I go for help with a concrete version?
+
+Bugs and feature requests are accepted against the latest version
+only. To get patches for earlier versions, you need to get an
+agreement with a developer of your choice - who may or not report the
+issue and a suggested fix upstream (depends on the license you have
+chosen).
+
+=head2 Business support and maintenance
+
+Generally, in volunteered projects, there is no right for support.
+While every maintainer is happy to improve the provided software,
+spare time is limited.
+
+For those who have a use case which requires guaranteed support, one of
+the maintainers should be hired or contracted.  For business support you
+can contact Jens via his CPAN email address rehsackATcpan.org. Please
+keep in mind that business support is neither available for free nor
+are you eligible to receive any support based on the license distributed
+with this package.
+
+=head1 THANKS
+
+=head2 Tassilo von Parseval
+
+Credits go to a number of people: Steve Purkis for giving me namespace advice
+and James Keenan and Terrence Branno for their effort of keeping the CPAN
+tidier by making L<List::Utils> obsolete.
+
+Brian McCauley suggested the inclusion of apply() and provided the pure-Perl
+implementation for it.
+
+Eric J. Roode asked me to add all functions from his module C<List::MoreUtil>
+into this one. With minor modifications, the pure-Perl implementations of those
+are by him.
+
+The bunch of people who almost immediately pointed out the many problems with
+the glitchy 0.07 release (Slaven Rezic, Ron Savage, CPAN testers).
+
+A particularly nasty memory leak was spotted by Thomas A. Lowery.
+
+Lars Thegler made me aware of problems with older Perl versions.
+
+Anno Siegel de-orphaned each_arrayref().
+
+David Filmer made me aware of a problem in each_arrayref that could ultimately
+lead to a segfault.
+
+Ricardo Signes suggested the inclusion of part() and provided the
+Perl-implementation.
+
+Robin Huston kindly fixed a bug in perl's MULTICALL API to make the
+XS-implementation of part() work.
+
+=head2 Jens Rehsack
+
+Credits goes to all people contributing feedback during the v0.400
+development releases.
+
+Special thanks goes to David Golden who spent a lot of effort to develop
+a design to support current state of CPAN as well as ancient software
+somewhere in the dark. He also contributed a lot of patches to refactor
+the API frontend to welcome any user of List::MoreUtils - from ancient
+past to recently last used.
+
+Toby Inkster provided a lot of useful feedback for sane importer code
+and was a nice sounding board for API discussions.
+
+Peter Rabbitson provided a sane git repository setup containing entire
+package history.
+
+=head1 TODO
+
+A pile of requests from other people is still pending further processing in
+my mailbox. This includes:
+
+=over 4
+
+=item * delete_index
+
+=item * random_item
+
+=item * random_item_delete_index
+
+=item * list_diff_hash
+
+=item * list_diff_inboth
+
+=item * list_diff_infirst
+
+=item * list_diff_insecond
+
+These were all suggested by Dan Muey.
+
+=item * listify
+
+Always return a flat list when either a simple scalar value was passed or an
+array-reference. Suggested by Mark Summersault.
+
+=back
+
+=head1 SEE ALSO
+
+L<List::Util>, L<List::AllUtils>, L<List::UtilsBy>
+
+=head1 AUTHOR
+
+Jens Rehsack E<lt>rehsack AT cpan.orgE<gt>
+
+Adam Kennedy E<lt>adamk@cpan.orgE<gt>
+
+Tassilo von Parseval E<lt>tassilo.von.parseval@rwth-aachen.deE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Some parts copyright 2011 Aaron Crane.
+
+Copyright 2004 - 2010 by Tassilo von Parseval
+
+Copyright 2013 - 2017 by Jens Rehsack
+
+All code added with 0.417 or later is licensed under the Apache License,
+Version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+All code until 0.416 is licensed under the same terms as Perl itself,
+either Perl version 5.8.4 or, at your option, any later version of
+Perl 5 you may have available.
+
+=cut
+
+1;
diff --git a/src/modules/List/MoreUtils/Contributing.pod b/src/modules/List/MoreUtils/Contributing.pod
new file mode 100644
index 0000000..11b7996
--- /dev/null
+++ b/src/modules/List/MoreUtils/Contributing.pod
@@ -0,0 +1,100 @@
+=head1 NAME
+
+List::MoreUtils::Contributing - Gives rough introduction into contributing to List::MoreUtils
+
+=head1 DESCRIPTION
+
+List::Moreutils has a turbulent history and a strong approach. Before
+going further, please step to
+L<Open Source Contribution Etiquette|http://tirania.org/blog/archive/2010/Dec-31.html>
+and then come back.
+
+The current distribution is a balance between finishing the history and
+claiming for future requirements. Therefore some components will receive
+a rewrite on purpose - others won't.
+
+For the moment - it's not the primary goal to clean up the configuration
+stage, until the primary goals and prerequisites are done.
+
+To contribute to List::MoreUtils, one has to arrange with the current
+situation, dig into details and ask for clarifying when parts are
+incomprehensible.
+
+=head2 Primary Goals
+
+The very first primary goal is to clear the backlog. These are primarily
+the open issues, feature requests and missing infrastructure elements.
+
+As example see RT#93207 or RT#75672 for missing configure time checks,
+while RT#93207 radiates until test - but doesn't affect runtime nor
+installation (beside test failures).
+
+=head2 Secondary Goals
+
+Secondary goals are harmonizing the function names and calling convention
+(see RT#102673), tidying the infrastructure of the distribution and remove
+unnecessary complexity (while protecting the necessary).
+
+One example of removing unnecessary infrastructure could be to move
+L<Data::Tumbler> and L<Test::WriteVariants> into authoring mode, when
+improved test for RT#93207 could be reasonably done by a module which
+is recommended for test. The recommendation of
+L<Graham Knop's Makefile.PL#L82|https://github.com/haarg/List-MoreUtils/blob/dd877f963deead742fc90005636c72c6be9060fc/Makefile.PL#L82>
+in L<PR#9|https://github.com/perl5-utils/List-MoreUtils/pull/9> a desirable
+one.
+
+=head2 Orientation Guide
+
+List::MoreUtils configuration stage heavily depends on L<Config::AutoConf>
+and L<Data::Tumbler>. A few prerequisites of both modules aren't available
+for Perl 5.6 - which leads to a tiny emulation layer t the begin of
+C<Makefile.PL>.
+
+The reason for L<Config::AutoConf> is quite simple - the opportunities
+for checking the environment cover a much wider range than a simple test
+whether there is a working compiler. It requires a lot of improvements
+since its base L<ExtUtils::CBuilder> was never designed to support
+that kind of solutions - but there is I<Work In Progress>. To finally
+solve issues as RT#75672 even in cross-compile environments - there is
+no way around such a checking tool.
+
+The reason for L<Data::Tumbler> in combination with L<Test::WriteVariants>
+are extensible tests with reasonable effort and easy figuring out which
+extra condition causes failures. Also - missing pre-conditions should
+result in failing tests i some cases - what is fully supported by the
+logic behind L<Data::Tumbler> in combination with L<Test::WriteVariants>.
+
+Finally - L<inc::latest> glues the stuff in a bundle together to allow
+people with older toolchains to use List::MoreUtils out of the box (maybe
+with reduced quantity but full quality).
+
+=head1 SEE ALSO
+
+L<Config::AutoConf>, L<Data::Tumbler>, L<Test::WriteVariants>,
+L<ExtUtils::MakeMaker::Extensions>
+
+=head1 AUTHOR
+
+Jens Rehsack E<lt>rehsack AT cpan.orgE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2015-2017 by Jens Rehsack
+
+All code added with 0.417 or later is licensed under the Apache License,
+Version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+All code until 0.416 is licensed under the same terms as Perl itself,
+either Perl version 5.8.4 or, at your option, any later version of
+Perl 5 you may have available.
+
+=cut
diff --git a/src/modules/List/MoreUtils/PP.pm b/src/modules/List/MoreUtils/PP.pm
new file mode 100644
index 0000000..320635b
--- /dev/null
+++ b/src/modules/List/MoreUtils/PP.pm
@@ -0,0 +1,953 @@
+package List::MoreUtils::PP;
+
+use 5.008_001;
+use strict;
+use warnings;
+
+our $VERSION = '0.430';
+
+=pod
+
+=head1 NAME
+
+List::MoreUtils::PP - Provide List::MoreUtils pure Perl implementation
+
+=head1 SYNOPSIS
+
+  BEGIN { $ENV{LIST_MOREUTILS_PP} = 1; }
+  use List::MoreUtils qw(:all);
+
+=cut
+
+## no critic (Subroutines::ProhibitSubroutinePrototypes, Subroutines::RequireArgUnpacking)
+## no critic (Subroutines::ProhibitManyArgs)
+
+sub any (&@)
+{
+    my $f = shift;
+    foreach (@_)
+    {
+        return 1 if $f->();
+    }
+    return 0;
+}
+
+sub all (&@)
+{
+    my $f = shift;
+    foreach (@_)
+    {
+        return 0 unless $f->();
+    }
+    return 1;
+}
+
+sub none (&@)
+{
+    my $f = shift;
+    foreach (@_)
+    {
+        return 0 if $f->();
+    }
+    return 1;
+}
+
+sub notall (&@)
+{
+    my $f = shift;
+    foreach (@_)
+    {
+        return 1 unless $f->();
+    }
+    return 0;
+}
+
+sub one (&@)
+{
+    my $f     = shift;
+    my $found = 0;
+    foreach (@_)
+    {
+        $f->() and $found++ and return 0;
+    }
+    return $found;
+}
+
+sub any_u (&@)
+{
+    my $f = shift;
+    return if !@_;
+    $f->() and return 1 foreach (@_);
+    return 0;
+}
+
+sub all_u (&@)
+{
+    my $f = shift;
+    return if !@_;
+    $f->() or return 0 foreach (@_);
+    return 1;
+}
+
+sub none_u (&@)
+{
+    my $f = shift;
+    return if !@_;
+    $f->() and return 0 foreach (@_);
+    return 1;
+}
+
+sub notall_u (&@)
+{
+    my $f = shift;
+    return if !@_;
+    $f->() or return 1 foreach (@_);
+    return 0;
+}
+
+sub one_u (&@)
+{
+    my $f = shift;
+    return if !@_;
+    my $found = 0;
+    foreach (@_)
+    {
+        $f->() and $found++ and return 0;
+    }
+    return $found;
+}
+
+sub reduce_u(&@)
+{
+    my $code = shift;
+
+    # Localise $a, $b
+    my ($caller_a, $caller_b) = do
+    {
+        my $pkg = caller();
+        ## no critic (TestingAndDebugging::ProhibitNoStrict, ValuesAndExpressions::ProhibitCommaSeparatedStatements)
+        no strict 'refs';
+        \*{$pkg . '::a'}, \*{$pkg . '::b'};
+    };
+
+    ## no critic (Variables::RequireInitializationForLocalVars)
+    local (*$caller_a, *$caller_b);
+    *$caller_a = \();
+    for (0 .. $#_)
+    {
+        *$caller_b = \$_[$_];
+        *$caller_a = \($code->());
+    }
+
+    return ${*$caller_a};
+}
+
+sub reduce_0(&@)
+{
+    my $code = shift;
+
+    # Localise $a, $b
+    my ($caller_a, $caller_b) = do
+    {
+        my $pkg = caller();
+        ## no critic (TestingAndDebugging::ProhibitNoStrict, ValuesAndExpressions::ProhibitCommaSeparatedStatements)
+        no strict 'refs';
+        \*{$pkg . '::a'}, \*{$pkg . '::b'};
+    };
+
+    ## no critic (Variables::RequireInitializationForLocalVars)
+    local (*$caller_a, *$caller_b);
+    *$caller_a = \0;
+    for (0 .. $#_)
+    {
+        *$caller_b = \$_[$_];
+        *$caller_a = \($code->());
+    }
+
+    return ${*$caller_a};
+}
+
+sub reduce_1(&@)
+{
+    my $code = shift;
+
+    # Localise $a, $b
+    my ($caller_a, $caller_b) = do
+    {
+        my $pkg = caller();
+        ## no critic (TestingAndDebugging::ProhibitNoStrict, ValuesAndExpressions::ProhibitCommaSeparatedStatements)
+        no strict 'refs';
+        \*{$pkg . '::a'}, \*{$pkg . '::b'};
+    };
+
+    ## no critic (Variables::RequireInitializationForLocalVars)
+    local (*$caller_a, *$caller_b);
+    *$caller_a = \1;
+    for (0 .. $#_)
+    {
+        *$caller_b = \$_[$_];
+        *$caller_a = \($code->());
+    }
+
+    return ${*$caller_a};
+}
+
+sub true (&@)
+{
+    my $f     = shift;
+    my $count = 0;
+    $f->() and ++$count foreach (@_);
+    return $count;
+}
+
+sub false (&@)
+{
+    my $f     = shift;
+    my $count = 0;
+    $f->() or ++$count foreach (@_);
+    return $count;
+}
+
+sub firstidx (&@)
+{
+    my $f = shift;
+    foreach my $i (0 .. $#_)
+    {
+        local *_ = \$_[$i];
+        return $i if $f->();
+    }
+    return -1;
+}
+
+sub firstval (&@)
+{
+    my $test = shift;
+    foreach (@_)
+    {
+        return $_ if $test->();
+    }
+    ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+    return undef;
+}
+
+sub firstres (&@)
+{
+    my $test = shift;
+    foreach (@_)
+    {
+        my $testval = $test->();
+        $testval and return $testval;
+    }
+    ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+    return undef;
+}
+
+sub onlyidx (&@)
+{
+    my $f = shift;
+    my $found;
+    foreach my $i (0 .. $#_)
+    {
+        local *_ = \$_[$i];
+        $f->() or next;
+        defined $found and return -1;
+        $found = $i;
+    }
+    return defined $found ? $found : -1;
+}
+
+sub onlyval (&@)
+{
+    my $test   = shift;
+    my $result = undef;
+    my $found  = 0;
+    foreach (@_)
+    {
+        $test->() or next;
+        $result = $_;
+        ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+        $found++ and return undef;
+    }
+    return $result;
+}
+
+sub onlyres (&@)
+{
+    my $test   = shift;
+    my $result = undef;
+    my $found  = 0;
+    foreach (@_)
+    {
+        my $rv = $test->() or next;
+        $result = $rv;
+        ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+        $found++ and return undef;
+    }
+    return $found ? $result : undef;
+}
+
+sub lastidx (&@)
+{
+    my $f = shift;
+    foreach my $i (reverse 0 .. $#_)
+    {
+        local *_ = \$_[$i];
+        return $i if $f->();
+    }
+    return -1;
+}
+
+sub lastval (&@)
+{
+    my $test = shift;
+    my $ix;
+    for ($ix = $#_; $ix >= 0; $ix--)
+    {
+        local *_ = \$_[$ix];
+        my $testval = $test->();
+
+        # Simulate $_ as alias
+        $_[$ix] = $_;
+        return $_ if $testval;
+    }
+    ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+    return undef;
+}
+
+sub lastres (&@)
+{
+    my $test = shift;
+    my $ix;
+    for ($ix = $#_; $ix >= 0; $ix--)
+    {
+        local *_ = \$_[$ix];
+        my $testval = $test->();
+
+        # Simulate $_ as alias
+        $_[$ix] = $_;
+        return $testval if $testval;
+    }
+    ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+    return undef;
+}
+
+sub insert_after (&$\@)
+{
+    my ($f, $val, $list) = @_;
+    my $c = &firstidx($f, @$list);
+    @$list = (@{$list}[0 .. $c], $val, @{$list}[$c + 1 .. $#$list],) and return 1 if $c != -1;
+    return 0;
+}
+
+sub insert_after_string ($$\@)
+{
+    my ($string, $val, $list) = @_;
+    my $c = firstidx { defined $_ and $string eq $_ } @$list;
+    @$list = (@{$list}[0 .. $c], $val, @{$list}[$c + 1 .. $#$list],) and return 1 if $c != -1;
+    return 0;
+}
+
+sub apply (&@)
+{
+    my $action = shift;
+    &$action foreach my @values = @_;
+    return wantarray ? @values : $values[-1];
+}
+
+sub after (&@)
+{
+    my $test = shift;
+    my $started;
+    my $lag;
+    ## no critic (BuiltinFunctions::RequireBlockGrep)
+    return grep $started ||= do
+    {
+        my $x = $lag;
+        $lag = $test->();
+        $x;
+    }, @_;
+}
+
+sub after_incl (&@)
+{
+    my $test = shift;
+    my $started;
+    return grep { $started ||= $test->() } @_;
+}
+
+sub before (&@)
+{
+    my $test = shift;
+    my $more = 1;
+    return grep { $more &&= !$test->() } @_;
+}
+
+sub before_incl (&@)
+{
+    my $test = shift;
+    my $more = 1;
+    my $lag  = 1;
+    ## no critic (BuiltinFunctions::RequireBlockGrep)
+    return grep $more &&= do
+    {
+        my $x = $lag;
+        $lag = !$test->();
+        $x;
+    }, @_;
+}
+
+sub indexes (&@)
+{
+    my $test = shift;
+    return grep {
+        local *_ = \$_[$_];
+        $test->()
+    } 0 .. $#_;
+}
+
+sub pairwise (&\@\@)
+{
+    my $op = shift;
+
+    # Symbols for caller's input arrays
+    use vars qw{ @A @B };
+    local (*A, *B) = @_;
+
+    # Localise $a, $b
+    my ($caller_a, $caller_b) = do
+    {
+        my $pkg = caller();
+        ## no critic (TestingAndDebugging::ProhibitNoStrict, ValuesAndExpressions::ProhibitCommaSeparatedStatements)
+        no strict 'refs';
+        \*{$pkg . '::a'}, \*{$pkg . '::b'};
+    };
+
+    # Loop iteration limit
+    my $limit = $#A > $#B ? $#A : $#B;
+
+    ## no critic (Variables::RequireInitializationForLocalVars)
+    # This map expression is also the return value
+    local (*$caller_a, *$caller_b);
+    ## no critic (BuiltinFunctions::ProhibitComplexMappings)
+    return map {
+        # Assign to $a, $b as refs to caller's array elements
+        (*$caller_a, *$caller_b) = \($#A < $_ ? undef : $A[$_], $#B < $_ ? undef : $B[$_]);
+
+        # Perform the transformation
+        $op->();
+    } 0 .. $limit;
+}
+
+sub each_array (\@;\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@)
+{
+    return each_arrayref(@_);
+}
+
+sub each_arrayref
+{
+    my @list  = @_;    # The list of references to the arrays
+    my $index = 0;     # Which one the caller will get next
+    my $max   = 0;     # Number of elements in longest array
+
+    # Get the length of the longest input array
+    foreach (@list)
+    {
+        unless (ref $_ eq 'ARRAY')
+        {
+            require Carp;
+            Carp::croak("each_arrayref: argument is not an array reference\n");
+        }
+        $max = @$_ if @$_ > $max;
+    }
+
+    # Return the iterator as a closure wrt the above variables.
+    return sub {
+        if (@_)
+        {
+            my $method = shift;
+            unless ($method eq 'index')
+            {
+                require Carp;
+                Carp::croak("each_array: unknown argument '$method' passed to iterator.");
+            }
+
+            ## no critic (Subroutines::ProhibitExplicitReturnUndef)
+            return undef if $index == 0 || $index > $max;
+            # Return current (last fetched) index
+            return $index - 1;
+        }
+
+        # No more elements to return
+        return if $index >= $max;
+        my $i = $index++;
+
+        # Return ith elements
+        ## no critic (BuiltinFunctions::RequireBlockMap)
+        return map $_->[$i], @list;
+    }
+}
+
+sub natatime ($@)
+{
+    my $n    = shift;
+    my @list = @_;
+    return sub { return splice @list, 0, $n; }
+}
+
+# "leaks" when lexically hidden in arrayify
+my $flatten;
+$flatten = sub {
+    return map { (ref $_ and ("ARRAY" eq ref $_ or overload::Method($_, '@{}'))) ? ($flatten->(@{$_})) : ($_) } @_;
+};
+
+sub arrayify
+{
+    return map { $flatten->($_) } @_;
+}
+
+sub mesh (\@\@;\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@)
+{
+    my $max = -1;
+    $max < $#$_ && ($max = $#$_) foreach @_;
+    ## no critic (BuiltinFunctions::ProhibitComplexMappings)
+    return map {
+        my $ix = $_;
+        ## no critic (BuiltinFunctions::RequireBlockMap)
+        map $_->[$ix], @_;
+    } 0 .. $max;
+}
+
+sub zip6 (\@\@;\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@)
+{
+    my $max = -1;
+    $max < $#$_ && ($max = $#$_) foreach @_;
+    ## no critic (BuiltinFunctions::ProhibitComplexMappings)
+    return map {
+        my $ix = $_;
+        ## no critic (BuiltinFunctions::RequireBlockMap)
+        [map $_->[$ix], @_];
+    } 0 .. $max;
+}
+
+sub listcmp (\@\@;\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@\@)
+{
+    my %ret;
+    for (my $i = 0; $i < scalar @_; ++$i)
+    {
+        my %seen;
+        my $k;
+        foreach my $w (grep { defined $_ and not $seen{$k = $_}++ } @{$_[$i]})
+        {
+            $ret{$w} ||= [];
+            push @{$ret{$w}}, $i;
+        }
+    }
+    return %ret;
+}
+
+sub uniq (@)
+{
+    my %seen = ();
+    my $k;
+    my $seen_undef;
+    return grep { defined $_ ? not $seen{$k = $_}++ : not $seen_undef++ } @_;
+}
+
+sub singleton (@)
+{
+    my %seen = ();
+    my $k;
+    my $seen_undef;
+    return grep { 1 == (defined $_ ? $seen{$k = $_} : $seen_undef) }
+      grep { defined $_ ? not $seen{$k = $_}++ : not $seen_undef++ } @_;
+}
+
+sub duplicates (@)
+{
+    my %seen = ();
+    my $k;
+    my $seen_undef;
+    return grep { 1 < (defined $_ ? $seen{$k = $_} : $seen_undef) }
+      grep { defined $_ ? not $seen{$k = $_}++ : not $seen_undef++ } @_;
+}
+
+sub frequency (@)
+{
+    my %seen = ();
+    my $k;
+    my $seen_undef;
+    my %h = map { defined $_ ? ($_ => $seen{$k = $_}) : () }
+      grep { defined $_ ? not $seen{$k = $_}++ : not $seen_undef++ } @_;
+    wantarray or return (scalar keys %h) + ($seen_undef ? 1 : 0);
+    undef $k;
+    return (%h, $seen_undef ? (\$k => $seen_undef) : ());
+}
+
+sub occurrences (@)
+{
+    my %seen = ();
+    my $k;
+    my $seen_undef;
+    my @ret;
+    foreach my $l (map { $_ } grep { defined $_ ? not $seen{$k = $_}++ : not $seen_undef++ } @_)
+    {
+        my $n = defined $l ? $seen{$l} : $seen_undef;
+        defined $ret[$n] or $ret[$n] = [];
+        push @{$ret[$n]}, $l;
+    }
+    return @ret;
+}
+
+sub mode (@)
+{
+    my %seen = ();
+    my ($max, $k, $seen_undef) = (1);
+
+    foreach (@_) { defined $_ ? ($max < ++$seen{$k = $_} and ++$max) : ($max < ++$seen_undef and ++$max) }
+    wantarray or return $max;
+
+    my @ret = ($max);
+    foreach my $l (grep { $seen{$_} == $max } keys %seen)
+    {
+        push @ret, $l;
+    }
+    $seen_undef and $seen_undef == $max and push @ret, undef;
+    return @ret;
+}
+
+sub samples ($@)
+{
+    my $n = shift;
+    if ($n > @_)
+    {
+        require Carp;
+        Carp::croak(sprintf("Cannot get %d samples from %d elements", $n, scalar @_));
+    }
+
+    for (my $i = @_; @_ - $i > $n;)
+    {
+        my $idx  = @_ - $i;
+        my $swp  = $idx + int(rand(--$i));
+        my $xchg = $_[$swp];
+        $_[$swp] = $_[$idx];
+        $_[$idx] = $xchg;
+    }
+
+    return splice @_, 0, $n;
+}
+
+sub minmax (@)
+{
+    return unless @_;
+    my $min = my $max = $_[0];
+
+    for (my $i = 1; $i < @_; $i += 2)
+    {
+        if ($_[$i - 1] <= $_[$i])
+        {
+            $min = $_[$i - 1] if $min > $_[$i - 1];
+            $max = $_[$i]     if $max < $_[$i];
+        }
+        else
+        {
+            $min = $_[$i]     if $min > $_[$i];
+            $max = $_[$i - 1] if $max < $_[$i - 1];
+        }
+    }
+
+    if (@_ & 1)
+    {
+        my $i = $#_;
+        if ($_[$i - 1] <= $_[$i])
+        {
+            $min = $_[$i - 1] if $min > $_[$i - 1];
+            $max = $_[$i]     if $max < $_[$i];
+        }
+        else
+        {
+            $min = $_[$i]     if $min > $_[$i];
+            $max = $_[$i - 1] if $max < $_[$i - 1];
+        }
+    }
+
+    return ($min, $max);
+}
+
+sub minmaxstr (@)
+{
+    return unless @_;
+    my $min = my $max = $_[0];
+
+    for (my $i = 1; $i < @_; $i += 2)
+    {
+        if ($_[$i - 1] le $_[$i])
+        {
+            $min = $_[$i - 1] if $min gt $_[$i - 1];
+            $max = $_[$i]     if $max lt $_[$i];
+        }
+        else
+        {
+            $min = $_[$i]     if $min gt $_[$i];
+            $max = $_[$i - 1] if $max lt $_[$i - 1];
+        }
+    }
+
+    if (@_ & 1)
+    {
+        my $i = $#_;
+        if ($_[$i - 1] le $_[$i])
+        {
+            $min = $_[$i - 1] if $min gt $_[$i - 1];
+            $max = $_[$i]     if $max lt $_[$i];
+        }
+        else
+        {
+            $min = $_[$i]     if $min gt $_[$i];
+            $max = $_[$i - 1] if $max lt $_[$i - 1];
+        }
+    }
+
+    return ($min, $max);
+}
+
+sub part (&@)
+{
+    my ($code, @list) = @_;
+    my @parts;
+    push @{$parts[$code->($_)]}, $_ foreach @list;
+    return @parts;
+}
+
+sub bsearch(&@)
+{
+    my $code = shift;
+
+    my $rc;
+    my $i = 0;
+    my $j = @_;
+    ## no critic (ControlStructures::ProhibitNegativeExpressionsInUnlessAndUntilConditions)
+    do
+    {
+        my $k = int(($i + $j) / 2);
+
+        $k >= @_ and return;
+
+        local *_ = \$_[$k];
+        $rc = $code->();
+
+        $rc == 0
+          and return wantarray ? $_ : 1;
+
+        if ($rc < 0)
+        {
+            $i = $k + 1;
+        }
+        else
+        {
+            $j = $k - 1;
+        }
+    } until $i > $j;
+
+    return;
+}
+
+sub bsearchidx(&@)
+{
+    my $code = shift;
+
+    my $rc;
+    my $i = 0;
+    my $j = @_;
+    ## no critic (ControlStructures::ProhibitNegativeExpressionsInUnlessAndUntilConditions)
+    do
+    {
+        my $k = int(($i + $j) / 2);
+
+        $k >= @_ and return -1;
+
+        local *_ = \$_[$k];
+        $rc = $code->();
+
+        $rc == 0 and return $k;
+
+        if ($rc < 0)
+        {
+            $i = $k + 1;
+        }
+        else
+        {
+            $j = $k - 1;
+        }
+    } until $i > $j;
+
+    return -1;
+}
+
+sub lower_bound(&@)
+{
+    my $code  = shift;
+    my $count = @_;
+    my $first = 0;
+    while ($count > 0)
+    {
+        my $step = $count >> 1;
+        my $it   = $first + $step;
+        local *_ = \$_[$it];
+        if ($code->() < 0)
+        {
+            $first = ++$it;
+            $count -= $step + 1;
+        }
+        else
+        {
+            $count = $step;
+        }
+    }
+
+    return $first;
+}
+
+sub upper_bound(&@)
+{
+    my $code  = shift;
+    my $count = @_;
+    my $first = 0;
+    while ($count > 0)
+    {
+        my $step = $count >> 1;
+        my $it   = $first + $step;
+        local *_ = \$_[$it];
+        if ($code->() <= 0)
+        {
+            $first = ++$it;
+            $count -= $step + 1;
+        }
+        else
+        {
+            $count = $step;
+        }
+    }
+
+    return $first;
+}
+
+sub equal_range(&@)
+{
+    my $lb = &lower_bound(@_);
+    my $ub = &upper_bound(@_);
+    return ($lb, $ub);
+}
+
+sub binsert (&$\@)
+{
+    my $lb = &lower_bound($_[0], @{$_[2]});
+    splice @{$_[2]}, $lb, 0, $_[1];
+    return $lb;
+}
+
+sub bremove (&\@)
+{
+    my $lb = &lower_bound($_[0], @{$_[1]});
+    return splice @{$_[1]}, $lb, 1;
+}
+
+sub qsort(&\@)
+{
+    require Carp;
+    Carp::croak("It's insane to use a pure-perl qsort");
+}
+
+sub slide(&@)
+{
+    my $op = shift;
+    my @l  = @_;
+
+    ## no critic (TestingAndDebugging::ProhibitNoStrict, ValuesAndExpressions::ProhibitCommaSeparatedStatements)
+    # Localise $a, $b
+    my ($caller_a, $caller_b) = do
+    {
+        my $pkg = caller();
+        no strict 'refs';
+        \*{$pkg . '::a'}, \*{$pkg . '::b'};
+    };
+
+    ## no critic (Variables::RequireInitializationForLocalVars)
+    # This map expression is also the return value
+    local (*$caller_a, *$caller_b);
+    ## no critic (BuiltinFunctions::ProhibitComplexMappings)
+    return map {
+        # Assign to $a, $b as refs to caller's array elements
+        (*$caller_a, *$caller_b) = \($l[$_], $l[$_ + 1]);
+
+        # Perform the transformation
+        $op->();
+    } 0 .. ($#l - 1);
+}
+
+sub slideatatime ($$@)
+{
+    my ($m, $w, @list) = @_;
+    my $n = $w - $m - 1;
+    return $n >= 0
+      ? sub { my @r = splice @list, 0, $m; $#list < $n and $n = $#list; @r and push @r, (@list ? @list[0 .. $n] : ()); return @r; }
+      : sub { return splice @list, 0, $m; };
+}
+
+sub sort_by(&@)
+{
+    my ($code, @list) = @_;
+    return map { $_->[0] }
+      sort     { $a->[1] cmp $b->[1] }
+      map      { [$_, scalar($code->())] } @list;
+}
+
+sub nsort_by(&@)
+{
+    my ($code, @list) = @_;
+    return map { $_->[0] }
+      sort     { $a->[1] <=> $b->[1] }
+      map      { [$_, scalar($code->())] } @list;
+}
+
+## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
+sub _XScompiled { return 0 }
+
+=head1 SEE ALSO
+
+L<List::Util>
+
+=head1 AUTHOR
+
+Jens Rehsack E<lt>rehsack AT cpan.orgE<gt>
+
+Adam Kennedy E<lt>adamk@cpan.orgE<gt>
+
+Tassilo von Parseval E<lt>tassilo.von.parseval@rwth-aachen.deE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Some parts copyright 2011 Aaron Crane.
+
+Copyright 2004 - 2010 by Tassilo von Parseval
+
+Copyright 2013 - 2017 by Jens Rehsack
+
+All code added with 0.417 or later is licensed under the Apache License,
+Version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+All code until 0.416 is licensed under the same terms as Perl itself,
+either Perl version 5.8.4 or, at your option, any later version of
+Perl 5 you may have available.
+
+=cut
+
+1;
diff --git a/src/modules/Parallel/ForkManager.pm b/src/modules/Parallel/ForkManager.pm
new file mode 100644
index 0000000..0018826
--- /dev/null
+++ b/src/modules/Parallel/ForkManager.pm
@@ -0,0 +1,852 @@
+package Parallel::ForkManager;
+our $AUTHORITY = 'cpan:DLUX';
+# ABSTRACT:  A simple parallel processing fork manager
+$Parallel::ForkManager::VERSION = '1.18';
+use POSIX ":sys_wait_h";
+use Storable qw(store retrieve);
+use File::Spec;
+use File::Temp ();
+use File::Path ();
+use Carp;
+
+use strict;
+
+sub new {
+  my ($c,$processes,$tempdir)=@_;
+
+  my $h={
+    max_proc   => $processes,
+    processes  => {},
+    in_child   => 0,
+    parent_pid => $$,
+    auto_cleanup => ($tempdir ? 0 : 1),
+    waitpid_blocking_sleep => 1,
+  };
+
+
+  # determine temporary directory for storing data structures
+  # add it to Parallel::ForkManager object so children can use it
+  # We don't let it clean up so it won't do it in the child process
+  # but we have our own DESTROY to do that.
+  if (not defined($tempdir) or not length($tempdir)) {
+    $tempdir = File::Temp::tempdir(CLEANUP => 0);
+  }
+  die qq|Temporary directory "$tempdir" doesn't exist or is not a directory.| unless (-e $tempdir && -d _);  # ensure temp dir exists and is indeed a directory
+  $h->{tempdir} = $tempdir;
+
+  return bless($h,ref($c)||$c);
+};
+
+sub start {
+  my ($s,$identification)=@_;
+
+  die "Cannot start another process while you are in the child process"
+    if $s->{in_child};
+  while ($s->{max_proc} && ( keys %{ $s->{processes} } ) >= $s->{max_proc}) {
+    $s->on_wait;
+    $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef);
+  };
+  $s->wait_children;
+  if ($s->{max_proc}) {
+    my $pid=fork();
+    die "Cannot fork: $!" if !defined $pid;
+    if ($pid) {
+      $s->{processes}->{$pid}=$identification;
+      $s->on_start($pid,$identification);
+    } else {
+      $s->{in_child}=1 if !$pid;
+    }
+    return $pid;
+  } else {
+    $s->{processes}->{$$}=$identification;
+    $s->on_start($$,$identification);
+    return 0; # Simulating the child which returns 0
+  }
+}
+
+sub finish {
+  my ($s, $x, $r)=@_;
+
+  if ( $s->{in_child} ) {
+    if (defined($r)) {  # store the child's data structure
+      my $storable_tempfile = File::Spec->catfile($s->{tempdir}, 'Parallel-ForkManager-' . $s->{parent_pid} . '-' . $$ . '.txt');
+      my $stored = eval { return &store($r, $storable_tempfile); };
+
+      # handle Storables errors, IE logcarp or carp returning undef, or die (via logcroak or croak)
+      if (not $stored or $@) {
+        warn(qq|The storable module was unable to store the child's data structure to the temp file "$storable_tempfile":  | . join(', ', $@));
+      }
+    }
+    CORE::exit($x || 0);
+  }
+  if ($s->{max_proc} == 0) { # max_proc == 0
+    $s->on_finish($$, $x ,$s->{processes}->{$$}, 0, 0, $r);
+    delete $s->{processes}->{$$};
+  }
+  return 0;
+}
+
+sub wait_children {
+  my ($s)=@_;
+
+  return if !keys %{$s->{processes}};
+  my $kid;
+  do {
+    $kid = $s->wait_one_child(&WNOHANG);
+  } while defined $kid and ( $kid > 0 or $kid < -1 ); # AS 5.6/Win32 returns negative PIDs
+};
+
+*wait_childs=*wait_children; # compatibility
+*reap_finished_children=*wait_children; # behavioral synonym for clarity
+
+sub wait_one_child {
+  my ($s,$par)=@_;
+
+  my $kid;
+  while (1) {
+    $kid = $s->_waitpid(-1,$par||=0);
+
+    last unless defined $kid;
+
+    last if $kid == 0 || $kid == -1; # AS 5.6/Win32 returns negative PIDs
+    redo if !exists $s->{processes}->{$kid};
+    my $id = delete $s->{processes}->{$kid};
+
+    # retrieve child data structure, if any
+    my $retrieved = undef;
+    my $storable_tempfile = File::Spec->catfile($s->{tempdir}, 'Parallel-ForkManager-' . $s->{parent_pid} . '-' . $kid . '.txt');
+    if (-e $storable_tempfile) {  # child has option of not storing anything, so we need to see if it did or not
+      $retrieved = eval { return &retrieve($storable_tempfile); };
+
+      # handle Storables errors
+      if (not $retrieved or $@) {
+        warn(qq|The storable module was unable to retrieve the child's data structure from the temporary file "$storable_tempfile":  | . join(', ', $@));
+      }
+
+      # clean up after ourselves
+      unlink $storable_tempfile;
+    }
+
+    $s->on_finish( $kid, $? >> 8 , $id, $? & 0x7f, $? & 0x80 ? 1 : 0, $retrieved);
+    last;
+  }
+  $kid;
+};
+
+sub wait_all_children {
+  my ($s)=@_;
+
+  while (keys %{ $s->{processes} }) {
+    $s->on_wait;
+    $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef);
+  };
+}
+
+*wait_all_childs=*wait_all_children; # compatibility;
+
+sub max_procs { $_[0]->{max_proc}; }
+
+sub is_child  { $_[0]->{in_child} }
+
+sub is_parent { !$_[0]->{in_child} }
+
+sub running_procs {
+    my $self = shift;
+
+    my @pids = keys %{ $self->{processes} };
+    return @pids;
+}
+
+sub wait_for_available_procs {
+    my( $self, $nbr ) = @_;
+    $nbr ||= 1;
+
+    croak "nbr processes '$nbr' higher than the max nbr of processes (@{[ $self->max_procs ]})"
+        if $nbr > $self->max_procs;
+
+    $self->wait_one_child until $self->max_procs - $self->running_procs >= $nbr;
+}
+
+sub run_on_finish {
+  my ($s,$code,$pid)=@_;
+
+  $s->{on_finish}->{$pid || 0}=$code;
+}
+
+sub on_finish {
+  my ($s,$pid,@par)=@_;
+
+  my $code=$s->{on_finish}->{$pid} || $s->{on_finish}->{0} or return 0;
+  $code->($pid,@par);
+};
+
+sub run_on_wait {
+  my ($s,$code, $period)=@_;
+
+  $s->{on_wait}=$code;
+  $s->{on_wait_period} = $period;
+}
+
+sub on_wait {
+  my ($s)=@_;
+
+  if(ref($s->{on_wait}) eq 'CODE') {
+    $s->{on_wait}->();
+    if (defined $s->{on_wait_period}) {
+        local $SIG{CHLD} = sub { } if ! defined $SIG{CHLD};
+        select undef, undef, undef, $s->{on_wait_period}
+    };
+  };
+};
+
+sub run_on_start {
+  my ($s,$code)=@_;
+
+  $s->{on_start}=$code;
+}
+
+sub on_start {
+  my ($s,@par)=@_;
+
+  $s->{on_start}->(@par) if ref($s->{on_start}) eq 'CODE';
+};
+
+sub set_max_procs {
+  my ($s, $mp)=@_;
+
+  $s->{max_proc} = $mp;
+}
+
+sub set_waitpid_blocking_sleep {
+    my( $self, $period ) = @_;
+    $self->{waitpid_blocking_sleep} = $period;
+}
+
+sub waitpid_blocking_sleep {
+    $_[0]->{waitpid_blocking_sleep};
+}
+
+sub _waitpid { # Call waitpid() in the standard Unix fashion.
+    my( $self, undef, $flag ) = @_;
+
+    return $flag ? $self->_waitpid_non_blocking : $self->_waitpid_blocking;
+}
+
+sub _waitpid_non_blocking {
+    my $self = shift;
+
+    for my $pid ( $self->running_procs ) {
+        my $p = waitpid $pid, &WNOHANG or next;
+
+        return $pid if $p != -1;
+
+        warn "child process '$pid' disappeared. A call to `waitpid` outside of Parallel::ForkManager might have reaped it.\n";
+        # it's gone. let's clean the process entry
+        delete $self->{processes}{$pid};
+    }
+
+    return;
+}
+
+sub _waitpid_blocking {
+    my $self = shift;
+
+    # pseudo-blocking
+    if( my $sleep_period = $self->{waitpid_blocking_sleep} ) {
+        while() {
+            my $pid = $self->_waitpid_non_blocking;
+
+            return $pid if defined $pid;
+
+            return unless $self->running_procs;
+
+            select undef, undef, undef, $sleep_period;
+        }
+    }
+
+    return waitpid -1, 0;
+}
+
+sub DESTROY {
+  my ($self) = @_;
+
+  if ($self->{auto_cleanup} && $self->{parent_pid} == $$ && -d $self->{tempdir}) {
+    File::Path::remove_tree($self->{tempdir});
+  }
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+Parallel::ForkManager - A simple parallel processing fork manager
+
+=head1 VERSION
+
+version 1.18
+
+=head1 SYNOPSIS
+
+  use Parallel::ForkManager;
+
+  my $pm = Parallel::ForkManager->new($MAX_PROCESSES);
+
+  DATA_LOOP:
+  foreach my $data (@all_data) {
+    # Forks and returns the pid for the child:
+    my $pid = $pm->start and next DATA_LOOP;
+
+    ... do some work with $data in the child process ...
+
+    $pm->finish; # Terminates the child process
+  }
+
+=head1 DESCRIPTION
+
+This module is intended for use in operations that can be done in parallel
+where the number of processes to be forked off should be limited. Typical
+use is a downloader which will be retrieving hundreds/thousands of files.
+
+The code for a downloader would look something like this:
+
+  use LWP::Simple;
+  use Parallel::ForkManager;
+
+  ...
+
+  my @links=(
+    ["http://www.foo.bar/rulez.data","rulez_data.txt"],
+    ["http://new.host/more_data.doc","more_data.doc"],
+    ...
+  );
+
+  ...
+
+  # Max 30 processes for parallel download
+  my $pm = Parallel::ForkManager->new(30);
+
+  LINKS:
+  foreach my $linkarray (@links) {
+    $pm->start and next LINKS; # do the fork
+
+    my ($link, $fn) = @$linkarray;
+    warn "Cannot get $fn from $link"
+      if getstore($link, $fn) != RC_OK;
+
+    $pm->finish; # do the exit in the child process
+  }
+  $pm->wait_all_children;
+
+First you need to instantiate the ForkManager with the "new" constructor.
+You must specify the maximum number of processes to be created. If you
+specify 0, then NO fork will be done; this is good for debugging purposes.
+
+Next, use $pm->start to do the fork. $pm returns 0 for the child process,
+and child pid for the parent process (see also L<perlfunc(1p)/fork()>).
+The "and next" skips the internal loop in the parent process. NOTE:
+$pm->start dies if the fork fails.
+
+$pm->finish terminates the child process (assuming a fork was done in the
+"start").
+
+NOTE: You cannot use $pm->start if you are already in the child process.
+If you want to manage another set of subprocesses in the child process,
+you must instantiate another Parallel::ForkManager object!
+
+=head1 METHODS
+
+The comment letter indicates where the method should be run. P for parent,
+C for child.
+
+=over 5
+
+=item new $processes
+
+Instantiate a new Parallel::ForkManager object. You must specify the maximum
+number of children to fork off. If you specify 0 (zero), then no children
+will be forked. This is intended for debugging purposes.
+
+The optional second parameter, $tempdir, is only used if you want the
+children to send back a reference to some data (see RETRIEVING DATASTRUCTURES
+below). If not provided, it is set via a call to L<File::Temp>::tempdir().
+
+The new method will die if the temporary directory does not exist or it is not
+a directory.
+
+=item start [ $process_identifier ]
+
+This method does the fork. It returns the pid of the child process for
+the parent, and 0 for the child process. If the $processes parameter
+for the constructor is 0 then, assuming you're in the child process,
+$pm->start simply returns 0.
+
+An optional $process_identifier can be provided to this method... It is used by
+the "run_on_finish" callback (see CALLBACKS) for identifying the finished
+process.
+
+=item finish [ $exit_code [, $data_structure_reference] ]
+
+Closes the child process by exiting and accepts an optional exit code
+(default exit code is 0) which can be retrieved in the parent via callback.
+If the second optional parameter is provided, the child attempts to send
+it's contents back to the parent. If you use the program in debug mode
+($processes == 0), this method just calls the callback.
+
+If the $data_structure_reference is provided, then it is serialized and
+passed to the parent process. See RETRIEVING DATASTRUCTURES for more info.
+
+=item set_max_procs $processes
+
+Allows you to set a new maximum number of children to maintain.
+
+=item wait_all_children
+
+You can call this method to wait for all the processes which have been
+forked. This is a blocking wait.
+
+=item reap_finished_children
+
+This is a non-blocking call to reap children and execute callbacks independent
+of calls to "start" or "wait_all_children". Use this in scenarios where "start"
+is called infrequently but you would like the callbacks executed quickly.
+
+=item is_parent
+
+Returns C<true> if within the parent or C<false> if within the child.
+
+=item is_child
+
+Returns C<true> if within the child or C<false> if within the parent.
+
+=item max_procs 
+
+Returns the maximal number of processes the object will fork.
+
+=item running_procs
+
+Returns the pids of the forked processes currently monitored by the
+C<Parallel::ForkManager>. Note that children are still reported as running
+until the fork manager harvest them, via the next call to
+C<start> or C<wait_all_children>.
+
+    my @pids = $pm->running_procs;
+
+    my $nbr_children =- $pm->running_procs;
+
+=item wait_for_available_procs( $n )
+
+Wait until C<$n> available process slots are available.
+If C<$n> is not given, defaults to I<1>.
+
+=item waitpid_blocking_sleep 
+
+Returns the sleep period, in seconds, of the pseudo-blocking calls. The sleep
+period can be a fraction of second. 
+
+Returns C<0> if disabled. 
+
+Defaults to 1 second.
+
+See I<BLOCKING CALLS> for more details.
+
+=item set_waitpid_blocking_sleep $seconds
+
+Sets the the sleep period, in seconds, of the pseudo-blocking calls.
+Set to C<0> to disable.
+
+See I<BLOCKING CALLS> for more details.
+
+=back
+
+=head1 CALLBACKS
+
+You can define callbacks in the code, which are called on events like starting
+a process or upon finish. Declare these before the first call to start().
+
+The callbacks can be defined with the following methods:
+
+=over 4
+
+=item run_on_finish $code [, $pid ]
+
+You can define a subroutine which is called when a child is terminated. It is
+called in the parent process.
+
+The parameters of the $code are the following:
+
+  - pid of the process, which is terminated
+  - exit code of the program
+  - identification of the process (if provided in the "start" method)
+  - exit signal (0-127: signal name)
+  - core dump (1 if there was core dump at exit)
+  - datastructure reference or undef (see RETRIEVING DATASTRUCTURES)
+
+=item run_on_start $code
+
+You can define a subroutine which is called when a child is started. It called
+after the successful startup of a child in the parent process.
+
+The parameters of the $code are the following:
+
+  - pid of the process which has been started
+  - identification of the process (if provided in the "start" method)
+
+=item run_on_wait $code, [$period]
+
+You can define a subroutine which is called when the child process needs to wait
+for the startup. If $period is not defined, then one call is done per
+child. If $period is defined, then $code is called periodically and the
+module waits for $period seconds between the two calls. Note, $period can be
+fractional number also. The exact "$period seconds" is not guaranteed,
+signals can shorten and the process scheduler can make it longer (on busy
+systems).
+
+The $code called in the "start" and the "wait_all_children" method also.
+
+No parameters are passed to the $code on the call.
+
+=back
+
+=head1 BLOCKING CALLS
+
+When it comes to waiting for child processes to terminate, C<Parallel::ForkManager> is between 
+a fork and a hard place (if you excuse the terrible pun). The underlying Perl C<waitpid> function
+that the module relies on can block until either one specific or any child process 
+terminate, but not for a process part of a given group.
+
+This means that the module can do one of two things when it waits for 
+one of its child processes to terminate:
+
+=over
+
+=item Only wait for its own child processes
+
+This is done via a loop using a C<waitpid> non-blocking call and a sleep statement.
+The code does something along the lines of
+
+    while(1) {
+        if ( any of the P::FM child process terminated ) {
+            return its pid
+        }
+
+        sleep $sleep_period
+    }
+
+This is the default behavior that the module will use.
+This is not the most efficient way to wait for child processes, but it's
+the safest way to ensure that C<Parallel::ForkManager> won't interfere with 
+any other part of the codebase. 
+
+The sleep period is set via the method C<set_waitpid_blocking_sleep>.
+
+=item Block until any process terminate
+
+Alternatively, C<Parallel::ForkManager> can call C<waitpid> such that it will
+block until any child process terminate. If the child process was not one of
+the monitored subprocesses, the wait will resume. This is more efficient, but mean
+that C<P::FM> can captures (and discards) the termination notification that a different
+part of the code might be waiting for. 
+
+If this is a race condition
+that doesn't apply to your codebase, you can set the 
+I<waitpid_blocking_sleep> period to C<0>, which will enable C<waitpid> call blocking.
+
+    my $pm = Parallel::ForkManager->new( 4 );
+
+    $pm->set_waitpid_blocking_sleep(0);  # true blocking calls enabled
+
+    for ( 1..100 ) {
+        $pm->start and next;
+
+        ...; # do work
+
+        $pm->finish;
+    }
+
+=back
+
+=head1 RETRIEVING DATASTRUCTURES from child processes
+
+The ability for the parent to retrieve data structures is new as of version
+0.7.6.
+
+Each child process may optionally send 1 data structure back to the parent.
+By data structure, we mean a reference to a string, hash or array. The
+contents of the data structure are written out to temporary files on disc
+using the L<Storable> modules' store() method. The reference is then
+retrieved from within the code you send to the run_on_finish callback.
+
+The data structure can be any scalar perl data structure which makes sense:
+string, numeric value or a reference to an array, hash or object.
+
+There are 2 steps involved in retrieving data structures:
+
+1) A reference to the data structure the child wishes to send back to the
+parent is provided as the second argument to the finish() call. It is up
+to the child to decide whether or not to send anything back to the parent.
+
+2) The data structure reference is retrieved using the callback provided in
+the run_on_finish() method.
+
+Keep in mind that data structure retrieval is not the same as returning a
+data structure from a method call. That is not what actually occurs. The
+data structure referenced in a given child process is serialized and
+written out to a file by L<Storable>. The file is subsequently read back
+into memory and a new data structure belonging to the parent process is
+created. Please consider the performance penality it can imply, so try to
+keep the returned structure small.
+
+=head1 EXAMPLES
+
+=head2 Parallel get
+
+This small example can be used to get URLs in parallel.
+
+  use Parallel::ForkManager;
+  use LWP::Simple;
+
+  my $pm = Parallel::ForkManager->new(10);
+
+  LINKS:
+  for my $link (@ARGV) {
+    $pm->start and next LINKS;
+    my ($fn) = $link =~ /^.*\/(.*?)$/;
+    if (!$fn) {
+      warn "Cannot determine filename from $fn\n";
+    } else {
+      $0 .= " " . $fn;
+      print "Getting $fn from $link\n";
+      my $rc = getstore($link, $fn);
+      print "$link downloaded. response code: $rc\n";
+    };
+    $pm->finish;
+  };
+
+=head2 Callbacks
+
+Example of a program using callbacks to get child exit codes:
+
+  use strict;
+  use Parallel::ForkManager;
+
+  my $max_procs = 5;
+  my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara );
+  # hash to resolve PID's back to child specific information
+
+  my $pm = Parallel::ForkManager->new($max_procs);
+
+  # Setup a callback for when a child finishes up so we can
+  # get it's exit code
+  $pm->run_on_finish( sub {
+      my ($pid, $exit_code, $ident) = @_;
+      print "** $ident just got out of the pool ".
+        "with PID $pid and exit code: $exit_code\n";
+  });
+
+  $pm->run_on_start( sub {
+      my ($pid, $ident)=@_;
+      print "** $ident started, pid: $pid\n";
+  });
+
+  $pm->run_on_wait( sub {
+      print "** Have to wait for one children ...\n"
+    },
+    0.5
+  );
+
+  NAMES:
+  foreach my $child ( 0 .. $#names ) {
+    my $pid = $pm->start($names[$child]) and next NAMES;
+
+    # This code is the child process
+    print "This is $names[$child], Child number $child\n";
+    sleep ( 2 * $child );
+    print "$names[$child], Child $child is about to get out...\n";
+    sleep 1;
+    $pm->finish($child); # pass an exit code to finish
+  }
+
+  print "Waiting for Children...\n";
+  $pm->wait_all_children;
+  print "Everybody is out of the pool!\n";
+
+=head2 Data structure retrieval
+
+In this simple example, each child sends back a string reference.
+
+  use Parallel::ForkManager 0.7.6;
+  use strict;
+
+  my $pm = Parallel::ForkManager->new(2, '/server/path/to/temp/dir/');
+
+  # data structure retrieval and handling
+  $pm -> run_on_finish ( # called BEFORE the first call to start()
+    sub {
+      my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
+
+      # retrieve data structure from child
+      if (defined($data_structure_reference)) {  # children are not forced to send anything
+        my $string = ${$data_structure_reference};  # child passed a string reference
+        print "$string\n";
+      }
+      else {  # problems occuring during storage or retrieval will throw a warning
+        print qq|No message received from child process $pid!\n|;
+      }
+    }
+  );
+
+  # prep random statement components
+  my @foods = ('chocolate', 'ice cream', 'peanut butter', 'pickles', 'pizza', 'bacon', 'pancakes', 'spaghetti', 'cookies');
+  my @preferences = ('loves', q|can't stand|, 'always wants more', 'will walk 100 miles for', 'only eats', 'would starve rather than eat');
+
+  # run the parallel processes
+  PERSONS:
+  foreach my $person (qw(Fred Wilma Ernie Bert Lucy Ethel Curly Moe Larry)) {
+    $pm->start() and next PERSONS;
+
+    # generate a random statement about food preferences
+    my $statement = $person . ' ' . $preferences[int(rand @preferences)] . ' ' . $foods[int(rand @foods)];
+
+    # send it back to the parent process
+    $pm->finish(0, \$statement);  # note that it's a scalar REFERENCE, not the scalar itself
+  }
+  $pm->wait_all_children;
+
+A second datastructure retrieval example demonstrates how children decide
+whether or not to send anything back, what to send and how the parent should
+process whatever is retrieved.
+
+=for example begin
+
+  use Parallel::ForkManager 0.7.6;
+  use Data::Dumper;  # to display the data structures retrieved.
+  use strict;
+
+  my $pm = Parallel::ForkManager->new(20);  # using the system temp dir $L<File::Temp::tempdir()
+
+  # data structure retrieval and handling
+  my %retrieved_responses = ();  # for collecting responses
+  $pm -> run_on_finish (
+    sub {
+      my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
+
+      # see what the child sent us, if anything
+      if (defined($data_structure_reference)) {  # test rather than assume child sent anything
+        my $reftype = ref($data_structure_reference);
+        print qq|ident "$ident" returned a "$reftype" reference.\n\n|;
+        if (1) {  # simple on/off switch to display the contents
+          print &Dumper($data_structure_reference) . qq|end of "$ident" sent structure\n\n|;
+        }
+
+        # we can also collect retrieved data structures for processing after all children have exited
+        $retrieved_responses{$ident} = $data_structure_reference;
+      } else {
+        print qq|ident "$ident" did not send anything.\n\n|;
+      }
+    }
+  );
+
+  # generate a list of instructions
+  my @instructions = (  # a unique identifier and what the child process should send
+    {'name' => '%ENV keys as a string', 'send' => 'keys'},
+    {'name' => 'Send Nothing'},  # not instructing the child to send anything back to the parent
+    {'name' => 'Childs %ENV', 'send' => 'all'},
+    {'name' => 'Child chooses randomly', 'send' => 'random'},
+    {'name' => 'Invalid send instructions', 'send' => 'Na Na Nana Na'},
+    {'name' => 'ENV values in an array', 'send' => 'values'},
+  );
+
+  INSTRUCTS:
+  foreach my $instruction (@instructions) {
+    $pm->start($instruction->{'name'}) and next INSTRUCTS;  # this time we are using an explicit, unique child process identifier
+
+    # last step in child processing
+    $pm->finish(0) unless $instruction->{'send'};  # no data structure is sent unless this child is told what to send.
+
+    if ($instruction->{'send'} eq 'keys') {
+      $pm->finish(0, \join(', ', keys %ENV));
+
+    } elsif ($instruction->{'send'} eq 'values') {
+      $pm->finish(0, [values %ENV]);  # kinda useless without knowing which keys they belong to...
+
+    } elsif ($instruction->{'send'} eq 'all') {
+      $pm->finish(0, \%ENV);  # remember, we are not "returning" anything, just copying the hash to disc
+
+    # demonstrate clearly that the child determines what type of reference to send
+    } elsif ($instruction->{'send'} eq 'random') {
+      my $string = q|I'm just a string.|;
+      my @array = qw(I am an array);
+      my %hash = (type => 'associative array', synonym => 'hash', cool => 'very :)');
+      my $return_choice = ('string', 'array', 'hash')[int(rand 3)];  # randomly choose return data type
+      $pm->finish(0, \$string) if ($return_choice eq 'string');
+      $pm->finish(0, \@array) if ($return_choice eq 'array');
+      $pm->finish(0, \%hash) if ($return_choice eq 'hash');
+
+    # as a responsible child, inform parent that their instruction was invalid
+    } else {
+      $pm->finish(0, \qq|Invalid instructions: "$instruction->{'send'}".|);  # ordinarily I wouldn't include invalid input in a response...
+    }
+  }
+  $pm->wait_all_children;  # blocks until all forked processes have exited
+
+  # post fork processing of returned data structures
+  for (sort keys %retrieved_responses) {
+    print qq|Post processing "$_"...\n|;
+  }
+
+=for example end
+
+=head1 BUGS AND LIMITATIONS
+
+Do not use Parallel::ForkManager in an environment, where other child
+processes can affect the run of the main program, so using this module
+is not recommended in an environment where fork() / wait() is already used.
+
+If you want to use more than one copies of the Parallel::ForkManager, then
+you have to make sure that all children processes are terminated, before you
+use the second object in the main program.
+
+You are free to use a new copy of Parallel::ForkManager in the child
+processes, although I don't think it makes sense.
+
+=head1 CREDITS
+
+  Michael Gang (bug report)
+  Noah Robin <sitz@onastick.net> (documentation tweaks)
+  Chuck Hirstius <chirstius@megapathdsl.net> (callback exit status, example)
+  Grant Hopwood <hopwoodg@valero.com> (win32 port)
+  Mark Southern <mark_southern@merck.com> (bugfix)
+  Ken Clarke <www.perlprogrammer.net>  (datastructure retrieval)
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+dLux (Szabó, Balázs) <dlux@dlux.hu>
+
+=item *
+
+Yanick Champoux <yanick@cpan.org>
+
+=item *
+
+Gabor Szabo <gabor@szabgab.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2000 by Balázs Szabó.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/src/modules/Switch.pm b/src/modules/Switch.pm
new file mode 100644
index 0000000..8c2544e
--- /dev/null
+++ b/src/modules/Switch.pm
@@ -0,0 +1,888 @@
+package Switch;
+
+use 5.005;
+use strict;
+use vars qw($VERSION);
+use Carp;
+
+use if $] >= 5.011, 'deprecate';
+
+$VERSION = '2.17';
+  
+
+# LOAD FILTERING MODULE...
+use Filter::Util::Call;
+
+sub __();
+
+# CATCH ATTEMPTS TO CALL case OUTSIDE THE SCOPE OF ANY switch
+
+$::_S_W_I_T_C_H = sub { croak "case/when statement not in switch/given block" };
+
+my $offset;
+my $fallthrough;
+my ($Perl5, $Perl6) = (0,0);
+
+sub import
+{
+	$fallthrough = grep /\bfallthrough\b/, @_;
+	$offset = (caller)[2]+1;
+	filter_add({}) unless @_>1 && $_[1] eq 'noimport';
+	my $pkg = caller;
+	no strict 'refs';
+	for ( qw( on_defined on_exists ) )
+	{
+		*{"${pkg}::$_"} = \&$_;
+	}
+	*{"${pkg}::__"} = \&__ if grep /__/, @_;
+	$Perl6 = 1 if grep(/Perl\s*6/i, @_);
+	$Perl5 = 1 if grep(/Perl\s*5/i, @_) || !grep(/Perl\s*6/i, @_);
+	1;
+}
+
+sub unimport
+{	
+	filter_del()
+}
+
+sub filter
+{
+	my($self) = @_ ;
+	local $Switch::file = (caller)[1];
+
+	my $status = 1;
+	$status = filter_read(1_000_000);
+	return $status if $status<0;
+    	$_ = filter_blocks($_,$offset);
+	$_ = "# line $offset\n" . $_ if $offset; undef $offset;
+	return $status;
+}
+
+use Text::Balanced ':ALL';
+
+sub line
+{
+	my ($pretext,$offset) = @_;
+	($pretext=~tr/\n/\n/)+($offset||0);
+}
+
+sub is_block
+{
+	local $SIG{__WARN__}=sub{die$@};
+	local $^W=1;
+	my $ishash = defined  eval 'my $hr='.$_[0];
+	undef $@;
+	return !$ishash;
+}
+
+my $pod_or_DATA = qr/ ^=[A-Za-z] .*? ^=cut (?![A-Za-z]) .*? $
+		    | ^__(DATA|END)__\n.*
+		    /smx;
+
+my $casecounter = 1;
+sub filter_blocks
+{
+	my ($source, $line) = @_;
+	return $source unless $Perl5 && $source =~ /case|switch/
+			   || $Perl6 && $source =~ /when|given|default/;
+	pos $source = 0;
+	my $text = "";
+	component: while (pos $source < length $source)
+	{
+		if ($source =~ m/(\G\s*use\s+Switch\b)/gc)
+		{
+			$text .= q{use Switch 'noimport'};
+			next component;
+		}
+		my @pos = Text::Balanced::_match_quotelike(\$source,qr/\s*/,1,0);
+		if (defined $pos[0])
+		{
+			my $pre = substr($source,$pos[0],$pos[1]); # matched prefix
+                        my $iEol;
+                        if( substr($source,$pos[4],$pos[5]) eq '/' && # 1st delimiter
+                            substr($source,$pos[2],$pos[3]) eq '' && # no op like 'm'
+                            index( substr($source,$pos[16],$pos[17]), 'x' ) == -1 && # no //x
+                            ($iEol = index( $source, "\n", $pos[4] )) > 0         &&
+                            $iEol < $pos[8] ){ # embedded newlines
+                            # If this is a pattern, it isn't compatible with Switch. Backup past 1st '/'.
+                            pos( $source ) = $pos[6];
+			    $text .= $pre . substr($source,$pos[2],$pos[6]-$pos[2]);
+			} else {
+			    $text .= $pre . substr($source,$pos[2],$pos[18]-$pos[2]);
+			}
+			next component;
+		}
+		if ($source =~ m/(\G\s*$pod_or_DATA)/gc) {
+			$text .= $1;
+			next component;
+		}
+		@pos = Text::Balanced::_match_variable(\$source,qr/\s*/);
+		if (defined $pos[0])
+		{
+			$text .= " " if $pos[0] < $pos[2];
+			$text .= substr($source,$pos[0],$pos[4]-$pos[0]);
+			next component;
+		}
+
+		if ($Perl5 && $source =~ m/\G(\n*)(\s*)(switch)\b(?=\s*[(])/gc
+		 || $Perl6 && $source =~ m/\G(\n*)(\s*)(given)\b(?=\s*[(])/gc
+		 || $Perl6 && $source =~ m/\G(\n*)(\s*)(given)\b(.*)(?=\{)/gc)
+		{
+			my $keyword = $3;
+			my $arg = $4;
+			$text .= $1.$2.'S_W_I_T_C_H: while (1) ';
+			unless ($arg) {
+				@pos = Text::Balanced::_match_codeblock(\$source,qr/\s*/,qr/\(/,qr/\)/,qr/[[{(<]/,qr/[]})>]/,undef) 
+				or do {
+					die "Bad $keyword statement (problem in the parentheses?) near $Switch::file line ", line(substr($source,0,pos $source),$line), "\n";
+				};
+				$arg = filter_blocks(substr($source,$pos[0],$pos[4]-$pos[0]),line(substr($source,0,$pos[0]),$line));
+			}
+			$arg =~ s {^\s*[(]\s*%}   { ( \\\%}	||
+			$arg =~ s {^\s*[(]\s*m\b} { ( qr}	||
+			$arg =~ s {^\s*[(]\s*/}   { ( qr/}	||
+			$arg =~ s {^\s*[(]\s*qw}  { ( \\qw};
+			@pos = Text::Balanced::_match_codeblock(\$source,qr/\s*/,qr/\{/,qr/\}/,qr/\{/,qr/\}/,undef)
+			or do {
+				die "Bad $keyword statement (problem in the code block?) near $Switch::file line ", line(substr($source,0, pos $source), $line), "\n";
+			};
+			my $code = filter_blocks(substr($source,$pos[0],$pos[4]-$pos[0]),line(substr($source,0,$pos[0]),$line));
+			$code =~ s/{/{ local \$::_S_W_I_T_C_H; Switch::switch($arg);/;
+			$text .= $code . 'continue {last}';
+			next component;
+		}
+		elsif ($Perl5 && $source =~ m/\G(\s*)(case\b)(?!\s*=>)/gc
+		    || $Perl6 && $source =~ m/\G(\s*)(when\b)(?!\s*=>)/gc
+		    || $Perl6 && $source =~ m/\G(\s*)(default\b)(?=\s*\{)/gc)
+		{
+			my $keyword = $2;
+			$text .= $1 . ($keyword eq "default"
+					? "if (1)"
+					: "if (Switch::case");
+
+			if ($keyword eq "default") {
+				# Nothing to do
+			}
+			elsif (@pos = Text::Balanced::_match_codeblock(\$source,qr/\s*/,qr/\{/,qr/\}/,qr/\{/,qr/\}/,undef)) {
+				my $code = substr($source,$pos[0],$pos[4]-$pos[0]);
+				$text .= " " if $pos[0] < $pos[2];
+				$text .= "sub " if is_block $code;
+				$text .= filter_blocks($code,line(substr($source,0,$pos[0]),$line)) . ")";
+			}
+			elsif (@pos = Text::Balanced::_match_codeblock(\$source,qr/\s*/,qr/[[(]/,qr/[])]/,qr/[[({]/,qr/[])}]/,undef)) {
+				my $code = filter_blocks(substr($source,$pos[0],$pos[4]-$pos[0]),line(substr($source,0,$pos[0]),$line));
+				$code =~ s {^\s*[(]\s*%}   { ( \\\%}	||
+				$code =~ s {^\s*[(]\s*m\b} { ( qr}	||
+				$code =~ s {^\s*[(]\s*/}   { ( qr/}	||
+				$code =~ s {^\s*[(]\s*qw}  { ( \\qw};
+				$text .= " " if $pos[0] < $pos[2];
+				$text .= "$code)";
+			}
+			elsif ($Perl6 && do{@pos = Text::Balanced::_match_variable(\$source,qr/\s*/)}) {
+				my $code = filter_blocks(substr($source,$pos[0],$pos[4]-$pos[0]),line(substr($source,0,$pos[0]),$line));
+				$code =~ s {^\s*%}  { \%}	||
+				$code =~ s {^\s*@}  { \@};
+				$text .= " " if $pos[0] < $pos[2];
+				$text .= "$code)";
+			}
+			elsif ( @pos = Text::Balanced::_match_quotelike(\$source,qr/\s*/,1,0)) {
+				my $code = substr($source,$pos[2],$pos[18]-$pos[2]);
+				$code = filter_blocks($code,line(substr($source,0,$pos[2]),$line));
+				$code =~ s {^\s*m}  { qr}	||
+				$code =~ s {^\s*/}  { qr/}	||
+				$code =~ s {^\s*qw} { \\qw};
+				$text .= " " if $pos[0] < $pos[2];
+				$text .= "$code)";
+			}
+			elsif ($Perl5 && $source =~ m/\G\s*(([^\$\@{])[^\$\@{]*)(?=\s*{)/gc
+			   ||  $Perl6 && $source =~ m/\G\s*([^;{]*)()/gc) {
+				my $code = filter_blocks($1,line(substr($source,0,pos $source),$line));
+				$text .= ' \\' if $2 eq '%';
+				$text .= " $code)";
+			}
+			else {
+				die "Bad $keyword statement (invalid $keyword value?) near $Switch::file line ", line(substr($source,0,pos $source), $line), "\n";
+			}
+
+		        die "Missing opening brace or semi-colon after 'when' value near $Switch::file line ", line(substr($source,0,pos $source), $line), "\n"
+				unless !$Perl6 || $source =~ m/\G(\s*)(?=;|\{)/gc;
+
+			do{@pos = Text::Balanced::_match_codeblock(\$source,qr/\s*/,qr/\{/,qr/\}/,qr/\{/,qr/\}/,undef)}
+			or do {
+				if ($source =~ m/\G\s*(?=([};]|\Z))/gc) {
+					$casecounter++;
+					next component;
+				}
+				die "Bad $keyword statement (problem in the code block?) near $Switch::file line ", line(substr($source,0,pos $source),$line), "\n";
+			};
+			my $code = filter_blocks(substr($source,$pos[0],$pos[4]-$pos[0]),line(substr($source,0,$pos[0]),$line));
+			$code =~ s/}(?=\s*\Z)/;last S_W_I_T_C_H }/
+				unless $fallthrough;
+			$text .= "{ while (1) $code continue { goto C_A_S_E_$casecounter } last S_W_I_T_C_H; C_A_S_E_$casecounter: }";
+			$casecounter++;
+			next component;
+		}
+
+		$source =~ m/\G(\s*(-[sm]\s+|\w+|#.*\n|\W))/gc;
+		$text .= $1;
+	}
+	$text;
+}
+
+
+
+sub in
+{
+	my ($x,$y) = @_;
+	my @numy;
+	for my $nextx ( @$x )
+	{
+		my $numx = ref($nextx) || defined $nextx && (~$nextx&$nextx) eq 0;
+		for my $j ( 0..$#$y )
+		{
+			my $nexty = $y->[$j];
+			push @numy, ref($nexty) || defined $nexty && (~$nexty&$nexty) eq 0
+				if @numy <= $j;
+			return 1 if $numx && $numy[$j] && $nextx==$nexty
+			         || $nextx eq $nexty;
+			
+		}
+	}
+	return "";
+}
+
+sub on_exists
+{
+	my $ref = @_==1 && ref($_[0]) eq 'HASH' ? $_[0] : { @_ };
+	[ keys %$ref ]
+}
+
+sub on_defined
+{
+	my $ref = @_==1 && ref($_[0]) eq 'HASH' ? $_[0] : { @_ };
+	[ grep { defined $ref->{$_} } keys %$ref ]
+}
+
+sub switch(;$)
+{
+	my ($s_val) = @_ ? $_[0] : $_;
+	my $s_ref = ref $s_val;
+	
+	if ($s_ref eq 'CODE')
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    return $s_val == $c_val  if ref $c_val eq 'CODE';
+			    return $s_val->(@$c_val) if ref $c_val eq 'ARRAY';
+			    return $s_val->($c_val);
+			  };
+	}
+	elsif ($s_ref eq "" && defined $s_val && (~$s_val&$s_val) eq 0)	# NUMERIC SCALAR
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    my $c_ref = ref $c_val;
+			    return $s_val == $c_val 	if $c_ref eq ""
+							&& defined $c_val
+							&& (~$c_val&$c_val) eq 0;
+			    return $s_val eq $c_val 	if $c_ref eq "";
+			    return in([$s_val],$c_val)	if $c_ref eq 'ARRAY';
+			    return $c_val->($s_val)	if $c_ref eq 'CODE';
+			    return $c_val->call($s_val)	if $c_ref eq 'Switch';
+			    return scalar $s_val=~/$c_val/
+							if $c_ref eq 'Regexp';
+			    return scalar $c_val->{$s_val}
+							if $c_ref eq 'HASH';
+		            return;	
+			  };
+	}
+	elsif ($s_ref eq "")				# STRING SCALAR
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    my $c_ref = ref $c_val;
+			    return $s_val eq $c_val 	if $c_ref eq "";
+			    return in([$s_val],$c_val)	if $c_ref eq 'ARRAY';
+			    return $c_val->($s_val)	if $c_ref eq 'CODE';
+			    return $c_val->call($s_val)	if $c_ref eq 'Switch';
+			    return scalar $s_val=~/$c_val/
+							if $c_ref eq 'Regexp';
+			    return scalar $c_val->{$s_val}
+							if $c_ref eq 'HASH';
+		            return;	
+			  };
+	}
+	elsif ($s_ref eq 'ARRAY')
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    my $c_ref = ref $c_val;
+			    return in($s_val,[$c_val]) 	if $c_ref eq "";
+			    return in($s_val,$c_val)	if $c_ref eq 'ARRAY';
+			    return $c_val->(@$s_val)	if $c_ref eq 'CODE';
+			    return $c_val->call(@$s_val)
+							if $c_ref eq 'Switch';
+			    return scalar grep {$_=~/$c_val/} @$s_val
+							if $c_ref eq 'Regexp';
+			    return scalar grep {$c_val->{$_}} @$s_val
+							if $c_ref eq 'HASH';
+		            return;	
+			  };
+	}
+	elsif ($s_ref eq 'Regexp')
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    my $c_ref = ref $c_val;
+			    return $c_val=~/s_val/ 	if $c_ref eq "";
+			    return scalar grep {$_=~/s_val/} @$c_val
+							if $c_ref eq 'ARRAY';
+			    return $c_val->($s_val)	if $c_ref eq 'CODE';
+			    return $c_val->call($s_val)	if $c_ref eq 'Switch';
+			    return $s_val eq $c_val	if $c_ref eq 'Regexp';
+			    return grep {$_=~/$s_val/ && $c_val->{$_}} keys %$c_val
+							if $c_ref eq 'HASH';
+		            return;	
+			  };
+	}
+	elsif ($s_ref eq 'HASH')
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    my $c_ref = ref $c_val;
+			    return $s_val->{$c_val} 	if $c_ref eq "";
+			    return scalar grep {$s_val->{$_}} @$c_val
+							if $c_ref eq 'ARRAY';
+			    return $c_val->($s_val)	if $c_ref eq 'CODE';
+			    return $c_val->call($s_val)	if $c_ref eq 'Switch';
+			    return grep {$_=~/$c_val/ && $s_val->{"$_"}} keys %$s_val
+							if $c_ref eq 'Regexp';
+			    return $s_val==$c_val	if $c_ref eq 'HASH';
+		            return;	
+			  };
+	}
+	elsif ($s_ref eq 'Switch')
+	{
+		$::_S_W_I_T_C_H =
+		      sub { my $c_val = $_[0];
+			    return $s_val == $c_val  if ref $c_val eq 'Switch';
+			    return $s_val->call(@$c_val)
+						     if ref $c_val eq 'ARRAY';
+			    return $s_val->call($c_val);
+			  };
+	}
+	else
+	{
+		croak "Cannot switch on $s_ref";
+	}
+	return 1;
+}
+
+sub case($) { local $SIG{__WARN__} = \&carp;
+	      $::_S_W_I_T_C_H->(@_); }
+
+# IMPLEMENT __
+
+my $placeholder = bless { arity=>1, impl=>sub{$_[1+$_[0]]} };
+
+sub __() { $placeholder }
+
+sub __arg($)
+{
+	my $index = $_[0]+1;
+	bless { arity=>0, impl=>sub{$_[$index]} };
+}
+
+sub hosub(&@)
+{
+	# WRITE THIS
+}
+
+sub call
+{
+	my ($self,@args) = @_;
+	return $self->{impl}->(0,@args);
+}
+
+sub meta_bop(&)
+{
+	my ($op) = @_;
+	sub
+	{
+		my ($left, $right, $reversed) = @_;
+		($right,$left) = @_ if $reversed;
+
+		my $rop = ref $right eq 'Switch'
+			? $right
+			: bless { arity=>0, impl=>sub{$right} };
+
+		my $lop = ref $left eq 'Switch'
+			? $left
+			: bless { arity=>0, impl=>sub{$left} };
+
+		my $arity = $lop->{arity} + $rop->{arity};
+
+		return bless {
+				arity => $arity,
+				impl  => sub { my $start = shift;
+					       return $op->($lop->{impl}->($start,@_),
+						            $rop->{impl}->($start+$lop->{arity},@_));
+					     }
+			     };
+	};
+}
+
+sub meta_uop(&)
+{
+	my ($op) = @_;
+	sub
+	{
+		my ($left) = @_;
+
+		my $lop = ref $left eq 'Switch'
+			? $left
+			: bless { arity=>0, impl=>sub{$left} };
+
+		my $arity = $lop->{arity};
+
+		return bless {
+				arity => $arity,
+				impl  => sub { $op->($lop->{impl}->(@_)) }
+			     };
+	};
+}
+
+
+use overload
+	"+"	=> 	meta_bop {$_[0] + $_[1]},
+	"-"	=> 	meta_bop {$_[0] - $_[1]},  
+	"*"	=>  	meta_bop {$_[0] * $_[1]},
+	"/"	=>  	meta_bop {$_[0] / $_[1]},
+	"%"	=>  	meta_bop {$_[0] % $_[1]},
+	"**"	=>  	meta_bop {$_[0] ** $_[1]},
+	"<<"	=>  	meta_bop {$_[0] << $_[1]},
+	">>"	=>  	meta_bop {$_[0] >> $_[1]},
+	"x"	=>  	meta_bop {$_[0] x $_[1]},
+	"."	=>  	meta_bop {$_[0] . $_[1]},
+	"<"	=>  	meta_bop {$_[0] < $_[1]},
+	"<="	=>  	meta_bop {$_[0] <= $_[1]},
+	">"	=>  	meta_bop {$_[0] > $_[1]},
+	">="	=>  	meta_bop {$_[0] >= $_[1]},
+	"=="	=>  	meta_bop {$_[0] == $_[1]},
+	"!="	=>  	meta_bop {$_[0] != $_[1]},
+	"<=>"	=>  	meta_bop {$_[0] <=> $_[1]},
+	"lt"	=>  	meta_bop {$_[0] lt $_[1]},
+	"le"	=> 	meta_bop {$_[0] le $_[1]},
+	"gt"	=> 	meta_bop {$_[0] gt $_[1]},
+	"ge"	=> 	meta_bop {$_[0] ge $_[1]},
+	"eq"	=> 	meta_bop {$_[0] eq $_[1]},
+	"ne"	=> 	meta_bop {$_[0] ne $_[1]},
+	"cmp"	=> 	meta_bop {$_[0] cmp $_[1]},
+	"\&"	=> 	meta_bop {$_[0] & $_[1]},
+	"^"	=> 	meta_bop {$_[0] ^ $_[1]},
+	"|"	=>	meta_bop {$_[0] | $_[1]},
+	"atan2"	=>	meta_bop {atan2 $_[0], $_[1]},
+
+	"neg"	=>	meta_uop {-$_[0]},
+	"!"	=>	meta_uop {!$_[0]},
+	"~"	=>	meta_uop {~$_[0]},
+	"cos"	=>	meta_uop {cos $_[0]},
+	"sin"	=>	meta_uop {sin $_[0]},
+	"exp"	=>	meta_uop {exp $_[0]},
+	"abs"	=>	meta_uop {abs $_[0]},
+	"log"	=>	meta_uop {log $_[0]},
+	"sqrt"  =>	meta_uop {sqrt $_[0]},
+	"bool"  =>	sub { croak "Can't use && or || in expression containing __" },
+
+	#	"&()"	=>	sub { $_[0]->{impl} },
+
+	#	"||"	=>	meta_bop {$_[0] || $_[1]},
+	#	"&&"	=>	meta_bop {$_[0] && $_[1]},
+	# fallback => 1,
+	;
+1;
+
+__END__
+
+
+=head1 NAME
+
+Switch - A switch statement for Perl, do not use if you can use given/when
+
+=head1 SYNOPSIS
+
+    use Switch;
+
+    switch ($val) {
+	case 1		{ print "number 1" }
+	case "a"	{ print "string a" }
+	case [1..10,42]	{ print "number in list" }
+	case (\@array)	{ print "number in list" }
+	case /\w+/	{ print "pattern" }
+	case qr/\w+/	{ print "pattern" }
+	case (\%hash)	{ print "entry in hash" }
+	case (\&sub)	{ print "arg to subroutine" }
+	else		{ print "previous case not true" }
+    }
+
+=head1 BACKGROUND
+
+[Skip ahead to L<"DESCRIPTION"> if you don't care about the whys
+and wherefores of this control structure]
+
+In seeking to devise a "Swiss Army" case mechanism suitable for Perl,
+it is useful to generalize this notion of distributed conditional
+testing as far as possible. Specifically, the concept of "matching"
+between the switch value and the various case values need not be
+restricted to numeric (or string or referential) equality, as it is in other 
+languages. Indeed, as Table 1 illustrates, Perl
+offers at least eighteen different ways in which two values could
+generate a match.
+
+	Table 1: Matching a switch value ($s) with a case value ($c)
+
+        Switch  Case    Type of Match Implied   Matching Code
+        Value   Value   
+        ======  =====   =====================   =============
+
+        number  same    numeric or referential  match if $s == $c;
+        or ref          equality
+
+	object  method	result of method call   match if $s->$c();
+	ref     name 				match if defined $s->$c();
+		or ref
+
+        other   other   string equality         match if $s eq $c;
+        non-ref non-ref
+        scalar  scalar
+
+        string  regexp  pattern match           match if $s =~ /$c/;
+
+        array   scalar  array entry existence   match if 0<=$c && $c<@$s;
+        ref             array entry definition  match if defined $s->[$c];
+                        array entry truth       match if $s->[$c];
+
+        array   array   array intersection      match if intersects(@$s, @$c);
+        ref     ref     (apply this table to
+                         all pairs of elements
+                         $s->[$i] and
+                         $c->[$j])
+
+        array   regexp  array grep              match if grep /$c/, @$s;
+        ref     
+
+        hash    scalar  hash entry existence    match if exists $s->{$c};
+        ref             hash entry definition   match if defined $s->{$c};
+                        hash entry truth        match if $s->{$c};
+
+        hash    regexp  hash grep               match if grep /$c/, keys %$s;
+        ref     
+
+        sub     scalar  return value defn       match if defined $s->($c);
+        ref             return value truth      match if $s->($c);
+
+        sub     array   return value defn       match if defined $s->(@$c);
+        ref     ref     return value truth      match if $s->(@$c);
+
+
+In reality, Table 1 covers 31 alternatives, because only the equality and
+intersection tests are commutative; in all other cases, the roles of
+the C<$s> and C<$c> variables could be reversed to produce a
+different test. For example, instead of testing a single hash for
+the existence of a series of keys (C<match if exists $s-E<gt>{$c}>),
+one could test for the existence of a single key in a series of hashes
+(C<match if exists $c-E<gt>{$s}>).
+
+=head1 DESCRIPTION
+
+The Switch.pm module implements a generalized case mechanism that covers
+most (but not all) of the numerous possible combinations of switch and case
+values described above.
+
+The module augments the standard Perl syntax with two new control
+statements: C<switch> and C<case>. The C<switch> statement takes a
+single scalar argument of any type, specified in parentheses.
+C<switch> stores this value as the
+current switch value in a (localized) control variable.
+The value is followed by a block which may contain one or more
+Perl statements (including the C<case> statement described below).
+The block is unconditionally executed once the switch value has
+been cached.
+
+A C<case> statement takes a single scalar argument (in mandatory
+parentheses if it's a variable; otherwise the parens are optional) and
+selects the appropriate type of matching between that argument and the
+current switch value. The type of matching used is determined by the
+respective types of the switch value and the C<case> argument, as
+specified in Table 1. If the match is successful, the mandatory
+block associated with the C<case> statement is executed.
+
+In most other respects, the C<case> statement is semantically identical
+to an C<if> statement. For example, it can be followed by an C<else>
+clause, and can be used as a postfix statement qualifier. 
+
+However, when a C<case> block has been executed control is automatically
+transferred to the statement after the immediately enclosing C<switch>
+block, rather than to the next statement within the block. In other
+words, the success of any C<case> statement prevents other cases in the
+same scope from executing. But see L<"Allowing fall-through"> below.
+
+Together these two new statements provide a fully generalized case
+mechanism:
+
+        use Switch;
+
+        # AND LATER...
+
+        %special = ( woohoo => 1,  d'oh => 1 );
+
+        while (<>) {
+	    chomp;
+            switch ($_) {
+                case (%special) { print "homer\n"; }      # if $special{$_}
+                case /[a-z]/i   { print "alpha\n"; }      # if $_ =~ /a-z/i
+                case [1..9]     { print "small num\n"; }  # if $_ in [1..9]
+                case { $_[0] >= 10 } { print "big num\n"; } # if $_ >= 10
+                print "must be punctuation\n" case /\W/;  # if $_ ~= /\W/
+	    }
+        }
+
+Note that C<switch>es can be nested within C<case> (or any other) blocks,
+and a series of C<case> statements can try different types of matches
+-- hash membership, pattern match, array intersection, simple equality,
+etc. -- against the same switch value.
+
+The use of intersection tests against an array reference is particularly
+useful for aggregating integral cases:
+
+        sub classify_digit
+        {
+                switch ($_[0]) { case 0            { return 'zero' }
+                                 case [2,4,6,8]    { return 'even' }
+                                 case [1,3,5,7,9]  { return 'odd' }
+                                 case /[A-F]/i     { return 'hex' }
+                               }
+        }
+
+
+=head2 Allowing fall-through
+
+Fall-though (trying another case after one has already succeeded)
+is usually a Bad Idea in a switch statement. However, this
+is Perl, not a police state, so there I<is> a way to do it, if you must.
+
+If a C<case> block executes an untargeted C<next>, control is
+immediately transferred to the statement I<after> the C<case> statement
+(i.e. usually another case), rather than out of the surrounding
+C<switch> block.
+
+For example:
+
+        switch ($val) {
+                case 1      { handle_num_1(); next }    # and try next case...
+                case "1"    { handle_str_1(); next }    # and try next case...
+                case [0..9] { handle_num_any(); }       # and we're done
+                case /\d/   { handle_dig_any(); next }  # and try next case...
+                case /.*/   { handle_str_any(); next }  # and try next case...
+        }
+
+If $val held the number C<1>, the above C<switch> block would call the
+first three C<handle_...> subroutines, jumping to the next case test
+each time it encountered a C<next>. After the third C<case> block
+was executed, control would jump to the end of the enclosing
+C<switch> block.
+
+On the other hand, if $val held C<10>, then only the last two C<handle_...>
+subroutines would be called.
+
+Note that this mechanism allows the notion of I<conditional fall-through>.
+For example:
+
+        switch ($val) {
+                case [0..9] { handle_num_any(); next if $val < 7; }
+                case /\d/   { handle_dig_any(); }
+        }
+
+If an untargeted C<last> statement is executed in a case block, this
+immediately transfers control out of the enclosing C<switch> block
+(in other words, there is an implicit C<last> at the end of each
+normal C<case> block). Thus the previous example could also have been
+written:
+
+        switch ($val) {
+                case [0..9] { handle_num_any(); last if $val >= 7; next; }
+                case /\d/   { handle_dig_any(); }
+        }
+
+
+=head2 Automating fall-through
+
+In situations where case fall-through should be the norm, rather than an
+exception, an endless succession of terminal C<next>s is tedious and ugly.
+Hence, it is possible to reverse the default behaviour by specifying
+the string "fallthrough" when importing the module. For example, the 
+following code is equivalent to the first example in L<"Allowing fall-through">:
+
+        use Switch 'fallthrough';
+
+        switch ($val) {
+                case 1      { handle_num_1(); }
+                case "1"    { handle_str_1(); }
+                case [0..9] { handle_num_any(); last }
+                case /\d/   { handle_dig_any(); }
+                case /.*/   { handle_str_any(); }
+        }
+
+Note the explicit use of a C<last> to preserve the non-fall-through
+behaviour of the third case.
+
+
+
+=head2 Alternative syntax
+
+Perl 6 will provide a built-in switch statement with essentially the
+same semantics as those offered by Switch.pm, but with a different
+pair of keywords. In Perl 6 C<switch> will be spelled C<given>, and
+C<case> will be pronounced C<when>. In addition, the C<when> statement
+will not require switch or case values to be parenthesized.
+
+This future syntax is also (largely) available via the Switch.pm module, by
+importing it with the argument C<"Perl6">.  For example:
+
+        use Switch 'Perl6';
+
+        given ($val) {
+                when 1       { handle_num_1(); }
+                when ($str1) { handle_str_1(); }
+                when [0..9]  { handle_num_any(); last }
+                when /\d/    { handle_dig_any(); }
+                when /.*/    { handle_str_any(); }
+                default      { handle anything else; }
+        }
+
+Note that scalars still need to be parenthesized, since they would be
+ambiguous in Perl 5.
+
+Note too that you can mix and match both syntaxes by importing the module
+with:
+
+	use Switch 'Perl5', 'Perl6';
+
+
+=head2 Higher-order Operations
+
+One situation in which C<switch> and C<case> do not provide a good
+substitute for a cascaded C<if>, is where a switch value needs to
+be tested against a series of conditions. For example:
+
+        sub beverage {
+            switch (shift) {
+                case { $_[0] < 10 } { return 'milk' }
+                case { $_[0] < 20 } { return 'coke' }
+                case { $_[0] < 30 } { return 'beer' }
+                case { $_[0] < 40 } { return 'wine' }
+                case { $_[0] < 50 } { return 'malt' }
+                case { $_[0] < 60 } { return 'Moet' }
+                else                { return 'milk' }
+            }
+        }
+
+(This is equivalent to writing C<case (sub { $_[0] < 10 })>, etc.; C<$_[0]>
+is the argument to the anonymous subroutine.)
+
+The need to specify each condition as a subroutine block is tiresome. To
+overcome this, when importing Switch.pm, a special "placeholder"
+subroutine named C<__> [sic] may also be imported. This subroutine
+converts (almost) any expression in which it appears to a reference to a
+higher-order function. That is, the expression:
+
+        use Switch '__';
+
+        __ < 2
+
+is equivalent to:
+
+        sub { $_[0] < 2 }
+
+With C<__>, the previous ugly case statements can be rewritten:
+
+        case  __ < 10  { return 'milk' }
+        case  __ < 20  { return 'coke' }
+        case  __ < 30  { return 'beer' }
+        case  __ < 40  { return 'wine' }
+        case  __ < 50  { return 'malt' }
+        case  __ < 60  { return 'Moet' }
+        else           { return 'milk' }
+
+The C<__> subroutine makes extensive use of operator overloading to
+perform its magic. All operations involving __ are overloaded to
+produce an anonymous subroutine that implements a lazy version
+of the original operation.
+
+The only problem is that operator overloading does not allow the
+boolean operators C<&&> and C<||> to be overloaded. So a case statement
+like this:
+
+        case  0 <= __ && __ < 10  { return 'digit' }  
+
+doesn't act as expected, because when it is
+executed, it constructs two higher order subroutines
+and then treats the two resulting references as arguments to C<&&>:
+
+        sub { 0 <= $_[0] } && sub { $_[0] < 10 }
+
+This boolean expression is inevitably true, since both references are
+non-false. Fortunately, the overloaded C<'bool'> operator catches this
+situation and flags it as an error. 
+
+=head1 DEPENDENCIES
+
+The module is implemented using Filter::Util::Call and Text::Balanced
+and requires both these modules to be installed. 
+
+=head1 AUTHOR
+
+Damian Conway (damian@conway.org). This module is now maintained by
+Alexandr Ciornii (alexchorny@gmail.com). Previously was maintained by
+Rafael Garcia-Suarez and perl5 porters.
+
+=head1 BUGS
+
+There are undoubtedly serious bugs lurking somewhere in code this funky :-)
+Bug reports and other feedback are most welcome.
+
+May create syntax errors in other parts of code.
+
+On perl 5.10.x may cause syntax error if "case" is present inside heredoc.
+
+In general, use given/when instead. It were introduced in perl 5.10.0.
+Perl 5.10.0 was released in 2007.
+
+=head1 LIMITATIONS
+
+Due to the heuristic nature of Switch.pm's source parsing, the presence of
+regexes with embedded newlines that are specified with raw C</.../>
+delimiters and don't have a modifier C<//x> are indistinguishable from
+code chunks beginning with the division operator C</>. As a workaround
+you must use C<m/.../> or C<m?...?> for such patterns. Also, the presence
+of regexes specified with raw C<?...?> delimiters may cause mysterious
+errors. The workaround is to use C<m?...?> instead.
+
+Due to the way source filters work in Perl, you can't use Switch inside
+an string C<eval>.
+
+May not work if sub prototypes are used (RT#33988).
+
+Regex captures in when are not available to code.
+
+If your source file is longer then 1 million characters and you have a
+switch statement that crosses the 1 million (or 2 million, etc.)
+character boundary you will get mysterious errors. The workaround is to
+use smaller source files.
+
+=head1 COPYRIGHT
+
+    Copyright (c) 1997-2008, Damian Conway. All Rights Reserved.
+    This module is free software. It may be used, redistributed
+        and/or modified under the same terms as Perl itself.
+
diff --git a/src/processfiles.py b/src/processfiles.py
new file mode 100644
index 0000000..b11fc41
--- /dev/null
+++ b/src/processfiles.py
@@ -0,0 +1,1530 @@
+#!/usr/bin/python
+
+import os
+import re
+import subprocess
+import pdb
+import xml.etree.ElementTree as ET
+import gzip
+import wget
+import pandas as pd
+import itertools
+import zipfile
+from io import StringIO
+import progressbar
+import json
+import tarfile
+import scipy
+from scipy import stats
+import multiprocessing
+from tqdm import tqdm
+import numpy as np
+import glob
+
+#Set to the established download directory
+dwn_dir = 'downloads/'
+
+#Set to the desired processed files directory
+pro_dir = 'processed/'
+
+def process_DGIdb():
+
+    print('Formating DGIdb file...')
+
+    dgidb = 'DGIdb_interactions.tsv'
+    dgidb_dwn = 'DGIdb_interactions_dwn.tsv'
+
+    #Clean in the output file those lines that are matches not found and header lines introduced in each access
+    outputh = open (pro_dir+dgidb, 'w')
+    inputh = open (dwn_dir+dgidb_dwn, 'r')
+    outputh.write('\t'.join(['gene_name','drug_name','interaction_type','source','gene_categories'])+'\n')
+    for line in inputh:
+        line = line.rstrip('\n')
+        line_a = line.split('\t')
+        if re.search('Possible suggestions:',line_a[0]) == None and re.search('Unmatched search term:',line_a[0]) == None and re.search('gene_name',line_a[0]) == None:
+            if line_a[3] not in ['CIViC', 'OncoKB']:
+                outputh.write(line+'\n')
+    inputh.close()
+    outputh.close()
+
+def process_sabdab():
+
+    print('Formating SAbDab file...')
+
+    sabdab_file = 'sabdab.tsv'
+    sabdab_dwn = 'TheraSAbDab_SeqStruc_OnlineDownload.csv'
+
+    sabdab_targets = {'Anthrax Protective Antigen': '', 'APP Abeta 1-40/42;TFRC': 'APP;TFRC', 'AR-C124910XX': '', 'CALCA&CALCB': 'CALCA;CALCB', 'Canine NGFB': '', 'CD41_CD61': 'CD41;CD61', 'CEACAM5&CD3E;CD3E': 'CEACAM5;CD3E', 'Dabigatran': '', 'DNA/H1 Complex': '', 'ERBB2 (Domain II);ERBB2 (Domain IV)': 'ERBB2', 'FN extra domain B': 'FN1', 'FZD Family': 'FZD1;FZD2;FZD3;FZD4;FZD5;FZD6;FZD7;FZD8;FZD9;FZD10', 'Ganglioside GD2': 'B4GALNT1', 'Heat Shock Protein 90 Homolog': 'HSP90', 'HHV gB AD-1': '', 'HIV-1 gp120': '', 'HIV-1 gp120 CD4bs': '', 'HIV-1 gp120 V3': '', 'Idiotope of anti-NeuGc-ganliosides': '', 'IGF1&IGF2': 'IGF1;IGF2', 'IL31 (Canine)': '', 'IL31 (Feline)': '', 'Influenza A HA': '', 'Influenza A HA2': '', 'Influenza B Virus': '', 'ITGA2B_ITGB3': 'ITGA2B;ITGB3', 'ITGA4_ITGB7': 'ITGA4;ITGB7', 'ITGA4_ITGB7&ITGAE_ITGB7': 'ITGA4;ITGAE;ITGB7', 'ITGAV_ITGB3': 'ITGAV;ITGB3', 'MRSV envelope protein': '', 'NGFB (Canine)': '', 'Non-Binding': '', 'NOTCH2&3': 'NOTCH2;NOTCH3', 'PcrV type III secretion system': '', 'PcrV type III secretion system;Polysaccharide synthesis locus (Pseudomonas)': '', 'PDCD1 (Canine)': '', 'Phosphatidylserine': 'PTDSS1', 'pro-GDF-8': 'MSTN', 'Rabies Virus Spike Glycoprotein G': '', 'Rabies Virus Strain ERA GP Ectodomain Epitope G-II': '', 'Rabies Virus Strain ERA GP Ectodomain Epitope G-III': '', 'Rabies Virus Surface Glycoprotein 4 (gp4) Epitope 1': '', 'RhD': 'RHD', 'RSV gpF': '', 'RSV gpF;RSV gpF': '', 'RV Antigenic Site III': '', 'SARS-CoV-2 Spike': '', 'SARS-CoV-2 Spike RBD': '', 'Serotype IATS O11': '', 'Shiga Toxin Type 1': '', 'Shiga Toxin Type 2': '', 'SIRPα': 'SIRPA', 'TGFB1 (Canine)': '', 'Toxin A': '', 'VP2 (Canine)': '', 'Zaire Ebolavirus GP': '', 'Zaire Ebolavirus GP1': '', 'α4β7': 'ITGB7', 'RSV': '', 'SpA': ''}
+
+    sabdab = pd.read_csv(dwn_dir+sabdab_dwn, low_memory=False)
+    sabdab.fillna('', inplace=True)
+
+    for index, row in sabdab.iterrows():
+        if row['Target'] in sabdab_targets.keys():
+            sabdab.loc[index, 'Target'] = sabdab_targets[row['Target']]
+
+    sabdab = sabdab.rename(columns={'Therapeutic': 'drug_name', 'Target': 'gene_name'})
+    sabdab = sabdab.drop(sabdab.loc[sabdab['gene_name'] == ''].index)
+    sabdab.to_csv(pro_dir+sabdab_file, index=False, sep='\t', header=True)
+
+def process_moalmanac():
+
+    print('Formating MOAlmanac file...')
+
+    moalmanac_file = 'moalmanac.tsv'
+    moalmanac_dwn = 'moalmanac_dwn.tsv'
+
+    def process_alterations(selection):
+        alterations = []
+        for ele in list(set(selection['feature_type'].tolist())):
+            alterations.append(ele+' ('+','.join(list(set(selection[selection['feature_type'] == ele]['protein_change'].dropna().tolist())))+')')
+        return('; '.join(alterations).replace(' ()', ''))
+
+    inputf = pd.read_csv(dwn_dir+moalmanac_dwn, sep ='\t', header=None, low_memory=False)
+    inputf.columns = ['assertion_id', 'feature_type', 'gene', 'gene1', 'gene2', 'drug', 'resistance', 'sensitivity', 'therapy_type', 'protein_change', 'predictive_implication', 'validated']
+    inputf = inputf[inputf['validated'] == True]
+    inputf = inputf[((inputf['drug'].str.contains('\+') == False) & (inputf['predictive_implication'].isin(['FDA-Approved', 'Guideline', 'Clinical trial', 'Clinical evidence']))) | ((inputf['drug'].str.contains('\+')) & (inputf['predictive_implication'].isin(['FDA-Approved', 'Guideline'])))]
+
+    subset_add = pd.DataFrame()
+    for i in ('gene', 'gene1', 'gene2'):
+        subset_add1 = inputf[[i,'drug']]
+        subset_add1.columns = ['gene', 'drug']
+        subset_add = subset_add.append(subset_add1[~subset_add1.isnull().any(axis=1)])
+
+    subset_add = subset_add.drop_duplicates()
+
+    moalmanac = pd.DataFrame(columns=['gene_name','drug_name','source','response','alteration','therapy_type'])
+    source = 'MOAlmanac'
+    for index, row in subset_add.iterrows():
+        selection = inputf[((inputf['gene'] == row['gene']) | (inputf['gene1'] == row['gene']) | (inputf['gene2'] == row['gene'])) & (inputf['drug'] == row['drug'])]
+
+        if len(selection) > 0:
+            drug = row['drug']
+            if re.search('\+', row['drug']):
+               drug = ' + '.join(sorted(drug.split(' + ')))
+
+            response = []
+            if True in list(set(selection['sensitivity'].tolist())):
+                response.append('sensitivity')
+                alteration_sen = process_alterations(selection[selection['sensitivity'] == True])
+
+            if True in list(set(selection['resistance'].tolist())):
+                response.append('resistance')
+                alteration_res = process_alterations(selection[selection['resistance'] == True])
+
+            if len(response) > 0:
+                if len(response) == 2:
+                   alteration = alteration_sen+' / '+alteration_res
+                elif response[0] == 'sensitivity':
+                   alteration = alteration_sen
+                elif response[0] == 'resistance':
+                    alteration = alteration_res
+
+                therapy = list(set(selection['therapy_type'].tolist()))[0]
+                moalmanac = moalmanac.append({'gene_name' : row['gene'], 'drug_name' : drug, 'source' : source, 'response' : ' / '.join(response), 'alteration' : alteration, 'therapy_type' : therapy} , ignore_index=True)
+
+    moalmanac.to_csv(pro_dir+moalmanac_file, index=False, sep='\t', header=True)
+
+def process_GDSC():
+
+    print('Formating GDSC...')
+
+    GDSC_int = 'GDSC_intermediate.tsv'
+    GDSC = 'GDSC.tsv'
+    GDSC_dwn = 'GDSC_features.csv'
+    GDSC_tsv = 'GDSC_features.tsv'
+
+    #Obtain the name of the ANOVA downloaded file from GDSC
+    ANOVA_dwn = ''
+    listdir = os.listdir(dwn_dir)
+    for efile in listdir:
+        if re.search('^ANOVA', efile) != None: ANOVA_dwn = efile
+
+    filei = open(dwn_dir+GDSC_dwn,'r')
+    fileo = open(pro_dir+GDSC_tsv,'w')
+
+    for line in filei:
+        line = line.strip('\n')
+        line_a = line.split(',')
+        if line_a[0] == 'cell_line_name':
+            fileo.write('\t'.join(line_a)+'\n')
+        else:
+            fileo.write('\t'.join(line_a[0:8])+'\t'+','.join(line_a[8:])+'\n')
+
+    filei.close()
+    filei.close()
+
+    def label_sens(row):
+        if row['feature_delta_mean_ic50'] < 0: return 'sensitivity'
+        else: return 'resistance'
+
+    def obtain_features(row):
+        if re.search('_mut',row['feature_name']) != None:
+            return row['feature_name'].replace('_mut','').replace('.',',')
+        elif re.search('loss.|gain.',row['feature_name']) != None and re.search('\.\.',row['feature_name']) != None:
+            field_a = row['feature_name'].split('.')
+
+            if re.search('loss|gain',field_a[3]) != None:
+                pan_list = []
+                for ele in row['feature_name'].split('.'):
+                    if re.search('cnaPANCAN',ele): pan_list.append(ele)
+                field_a = list(set([item for sublist in [y.split(',') for y in [str(x) for x in gfeatures[(gfeatures['genetic_feature'].isin(pan_list)) & (gfeatures['is_mutated'] == 1)]['genes_in_segment']]] for item in sublist]))
+                return ','.join(field_a)
+            else:
+                #Correction of NKX2.1 value
+                if row['feature_name'] == "gain.cnaPANCAN393..FOXA1.NKX2.1.PSMA6.":
+                    field_a[field_a.index('1')-1:field_a.index('1')+1] = ['-'.join(field_a[field_a.index('1')-1:field_a.index('1')+1])]
+                return ','.join(field_a[3:len(field_a)-1])
+
+        elif re.search('loss.|gain.',row['feature_name']) != None and re.search('\.\.',row['feature_name']) == None:
+            field_a = list(set([item for sublist in [y.split(',') for y in [str(x) for x in gfeatures[(gfeatures['genetic_feature'] == row['feature_name'].split('.')[1]) & (gfeatures['is_mutated'] == 1)]['genes_in_segment']]] for item in sublist]))
+
+            while 'UNCLASSIFIED' in field_a: field_a.remove('UNCLASSIFIED')
+            return ','.join(field_a)
+
+    def obtain_alt(row):
+        if re.search('loss.',row['feature_name']) != None: return 'deletion'
+        elif re.search('gain.',row['feature_name']) != None: return 'amplification'
+        elif re.search('_mut',row['feature_name']) != None: return 'mutation'
+
+    gfeatures = pd.read_csv(pro_dir+'GDSC_features.tsv', sep='\t', header = 0, dtype={'cell_line_name': str, 'cosmic_sample_id': int, 'gdsc_desc1': str, 'gdsc_desc2': str, 'tcga_desc': str, 'genetic_feature': str, 'is_mutated': int, 'recurrent_gain_loss': str, 'genes_in_segment': str})
+
+    filei = pd.read_excel(dwn_dir+ANOVA_dwn, sheet_name = 'PANCANCER_ANOVA', header = 0, dtype={'drug_name': str, 'drug_id': int, 'target': str, 'target_pathway': str, 'feature_name': str, 'n_feature_pos': int, 'n_feature_neg': int, 'ic50_effect_size': float, 'log_ic50_mean_pos': float, 'log_ic50_mean_neg': float, 'log_max_conc_tested': float, 'feature_ic50_t_pval': float, 'feature_delta_mean_ic50': float, 'feature_pos_ic50_var': float, 'feature_neg_ic50_var': float, 'feature_pval': float, 'tissue_pval': float, 'msi_pval': float, 'fdr': float, 'tissue_type': str, 'dataset_version': float})
+
+    filei = filei[(filei['feature_pval'] < 0.001) & (filei['fdr'] < 25)]
+
+    filei['response'] = filei.apply(lambda row: label_sens(row), axis = 1)
+    filei['gene'] = filei.apply(lambda row: obtain_features(row), axis = 1)
+    filei['alteration'] = filei.apply(lambda row: obtain_alt(row), axis = 1)
+    filei['source'] = 'GDSC'
+
+    filei['gene'] = filei['gene'].apply(lambda x: x.split(','))
+    filei = filei.explode('gene')
+    filei = filei.reset_index()
+
+    filei.rename(columns={'gene': 'gene_name'}, inplace=True)
+
+    filei.to_csv(pro_dir+GDSC_int, sep='\t', index=False, header=True)
+
+    fileo = pd.DataFrame(columns=['gene_name','drug_name','source','response','alteration'])
+    source = 'GDSC'
+
+    for index, row in filei[['drug_name', 'gene_name']].drop_duplicates().iterrows():
+        selection = filei[(filei['drug_name'] == row['drug_name']) & (filei['gene_name'] == row['gene_name'])]
+
+        if len(selection) > 0:
+
+            response = []
+            (alteration_sen, alteration_res) = ('', '')
+
+            if len(selection.loc[selection['response'] == 'sensitivity']) > 0:
+                response.append('sensitivity')
+                alteration_sen = '; '.join(list(set(selection.loc[selection['response'] == 'sensitivity']['alteration'].tolist())))
+            if len(selection.loc[selection['response'] == 'resistance']) > 0:
+                response.append('resistance')
+                alteration_res = '; '.join(list(set(selection.loc[selection['response'] == 'resistance']['alteration'].tolist())))
+
+            if len(response) > 0:
+                if len(response) == 2:
+                    alteration = alteration_sen+' / '+alteration_res
+                elif response[0] == 'sensitivity':
+                    alteration = alteration_sen
+                elif response[0] == 'resistance':
+                    alteration = alteration_res
+
+                fileo = fileo.append({'gene_name' : row['gene_name'], 'drug_name' : row['drug_name'], 'source' : source, 'response' : ' / '.join(response), 'alteration' : alteration} , ignore_index=True)
+
+    fileo.to_csv(pro_dir+GDSC, index=False, sep='\t', header=True)
+
+def process_KEGG_ATC():
+
+    print('Formating KEGG ATC...')
+
+    KEGG_ATC = 'TargetBasedClassificationKEGG_formated.tsv'
+    KEGG_ATC_dwn = 'br08310.keg'
+
+    filei = open(dwn_dir+KEGG_ATC_dwn, 'r')
+    fileo = open(pro_dir+KEGG_ATC, 'w')
+
+    fileo.write('\t'.join(['drug', 'familiy1', 'family2', 'family3'])+'\n')
+
+    elementA = ''
+    elementB = ''
+    elementC = ''
+    elementD = ''
+
+    for line in filei:
+        line = line.rstrip()
+        line = line.replace('<b>', ' ')
+        line = line.replace('</b>', ' ')
+        line_a = line.split('\t')
+
+        if re.search('^[ABCDEF] *D[\d]{5}', line_a[0]):
+            pattern = re.compile('^[ABCDEF] *D[\d]{5} +(.+?) \(')
+            name_s = pattern.search(line_a[0])
+            if name_s == None:
+                pattern = re.compile('^[ABCDEF] *D[\d]{5} +(.+?)$')
+                name_s = pattern.search(line_a[0])
+            name = name_s.group(1)
+            tipo = ''
+            if len(line_a) > 1:
+                elementA_w = elementA + '_'+line_a[1]
+                elementB_w = elementB + '_'+line_a[1]
+                elementC_w = elementC + '_'+line_a[1]
+                elementD_w = elementD + '_'+line_a[1]
+            else:
+                (elementA_w, elementB_w, elementC_w, elementD_w) = (elementA, elementB, elementC, elementD)
+
+            if line_a[0][0] == 'C': elementB_w = ''
+            if line_a[0][0] == 'D': elementC_w = ''
+            if line_a[0][0] == 'E': elementD_w = ''
+
+
+            fileo.write('\t'.join([name,elementA_w,elementB_w,elementC_w,elementD_w])+'\n')
+        else:
+            if re.search('^A', line_a[0]):
+
+                pattern = re.compile('^A (.+)')
+                elementA_s = pattern.search(line_a[0])
+                elementA = elementA_s.group(1)
+                elementB = ''
+                elementC = ''
+                elementD = ''
+            elif re.search('^B', line_a[0]):
+
+                pattern = re.compile('^B  (.+) \[')
+                elementB_s = pattern.search(line_a[0])
+                if elementB_s == None:
+                    pattern = re.compile('^B  (.+)$')
+                    elementB_s = pattern.search(line_a[0])
+                elementB = elementB_s.group(1)
+                elementC = ''
+                elementD = ''
+            elif re.search('^C', line_a[0]):
+
+                pattern = re.compile('^C    (.+?) \[')
+                elementC_s = pattern.search(line_a[0])
+                if elementC_s == None:
+                    pattern = re.compile('^C    (.+?)$')
+                    elementC_s = pattern.search(line_a[0])
+                elementC = elementC_s.group(1)
+                elementD = ''
+            elif re.search('^D', line_a[0]):
+
+                pattern = re.compile('^D      (.+?) \[')
+                elementD_s = pattern.search(line_a[0])
+                if elementD_s == None:
+                    pattern = re.compile('^D      (.+?)$')
+                    elementD_s = pattern.search(line_a[0])
+                elementD = elementD_s.group(1)
+
+    filei.close()
+    fileo.close()
+
+def process_cmap():
+
+    print('Formating CMAP MOA...')
+
+    CMAP_MOA = 'cmap_moa.tsv'
+
+    fileo = open(pro_dir+CMAP_MOA ,'w')
+    fileo.write('\t'.join(['drug', 'moa'])+'\n')
+
+    with open(dwn_dir+CMAP_MOA) as f:
+        for line in f:
+            data = json.loads(line)
+            for e in data:
+                fileo.write('\t'.join([e['pert_iname'], e['name']])+'\n')
+    fileo.close()
+
+def process_FDA():
+
+    print('Formating FDA file...')
+
+    fda_status_file = 'fda_status.tsv'
+    #Obtain the name of the FDA zip downloaded
+    fda_dwn = ''
+    listdir = os.listdir(dwn_dir)
+    for efile in listdir:
+        if re.search('^drugsatfda', efile) != None: fda_dwn = efile
+
+    df_dict = dict()
+    with zipfile.ZipFile(dwn_dir+fda_dwn) as z:
+        for filename in z.namelist():
+            if filename == 'MarketingStatus.txt':
+                # read the file
+                with z.open(filename) as f:
+                    data = pd.read_csv(f, sep ='\t', header=0, low_memory=False)
+                    df_dict['markstat'] = data
+            elif filename == 'Products.txt':
+                with z.open(filename) as f:
+                    content = f.read()
+                    content = str(content, 'utf-8').replace('\t\r\n', '\r\n')
+                    data = pd.read_csv(StringIO(content), sep ='\t', header=0, low_memory=False)
+                    df_dict['products'] = data
+            elif filename == 'MarketingStatus_Lookup.txt':
+                with z.open(filename) as f:
+                    data = pd.read_csv(f, sep ='\t', header=0, low_memory=False)
+                    df_dict['markstatlook'] = data
+
+    #create the output file
+    fda_drug_list = {}
+    for index, row in df_dict['products'].iterrows():
+        compound = row['ActiveIngredient'].split('; ')
+        applno = row['ApplNo']
+        for comp in compound:
+            status = df_dict['markstatlook'][df_dict['markstatlook']['MarketingStatusID'] == df_dict['markstat'][df_dict['markstat']['ApplNo'] == applno]['MarketingStatusID'].tolist()[0]]['MarketingStatusDescription'].tolist()
+            if comp in fda_drug_list.keys():
+                fda_drug_list[comp][0] = fda_drug_list[comp][0]+status
+                fda_drug_list[comp][1] = fda_drug_list[comp][1]+[applno]
+            else:
+                fda_drug_list[comp] = [status, [applno]]
+
+    for comp in fda_drug_list:
+        if 'Prescription' in fda_drug_list[comp][0] or 'Over-the-counter' in fda_drug_list[comp][0]: fda_drug_list[comp][0] = 'Approved'
+        elif 'Discontinued' in fda_drug_list[comp][0]: fda_drug_list[comp][0] = 'Withdrawn'
+        else: fda_drug_list[comp][0] = ''
+        fda_drug_list[comp][1] = ';'.join([str(x) for x in list(set(fda_drug_list[comp][1]))])
+
+    fda_status = pd.DataFrame(list(fda_drug_list.items()), columns=['drug','data'])
+    fda_status = pd.concat([fda_status, pd.DataFrame(fda_status['data'].to_list(), columns = ['status', 'ApplNo'])], axis=1)
+    fda_status = fda_status.drop('data', axis=1)
+    fda_status.to_csv(pro_dir+fda_status_file, index=False, sep='\t')
+
+def process_FDA_label():
+
+    print('Formating FDA label file...')
+
+    FDA_label = 'fda_labels.tsv'
+
+    fileo = open(pro_dir+FDA_label,'w')
+    fileo.write("\t".join(['indication', 'ApplNo'])+'\n')
+
+    #Obtain the name of the FDA zip downloaded
+    listdir = os.listdir(dwn_dir)
+
+    for efile in listdir:
+
+        if re.search('^drug\-label', efile) != None:
+
+            with zipfile.ZipFile(dwn_dir+efile) as z:
+                for filename in z.namelist():
+                    # read the file
+                    with z.open(filename) as f:
+                        data = json.load(f)
+                        for e in data['results']:
+                            if 'indications_and_usage' in e.keys() and 'application_number' in e['openfda'].keys():
+                                fileo.write("\t".join([e['indications_and_usage'][0], e['openfda']['application_number'][0]])+'\n')
+
+    fileo.close()
+
+def process_EMA():
+
+    print('Formating EMA file...')
+
+    ema_file = 'ema_status.tsv'
+    EMA_dwn = 'Medicines_output_european_public_assessment_reports.xlsx'
+
+    filei = pd.read_excel(dwn_dir+EMA_dwn)
+    filei.columns = filei.loc[filei.iloc[:,0] == 'Category'].values.tolist()[0]
+    selection = filei.loc[filei['Category'] == 'Human'][['Medicine name', 'Therapeutic area', 'International non-proprietary name (INN) / common name', 'Active substance', 'Authorisation status', 'Condition / indication']]
+
+    selection.to_csv(pro_dir+ema_file, index=False, sep='\t')
+
+def process_ct():
+
+    print('Formating Clinical Trial file...')
+
+    ct = 'clinicaltrials.tsv'
+    ct_dwn = 'AllPublicXML.zip'
+
+    fileo = open(pro_dir+ct,'w')
+    fileo.write("\t".join(['condition', 'title', 'status', 'drug'])+'\n')
+
+    with zipfile.ZipFile(dwn_dir+ct_dwn) as z:
+        for i in progressbar.progressbar(range(len(z.infolist()))):
+            if re.search('.xml$', z.infolist()[i].filename):
+                with z.open(z.infolist()[i].filename) as f:
+                    [title,status,condition,drug] = ['','','','']
+                    tree = ET.ElementTree(file=f)
+                    for elem in tree.iterfind('official_title'):
+                        title = elem.text
+                    for elem in tree.iterfind('overall_status'):
+                        status = elem.text
+                    for elem in tree.iterfind('condition'):
+                        condition = elem.text
+                    for elem in tree.iterfind('intervention/intervention_type'):
+                        if elem.text == 'Drug':
+                            for elem in tree.iterfind('intervention/intervention_name'):
+                                drug = elem.text
+
+                    if drug != '': fileo.write('\t'.join([condition,title,status,drug])+'\n')
+
+    fileo.close()
+
+def process_cgc_oncovar():
+
+    print('Creating file oncovar scores...')
+
+    oncovar_file = 'oncovar_scores.tsv'
+    oncovar_dwn = 'TCGA.PanCancer.all.genes.OncoVar.tsv.gz'
+
+    oncovar_tumor_dir = 'All_genes_OncoVar_TCGA.tar.gz'
+    oncovar = ''
+
+    tar = tarfile.open(name=dwn_dir+oncovar_tumor_dir)
+
+    output = open(pro_dir+oncovar_file, 'w')
+    output.write('\t'.join(['gene', 'cancer_type', 'P_value', 'FDR', 'OncoVar_Score', 'Consensus_Score', 'Driver_Level'])+'\n')
+
+    for member in tar.getnames():
+        if re.search('.gz$', member):
+            f=tar.extractfile(member)
+            data = pd.read_csv(f, compression='gzip', sep='\t', low_memory=False, header=0, encoding = 'ISO-8859-1')
+            for index, row in data.iterrows():
+                if row['Cancer'] == 'PanCancer':
+                    cancer = row['Cancer']
+                else:
+                    cancer = row['Cancer'].split('[')[1]
+                    cancer = cancer.replace(']', '')
+                output.write('\t'.join([row['Gene_symbol'], cancer, str(row['P_value']), str(row['FDR']), str(row['OncoVar_Score']), str(row['Consensus_Score']), str(row['Driver Level'])])+'\n')
+            if re.search(oncovar_dwn, member):
+                oncovar = data
+
+    print('Creating file with ONC and TSG classification...')
+
+    cgc_file = 'cancer_gene_census.csv'
+    cgc = pd.read_csv(dwn_dir+cgc_file, low_memory=False, header=0, encoding = 'ISO-8859-1')
+    cgc = cgc.fillna('')
+
+    fileo = open(pro_dir+'generole.tsv','w')
+    fileo.write("\t".join(['gene', 'CGC', 'OncoKB', '2020Rule', 'CTAT', 'Oncogene', 'TSgene']) + '\n')
+
+    genes = list(set(cgc['Gene Symbol'].tolist() + oncovar['Gene_symbol'].tolist()))
+
+    for gene in genes:
+
+        categories = [cgc.loc[cgc['Gene Symbol'] == gene]['Role in Cancer'], oncovar.loc[oncovar['Gene_symbol'] == gene]['OncoKB'], oncovar.loc[oncovar['Gene_symbol'] == gene]['2020Rule'], oncovar.loc[oncovar['Gene_symbol'] == gene]['CTAT'], oncovar.loc[oncovar['Gene_symbol'] == gene]['Oncogene'], oncovar.loc[oncovar['Gene_symbol'] == gene]['TSgene']]
+
+        for idx, cat in enumerate(categories):
+            list_cat = []
+            if not cat.empty:
+                value = cat.tolist()[0]
+                if isinstance(value, str):
+                    subcats = value.split(', ')
+                    subcats = [x.split('/') for x in subcats]
+                    subcats = [item for sublist in subcats for item in sublist]
+
+                    for subcat in subcats:
+                        if subcat not in list_cat: list_cat.append(subcat.upper())
+
+                    if idx == 4:
+                        if list_cat[0] == 'Y': list_cat[0] = 'ONC'
+                    elif idx == 5:
+                        if list_cat[0] == 'Y': list_cat[0] = 'TSG'
+
+            list_cat = ['ONC' if x in ['FUSION', 'ONCOGENE'] else x for x in list_cat]
+            list_cat = ['' if x in ['N', 'Y'] else x for x in list_cat]
+
+            categories[idx] = '/'.join(list(set(list_cat)))
+
+        fileo.write("\t".join([gene]+categories) + '\n')
+
+    fileo.close()
+
+    print('Creating file with CGC info for scores...')
+
+    cgc = 'cgc_scores.tsv'
+
+    fileo = open(pro_dir+cgc,'w')
+    fileo.write("\t".join(['gene', 'cancer_type_source', 'Tier']) + '\n')
+
+    for index, row in cgc.iterrows():
+        tumors = row['Tumour Types(Somatic)'].split(', ')+row['Tumour Types(Germline)'].split(', ')
+        tumors = [x for x in tumors if x != '']
+        for t in list(set(tumors)):
+            fileo.write("\t".join([row['Gene Symbol'], t, str(row['Tier'])]) + '\n')
+
+    fileo.close()
+
+def process_KEGG_ind():
+
+    print('Creating pathway member information...')
+
+    paths = ['hsa03320', 'hsa04010', 'hsa04012', 'hsa04014', 'hsa04015', 'hsa04020', 'hsa04022', 'hsa04024', 'hsa04062', 'hsa04064', 'hsa04066', 'hsa04068', 'hsa04071', 'hsa04110', 'hsa04114', 'hsa04115', 'hsa04150', 'hsa04151', 'hsa04152', 'hsa04210', 'hsa04261', 'hsa04270', 'hsa04310', 'hsa04330', 'hsa04340', 'hsa04350', 'hsa04370', 'hsa04390', 'hsa04510', 'hsa04520', 'hsa04530', 'hsa04540', 'hsa04611', 'hsa04620', 'hsa04621', 'hsa04622', 'hsa04630', 'hsa04650', 'hsa04660', 'hsa04662', 'hsa04664', 'hsa04666', 'hsa04668', 'hsa04670', 'hsa04722', 'hsa04910', 'hsa04912', 'hsa04914', 'hsa04915', 'hsa04916', 'hsa04919', 'hsa04920', 'hsa04921', 'hsa04922', 'hsa04971', 'hsa05010', 'hsa05012', 'hsa05160', 'hsa05200', 'hsa05205', 'hsa05212', 'hsa05214', 'hsa05218', 'hsa05231']
+
+    global dwn_dir
+    global pro_dir
+
+    dwn_dir_KEGG = dwn_dir+'KEGGmodeled/'
+    pro_dir_KEGG = pro_dir+'KEGGmodeled'
+
+    if not os.path.exists(pro_dir_KEGG): os.makedirs(pro_dir_KEGG)
+
+    if not os.path.exists(pro_dir_KEGG+'/'+'components'): os.makedirs(pro_dir_KEGG+'/'+'components')
+    if not os.path.exists(pro_dir_KEGG+'/'+'relations'): os.makedirs(pro_dir_KEGG+'/'+'relations')
+    if not os.path.exists(pro_dir_KEGG+'/'+'subpaths'): os.makedirs(pro_dir_KEGG+'/'+'subpaths')
+    if not os.path.exists(pro_dir_KEGG+'/'+'dictionaries'): os.makedirs(pro_dir_KEGG+'/'+'dictionaries')
+
+    pro_dir_KEGG = pro_dir_KEGG+'/'
+
+    gene_symbol_dic = {}
+
+    def retrieve_symbol(gene):
+
+        if gene not in gene_symbol_dic.keys():
+
+            import sys
+            from Bio import Entrez
+
+            Entrez.email = "" #place email
+
+            request = Entrez.epost("gene", id=gene)
+
+            result = Entrez.read(request)
+
+            webEnv = result["WebEnv"]
+            queryKey = result["QueryKey"]
+            data = Entrez.esummary(db="gene", webenv=webEnv, query_key=queryKey)
+            annotations = Entrez.read(data)
+
+            gene_symbol = annotations["DocumentSummarySet"]["DocumentSummary"][0]["NomenclatureSymbol"]
+
+            gene_symbol_dic[gene] = gene_symbol
+
+        else: gene_symbol = gene_symbol_dic[gene]
+
+        return gene_symbol
+
+    def parse_kegg_xml():
+
+        for path in paths:
+
+            output_components = open(pro_dir_KEGG+'components/'+path+'.components.tsv','w')
+            output_relations = open(pro_dir_KEGG+'relations/'+path+'.relations.tsv','w')
+
+            tree = ET.parse(dwn_dir_KEGG+path+'.kgml.xml')
+            root = tree.getroot()
+#            print (root.tag)
+#            print (root.attrib)
+#            for child in root:
+#                print(child.tag, child.attrib)
+#                pdb.set_trace()
+            group = {}
+            for entry in root.iter('entry'):
+                if entry.attrib['type'] not in ['ortholog', 'map']:
+                    ID = entry.attrib['id']
+                    if entry.attrib['type'] == 'group':
+                        group[ID] = []
+                        for component in entry.iter('component'):
+                            group[ID].append(component.attrib['id'])
+                    else:
+                        components = entry.attrib['name'].replace('hsa:','')
+                        components = components.replace('cpd:','')
+                        components = components.replace('dr:','')
+                        components = components.replace('gl:','').split(' ')
+                        output_components.write('\t'.join([ID,':'.join(components)])+'\n')
+
+            for relation in root.iter('relation'):
+                entry1 = [relation.attrib['entry1']]
+                entry2 = [relation.attrib['entry2']]
+                TYPE = relation.attrib['type']
+
+                #if ID belongs to a group the relationship is repited for each component and the name will be the combined one
+                if entry1[0] in group.keys():
+                    entry1 = group[entry1[0]]
+
+                if entry2[0] in group.keys():
+                    entry2 = group[entry2[0]]
+
+                r_product = list(itertools.product(entry1, entry2))
+
+                (name, value) = ([], [])
+                for subtype in relation.iter('subtype'):
+                    name.append(subtype.attrib['name'])
+                    value.append(subtype.attrib['value'])
+
+                    for r in r_product:
+                        output_relations.write('\t'.join([r[0],r[1],TYPE,','.join(name),','.join(value)])+'\n')
+
+            output_components.close()
+            output_relations.close()
+
+    def nodes_ids_and_symbols():
+        for i in progressbar.progressbar(range(len(paths))):
+            relations = pd.read_csv(pro_dir_KEGG+'relations/'+paths[i]+'.relations.tsv', sep='\t', low_memory=False, header=None)
+            relations.columns = ['entry1', 'entry2', 'type', 'name', 'value']
+            components = pd.read_csv(pro_dir_KEGG+'components/'+paths[i]+'.components.tsv', sep='\t', low_memory=False, header=None)
+            components.columns = ['ID', 'components']
+
+            fileo = open(pro_dir_KEGG+'dictionaries/'+paths[i]+'.codes.tsv', 'w')
+            fileo.write('\t'.join(['node', 'id', 'symbol'])+'\n')
+
+            nodes = list(set(relations['entry1'].tolist() +relations['entry2'].tolist()))
+
+            for node in nodes:
+                components_id = components.loc[components['ID'] == node]['components'].tolist()[0]
+                if re.search('\.', components_id):
+                    ids = components.loc[components['ID'] == entry]['components'].tolist()[0].split('.')
+                    for i, t in enumerate(transl):
+                        transl[i] = components.loc[components['ID'] == int(t)]['components'].tolist()[0]
+                else:
+                    ids = components.loc[components['ID'] == node]['components'].tolist()
+
+                ids = [x for ele in ids for x in ele.split(':')]
+
+                symbols = []
+                for el in ids:
+                    if re.search('(C|G)', el) == None:
+                        symbol = retrieve_symbol(el)
+                        if symbol != '': symbols.append(symbol)
+
+                fileo.write('\t'.join([str(node), ':'.join(ids),':'.join(symbols)])+'\n')
+
+            fileo.close()
+
+    def translate_codes_to_symbols():
+        for i in progressbar.progressbar(range(len(paths))):
+            relations = pd.read_csv(pro_dir_KEGG+'relations/'+paths[i]+'.elemcode.tsv', sep='\t', low_memory=False, header=None)
+            relations.columns = ['entry1', 'entry2', 'type', 'name', 'value']
+            fileo = open(pro_dir_KEGG+'relations/'+paths[i]+'.symbol.tsv', 'w')
+
+            for index, row in relations.iterrows():
+                entries = [row['entry1'], row['entry2']]
+                for idx, entry in enumerate(entries):
+                    transl = []
+                    for el in entry.split(':'):
+                        if re.search('(C|G)', el) == None:
+                            symbol = retrieve_symbol(el)
+                            transl.append(symbol)
+                        else:
+                            transl.append('')
+
+                    entries[idx] = ':'.join(transl)
+
+                fileo.write('\t'.join(entries+row[2:].tolist())+'\n')
+
+            fileo.close()
+
+    def obtain_subroutes():
+        import networkx as nx
+        for i in progressbar.progressbar(range(len(paths))):
+            fileo = open(pro_dir_KEGG+'subpaths/'+paths[i]+'.symbol.tsv','w')
+            filei = pd.read_csv(pro_dir_KEGG+'relations/'+paths[i]+'.relations.tsv', sep='\t', low_memory=False, header=None)
+            filei.columns = ['entry1', 'entry2', 'type', 'name', 'value']
+            dictionary = pd.read_csv(pro_dir_KEGG+'dictionaries/'+paths[i]+'.codes.tsv', sep='\t', low_memory=False, header=0)
+            generole = pd.read_csv(pro_dir+'generole.tsv', sep='\t', low_memory=False, header=0)
+
+            entry_list = [list(a) for a in zip(filei.entry1, filei.entry2)]
+
+            # Create graph
+            G = nx.Graph()
+            # Fill graph with data
+            G.add_edges_from(entry_list)
+
+            all_paths = []
+            #obtain all combinations of paths of 4 components from node to node
+            for start in G.nodes:
+                for end in G.nodes:
+                    if start != end:
+                        all_paths = all_paths + list(nx.all_simple_paths(G, start, end, 3))
+
+            #remove duplicates
+            import itertools
+            all_paths.sort()
+            all_paths_dedup = [list(a) for a in (all_paths for all_paths,_ in itertools.groupby(all_paths))]
+
+            #discard elements after a non-valid association, discarded next valid elements will appear in the begining of another path retrieved by all_simple_paths
+            #this allows also to remove the nodes linked in backward direction from all_simple_paths
+            generole = pd.read_csv(pro_dir+'generole.tsv', sep='\t', low_memory=False, header=0)
+            all_paths_checked = []
+            import more_itertools
+            for path in all_paths_dedup:
+                path_it = []
+                for pair in list(more_itertools.windowed(path,n=2, step=1)):
+                    associations = [item for sublist in [x.split(',') for x in filei.loc[(filei['entry1'] == pair[0]) & (filei['entry2'] == pair[1])]['name'].tolist()] for item in sublist]
+
+                    if len(associations) > 0: #valid: association is in relations file
+                        if len(set(associations) & set(['indirect effect'])) == 0: #valid: asociation is not indirect
+                            if str(dictionary.loc[dictionary['node'] == pair[0]]['symbol'].tolist()[0]) != 'nan' and str(dictionary.loc[dictionary['node'] == pair[1]]['symbol'].tolist()[0]) != 'nan': #valid: node has gene symbol attached
+                                #update source node
+                                genes = []
+                                for gene in dictionary.loc[dictionary['node'] == pair[0]]['symbol'].tolist()[0].split(':'):
+                                    #decide the role of the gene
+                                    roles = ':'.join(list(set([x for x in generole.loc[generole['gene'] == gene].values.flatten().tolist()[1:] if str(x) != 'nan'])))
+                                    #check if there is a restrictive association for the pair, in that case the pair is discarded
+                                    if (roles in ['ONC', ''] and len(set(associations) & set(['activation', 'expression'])) > 0) or (roles == 'TSG' and len(set(associations) & set(['inhibition', 'repression'])) > 0) or (roles not in ['ONC', '', 'TSG']) or (len(set(['activation', 'expression', 'inhibition', 'repression']) & set(associations)) == 0):
+                                        genes.append(gene)
+
+                                gene_names = ':'.join(genes)
+                                if gene_names != '': #association only saved if genes in node
+                                    if len(path_it) == 0: path_it = [gene_names]
+                                    else: path_it[-1] = gene_names
+
+                                    #report destination node
+                                    genes = []
+                                    for gene in dictionary.loc[dictionary['node'] == pair[1]]['symbol'].tolist()[0].split(':'):
+                                       genes.append(gene)
+                                    gene_names = ':'.join(genes)
+                                    path_it.append(gene_names)
+
+                            else:
+                                break
+                        else:
+                            break
+                    else:
+                        break
+
+                all_paths_checked.append(path_it)
+
+            #remove duplicates
+            all_paths_checked = [x for x in all_paths_checked if len(x) > 1]
+            all_paths_checked.sort()
+            all_paths_checked_dedup = [list(a) for a in (all_paths_checked for all_paths_checked,_ in itertools.groupby(all_paths_checked))]
+
+            #remove pathways included in other ones
+            all_paths_unified = []
+            for path in all_paths_checked_dedup:
+                found = False
+                for path_search in all_paths_checked_dedup:
+                    #if the length of both the path being searched and the path for the search is the same in the match, the search is on itself because redundances have been removed
+                    if any(path == path_search[i:i+len(path)] for i in range(len(path_search))) and len(path) != len(path_search):
+                        found = True
+                        break
+                if not found:        
+                    all_paths_unified.append(path)
+
+            for path in all_paths_unified:
+                fileo.write('\t'.join([str(x) for x in path])+'\n')
+            fileo.close()
+
+    def obtain_upstream_genes():
+        upstream_genes = {}
+        for i in progressbar.progressbar(range(len(paths))):
+            filei = open(pro_dir_KEGG+'subpaths/'+paths[i]+'.symbol.tsv', 'r')
+            for line in filei:
+                line = line.rstrip('\n')
+                line_a = line.split('\t')
+                for idx, col in enumerate(line_a[1:]):
+                    for gene in col.split(':'):
+                        if gene == 'MAPK1' and 'CDK5' in [x for ele in line_a[:idx+1] for x in ele.split(':')]:
+                            print(paths[i])
+                            print(line_a)
+                        if gene in upstream_genes.keys():
+                            upstream_genes[gene] = upstream_genes[gene] + [x for ele in line_a[:idx+1] for x in ele.split(':')]
+                        else:
+                            upstream_genes[gene] = [x for ele in line_a[:idx+1] for x in ele.split(':')]
+        filei.close()
+        fileo = open(pro_dir_KEGG+'upstream_genes.tsv', 'w')
+        fileo.write('\t'.join(['gene', 'upstream_genes'])+'\n')
+        for gene in upstream_genes:
+            if gene != '':
+                upstream_genes_list = list(set(upstream_genes[gene]))
+                upstream_genes_list = list(filter(None, upstream_genes_list))
+                if len(upstream_genes_list) > 0:
+                    fileo.write('\t'.join([gene, '|'.join(upstream_genes_list)])+'\n')
+        fileo.close()
+
+    parse_kegg_xml()
+    nodes_ids_and_symbols()
+    obtain_subroutes()
+    obtain_upstream_genes()
+
+def process_gene_names():
+
+    print('Creating file with checked gene symbols...')
+
+    genes_chk = 'genes_checked.tsv'
+
+    genes = []
+
+    dgidb = 'DGIdb_interactions.tsv'
+    sabdab_file = 'sabdab.tsv'
+    moalmanac_file = 'moalmanac.tsv'
+    GDSC = 'GDSC.tsv'
+    drugbank_file = 'DrugBank.tsv'
+
+    files = [dgidb, sabdab_file, moalmanac_file, GDSC, drugbank_file]
+
+    for f in files:
+        inputf=open(pro_dir+f,'r')
+        for line in inputf:
+            line = line.rstrip('\n')
+            line_a = line.split('\t')
+            if 'gene_name' in line_a: gene_index = line_a.index('gene_name')
+            if f == GDSC: genes = genes+line_a[gene_index].split(',')
+            elif f == sabdab_file: genes = genes+line_a[gene_index].split(';')
+            else: genes.append(line_a[gene_index])
+        inputf.close()
+
+    outputf = open(pro_dir+genes_chk, 'w')
+    outputf.write('\t'.join(['gene_name', 'checked_gene_symbol'])+'\n')
+
+    import httplib2 as http
+    import json
+
+    try:
+        from urlparse import urlparse
+    except ImportError:
+        from urllib.parse import urlparse
+
+    headers = {
+        'Accept': 'application/json',
+    }
+
+    uri = 'http://rest.genenames.org'
+    method = 'GET'
+    body = ''
+    h = http.Http()
+
+    genes = list(set(genes))
+
+    for i in progressbar.progressbar(range(len(genes))):
+        found = False
+        symbol = ''
+
+        path = '/search/symbol/'+genes[i]
+
+        target = urlparse(uri+path)
+
+        response, content = h.request(target.geturl(), method, body, headers)
+
+        if response['status'] == '200':
+        # assume that content is a json reply
+        # parse content with the json module 
+            data = json.loads(content)
+            if len(data['response']['docs']) > 0:
+                outputf.write('\t'.join([genes[i], data['response']['docs'][0]['symbol']])+'\n')
+                found = True
+        else:
+            print ('Error detected: ' + response['status'])
+
+        if not found:
+            path = '/search/alias_symbol/'+genes[i]
+            target = urlparse(uri+path)
+
+            response, content = h.request(target.geturl(), method, body, headers)
+
+            if response['status'] == '200':
+                data = json.loads(content)
+                if len(data['response']['docs']) > 0:
+                    outputf.write('\t'.join([genes[i], data['response']['docs'][0]['symbol']])+'\n')
+                    found = True
+            else:
+                print ('Error detected: ' + response['status'])
+
+        if not found:
+            path = '/search/prev_symbol/'+genes[i]
+            target = urlparse(uri+path)
+
+            response, content = h.request(target.geturl(), method, body, headers)
+
+            if response['status'] == '200':
+                data = json.loads(content)
+                if len(data['response']['docs']) > 0:
+                    outputf.write('\t'.join([genes[i], data['response']['docs'][0]['symbol']])+'\n')
+                    found = True
+            else:
+                print ('Error detected: ' + response['status'])
+
+        if not found:
+            print('Not found:'+genes[i])
+            outputf.write('\t'.join([genes[i], genes[i]])+'\n')
+
+    outputf.close()
+
+def process_civic():
+
+    print('Processing civic file...')
+
+    civic_file = 'civic.tsv'
+    civic_dwn = 'civic_evidence.tsv'
+
+    output = open(pro_dir+civic_file, 'w')
+    output.write('\t'.join(['drug_name', 'gene_name', 'variation', 'response', 'source'])+'\n')
+    civic = pd.read_csv(dwn_dir+civic_dwn, sep ='\t', low_memory=False)
+
+    civic_select = civic.loc[(civic['status'] == 'ACCEPTED') & (civic['evidence_direction'] == 'SUPPORTS') & (civic['clinical_significance'].isin(['RESISTANCE', 'SENSITIVITYRESPONSE'])) & (civic['evidenceLevel'].isin(['A']))] # A: Validated association
+
+    drug_gene = civic_select.drop_duplicates(subset=['drug', 'gene'])
+
+    for index, row in drug_gene.iterrows():
+        alteration_sen = []
+        alteration_res = []
+        civic_subset = civic_select.loc[(civic_select['drug'] == row['drug']) & (civic_select['gene'] == row['gene'])]
+
+        for index2, row2 in civic_subset.iterrows():
+            if row2['clinical_significance'] == 'SENSITIVITYRESPONSE':
+                alteration_sen.append(row2['variant'])
+            else:
+                alteration_res.append(row2['variant'])
+
+        if len(alteration_sen) > 0 and len(alteration_res) > 0:
+            response = 'sensitivity / resistance'
+            variation = '; '.join(list(set(alteration_sen)))+' / '+'; '.join(list(set(alteration_res)))
+        elif len(alteration_sen) > 0:
+            response = 'sensitivity'
+            variation = '; '.join(list(set(alteration_sen)))
+        else:
+            response = 'resistance'
+            variation = '; '.join(list(set(alteration_res)))
+
+
+        output.write('\t'.join([row['drug'], row['gene'], variation, response, 'CIViC'])+'\n')
+    output.close()
+
+def process_oncoKB():
+
+    print('Processing oncoKB file...')
+
+    oncokb_file = 'oncokb.tsv'
+    oncokb_dwn = 'oncokb_biomarker_drug_associations.tsv'
+
+    output = open(pro_dir+oncokb_file, 'w')
+    output.write('\t'.join(['drug_name', 'gene_name', 'variation', 'response', 'source'])+'\n')
+    oncokb = pd.read_csv(dwn_dir+oncokb_dwn, sep ='\t', low_memory=False)
+
+    oncokb_select = oncokb.loc[(oncokb['Level'].isin(['1', '2', 'R1'])) & (oncokb['Gene'] != 'Other Biomarkers')] # 1, 2 and R1 (top evidence)
+    oncokb_select = oncokb_select.rename(columns={'Drugs (for therapeutic implications only)':'Drugs'})
+    oncokb_select['Drugs'] = oncokb_select['Drugs'].apply(lambda x: x.split(', '))
+    oncokb_select = oncokb_select.explode('Drugs')
+    oncokb_select = oncokb_select.reset_index()
+
+    for index, row in oncokb_select.iterrows():
+        d = row['Drugs']
+        if re.search('\+', d):
+            d = ' + '.join(sorted(d.split(' + ')))
+            oncokb_select.loc[index, ['Drugs']] = d
+
+    drug_gene = oncokb_select.drop_duplicates(subset=['Drugs', 'Gene'])
+
+    for index, row in drug_gene.iterrows():
+        alteration_sen = []
+        alteration_res = []
+        oncokb_subset = oncokb_select.loc[(oncokb_select['Drugs'] == row['Drugs']) & (oncokb_select['Gene'] == row['Gene'])]
+
+        for index2, row2 in oncokb_subset.iterrows():
+            if re.search('excluding', row2['Alterations']) != None:
+                alt = [row2['Alterations']]
+            else:
+                alt = row2['Alterations'].split(', ')
+
+            if row2['Level'] in ['1', '2', '3A']:
+                alteration_sen = alteration_sen + alt
+            else:
+                alteration_res = alteration_res + alt
+
+        if len(alteration_sen) > 0 and len(alteration_res) > 0:
+            response = 'sensitivity / resistance'
+            variation = '; '.join(sorted(list(set(alteration_sen))))+' / '+'; '.join(sorted(list(set(alteration_res))))
+        elif len(alteration_sen) > 0:
+            response = 'sensitivity'
+            variation = '; '.join(sorted(list(set(alteration_sen))))
+        else:
+            response = 'resistance'
+            variation = '; '.join(sorted(list(set(alteration_res))))
+
+        output.write('\t'.join([row['Drugs'], row['Gene'], variation, response, 'OncoKB'])+'\n')
+    output.close()
+
+def process_intogen():
+
+    print('Processing intogen file...')
+
+    intogen = 'intogen.tsv'
+    intogen_dwn = 'download'
+
+    output = open(pro_dir+intogen, 'w')
+    output.write('\t'.join(['gene_name', 'cohort', 'cancer_type', 'qvalue_combination'])+'\n')
+
+    with zipfile.ZipFile(dwn_dir+intogen_dwn) as z:
+        for i in z.infolist():
+            if re.search('Compendium_Cancer_Genes.tsv', i.filename):
+                with z.open(i.filename) as f:
+                    data = pd.read_csv(f, sep ='\t', header=0, low_memory=False)
+                    for index, row in data.iterrows():
+                        output.write('\t'.join([row['SYMBOL'], row['COHORT'], row['CANCER_TYPE'], str(row['QVALUE_COMBINATION'])])+'\n')
+
+    output.close()
+
+def process_depmap():
+
+    print('Processing DepMap public score + Chronos...')
+
+    depmap_dwn = 'CRISPR_gene_effect.csv'
+    depmap_pro = 'chronos_skew.tsv'
+
+    matrix = pd.read_csv(dwn_dir+depmap_dwn, low_memory=False)
+    matrix = matrix.set_index('DepMap_ID')
+    matrix.columns = [x.split(' ')[0] for x in matrix.columns.tolist()]
+    skewness = matrix.apply(lambda x : scipy.stats.skew(x, nan_policy='omit'))
+    skewness_min = pd.DataFrame(skewness)
+    skewness_min['min'] = skewness[skewness > -0.5].min()
+
+    skewness_min.to_csv(pro_dir+depmap_pro, index=True, sep='\t', header=False)
+
+def process_KEGG_pathways():
+
+    print('Processing KEGG gene pathway file...')
+
+    genepathway_file = 'gene_pathway.tsv'
+
+    inputf = pd.read_csv(dwn_dir+genepathway_file, sep ="\t", header=None, low_memory=False)
+
+    inputf.columns =['KEGG Gene ID', 'KEGG Pathway ID']
+    inputf['KEGG Gene ID'] = inputf['KEGG Gene ID'].str.replace('hsa:','')
+    inputf['KEGG Pathway ID'] = inputf['KEGG Pathway ID'].str.replace('path:','')
+
+    array_agg = lambda x: '|'.join(x.astype(str))
+    inputf = inputf.groupby('KEGG Gene ID').agg({'KEGG Pathway ID': array_agg})
+
+    inputf.to_csv(pro_dir+genepathway_file, sep = "\t")
+
+    print('Processing KEGG pathway descriptions file...')
+
+    pathwaydesc_file = 'pathway_desc.tsv'
+
+    inputf = pd.read_csv(dwn_dir+pathwaydesc_file, sep ="\t", header=None, low_memory=False)
+
+    inputf.columns =['KEGG Pathway ID', 'KEGG Pathway desc']
+    inputf['KEGG Pathway ID'] = inputf['KEGG Pathway ID'].str.replace('path:','')
+    inputf['KEGG Pathway desc'] = inputf['KEGG Pathway desc'].str.replace(' \- Homo sapiens \(human\)','')
+
+    inputf.to_csv(pro_dir+pathwaydesc_file, sep = "\t", index=False)
+
+def process_SL():
+
+    print('Processing SL dependencies file...')
+
+    sl_file = 'all_genetic_dependencies.tsv'
+
+    inputf = pd.read_csv(dwn_dir+sl_file, sep ="\t", low_memory=False)
+    outputf = open(pro_dir+'genetic_dependencies.tsv','w')
+    outputf.write('\t'.join(['gene','genetic_dependency'])+'\n')
+
+    sel = inputf.loc[inputf['padj'] < 0.25].groupby("dependency")
+
+    for dependency, frame in sel:
+        dep_genes = []
+        for index, row in sel.get_group(dependency).iterrows():
+            if not row['gene'] == dependency:
+                dep_genes.append(row['gene']+'('+row['alteration']+')')
+        if len(dep_genes) > 0:
+            outputf.write('\t'.join([dependency,'|'.join(dep_genes)])+'\n')
+
+    outputf.close()
+
+def process_file(filename):
+    print(filename)
+    cosmic = {}
+    gene_freq = {}
+    mut_freq = {}
+
+    inputf = pd.read_csv(pro_dir+'temp/'+filename, sep ="\t", header=0, low_memory=False)
+
+    if not inputf.empty:
+        inputf[['Gene name']] = inputf['Gene name'].str.split('_',expand=True)[[0]]
+        inputf[['HGVSC_Transcript','HGVSC']] = inputf['HGVSC'].str.split(':',expand=True)
+        inputf[['HGVSC_Transcript']] = inputf['HGVSC_Transcript'].str.split('.',expand=True)[[0]]
+        outputf = open(pro_dir+'temp/'+filename.replace('.tsv', '_prc.tsv'), 'w')
+
+        gene_list = inputf['Gene name'].unique().tolist()
+        for gene in gene_list:
+            samples_gene = len(inputf.loc[inputf['Gene name'] == gene]['ID_sample'].unique().tolist())
+            mut_list = inputf.loc[inputf['Gene name'] == gene]['GENOMIC_MUTATION_ID'].unique().tolist()
+            for mut in mut_list:
+                samples_mut = len(inputf.loc[inputf['GENOMIC_MUTATION_ID'] == mut]['ID_sample'].unique().tolist())
+                transcript_list = inputf.loc[(inputf['Gene name'] == gene) & (inputf['GENOMIC_MUTATION_ID'] == mut)]['HGVSC_Transcript'].unique().tolist()
+                for trans in transcript_list:
+                    FATHMM = ['' if x is np.nan else x for x in inputf.loc[(inputf['Gene name'] == gene) & (inputf['GENOMIC_MUTATION_ID'] == mut) & (inputf['HGVSC_Transcript'] == trans)]['FATHMM prediction'].tolist()][0]
+                    HGVSc = inputf.loc[(inputf['Gene name'] == gene) & (inputf['GENOMIC_MUTATION_ID'] == mut) & (inputf['HGVSC_Transcript'] == trans)]['HGVSC'].tolist()[0]
+
+                    outputf.write('\t'.join([':'.join([gene, trans, HGVSc]), mut, FATHMM, str(samples_gene), str(samples_mut), str(total_cosmic_rec)])+'\n')
+        outputf.close()
+
+def create_cosmic_temp_files(idx):
+    outputn = cosmic_file.replace('.tsv.gz','.tsv.filtered.gz')
+    output_file = pro_dir+'temp/'+'COSMIC_'+str(idx)+'.tsv'
+    select_column = ['Gene name', 'ID_sample', 'GENOMIC_MUTATION_ID', 'GRCh', 'Mutation genome position', 'FATHMM prediction', 'Mutation somatic status', 'HGVSC']
+    outputf = open(output_file,'w')
+    outputf.write('\t'.join(select_column)+'\n')
+    outputf.close()
+
+    cleaned_genes_group = [x for x in genes_group[idx] if str(x) != 'None']
+    gene_list = ['^'+x+'_' for x in cleaned_genes_group]+['^'+x+'\t' for x in cleaned_genes_group]
+
+    command = 'zcat '+pro_dir+outputn+' | egrep \''+'|'.join(gene_list)+'\' >> '+ output_file
+    subprocess.call(command,shell=True)
+
+def process_cosmic():
+
+    global cosmic_file
+    cosmic_file = 'CosmicMutantExport.tsv.gz'
+
+    print('Processing COSMIC data...')
+
+    print('-->Filtering COSMIC...')
+    #filtering columns
+    inputf = gzip.open(dwn_dir+cosmic_file,'r')
+    first_line = inputf.readline().rstrip().decode().split('\t')
+    global outputn
+    outputn = cosmic_file.replace('.tsv.gz','.tsv.filtered.gz')
+    select_column = ['Gene name', 'ID_sample', 'GENOMIC_MUTATION_ID', 'GRCh', 'Mutation genome position', 'FATHMM prediction', 'Mutation somatic status', 'HGVSC']
+    inputf.close()
+
+    #filtering records
+    idx = [first_line.index(i) for i in select_column]
+    cols = [str(i + 1) for i in idx]
+    #some records have an empty ID (not counted)
+    command = 'zcat '+dwn_dir+cosmic_file+' | cut -f '+','.join(cols)+' | awk -F"\t" \'($3 != "" && $4 == "38") || $1 == "Gene name" {print}\' | gzip > '+pro_dir+outputn
+    subprocess.call(command,shell=True)
+
+    #obtaining list of genes
+    gfile = pro_dir+'gene_list.txt'
+    command = 'zcat '+pro_dir+outputn+' | cut -f 1 | cut -d\'_\' -f 1 | grep -v Gene | sort | uniq > '+gfile
+    subprocess.call(command,shell=True)
+
+    print('-->Creating COSMIC file...')
+    genes_file = pd.read_csv(gfile, sep ="\t", header=None, low_memory=False)
+    gene_list = genes_file[0].tolist()
+
+    def grouper(n, iterable, fillvalue=None):
+        import itertools
+        args = [iter(iterable)] * n
+        return itertools.zip_longest(*args, fillvalue=fillvalue)
+
+    global genes_group
+    genes_group = list(grouper(10, gene_list))
+
+    os.mkdir(pro_dir+'temp/')
+    a_pool = multiprocessing.Pool(processes=9)
+
+    result_list_tqdm = []
+    for result in tqdm(a_pool.imap(create_cosmic_temp_files, range(0,len(genes_group))), total=len(genes_group)):
+        result_list_tqdm.append(result)
+
+    with gzip.open(pro_dir+outputn,'r') as f:
+        global total_cosmic_rec
+        total_cosmic_rec = len(f.readlines()) - 1 #removing the header for the count
+
+    gfreq = open(pro_dir+'cosmic_gene_freq.tsv','w')
+    gfreq.write('\t'.join(['Gene name', 'Gene_freq', 'Total'])+'\n')
+    gfreq.close()
+    gfreq = open(pro_dir+'cosmic_gene_freq.tsv','a')
+    listdir = os.listdir(pro_dir+'temp/')
+    for efile in listdir:
+        inputf = pd.read_csv(pro_dir+'temp/'+efile, sep ="\t", header=0, low_memory=False)
+        inputf[['Gene name']] = inputf['Gene name'].str.split('_',expand=True)[[0]]
+        gene_list = inputf['Gene name'].unique().tolist()
+        for gene in gene_list:
+            samples_gene = len(inputf.loc[inputf['Gene name'] == gene]['ID_sample'].unique().tolist())
+
+            gfreq.write('\t'.join([gene, str(samples_gene), str(total_cosmic_rec)])+'\n')
+    gfreq.close()
+
+    a_pool = multiprocessing.Pool(processes=9)
+
+    result_list_tqdm = []
+    for result in tqdm(a_pool.imap(process_file, os.listdir(pro_dir+'temp/')), total=len(os.listdir(pro_dir+'temp/'))):
+        result_list_tqdm.append(result)
+
+    read_files = glob.glob(pro_dir+'temp/'+"*_prc.tsv")
+    with open(pro_dir+'COSMIC.tsv', 'ab') as outfile:
+        outfile.write(bytes('\t'.join(['Cosmic_key', 'cosmic_id', 'FATHMM', 'Gene_freq', 'Mut_freq', 'Total'])+'\n','utf-8'))
+        for f in read_files:
+            with open(f, "rb") as infile:
+                outfile.write(infile.read())
+
+    shutil. rmtree(pro_dir+'temp/')
+
+def process_xml_clinvar(ofile):
+
+    outputf = open(ofile,'a')
+
+    tree = ET.parse(xml_section)
+    root = tree.getroot()
+
+    #print (root.tag)
+    #print (root.attrib)
+    #for child in root:
+    #    print(child.tag, child.attrib)
+    for ClinVarSet in root.iter('ClinVarSet'):
+        (ACC, ASSEMBLY, CHR, VCF_POS, VCF_REF, VCF_ALT, SYMBOL, TRAIT, SIG) = ([], [], [], [], [], [], [], [], [])
+        for ReferenceClinVarAssertion in ClinVarSet.iter('ReferenceClinVarAssertion'):
+            for ClinVarAccession in ReferenceClinVarAssertion.iter('ClinVarAccession'):
+                ACC.append(ClinVarAccession.attrib['Acc'])
+            for MeasureSet in ReferenceClinVarAssertion.iter('MeasureSet'):
+                for Measure in MeasureSet.iter('Measure'):
+                    for SequenceLocation in Measure.iter('SequenceLocation'):
+                        if 'positionVCF' in SequenceLocation.attrib.keys():
+                            ASSEMBLY.append(SequenceLocation.attrib['Assembly'])
+                            CHR.append(SequenceLocation.attrib['Chr'])
+                            VCF_POS.append(SequenceLocation.attrib['positionVCF'])
+                            VCF_REF.append(SequenceLocation.attrib['referenceAlleleVCF'])
+                            VCF_ALT.append(SequenceLocation.attrib['alternateAlleleVCF'])
+                    for Symbol in Measure.iter('Symbol'):
+                        for ElementValue in Symbol.iter('ElementValue'):
+                            if ElementValue.attrib['Type'] == 'Preferred': SYMBOL.append(ElementValue.text)
+            for TraitSet in ReferenceClinVarAssertion.iter('TraitSet'):
+                for Trait in TraitSet:
+                    for Name in Trait.iter('Name'):
+                        for ElementValue in Name.iter('ElementValue'):
+                            if ElementValue.attrib['Type'] == 'Preferred': TRAIT.append(ElementValue.text)
+            for ClinicalSignificance in ReferenceClinVarAssertion.iter('ClinicalSignificance'):
+                for Description in ClinicalSignificance.iter('Description'):
+                    SIG.append(Description.text)
+
+        for idx in range(len(ASSEMBLY)):
+            outputf.write('\t'.join(['::'.join(list(set(ACC))),ASSEMBLY[idx],CHR[idx],VCF_POS[idx],VCF_REF[idx],VCF_ALT[idx],'::'.join(list(set(SYMBOL))),'::'.join(list(set(TRAIT))),'::'.join(list(set(SIG)))])+'\n')
+
+    outputf.close()
+
+def process_clinvar():
+
+    print('Processing ClinVar data...')
+
+    clinvar_file = 'ClinVarFullRelease_00-latest.xml.gz'
+
+    print('-->Filtering ClinVar...')
+
+    #filter records
+    inputf = gzip.open(dwn_dir+clinvar_file,'rb')
+    outputn = clinvar_file.replace('.xml.gz','tagfiltered.xml.gz')
+    outputf = gzip.open(pro_dir+outputn,'wb')
+
+    for l in inputf:
+        if re.search('xml|<ReleaseSet|</ReleaseSet|<ClinVarSet|</ClinVarSet|<ReferenceClinVarAssertion|</ReferenceClinVarAssertion|<ClinVarAccession|</ClinVarAccession|<MeasureSet|</MeasureSet|<Measure|</Measure|<SequenceLocation|</SequenceLocation|<Symbol|</Symbol|<ElementValue|</ElementValue|<TraitSet|</TraitSet|<Trait|</Trait|<Name|</Name|<ClinicalSignificance|</ClinicalSignificance|<Description|</Description',l.decode()):
+            outputf.write(l)
+
+    outputf.close()
+
+    print('-->Creating ClinVar file...')
+    #create tsv file
+    global xml_section
+    xml_section = pro_dir+'xml_temp.xml'
+    outputf = open(pro_dir+'Clinvar.tsv','w')
+    outputf.write('\t'.join(['Acc','Assembly','Chr','VCF_pos','VCF_ref','VCF_alt','Gene','Trait','Significance'])+'\n')
+    outputf.close()
+
+    #counter for ClinVarSet tag
+    counter = 0
+    xml = gzip.open(pro_dir+outputn, 'rb')
+
+    for line in xml:
+        line = line.decode()
+        if re.search('^<\?xml version|^<ReleaseSet|^</ReleaseSet',line):
+            continue
+        elif re.search('^<ClinVarSet',line) and counter == 0:
+            xml_section_file = open(xml_section,'w')
+            xml_section_file.write('<ReleaseSet Dated="2021-10-30" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Type="full" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.64.xsd">'+'\n')
+            xml_section_file.write(line)
+        elif re.search('^</ClinVarSet',line):
+            #limit of tags per processing
+            if counter < 10000:
+                xml_section_file.write(line)
+                counter += 1
+            else:
+                xml_section_file.write(line)
+                xml_section_file.write('</ReleaseSet>')
+                xml_section_file.close()
+                counter = 0
+                process_xml_clinvar(pro_dir+'Clinvar.tsv')
+        else:
+            xml_section_file.write(line)
+
+    xml.close()
+    os.remove(xml_section)
+    os.remove(pro_dir+outputn)
+
+def process_pfam():
+
+    print('Processing Pfam data...')
+
+    pfam_file = 'Pfam-A.full.gz'
+
+    print('-->Filtering Pfam...')
+    #filter records
+    inputf = gzip.open(dwn_dir+pfam_file,'rb')
+    outputn = pfam_file.replace('.full.gz','.full.filtered.gz')
+    outputf = gzip.open(pro_dir+outputn,'wb')
+
+    for l in inputf:
+        if re.search('^#=GF ID|^#=GF AC|^#=GF DE|_HUMAN|^//',l.decode('latin-1')):
+            outputf.write(l)
+
+    outputf.close()
+
+    print('-->Creating Pfam file...')
+    #create tsv file
+    outputf = open(pro_dir+'Pfam-A.full.tsv','w')
+    outputf.write('\t'.join(['DOMAIN_ID','PFAM_ACC','DOMAIN_DESCRIPT','PROTEIN_NAME','PROTEIN_ACC','START','END'])+'\n')
+
+    filei = gzip.open(pro_dir+outputn,'rb')
+
+    (domain_id, pfam_acc, domain_des, protein_name, protein_acc, start, end) = ('', '', '', '', '', '', '')
+
+    for line in filei:
+        line = line.decode()
+        line = line.strip('\n')
+        line_a = line.split(' ')
+        if line_a[0] == '//':
+            (domain_id, pfam_acc, domain_des, protein_name, protein_acc, start, end) = ('', '', '', '', '', '', '')
+        else:
+            if line_a[1] == 'ID': domain_id = ' '.join(line_a[4:])
+            if line_a[1] == 'AC': pfam_acc = ' '.join(line_a[4:])
+            if line_a[1] == 'DE': domain_des = ' '.join(line_a[4:])
+            if line_a[0] == '#=GS' and len(line_a) > 10:
+                if 'AC' in line_a:
+                    protein_name = line_a[1].split('_')[0]
+                    protein_acc = line_a[line_a.index('AC')+1].split('.')[0]
+                    start = line_a[1].split('/')[1].split('-')[0]
+                    end = line_a[1].split('/')[1].split('-')[1]
+
+                    outputf.write('\t'.join([domain_id, pfam_acc, domain_des, protein_name, protein_acc, start, end])+'\n')
+
+    filei.close()
+    outputf.close()
+    os.remove(pro_dir+outputn)
+
+def process_interpro():
+
+    print('Processing Interpro data...')
+
+    interpro_file = 'match_complete.xml.gz'
+
+    #filter records
+    print('-->Filtering Interpro...')
+    inputf = gzip.open(dwn_dir+interpro_file,'rb')
+    outputn = interpro_file.replace('.xml.gz','.xml.filtered.gz')
+    outputf = gzip.open(pro_dir+outputn,'wb')
+
+    for line in inputf:
+        line = line.decode().rstrip()
+        if re.search('^<?xml', line) or re.search('interpromatch', line):
+            outputf.write(bytes(line+'\n','utf-8'))
+        elif re.search('<protein', line) and re.search('_HUMAN', line):
+            while line != '</protein>':
+                outputf.write(bytes(line+'\n','utf-8'))
+                line = next(inputf).decode().rstrip()
+            outputf.write(bytes(line+'\n','utf-8'))
+
+    inputf.close()
+    outputf.close()
+
+    print('-->Creating Interpro file...')
+
+    outputf = open(pro_dir+'Interpro.tsv','w')
+    outputf.write('\t'.join(['DOMAIN_ID','DOMAIN_DESCRIPT','PROTEIN_NAME','PROTEIN_ACC','START','END'])+'\n')
+
+    inputf = gzip.open(pro_dir+outputn,'rb')
+    tree = ET.parse(inputf)
+    root = tree.getroot()
+
+    for protein in root.iter('protein'):
+        (domain_id, pfam_acc, domain_des, protein_name, protein_acc, start, end) = ('', '', '', '', '', '', '')
+        protein_acc = protein.attrib['id']
+        protein_name = protein.attrib['name'].split('_')[0]
+
+        for match in protein.iter('match'):
+            for ipr in match.iter('ipr'):
+                domain_id = ipr.attrib['id']
+                domain_des = ipr.attrib['name']
+                for lcn in match.iter('lcn'):
+                    start = lcn.attrib['start']
+                    end = lcn.attrib['end']
+                    outputf.write('\t'.join([domain_id, domain_des, protein_name, protein_acc, start, end])+'\n')
+
+    outputf.close()
+    inputf.close()
+    os.remove(pro_dir+outputn)
+
+def process_uniprot():
+
+    print('Processing Uniprot data...')
+
+    uniprot_file = 'uniprot_sprot.xml.gz'
+
+    print('-->Filtering Uniprot...')
+    #filter records
+    inputf = gzip.open(dwn_dir+uniprot_file,'rb')
+    outputn = uniprot_file.replace('.xml.gz','.xml.filtered.gz')
+    outputf = gzip.open(pro_dir+outputn,'wb')
+
+    for l in inputf:
+        l = l.decode()
+        if re.search('\?xml|<uniprot|</uniprot|xmlns:|xsi:|<entry|</entry|<accession>|</accession>|<gene>|</gene>|<name .*type=\"primary\"|<organism>|<organism |</organism>|<name type=\"common\"',l):
+            if re.search('<uniprot',l): l = '<uniprot>\n'
+            if not re.search('^ xmlns:|^ xsi:',l):
+                l = re.sub(r' xmlns\=\"http\:\/\/uniprot.org\/uniprot\"','',l)
+                outputf.write(bytes(l,'utf-8'))
+
+    outputf.close()
+
+    print('-->Creating Uniprot file...')
+
+    outputf = open(pro_dir+'Uniprot.tsv','w')
+    outputf.write('\t'.join(['PROTEIN_ID','GENE_NAME','PUBMED_ID','dbSNP_ref'])+'\n')
+
+    inputf = gzip.open(pro_dir+outputn,'rb')
+    tree = ET.parse(inputf)
+    root = tree.getroot()
+
+#    for child in root:
+#        print(child.tag, child.attrib)
+
+    for entry in root.iter('entry'):
+        (protein_id, gene_name, organism) = ([], '', '')
+        for accession in entry.iter('accession'):
+            protein_id.append(accession.text)
+        for gene in entry.iter('gene'):
+            for name in gene.iter('name'):
+                gene_name = name.text
+        for organism in entry.iter('organism'):
+            for name in organism.iter('name'):
+                organism = name.text
+        if organism == 'Human':
+            outputf.write('\t'.join([';'.join(protein_id),gene_name,'',''])+'\n')
+
+    outputf.close()
+    os.remove(pro_dir+outputn)
+
+def process_hallmarks():
+
+    print('Processing Hallmarks data...')
+
+    hallmarks_dwn = "hallmarks.xlsx"
+    hallmarks_file = "hallmarks.tsv"
+
+    filei = pd.read_excel(dwn_dir+hallmarks_dwn)
+    filei['Genes_list'] = filei['Genes'].apply(lambda x: x.split(', '))
+    filei = filei.explode('Genes_list')
+    filei.to_csv(pro_dir+hallmarks_file, index=False, sep='\t', header=True)
+
+process_DGIdb()
+process_sabdab()
+process_moalmanac()
+process_GDSC()
+process_KEGG_ATC()
+process_cmap()
+process_FDA()
+process_FDA_label()
+process_EMA()
+process_ct()
+process_cgc_oncovar()
+process_KEGG_ind()
+process_gene_names()
+#correct manually NKX2.1 interpreted separately as NKX2 and 1
+process_civic()
+process_oncoKB()
+process_intogen()
+process_depmap()
+process_KEGG_pathways()
+process_SL()
+process_hallmarks()
+
+#exclusive for genomic annotation
+process_cosmic()
+process_clinvar()
+process_pfam()
+process_interpro()
+process_uniprot()
diff --git a/src/python_example.py b/src/python_example.py
new file mode 100644
index 0000000..d29099a
--- /dev/null
+++ b/src/python_example.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+#This example uses the 'requests' library
+#To install requests:
+#   sudo pip install requests
+#If you don't have pip:
+#   see http://www.pip-installer.org/en/latest/installing.html
+#       or
+#   wget https://raw.github.com/pypa/pip/master/contrib/get-pip.py
+#   sudo python get-pip.py
+
+import sys
+import argparse
+import json
+import requests
+import importlib
+
+def usage():
+  print("Usage Examples:")
+  print("python python_example.py --help")
+  print("python python_example.py --genes='FLT3'")
+  print("python python_example.py --genes='FLT3,EGFR,KRAS'")
+  print("python python_example.py --genes='FLT3,EGFR' --interaction_sources='TALC,TEND'")
+  print("python python_example.py --genes='FLT3,EGFR' --gene_categories='KINASE'")
+  print("python python_example.py --genes='FLT3,EGFR' --interaction_types='inhibitor'")
+  print("python python_example.py --genes='FLT3,EGFR' --source_trust_levels='Expert curated'")
+  print("python python_example.py --genes='FLT3,EGFR' --antineoplastic_only")
+  print("python python_example.py --genes='FLT3,EGFR,KRAS' --interaction_sources='TALC,TEND,MyCancerGenome' --gene_categories='KINASE' --interaction_types='inhibitor' --antineoplastic_only")
+  sys.exit(0)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description = "A Python example for using the DGIdb API", epilog = "For complete API documentation refer to http://dgidb.org/api")
+    parser.add_argument("-g", "--genes", help="list of gene symbols(REQUIRED). Use official Entrez symbols for best results", dest="genes")
+    parser.add_argument("-is", "--interaction_sources", help="Limit results to those from particular data sources. e.g. 'DrugBank', 'PharmGKB', 'TALC', 'TEND', 'TTD', 'MyCancerGenome')", dest="interaction_sources")
+    parser.add_argument("-it", "--interaction_types", help="Limit results to interactions with drugs that have a particular mechanism of action. e.g. 'inhibitor', 'antibody', etc", dest="interaction_types")
+    parser.add_argument("-gc", "--gene_categories", help="Limit results to genes with a particular druggable gene type. e.g. 'KINASE', 'ION CHANNEL', etc", dest="gene_categories")
+    parser.add_argument("-stl", "--source_trust_levels", help="Limit results based on trust level of the interaction source. e.g. 'Expert curated' or 'Non-curated", dest = "source_trust_levels")
+    parser.add_argument("-ano", "--antineoplastic_only", help="Limit results to anti-cancer drugs only", dest="antineoplastic_only", action = 'store_true')
+    parser.add_argument("-u", "--usage", help="Usage examples", dest="usage", action = 'store_true')
+    return parser.parse_args()
+
+class DGIAPI:
+    'API Example class for DGI API.'
+    #modificado en base a https://github.com/dgidb/dgidb/issues/324
+    domain = 'https://dgidb.org/'
+    api_path = '/api/v1/interactions.json'
+    def __init__(self, args):
+        self.genes = args.genes
+        self.interaction_sources = args.interaction_sources
+        self.interaction_types = args.interaction_types
+        self.gene_categories = args.gene_categories
+        self.source_trust_levels = args.source_trust_levels
+        self.antineoplastic_only = args.antineoplastic_only
+    def run_workflow(self):
+        self.create_request()
+        self.post_request()
+        self.print_response()
+    def create_request(self):
+        self.request = "http://dgidb.org/api/v1/interactions.json?genes=FLT1&drug_types=antineoplastic&interaction_sources=TALC"
+        self.payload = {}
+        if(self.genes):
+            self.payload['genes'] = self.genes
+        if(self.interaction_sources):
+            self.payload['interaction_sources'] = self.interaction_sources
+        if(self.gene_categories):
+            self.payload['gene_categories'] = self.gene_categories
+        if(self.interaction_types):
+            self.payload['interaction_types'] = self.interaction_types
+        if(self.source_trust_levels):
+            self.payload['source_trust_levels'] = self.source_trust_levels
+        if(self.antineoplastic_only):
+            self.payload['drug_types'] = 'antineoplastic'
+    def post_request(self):
+        self.request = DGIAPI.domain + DGIAPI.api_path
+        self.response = requests.post(self.request, data = self.payload)
+    def print_response(self):
+        response = json.loads(self.response.content)
+        matches = response['matchedTerms']
+        if(matches):
+          print("gene_name\tdrug_name\tinteraction_type\tsource\tgene_categories")
+        for match in matches:
+            gene = match['geneName']
+            categories = match['geneCategories']
+            categories.sort()
+            joined_categories = ",".join(categories)
+            for interaction in match['interactions']:
+                source = interaction['source']
+                drug = interaction['drugName']
+                interaction_type = interaction['interactionType']
+                print(gene + "\t" + drug + "\t" + interaction_type + "\t" + source + "\t" + joined_categories.lower())
+        for unmatched in response['unmatchedTerms']:
+            print("Unmatched search term: " + unmatched['searchTerm'])
+            print("Possible suggestions: " + ",".join(unmatched['suggestions']))
+
+if __name__ == '__main__':
+    args = parse_args()
+    if(not args.genes or args.usage):
+      usage()
+    da = DGIAPI(args)
+    da.run_workflow()