Skip to content

Commit

Permalink
Merge pull request #21 from tokebe/load-delimitation
Browse files Browse the repository at this point in the history
Load delimitation
  • Loading branch information
tokebe authored Jul 12, 2021
2 parents d8fdef3 + 3fdf37f commit 6d0b36e
Show file tree
Hide file tree
Showing 17 changed files with 538 additions and 2,670 deletions.
8 changes: 4 additions & 4 deletions niclassify/core/StandardProgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def mp_delim(arg):
# rename delims with order prefix
delim = utilities.get_data(arg[3])
delim["Delim_spec"] = arg[0] + delim["Delim_spec"].astype(str)
delim.to_csv(arg[3], sep="\t", index=False)
delim.to_csv(arg[3], index=False)


def mp_ftgen(arg):
Expand Down Expand Up @@ -832,7 +832,7 @@ def split_by_taxon(self, taxon_split=None, create_align=False):
delim_file = tempfile.NamedTemporaryFile(
mode="w+",
prefix="delim_{}_".format(taxon),
suffix=".tsv",
suffix=".csv",
delete=False,
dir=pool_dir.name
)
Expand All @@ -841,7 +841,7 @@ def split_by_taxon(self, taxon_split=None, create_align=False):
if delims is not None:
print(delims[delims["sample_name"].isin(pids)])
delims[delims["sample_name"].isin(pids)].to_csv(
delim_file.name, sep="\t", index=False)
delim_file.name, index=False)

tree_file = tempfile.NamedTemporaryFile(
mode="w+",
Expand Down Expand Up @@ -920,7 +920,7 @@ def delimit_species(self, method="bPTP", tax=None, debug=False):
ignore_index=True,
sort=False
)
delim_merge.to_csv(self.delim_fname, sep="\t", index=False)
delim_merge.to_csv(self.delim_fname, index=False)

pool_dir.cleanup()

Expand Down
2 changes: 1 addition & 1 deletion niclassify/core/scripts/create_measures.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ args <- commandArgs(trailingOnly = TRUE)
seq_alignment<-read.FASTA(args[1], type="DNA")

## Pull in Species Assignments from FNAME_SPECIES ##
speciesNames <- read_tsv(args[2])
speciesNames <- read_csv(args[2])

print(speciesNames)

Expand Down
2 changes: 1 addition & 1 deletion niclassify/core/scripts/delim_tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ if (length(args) > 2) {
# Run splits to delimit species
GMYC <- gmyc(UPGMA)
# Save results to given file
write_delim(spec.list(GMYC), args[3], delim = "\t")
write_csv(spec.list(GMYC), args[3])
}
8 changes: 6 additions & 2 deletions niclassify/core/utilities/ftprep_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@
"dnaSim_med",
"dnaSim_std",
"dnaSim_min",
"dnaSim_max"
"dnaSim_max",
"index",
"level_0"
]


Expand Down Expand Up @@ -264,7 +266,7 @@ def delimit_species_bPTP(infname, outtreefname, outfname, debug=False):
pd.DataFrame({
"Delim_spec": species_expanded,
"sample_name": samples_expanded
}).to_csv(outfname, sep="\t", index=False)
}).to_csv(outfname, index=False)


def delimit_species_GMYC(infname, outtreefname, outfname, debug=False):
Expand Down Expand Up @@ -359,6 +361,8 @@ def generate_measures(fastafname, delimfname, outfname, debug=False):
else:
proc = subprocess.run(
ftgen_call,
stdout=logfile,
stderr=logfile,
env=os.environ.copy(),
creationflags=(
0 if PLATFORM != 'Windows' else subprocess.CREATE_NO_WINDOW)
Expand Down
23 changes: 4 additions & 19 deletions niclassify/core/utilities/general_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,32 +174,17 @@ def get_data(filename, excel_sheet=None):
# engine="python"
)

elif ".csv" in os.path.splitext(filename)[1]: # using csv
raw_data = pd.read_csv(
filename,
na_values=NANS,
keep_default_na=True,
engine="python"
)

elif ".tsv" in os.path.splitext(filename)[1]: # using tsv
raw_data = pd.read_csv(
filename,
na_values=NANS,
keep_default_na=True,
sep="\t",
engine="python"
)

# using txt; must figure out delimiter
elif ".txt" in os.path.splitext(filename)[1]:
elif (os.path.splitext(filename)[1] in [".csv", ".tsv", ".txt"]):
# use python engine to guess separator each time
# because who trusts file extensions?
raw_data = pd.read_csv(
filename,
na_values=NANS,
keep_default_na=True,
sep=None,
engine="python"
)

else: # invalid extension
raise TypeError(
"data file type is unsupported, or file extension not included")
Expand Down
Loading

0 comments on commit 6d0b36e

Please sign in to comment.