From 2958d58f65fb4b0e256868317d8901143e1a30a4 Mon Sep 17 00:00:00 2001 From: thomasyu888 Date: Mon, 7 Oct 2019 11:00:09 -0700 Subject: [PATCH 1/4] Rename bed file --- genie/database_to_staging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index e511e06e..5c417419 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -473,7 +473,7 @@ def stagingToCbio( SEG_PATH = os.path.join( GENIE_RELEASE_DIR, 'genie_private_data_cna_hg19_%s.seg' % genieVersion) COMBINED_BED_PATH = os.path.join( - GENIE_RELEASE_DIR, 'genie_combined_%s.bed' % genieVersion) + GENIE_RELEASE_DIR, 'genomic_information_%s.txt' % genieVersion) consortiumReleaseSynId = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == "consortium"][0] variant_filtering_synId = databaseSynIdMappingDf['Id'][ @@ -1110,7 +1110,7 @@ def stagingToCbio( syn, COMBINED_BED_PATH, parent=consortiumReleaseSynId, genieVersion=genieVersion, - name="genie_combined.bed", + name="genomic_information.txt", staging=current_release_staging) return(genePanelEntities) From 3b38b69814b291dd6afa58b2982ab947bee48f5f Mon Sep 17 00:00:00 2001 From: thomasyu888 Date: Mon, 7 Oct 2019 11:05:17 -0700 Subject: [PATCH 2/4] Switch from bed to genomic_information --- genie/consortium_to_public.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/genie/consortium_to_public.py b/genie/consortium_to_public.py index b22c3351..84fae6e9 100644 --- a/genie/consortium_to_public.py +++ b/genie/consortium_to_public.py @@ -45,7 +45,7 @@ def consortiumToPublic(syn, processingDate, genie_version, releaseId, databaseSy MUTATIONS_PATH = os.path.join(dbTostaging.GENIE_RELEASE_DIR,'data_mutations_extended_%s.txt' % genie_version) FUSIONS_PATH = os.path.join(dbTostaging.GENIE_RELEASE_DIR,'data_fusions_%s.txt' % genie_version) SEG_PATH = os.path.join(dbTostaging.GENIE_RELEASE_DIR,'genie_public_data_cna_hg19_%s.seg' % genie_version) - COMBINED_BED_PATH = os.path.join(dbTostaging.GENIE_RELEASE_DIR,'genie_combined_%s.bed' % genie_version) + COMBINED_BED_PATH = os.path.join(dbTostaging.GENIE_RELEASE_DIR,'genomic_information_%s.txt' % genie_version) if not os.path.exists(dbTostaging.GENIE_RELEASE_DIR): os.mkdir(dbTostaging.GENIE_RELEASE_DIR) @@ -186,12 +186,12 @@ def consortiumToPublic(syn, processingDate, genie_version, releaseId, databaseSy # panelDf = panelDf[panelDf['SAMPLE_ID'].isin(publicReleaseSamples)] # panelDf.to_csv(DATA_GENE_PANEL_PATH,sep="\t",index=False) # storeFile(syn, DATA_GENE_PANEL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_gene_matrix.txt") - elif entName == "genie_combined.bed": + elif entName == "genomic_information.txt": bed = syn.get(entId, followLink=True) bedDf = pd.read_csv(bed.path, sep="\t") bedDf = bedDf[bedDf.SEQ_ASSAY_ID.isin(allClin.SEQ_ASSAY_ID)] bedDf.to_csv(COMBINED_BED_PATH,sep="\t",index=False) - storeFile(syn, COMBINED_BED_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genie_combined.bed") + storeFile(syn, COMBINED_BED_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genomic_information.txt") elif entName in ["data_clinical_sample.txt", "data_clinical_patient.txt"] or entName.endswith(".html"): continue elif entName.startswith("data_gene_panel"): From eebcd070c8d9576301d29d961aa0967c236ccb61 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 22 Oct 2019 14:51:27 -0700 Subject: [PATCH 3/4] Switch dashboard to use genomic_information.txt --- genie/dashboardTemplate.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genie/dashboardTemplate.Rmd b/genie/dashboardTemplate.Rmd index 44f49a80..243e06bd 100644 --- a/genie/dashboardTemplate.Rmd +++ b/genie/dashboardTemplate.Rmd @@ -337,7 +337,7 @@ if (is.null(this_samples)) { this_patient <- this_samples this_patient <- this_patient[!duplicated(this_patient$PATIENT_ID),] } -this_bed <- getFileDf("genie_combined.bed", releaseFiles) +this_bed <- getFileDf("genomic_information.txt", releaseFiles) this_assays = as.character(unique(this_samples$SEQ_ASSAY_ID)) this_mut <- getFileDf("data_mutations_extended.txt", releaseFiles) black_list_variants <- synTableQuery("select * from syn18459663", From d84359b84e2f85abb5bcb209eae9049a3c84a202 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 23 Oct 2019 13:31:49 -0700 Subject: [PATCH 4/4] Fix utf-8 error --- genie/maf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/genie/maf.py b/genie/maf.py index e9e858f7..90c104dc 100644 --- a/genie/maf.py +++ b/genie/maf.py @@ -76,10 +76,10 @@ def createFinalMaf(self, mafDf, filePath, maf=False): mafset = mafDf.to_csv(sep="\t", index=False) else: mafset = mafDf.to_csv(sep="\t", index=False, header=None) - write_or_append = "w" if maf else "a" + write_or_append = "wb" if maf else "ab" with open(filePath, write_or_append) as maffile: mafSet = process_functions.removeStringFloat(mafset) - maffile.write(mafSet) + maffile.write(mafSet.encode("utf-8")) def storeProcessedMaf( self, filePath, mafSynId, centerMafSynId, isNarrow=False):