From 5dfe4cfdbb9da428267d419b3b4a5993088ab3b3 Mon Sep 17 00:00:00 2001 From: Ganten-Hornby <65263129+Ganten-Hornby@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:39:47 +0800 Subject: [PATCH] fix the SyntaxWarning --- src/gsMap/diagnosis.py | 12 +----------- src/gsMap/utils/generate_r2_matrix.py | 2 +- src/gsMap/utils/regression_read.py | 2 +- src/gsMap/visualize.py | 10 +++++++--- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/gsMap/diagnosis.py b/src/gsMap/diagnosis.py index 2ebe7a6..fe689f7 100644 --- a/src/gsMap/diagnosis.py +++ b/src/gsMap/diagnosis.py @@ -9,7 +9,7 @@ from gsMap.config import DiagnosisConfig from gsMap.utils.manhattan_plot import ManhattanPlot -from gsMap.visualize import draw_scatter, load_st_coord, estimate_point_size_for_plot +from gsMap.visualize import draw_scatter, load_st_coord, estimate_point_size_for_plot,load_ldsc warnings.filterwarnings("ignore", category=FutureWarning) @@ -23,16 +23,6 @@ def convert_z_to_p(gwas_data): gwas_data['P'] = gwas_data['P'].clip(lower=min_p_value) return gwas_data - -def load_ldsc(ldsc_input_file): - """Load LDSC data and calculate logp.""" - ldsc = pd.read_csv(ldsc_input_file, compression='gzip') - ldsc['spot'] = ldsc['spot'].astype(str).replace('\.0', '', regex=True) - ldsc.set_index('spot', inplace=True) - ldsc['logp'] = -np.log10(ldsc['p']) - return ldsc - - def load_gene_diagnostic_info(config:DiagnosisConfig): """Load or compute gene diagnostic info.""" gene_diagnostic_info_save_path = config.get_gene_diagnostic_info_save_path(config.trait_name) diff --git a/src/gsMap/utils/generate_r2_matrix.py b/src/gsMap/utils/generate_r2_matrix.py index e864d6b..150cf06 100644 --- a/src/gsMap/utils/generate_r2_matrix.py +++ b/src/gsMap/utils/generate_r2_matrix.py @@ -69,7 +69,7 @@ def read(self, fname): raise ValueError('{f} filename must end in {f}'.format(f=end)) comp = get_compression(fname) self.df = pd.read_csv(fname, header=self.header, usecols=self.usecols, - sep='\s+', compression=comp) + sep=r'\s+', compression=comp) if self.colnames: self.df.columns = self.colnames if self.keepcol is not None: diff --git a/src/gsMap/utils/regression_read.py b/src/gsMap/utils/regression_read.py index 389a5a0..1d10030 100644 --- a/src/gsMap/utils/regression_read.py +++ b/src/gsMap/utils/regression_read.py @@ -60,7 +60,7 @@ def read_csv(fh, **kwargs): ''' Read the csv data ''' - return pd.read_csv(fh, sep='\s+', na_values='.', **kwargs) + return pd.read_csv(fh, sep=r'\s+', na_values='.', **kwargs) # Fun for reading loading LD scores diff --git a/src/gsMap/visualize.py b/src/gsMap/visualize.py index b2b14fe..d701662 100644 --- a/src/gsMap/visualize.py +++ b/src/gsMap/visualize.py @@ -11,9 +11,13 @@ def load_ldsc(ldsc_input_file): - ldsc = pd.read_csv(ldsc_input_file, compression='gzip') - ldsc.spot = ldsc.spot.astype(str).replace('\.0', '', regex=True) - ldsc.index = ldsc.spot + ldsc = pd.read_csv( + ldsc_input_file, + compression='gzip', + dtype={'spot': str, 'p': float}, + index_col='spot', + usecols=['spot', 'p'] + ) ldsc['logp'] = -np.log10(ldsc.p) return ldsc