-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatch_gene_counts.py~
41 lines (31 loc) · 1.75 KB
/
match_gene_counts.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#####################################
###### ABOUT THIS SCRIPT #
#####################################
# This script divides the geneSequenceResults DataBase into NT rows and TN rows
# and then merges matching sample/gene pairs
# so the normalized counts of TN and NT pairs can be compared side-by-side
#####################################
###### INSTRUCTIONS FOR USE: #
#####################################
#In the shell go to this directory and type 'python analize.py' to run the script.
# -- It seems to run much faster this way, than to run it in Sublime Text 2, in my experience.
# check out the README for more information..
#####################################
# Import necessary libraries
import pandas
import sqlite3 as lite
# Open connection to the geneSequenceResults.db
con = lite.connect('/Users/elenka/Documents/larriscku/geneSequenceResults.db');
cur = con.cursor()
# Read the TN (tumor) data in as one dataframe, and read the NT (benign) data in as another dataframe
TN_table = pandas.read_sql_query("select * from genes where tissuetype like 'TN'", con)
NT_table = pandas.read_sql_query("select * from genes where tissuetype like 'NT'", con)
# Create a column with the sample so the TN data and NT data rows can be matched
NT_table['sample'] = NT_table['barcode'].apply(lambda x: x[:12])
TN_table['sample'] = TN_table['barcode'].apply(lambda x: x[:12])
# Merge the two dataframes where they have the same geneid and sample
pairs = pandas.merge(TN_table, TN_table, on=['geneid','sample'], how='inner', suffixes=('_NT', '_TN'))
# Save the relevant columns to a tab-delimited .dat file
pairs[['geneid','sample','normcount_TN','normcount_NT']].to_csv('gene_pairs_table.dat', sep='\t', index=False)
# Close connection to geneSequenceResults.db
con.close()