-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaired_t_test.py
49 lines (37 loc) · 1.83 KB
/
paired_t_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#####################################
###### ABOUT THIS SCRIPT #
#####################################
# This script does a 2-tailed paired t-test on the samples of each gene
#####################################
###### INSTRUCTIONS FOR USE: #
#####################################
#In the shell go to this directory and type 'python paired_t_test.py' to run the script.
#####################################
# Import necessary libraries
import pandas
from pandas.io import sql
import sqlite3 as lite
from scipy import stats
import numpy
# Open connection to the geneSequenceResults.db
con = lite.connect('geneSequenceResults.db');
cur = con.cursor()
cur.execute('DROP TABLE IF EXISTS gene_paired_ttest')
cur.execute('CREATE TABLE gene_paired_ttest (geneid TEXT, tvalue REAL, pvalue REAL)')
# Get list of distinct gene id's
gene_ids = pandas.read_sql_query("select distinct geneid from gene_pairs", con)
# Loop through each gene id
for i in gene_ids.index:
g = gene_ids.loc[i][0]
g_samples = pandas.read_sql_query("select * from gene_pairs where geneid like '" + g + "'", con) #get all samples of the gene
g_samples[['normcount_TN','normcount_NT']] = g_samples[['normcount_TN','normcount_NT']].applymap(lambda x: float(x)) #convert the count from strings to floats
t_value, p_value = stats.ttest_rel(g_samples['normcount_TN'], g_samples['normcount_NT']) #calculate the t-value and p-value
tmpList = []
tmpList.extend([g, t_value, p_value])
cur.execute('INSERT INTO gene_paired_ttest VALUES (?,?,?)', tmpList) #write the geneid, tvalue, pvalue into the database
# Commit to the changes and close connection to geneSequenceResults.db
con.commit()
con.close()
#####################################
# END OF FILE
#####################################