Skip to content

Commit

Permalink
XMerge branch 'release/2.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
rpshep committed Jan 17, 2020
2 parents 434d0d1 + eb1537c commit 80ab45e
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 103 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ addons:

before_install: # installs R3.3
- sudo add-apt-repository "deb https://cran.ma.imperial.ac.uk/bin/linux/ubuntu trusty/"
- sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9
- sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
- sudo apt-get update
- sudo apt-get install r-base

Expand Down
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# CHANGES
## 2.1.0
* Fixed so that plots aren't generated when not enough data

## 2.0.8
* added version tag in write_results def to avoids tests failing

Expand Down
8 changes: 4 additions & 4 deletions pyCRISPRcleanR/config/results.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
"#results tr:nth-child(even){{background-color: #f2f2f2;}}",
"#results tr:hover {{background-color: #ddd;}}",
"#results th {{padding-top: 12px; padding-bottom: 12px; text-align: left; background-color: lightblue; color: black;}} </style></head>",
"<body> <h2>CRISPRcleanR v{version} Analysis Results [<a href=\" ../{outdir}/{file_name}\">results.tar.bz2 </a> (download link suppressed in Docker)]</h2> <table style=\"width:100%\" id=\"results\">"],
"<body> <h2>CRISPRcleanR v{version} Analysis Results [<a href=\" {outdir}/{file_name}\">results.tar.bz2 </a> (download link suppressed in Docker)]</h2> <table style=\"width:100%\" id=\"results\">"],
"table_header": "<tr><th>{}</th><th>{}</th><th>Description</th></tr>",

"table_row_images" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\" target=\"_blank\" >{file_name}</a></td><td>{description}</td></tr>",
"table_row_files" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
"table_row_folders" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
"table_row_images" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\" target=\"_blank\" >{file_name}</a></td><td>{description}</td></tr>",
"table_row_files" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
"table_row_folders" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",

"intermediate_row" : "<tr><th>{}</th></tr>",

Expand Down
4 changes: 4 additions & 0 deletions pyCRISPRcleanR/formatInput.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def run_analysis(self):
ref_gene_list_dict = SM.load_signature_files(gene_sig_dir, cldf)
if self.run_bagel:
log.info("Running Bagel on normalised fold changes .....")
cr = ','.join(map(lambda x: "%d" % x, range(1, fc.shape[1] - 1, 1)))
log.info("python BAGEL.py -i "+outdir+"/02_normalised_fold_changes.tsv -o "+outdir+"/bagelOut/normalised_FC_bagel.out -e "+gene_sig_dir+"/essential.txt -n "+gene_sig_dir+"/non_essential.txt -c "+str(cr)+" --numiter "+str(iter))
SM.run_bagel(fcfile, ref_gene_list_dict['essential_genes'],
ref_gene_list_dict['non_essential_genes'], cpus,
column_list=list(range(1, fc.shape[1] - 1, 1)), NUM_BOOTSTRAPS=iter,
Expand Down Expand Up @@ -118,6 +120,8 @@ def run_analysis(self):

if self.run_bagel:
log.info("Running Bagel on crisprcleanr corrected fold changes .....")
cr = ','.join(map(lambda x: "%d" % x, range(1, crispr_fc.shape[1] - 1, 1)))
log.info("python BAGEL.py -i "+outdir+"/04_crispr_cleanr_fold_changes.tsv -o "+outdir+"/bagelOut/CRISPRcleanR_FC_bagel.out -e "+gene_sig_dir+"/essential.txt -n "+gene_sig_dir+"/non_essential.txt -c "+str(cr)+" --numiter "+str(iter))
SM.run_bagel(crispr_fc_file, ref_gene_list_dict['essential_genes'],
ref_gene_list_dict['non_essential_genes'], cpus,
column_list=list(range(1, crispr_fc.shape[1] - 1, 1)), NUM_BOOTSTRAPS=iter,
Expand Down
207 changes: 113 additions & 94 deletions pyCRISPRcleanR/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,13 @@ def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel',
dimensions.append(d1)

trace1 = go.Splom(dimensions=dimensions, diagonal=dict(visible=False))
trace1['dimensions'][1].update(visible=True)
trace1['showupperhalf'] = False

t_len = len(trace1['dimensions'])

if t_len > 1:
trace1['dimensions'][1].update(visible=True)
if t_len > 2:
trace1['showupperhalf'] = False
annotation_list = []
yaxis_val = 1
xcounter = 0
Expand All @@ -132,6 +137,7 @@ def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel',
continue
if xcounter < ycounter:
continue

slope, intercept, r_value, p_value, std_err = stats.linregress(df[col1], df[col2])
# line = slope * df[col1] + intercept
format_r_value = "<i>{}_vs_{}: R<sup>2</sup>={:02.2f}</i>".format(col1, col2, r_value)
Expand Down Expand Up @@ -194,39 +200,40 @@ def roc_curve(df, data_type='', fdrth=0.05, saveto='./roc_curve'):
:param saveto:
:return:
"""
df, roc_auc, sens, _ = PlotData._roc_curve_r(df.tf.values, df.avgFC.values)

recall = df.sensitivity.values
tnr = df.specificity.values
df, roc_auc, sens, FDR5percTh = PlotData._roc_curve_r(df.tf.values, df.avgFC.values)
if not np.isnan(FDR5percTh):
recall = df.sensitivity.values
tnr = df.specificity.values

lw = 2
lw = 2

trace1 = go.Scatter(x=tnr, y=recall,
trace1 = go.Scatter(x=tnr, y=recall,
mode='lines',
line=dict(color='darkorange', width=lw),
showlegend=False
)

trace2 = go.Scatter(x=[1, 0], y=[0, 1],
trace2 = go.Scatter(x=[1, 0], y=[0, 1],
mode='lines',
line=dict(color='black', width=0.5),
showlegend=False
)

abline = go.Scatter(x=[0, 1], y=[sens, sens],
abline = go.Scatter(x=[0, 1], y=[sens, sens],
mode='lines',
line=dict(color='black', width=0.5, dash='dash'),
showlegend=False
)
labels = {
labels = {
'x': [0],
'y': [0],
'legendgroup': 'group', # this can be any string, not just "group"
'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * fdrth, sens, roc_auc),
'opacity': 0,

}
layout = go.Layout(title='Receiver operating characteristic({})'.format(data_type),
}
layout = go.Layout(title='Receiver operating characteristic({})'.format(data_type),
xaxis=dict(title='TNR',
autorange='reversed',
showline=False,
Expand All @@ -240,9 +247,9 @@ def roc_curve(df, data_type='', fdrth=0.05, saveto='./roc_curve'):
)
)

figure = go.Figure(data=[trace1, trace2, abline, labels], layout=layout)
figure = go.Figure(data=[trace1, trace2, abline, labels], layout=layout)

py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf())

return None
Expand All @@ -266,10 +273,15 @@ def _roc_curve_r(observations, predictions, FDRth=0.05):
cords = numpy2ri.ri2py(COORS)
df = pd.DataFrame(cords.T, columns=columns)
FDR5percTh = (df[df.ppv >= (1 - FDRth)])['threshold'].max()
index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
if not np.isnan(FDR5percTh):
index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
else:
index_min = 0

threshold = df.at[index_min, 'threshold']
SENS = df.at[index_min, 'sensitivity']
SPEC = df.at[index_min, 'specificity']

return df, auc, SENS, FDR5percTh

@staticmethod
Expand All @@ -283,42 +295,44 @@ def pr_rc_curve(df, data_type='sgRNA', FDRth=0.05, saveto='./pr_rc_curve'):
"""
observations = df.tf.values
predictions = -df.avgFC.values
df, auc, SENS = PlotData._pr_rc_curve_r(observations, predictions, FDRth=FDRth)
h1abline = 1 - FDRth
h2abline = sum(observations) / observations.size
df, auc, SENS, FDR5percTh = PlotData._pr_rc_curve_r(observations, predictions, FDRth=FDRth)

trace1 = go.Scatter(x=df.recall.values, y=df.precision.values,
if not np.isnan(FDR5percTh):
h1abline = 1 - FDRth
h2abline = sum(observations) / observations.size

trace1 = go.Scatter(x=df.recall.values, y=df.precision.values,
mode='lines',
line=dict(color='navy', width=2),
showlegend=False
)

ablineh1 = go.Scatter(x=[0, 1], y=[h1abline, h1abline],
ablineh1 = go.Scatter(x=[0, 1], y=[h1abline, h1abline],
mode='lines',
line=dict(color='black', width=0.5, dash='dash'),
showlegend=False
)
ablineh2 = go.Scatter(x=[0, 1], y=[h2abline, h2abline],
ablineh2 = go.Scatter(x=[0, 1], y=[h2abline, h2abline],
mode='lines',
line=dict(color='black', width=0.5),
showlegend=False
)

ablinev1 = go.Scatter(x=[SENS, SENS], y=[0, 1],
ablinev1 = go.Scatter(x=[SENS, SENS], y=[0, 1],
mode='lines',
line=dict(color='black', width=0.5),
showlegend=False
)

labels = {
'x': [0],
'y': [0],
'legendgroup': 'group', # this can be any string, not just "group"
'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * FDRth, SENS, auc),
'opacity': 0,
labels = {
'x': [0],
'y': [0],
'legendgroup': 'group', # this can be any string, not just "group"
'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * FDRth, SENS, auc),
'opacity': 0,

}
layout = go.Layout(title='Precision-Recall({}): AUC={:.2f}'.format(data_type, auc),
}
layout = go.Layout(title='Precision-Recall({}): AUC={:.2f}'.format(data_type, auc),
xaxis=dict(title='Recall'
),
yaxis=dict(title='Precision'),
Expand All @@ -328,9 +342,9 @@ def pr_rc_curve(df, data_type='sgRNA', FDRth=0.05, saveto='./pr_rc_curve'):
)
)

figure = go.Figure(data=[trace1, ablineh1, ablineh2, ablinev1, labels], layout=layout)
figure = go.Figure(data=[trace1, ablineh1, ablineh2, ablinev1, labels], layout=layout)

py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf())

return None
Expand All @@ -352,11 +366,15 @@ def _pr_rc_curve_r(observations, predictions, FDRth=0.05):
cols = ['recall', 'precision', 'threshold']
df = pd.DataFrame(curve, columns=cols)
FDR5percTh = - (df[df.precision >= (1 - FDRth)])['threshold'].min()
index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
if not np.isnan(FDR5percTh):
index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
else:
index_min = 0

SENS = df.at[index_min, 'recall']
threshold = -FDR5percTh

return df, auc, SENS
return df, auc, SENS, FDR5percTh

@staticmethod
def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05, data_type='genes',
Expand Down Expand Up @@ -451,59 +469,60 @@ def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05
)
trace_list.append(ablineh1)
# axis and labels
ypos = int(round(log10(FDRpercRANK)))
fdr_label = dict(
x=max_log - 1, y=ypos + 0.12,
xref='x',
yref='y',
text="{}%FDR".format(100 * fdrth),
showarrow=False,
font=dict(
size=16,
color='red'
),
align='center'
)

x_label1 = dict(
x=0.25,
y=-0.075,
showarrow=False,
text="LogFC",
xref='paper',
yref='paper',
font=dict(size=14)
)
if FDRpercRANK > 0:
ypos = int(round(log10(FDRpercRANK)))
fdr_label = dict(
x=max_log - 1, y=ypos + 0.12,
xref='x',
yref='y',
text="{}%FDR".format(100 * fdrth),
showarrow=False,
font=dict(
size=16,
color='red'
),
align='center'
)

x_label2 = dict(
x=0.85,
y=-0.075,
showarrow=False,
text="% of genes below {}%FDR cutoff".format(100 * fdrth),
xref='paper',
yref='paper',
font=dict(size=14)
)
x_label1 = dict(
x=0.25,
y=-0.075,
showarrow=False,
text="LogFC",
xref='paper',
yref='paper',
font=dict(size=14)
)

annotations_list.append(fdr_label)
annotations_list.append(x_label1)
annotations_list.append(x_label2)
x_label2 = dict(
x=0.85,
y=-0.075,
showarrow=False,
text="% of genes below {}%FDR cutoff".format(100 * fdrth),
xref='paper',
yref='paper',
font=dict(size=14)
)

layout = go.Layout(
title='Depletion Profile: {}'.format(data_type),
xaxis=dict(
title="",
showline=True,
showgrid=False,
zeroline=True,
range=[min_log, count],
ticks="",
showticklabels=True,
ticktext=labels,
tickvals=tickvalue,
dtick=1
),
yaxis=dict(title='Depeltion Rank',
annotations_list.append(fdr_label)
annotations_list.append(x_label1)
annotations_list.append(x_label2)

layout = go.Layout(
title='Depletion Profile: {}'.format(data_type),
xaxis=dict(
title="",
showline=True,
showgrid=False,
zeroline=True,
range=[min_log, count],
ticks="",
showticklabels=True,
ticktext=labels,
tickvals=tickvalue,
dtick=1
),
yaxis=dict(title='Depeltion Rank',
autorange='reversed',
showgrid=False,
range=[1, y_fc],
Expand All @@ -515,19 +534,19 @@ def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05
domain=[1, 1],
dtick=1,
),
annotations=annotations_list
annotations=annotations_list

)
)

figure = go.Figure(data=trace_list, layout=layout)
figure = go.Figure(data=trace_list, layout=layout)

if save_image:
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf(),
image='jpeg', image_filename=saveto, image_width=1200, image_height=800)
else:
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf())
if save_image:
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf(),
image='jpeg', image_filename=saveto, image_width=1200, image_height=800)
else:
py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
config=PlotData.plotly_conf())

return None

Expand Down
Loading

0 comments on commit 80ab45e

Please sign in to comment.