XMerge branch 'release/2.1.0'

cancerit · Jan 17, 2020 · 80ab45e · 80ab45e
2 parents 434d0d1 + eb1537c
commit 80ab45e
Show file tree

Hide file tree

Showing 9 changed files with 138 additions and 103 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -21,7 +21,7 @@ addons:
 
 before_install: # installs R3.3
     - sudo add-apt-repository "deb https://cran.ma.imperial.ac.uk/bin/linux/ubuntu trusty/"
-    - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9
+    - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
     - sudo apt-get update
     - sudo apt-get install r-base
 

diff --git a/CHANGES.md b/CHANGES.md
@@ -1,4 +1,7 @@
 # CHANGES
+## 2.1.0
+* Fixed so that plots aren't generated when not enough data
+
 ## 2.0.8
 * added version tag in write_results def to avoids tests failing
 

diff --git a/pyCRISPRcleanR/config/results.json b/pyCRISPRcleanR/config/results.json
@@ -5,12 +5,12 @@
     "#results tr:nth-child(even){{background-color: #f2f2f2;}}",
     "#results tr:hover {{background-color: #ddd;}}",
     "#results th {{padding-top: 12px; padding-bottom: 12px; text-align: left; background-color: lightblue; color: black;}} </style></head>",
-              "<body> <h2>CRISPRcleanR v{version} Analysis Results [<a href=\" ../{outdir}/{file_name}\">results.tar.bz2 </a> (download link suppressed in Docker)]</h2> <table style=\"width:100%\" id=\"results\">"],
+              "<body> <h2>CRISPRcleanR v{version} Analysis Results [<a href=\" {outdir}/{file_name}\">results.tar.bz2 </a> (download link suppressed in Docker)]</h2> <table style=\"width:100%\" id=\"results\">"],
   "table_header": "<tr><th>{}</th><th>{}</th><th>Description</th></tr>",
 
-  "table_row_images" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\"  target=\"_blank\" >{file_name}</a></td><td>{description}</td></tr>",
-  "table_row_files" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
-  "table_row_folders" : "<tr><td>{count}</td> <td><a href=\" ../{outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
+  "table_row_images" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\"  target=\"_blank\" >{file_name}</a></td><td>{description}</td></tr>",
+  "table_row_files" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
+  "table_row_folders" : "<tr><td>{count}</td> <td><a href=\" {outdir}/{file_name}\">{file_name}</a></td><td>{description}</td></tr>",
 
   "intermediate_row" : "<tr><th>{}</th></tr>",
 

diff --git a/pyCRISPRcleanR/formatInput.py b/pyCRISPRcleanR/formatInput.py
@@ -80,6 +80,8 @@ def run_analysis(self):
                 ref_gene_list_dict = SM.load_signature_files(gene_sig_dir, cldf)
                 if self.run_bagel:
                     log.info("Running Bagel on normalised fold changes .....")
+                    cr = ','.join(map(lambda x: "%d" % x, range(1, fc.shape[1] - 1, 1)))
+                    log.info("python BAGEL.py -i "+outdir+"/02_normalised_fold_changes.tsv -o "+outdir+"/bagelOut/normalised_FC_bagel.out -e "+gene_sig_dir+"/essential.txt -n "+gene_sig_dir+"/non_essential.txt -c "+str(cr)+" --numiter "+str(iter))
                     SM.run_bagel(fcfile, ref_gene_list_dict['essential_genes'],
                             ref_gene_list_dict['non_essential_genes'], cpus,
                             column_list=list(range(1, fc.shape[1] - 1, 1)), NUM_BOOTSTRAPS=iter,
@@ -118,6 +120,8 @@ def run_analysis(self):
 
                     if self.run_bagel:
                         log.info("Running Bagel on crisprcleanr corrected fold changes .....")
+                        cr = ','.join(map(lambda x: "%d" % x, range(1, crispr_fc.shape[1] - 1, 1)))
+                        log.info("python BAGEL.py -i "+outdir+"/04_crispr_cleanr_fold_changes.tsv -o "+outdir+"/bagelOut/CRISPRcleanR_FC_bagel.out -e "+gene_sig_dir+"/essential.txt -n "+gene_sig_dir+"/non_essential.txt -c "+str(cr)+" --numiter "+str(iter))
                         SM.run_bagel(crispr_fc_file, ref_gene_list_dict['essential_genes'],
                                 ref_gene_list_dict['non_essential_genes'], cpus,
                                 column_list=list(range(1, crispr_fc.shape[1] - 1, 1)), NUM_BOOTSTRAPS=iter,

diff --git a/pyCRISPRcleanR/plots.py b/pyCRISPRcleanR/plots.py
@@ -115,8 +115,13 @@ def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel',
             dimensions.append(d1)
 
         trace1 = go.Splom(dimensions=dimensions, diagonal=dict(visible=False))
-        trace1['dimensions'][1].update(visible=True)
-        trace1['showupperhalf'] = False
+
+        t_len = len(trace1['dimensions'])
+
+        if t_len > 1:
+            trace1['dimensions'][1].update(visible=True)
+        if t_len > 2:
+            trace1['showupperhalf'] = False
         annotation_list = []
         yaxis_val = 1
         xcounter = 0
@@ -132,6 +137,7 @@ def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel',
                     continue
                 if xcounter < ycounter:
                     continue
+
                 slope, intercept, r_value, p_value, std_err = stats.linregress(df[col1], df[col2])
                 # line = slope * df[col1] + intercept
                 format_r_value = "<i>{}_vs_{}: R<sup>2</sup>={:02.2f}</i>".format(col1, col2, r_value)
@@ -194,39 +200,40 @@ def roc_curve(df, data_type='', fdrth=0.05, saveto='./roc_curve'):
         :param saveto:
         :return:
         """
-        df, roc_auc, sens, _ = PlotData._roc_curve_r(df.tf.values, df.avgFC.values)
 
-        recall = df.sensitivity.values
-        tnr = df.specificity.values
+        df, roc_auc, sens, FDR5percTh = PlotData._roc_curve_r(df.tf.values, df.avgFC.values)
+        if not np.isnan(FDR5percTh):
+          recall = df.sensitivity.values
+          tnr = df.specificity.values
 
-        lw = 2
+          lw = 2
 
-        trace1 = go.Scatter(x=tnr, y=recall,
+          trace1 = go.Scatter(x=tnr, y=recall,
                             mode='lines',
                             line=dict(color='darkorange', width=lw),
                             showlegend=False
                             )
 
-        trace2 = go.Scatter(x=[1, 0], y=[0, 1],
+          trace2 = go.Scatter(x=[1, 0], y=[0, 1],
                             mode='lines',
                             line=dict(color='black', width=0.5),
                             showlegend=False
                             )
 
-        abline = go.Scatter(x=[0, 1], y=[sens, sens],
+          abline = go.Scatter(x=[0, 1], y=[sens, sens],
                             mode='lines',
                             line=dict(color='black', width=0.5, dash='dash'),
                             showlegend=False
                             )
-        labels = {
+          labels = {
             'x': [0],
             'y': [0],
             'legendgroup': 'group',  # this can be any string, not just "group"
             'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * fdrth, sens, roc_auc),
             'opacity': 0,
 
-        }
-        layout = go.Layout(title='Receiver operating characteristic({})'.format(data_type),
+          }
+          layout = go.Layout(title='Receiver operating characteristic({})'.format(data_type),
                            xaxis=dict(title='TNR',
                                       autorange='reversed',
                                       showline=False,
@@ -240,9 +247,9 @@ def roc_curve(df, data_type='', fdrth=0.05, saveto='./roc_curve'):
                                        )
                            )
 
-        figure = go.Figure(data=[trace1, trace2, abline, labels], layout=layout)
+          figure = go.Figure(data=[trace1, trace2, abline, labels], layout=layout)
 
-        py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
+          py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
                 config=PlotData.plotly_conf())
 
         return None
@@ -266,10 +273,15 @@ def _roc_curve_r(observations, predictions, FDRth=0.05):
         cords = numpy2ri.ri2py(COORS)
         df = pd.DataFrame(cords.T, columns=columns)
         FDR5percTh = (df[df.ppv >= (1 - FDRth)])['threshold'].max()
-        index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
+        if not np.isnan(FDR5percTh):
+            index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
+        else:
+            index_min = 0
+
         threshold = df.at[index_min, 'threshold']
         SENS = df.at[index_min, 'sensitivity']
         SPEC = df.at[index_min, 'specificity']
+
         return df, auc, SENS, FDR5percTh
 
     @staticmethod
@@ -283,42 +295,44 @@ def pr_rc_curve(df, data_type='sgRNA', FDRth=0.05, saveto='./pr_rc_curve'):
         """
         observations = df.tf.values
         predictions = -df.avgFC.values
-        df, auc, SENS = PlotData._pr_rc_curve_r(observations, predictions, FDRth=FDRth)
-        h1abline = 1 - FDRth
-        h2abline = sum(observations) / observations.size
+        df, auc, SENS, FDR5percTh = PlotData._pr_rc_curve_r(observations, predictions, FDRth=FDRth)
 
-        trace1 = go.Scatter(x=df.recall.values, y=df.precision.values,
+        if not np.isnan(FDR5percTh):
+            h1abline = 1 - FDRth
+            h2abline = sum(observations) / observations.size
+
+            trace1 = go.Scatter(x=df.recall.values, y=df.precision.values,
                             mode='lines',
                             line=dict(color='navy', width=2),
                             showlegend=False
                             )
 
-        ablineh1 = go.Scatter(x=[0, 1], y=[h1abline, h1abline],
+            ablineh1 = go.Scatter(x=[0, 1], y=[h1abline, h1abline],
                               mode='lines',
                               line=dict(color='black', width=0.5, dash='dash'),
                               showlegend=False
                               )
-        ablineh2 = go.Scatter(x=[0, 1], y=[h2abline, h2abline],
+            ablineh2 = go.Scatter(x=[0, 1], y=[h2abline, h2abline],
                               mode='lines',
                               line=dict(color='black', width=0.5),
                               showlegend=False
                               )
 
-        ablinev1 = go.Scatter(x=[SENS, SENS], y=[0, 1],
+            ablinev1 = go.Scatter(x=[SENS, SENS], y=[0, 1],
                               mode='lines',
                               line=dict(color='black', width=0.5),
                               showlegend=False
                               )
 
-        labels = {
-            'x': [0],
-            'y': [0],
-            'legendgroup': 'group',  # this can be any string, not just "group"
-            'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * FDRth, SENS, auc),
-            'opacity': 0,
+            labels = {
+              'x': [0],
+              'y': [0],
+              'legendgroup': 'group',  # this can be any string, not just "group"
+              'name': "Recall {}%FDR={:04.2f} <br></br> AUC={:04.2f}".format(100 * FDRth, SENS, auc),
+              'opacity': 0,
 
-        }
-        layout = go.Layout(title='Precision-Recall({}): AUC={:.2f}'.format(data_type, auc),
+            }
+            layout = go.Layout(title='Precision-Recall({}): AUC={:.2f}'.format(data_type, auc),
                            xaxis=dict(title='Recall'
                                       ),
                            yaxis=dict(title='Precision'),
@@ -328,9 +342,9 @@ def pr_rc_curve(df, data_type='sgRNA', FDRth=0.05, saveto='./pr_rc_curve'):
                                        )
                            )
 
-        figure = go.Figure(data=[trace1, ablineh1, ablineh2, ablinev1, labels], layout=layout)
+            figure = go.Figure(data=[trace1, ablineh1, ablineh2, ablinev1, labels], layout=layout)
 
-        py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
+            py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
                 config=PlotData.plotly_conf())
 
         return None
@@ -352,11 +366,15 @@ def _pr_rc_curve_r(observations, predictions, FDRth=0.05):
         cols = ['recall', 'precision', 'threshold']
         df = pd.DataFrame(curve, columns=cols)
         FDR5percTh = - (df[df.precision >= (1 - FDRth)])['threshold'].min()
-        index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
+        if not np.isnan(FDR5percTh):
+            index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
+        else:
+            index_min = 0
+
         SENS = df.at[index_min, 'recall']
         threshold = -FDR5percTh
 
-        return df, auc, SENS
+        return df, auc, SENS, FDR5percTh
 
     @staticmethod
     def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05, data_type='genes',
@@ -451,59 +469,60 @@ def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05
                               )
         trace_list.append(ablineh1)
         # axis and labels
-        ypos = int(round(log10(FDRpercRANK)))
-        fdr_label = dict(
-            x=max_log - 1, y=ypos + 0.12,
-            xref='x',
-            yref='y',
-            text="{}%FDR".format(100 * fdrth),
-            showarrow=False,
-            font=dict(
-                size=16,
-                color='red'
-            ),
-            align='center'
-        )
-
-        x_label1 = dict(
-            x=0.25,
-            y=-0.075,
-            showarrow=False,
-            text="LogFC",
-            xref='paper',
-            yref='paper',
-            font=dict(size=14)
-        )
+        if FDRpercRANK > 0:
+            ypos = int(round(log10(FDRpercRANK)))
+            fdr_label = dict(
+                x=max_log - 1, y=ypos + 0.12,
+                xref='x',
+                yref='y',
+                text="{}%FDR".format(100 * fdrth),
+                showarrow=False,
+                font=dict(
+                    size=16,
+                    color='red'
+                ),
+                align='center'
+            )
 
-        x_label2 = dict(
-            x=0.85,
-            y=-0.075,
-            showarrow=False,
-            text="% of genes below {}%FDR cutoff".format(100 * fdrth),
-            xref='paper',
-            yref='paper',
-            font=dict(size=14)
-        )
+            x_label1 = dict(
+                x=0.25,
+                y=-0.075,
+                showarrow=False,
+                text="LogFC",
+                xref='paper',
+                yref='paper',
+                font=dict(size=14)
+            )
 
-        annotations_list.append(fdr_label)
-        annotations_list.append(x_label1)
-        annotations_list.append(x_label2)
+            x_label2 = dict(
+                x=0.85,
+                y=-0.075,
+                showarrow=False,
+                text="% of genes below {}%FDR cutoff".format(100 * fdrth),
+                xref='paper',
+                yref='paper',
+                font=dict(size=14)
+            )
 
-        layout = go.Layout(
-            title='Depletion Profile: {}'.format(data_type),
-            xaxis=dict(
-                title="",
-                showline=True,
-                showgrid=False,
-                zeroline=True,
-                range=[min_log, count],
-                ticks="",
-                showticklabels=True,
-                ticktext=labels,
-                tickvals=tickvalue,
-                dtick=1
-            ),
-            yaxis=dict(title='Depeltion Rank',
+            annotations_list.append(fdr_label)
+            annotations_list.append(x_label1)
+            annotations_list.append(x_label2)
+
+            layout = go.Layout(
+                title='Depletion Profile: {}'.format(data_type),
+                xaxis=dict(
+                    title="",
+                    showline=True,
+                    showgrid=False,
+                    zeroline=True,
+                    range=[min_log, count],
+                    ticks="",
+                    showticklabels=True,
+                    ticktext=labels,
+                    tickvals=tickvalue,
+                    dtick=1
+                ),
+                yaxis=dict(title='Depeltion Rank',
                        autorange='reversed',
                        showgrid=False,
                        range=[1, y_fc],
@@ -515,19 +534,19 @@ def depletion_profile_with_gene_signature(FCsprofile, signatures, df, fdrth=0.05
                        domain=[1, 1],
                        dtick=1,
                        ),
-            annotations=annotations_list
+                annotations=annotations_list
 
-        )
+            )
 
-        figure = go.Figure(data=trace_list, layout=layout)
+            figure = go.Figure(data=trace_list, layout=layout)
 
-        if save_image:
-            py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
-                    config=PlotData.plotly_conf(),
-                    image='jpeg', image_filename=saveto, image_width=1200, image_height=800)
-        else:
-            py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
-                    config=PlotData.plotly_conf())
+            if save_image:
+                py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
+                        config=PlotData.plotly_conf(),
+                        image='jpeg', image_filename=saveto, image_width=1200, image_height=800)
+            else:
+                py.plot(figure, filename=saveto + '_' + data_type + '.html', auto_open=False,
+                        config=PlotData.plotly_conf())
 
         return None