Compiled all labs

NBISweden · Feb 7, 2024 · 2bf2751 · 2bf2751
1 parent f201076
commit 2bf2751
Show file tree

Hide file tree

Showing 5 changed files with 141 additions and 79 deletions.
diff --git a/compiled/labs/scanpy/scanpy_08_spatial.ipynb b/compiled/labs/scanpy/scanpy_08_spatial.ipynb
@@ -1028,58 +1028,21 @@
       "execution_count": null,
       "outputs": []
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "<div>\n",
-        "\n",
-        "> **Caution**\n",
-        ">\n",
-        "> This is a slow compute intensive step, we will not run this now and\n",
-        "> instead use a pre-computed file in the step below.\n",
-        ">\n",
-        "> ``` {python}\n",
-        "> # the model is saved to a file, so if is slow to run, you can simply reload it from disk\n",
-        "> if not fetch_data:\n",
-        ">     sc_model = RNAStereoscope(sc_adata)\n",
-        ">     sc_model.train(max_epochs=300)\n",
-        ">     sc_model.history[\"elbo_train\"][10:].plot()\n",
-        ">     sc_model.save(\"./data/spatial/visium/scmodel\", overwrite=True)\n",
-        "> ```\n",
-        "\n",
-        "</div>\n",
-        "\n",
-        "Download precomputed data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "path_folder = \"data/spatial/visium/scmodel\"\n",
-        "if fetch_data and not os.path.exists(path_folder):\n",
-        "    import shutil\n",
-        "    folder_url = os.path.join(\n",
-        "        path_data, \"spatial/visium/results/scmodel\")\n",
-        "    shutil.copytree(folder_url, path_folder)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Read data"
-      ]
-    },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
-        "sc_model = RNAStereoscope.load(path_folder, sc_adata)\n",
-        "print(\"Loaded RNA model from file!\")"
+        "# the model is saved to a file, so if is slow to run, you can simply reload it from disk by setting train = False\n",
+        "\n",
+        "train = True\n",
+        "if train:\n",
+        "    sc_model = RNAStereoscope(sc_adata)\n",
+        "    sc_model.train(max_epochs=300)\n",
+        "    sc_model.history[\"elbo_train\"][10:].plot()\n",
+        "    sc_model.save(\"./data/spatial/visium/scanpy_scmodel\", overwrite=True)\n",
+        "else:\n",
+        "    sc_model = RNAStereoscope.load(\"./data/spatial/visium/scanpy_scmodel\", sc_adata)\n",
+        "    print(\"Loaded RNA model from file!\")"
       ],
       "execution_count": null,
       "outputs": []
@@ -1112,14 +1075,14 @@
       "cell_type": "code",
       "metadata": {},
       "source": [
-        "train=True\n",
+        "train = True\n",
         "if train:\n",
         "    spatial_model = SpatialStereoscope.from_rna_model(st_adata, sc_model)\n",
         "    spatial_model.train(max_epochs = 3000)\n",
         "    spatial_model.history[\"elbo_train\"][10:].plot()\n",
-        "    spatial_model.save(\"./data/spatial/stmodel\", overwrite = True)\n",
+        "    spatial_model.save(\"./data/spatial/visium/scanpy_stmodel\", overwrite = True)\n",
         "else:\n",
-        "    spatial_model = SpatialStereoscope.load(\"./data/spatial/stmodel\", st_adata)\n",
+        "    spatial_model = SpatialStereoscope.load(\"./data/spatial/visium/scanpy_stmodel\", st_adata)\n",
         "    print(\"Loaded Spatial model from file!\")"
       ],
       "execution_count": null,

diff --git a/compiled/labs/seurat/seurat_03_integration.qmd b/compiled/labs/seurat/seurat_03_integration.qmd
@@ -232,12 +232,11 @@ FeaturePlot(alldata.int, reduction = "umap", dims = 1:2, features = myfeatures,
 ## Harmony
 
 An alternative method for integration is Harmony, for more details on
-the method, please see their paper [Nat.
-Methods](https://www.nature.com/articles/s41592-019-0619-0).
-
-This method runs integration on a dimensionality reduction, in most
-applications the PCA. So first, we will rerun scaling and PCA with the
-same set of genes that were used for the CCA integration.
+the method, please se their paper [Nat.
+Methods](https://www.nature.com/articles/s41592-019-0619-0). This method
+runs the integration on a dimensionality reduction, in most applications
+the PCA. So first, we will rerun scaling and PCA with the same set of
+genes that were used for the CCA integration.
 
 OBS! Make sure to revert back to the `RNA` assay.
 
@@ -281,9 +280,8 @@ Biotech.](https://www.nature.com/articles/s41587-019-0113-3)). This
 method is implemented in python, but we can run it through the
 Reticulate package.
 
-We will use the split list of samples and transpose the matrix as
-scanorama requires rows as samples and columns as genes. We also create
-a list with the highly variables genes for each sample.
+We will run it with the same set of variable genes, but first we have to
+create a list of all the objects per sample.
 
 ``` {r}
 #| fig-height: 5

diff --git a/compiled/labs/seurat/seurat_05_dge.qmd b/compiled/labs/seurat/seurat_05_dge.qmd
@@ -169,6 +169,59 @@ VlnPlot(alldata, features = as.character(unique(top3$gene)), ncol = 5, group.by
 
 </div>
 
+### DGE with equal amount of cells
+
+The number of cells per cluster differ quite a bit in this data
+
+``` {r}
+table(alldata@active.ident)
+```
+
+Hence when we run `FindAllMarkers` one cluster vs rest, the largest
+cluster (cluster 0) will dominate the "rest" and influence the results
+the most. So it is often a good idea to subsample the clusters to an
+equal number of cells before running differential expression for one vs
+rest. So lets select 300 cells per cluster:
+
+``` {r}
+sub <- subset(alldata, cells = WhichCells(alldata, downsample = 300))
+table(sub@active.ident)
+```
+
+Now rerun `FindAllMarkers` with this set and compare the results.
+
+``` {r}
+markers_genes_sub <- FindAllMarkers(
+    sub,
+    log2FC.threshold = 0.2,
+    test.use = "wilcox",
+    min.pct = 0.1,
+    min.diff.pct = 0.2,
+    only.pos = TRUE,
+    max.cells.per.ident = 50,
+    assay = "RNA"
+)
+```
+
+The number of significant genes per cluster has changed, with more for
+some clusters and less for others.
+
+``` {r}
+table(markers_genes$cluster)
+table(markers_genes_sub$cluster)
+```
+
+``` {r}
+#| fig-height: 8
+#| fig-width: 8
+
+markers_genes_sub %>%
+    group_by(cluster) %>%
+    slice_min(p_val_adj, n = 5, with_ties = FALSE) -> top5_sub
+
+DotPlot(alldata, features = rev(as.character(unique(top5_sub$gene))), group.by = sel.clust, assay = "RNA") + coord_flip()
+```
+
 ## DGE across conditions
 
 The second way of computing differential expression is to answer which

diff --git a/compiled/labs/seurat/seurat_06_celltyping.qmd b/compiled/labs/seurat/seurat_06_celltyping.qmd
@@ -214,6 +214,42 @@ ggplot(ctrl@meta.data, aes(x = CCA_snn_res.0.5, fill = scpred_prediction)) +
     theme_classic()
 ```
 
+## Azimuth
+
+There are multiple online resources with large curated datasets with
+methods to integrate and do label transfer. One such resource is
+[Azimuth](https://azimuth.hubmapconsortium.org/) another one is
+[Disco](https://www.immunesinglecell.org/).
+
+Azimuth is also possible to install and run locally, but in this case we
+have used the online app and ran the predictions for you. So you just
+have to download the prediction file.
+
+This is how to save the count matrix:
+
+``` {r}
+#| eval: false
+
+# No need to run now.
+C = ctrl@assays$RNA@counts
+saveRDS(C, file = "data/covid/results/ctrl13_count_matrix.rds")
+```
+
+Instead load the results and visualize:
+
+``` {r}
+path_data <- "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq"
+path_file <- "data/covid/results/azimuth_pred.tsv"
+if (!dir.exists(dirname(path_file))) dir.create(dirname(path_file), recursive = TRUE)
+if (fetch_data && !file.exists(path_file)) download.file(url = file.path(path_data, "covid/results/azimuth_pred.tsv"), destfile = path_file)
+
+azimuth_pred <- read.table(path_file, sep = "\t", header = T)
+
+# add predictions to the seurat object
+ctrl$azimuth = azimuth_pred$predicted.celltype.l2
+DimPlot(ctrl, group.by = "azimuth", label = T, repel = T) + NoAxes()
+```
+
 ## Compare results
 
 Now we will compare the output of the two methods using the convenient
@@ -224,6 +260,17 @@ metadata slots.
 crossTab(ctrl, "predicted.id", "scpred_prediction")
 ```
 
+We can also plot all the different predictions side by side
+
+``` {r}
+wrap_plots(
+    DimPlot(ctrl, label = T, group.by = "predicted.id") + NoAxes(),
+    DimPlot(ctrl, label = T, group.by = "scpred_prediction") + NoAxes(),
+    DimPlot(ctrl, label = T, group.by = "azimuth") + NoAxes(),
+    ncol = 2
+)
+```
+
 ## GSEA with celltype markers
 
 Another option, where celltype can be classified on cluster level is to

diff --git a/compiled/labs/seurat/seurat_08_spatial.qmd b/compiled/labs/seurat/seurat_08_spatial.qmd
@@ -58,7 +58,7 @@ kidney. Here we will download and process sections from the mouse brain.
 
 ``` {r load}
 # download pre-computed data if missing or long compute
-fetch_data <- FALSE
+fetch_data <- TRUE
 
 # url for source and intermediate data
 path_data <- "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq"
@@ -399,7 +399,7 @@ generated with the SMART-Seq2 protocol.
 First download the seurat data:
 
 ``` {r}
-if (!dir.exists("data/spatial/visium")) dir.create("data/spatial/visium")
+if (!dir.exists("data/spatial/visium")) dir.create("data/spatial/visium", recursive = TRUE)
 path_file <- "data/spatial/visium/allen_cortex.rds"
 if (!file.exists(path_file)) download.file(url = file.path(path_data, "spatial/visium/allen_cortex.rds"), destfile = path_file)
 ```
@@ -570,31 +570,32 @@ We then run the deconvolution defining the celltype of interest as
 >
 > This is a slow compute intensive step, we will not run this now and
 > instead use a pre-computed file in the step below.
->
-> ``` {r}
-> #| results: hide
-> #| eval: false
->
-> # fetch_data is defined at the top of this document
-> if (!fetch_data) {
->   deconvolution_crc <- SCDC::SCDC_prop(
->     bulk.eset = eset_ST,
->     sc.eset = eset_SC,
->     ct.varname = "subclass",
->     ct.sub = as.character(unique(eset_SC$subclass))
->   )
->   saveRDS(deconvolution_crc, "data/spatial/visium/seurat_scdc.rds")
-> }
-> ```
 
 </div>
 
+``` {r}
+#| results: hide
+#| eval: false
+# this code block is not executed
+
+# fetch_data is defined at the top of this document
+if (!fetch_data) {
+  deconvolution_crc <- SCDC::SCDC_prop(
+    bulk.eset = eset_ST,
+    sc.eset = eset_SC,
+    ct.varname = "subclass",
+    ct.sub = as.character(unique(eset_SC$subclass))
+  )
+  saveRDS(deconvolution_crc, "data/spatial/visium/seurat_scdc.rds")
+}
+```
+
 Download the precomputed file.
 
 ``` {r}
 # fetch_data is defined at the top of this document
+path_file <- "data/spatial/visium/seurat_scdc.rds"
 if (fetch_data) {
-  path_file <- "data/spatial/visium/seurat_scdc.rds"
   if (!file.exists(path_file)) download.file(url = file.path(path_data, "spatial/visium/results/seurat_scdc.rds"), destfile = path_file)
 }
 ```