scverse · grst · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024
diff --git a/.cruft.json b/.cruft.json
@@ -1,7 +1,7 @@
 {
   "template": "https://github.com/scverse/cookiecutter-scverse",
-  "commit": "8e96abb5c3e2d5078c44713958da672711cf2a48",
-  "checkout": "v0.3.0",
+  "commit": "586b1652162ff7994b0070a034023d64289ae416",
+  "checkout": "v0.3.1",
   "context": {
     "cookiecutter": {
       "project_name": "scirpy",
@@ -13,7 +13,8 @@
       "project_repo": "https://github.com/scverse/scirpy",
       "license": "BSD 3-Clause License",
       "_copy_without_render": [
-        ".github/workflows/**.yaml",
+        ".github/workflows/build.yaml",
+        ".github/workflows/test.yaml",
         "docs/_templates/autosummary/**.rst"
       ],
       "_render_devdocs": false,

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,30 +6,19 @@ default_stages:
   - push
 minimum_pre_commit_version: 2.16.0
 repos:
-  - repo: https://github.com/psf/black
-    rev: "23.12.1"
-    hooks:
-      - id: black
-  - repo: https://github.com/asottile/blacken-docs
-    rev: 1.16.0
-    hooks:
-      - id: blacken-docs
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.0.3
     hooks:
       - id: prettier
-        # Newer versions of node don't work on systems that have an older version of GLIBC
-        # (in particular Ubuntu 18.04 and Centos 7)
-        # EOL of Centos 7 is in 2024-06, we can probably get rid of this then.
-        # See https://github.com/scverse/cookiecutter-scverse/issues/143 and
-        # https://github.com/jupyterlab/jupyterlab/issues/12675
-        language_version: "17.9.1"
         exclude: '^\.conda'
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.1.14
     hooks:
       - id: ruff
+        types_or: [python, pyi, jupyter]
         args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff-format
+        types_or: [python, pyi, jupyter]
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:

diff --git a/docs/tutorials/tutorial_3k_tcr.ipynb b/docs/tutorials/tutorial_3k_tcr.ipynb
@@ -49,7 +49,6 @@
    "source": [
     "import muon as mu\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
     "import scanpy as sc\n",
     "import scirpy as ir\n",
     "from cycler import cycler\n",
@@ -581,9 +580,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.group_abundance(\n",
-    "    mdata, groupby=\"airr:receptor_subtype\", target_col=\"gex:source\"\n",
-    ")"
+    "_ = ir.pl.group_abundance(mdata, groupby=\"airr:receptor_subtype\", target_col=\"gex:source\")"
    ]
   },
   {
@@ -639,11 +636,7 @@
    "source": [
     "print(\n",
     "    \"Fraction of cells with more than one pair of TCRs: {:.2f}\".format(\n",
-    "        np.sum(\n",
-    "            mdata.obs[\"airr:chain_pairing\"].isin(\n",
-    "                [\"extra VJ\", \"extra VDJ\", \"two full chains\", \"multichain\"]\n",
-    "            )\n",
-    "        )\n",
+    "        np.sum(mdata.obs[\"airr:chain_pairing\"].isin([\"extra VJ\", \"extra VDJ\", \"two full chains\", \"multichain\"]))\n",
     "        / mdata[\"airr\"].n_obs\n",
     "    )\n",
     ")"
@@ -692,9 +685,7 @@
     }
    ],
    "source": [
-    "mu.pl.embedding(\n",
-    "    mdata, basis=\"gex:umap\", color=\"airr:chain_pairing\", groups=\"multichain\"\n",
-    ")"
+    "mu.pl.embedding(mdata, basis=\"gex:umap\", color=\"airr:chain_pairing\", groups=\"multichain\")"
    ]
   },
   {
@@ -724,9 +715,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mu.pp.filter_obs(\n",
-    "    mdata, \"airr:chain_pairing\", lambda x: ~np.isin(x, [\"orphan VDJ\", \"orphan VJ\"])\n",
-    ")"
+    "mu.pp.filter_obs(mdata, \"airr:chain_pairing\", lambda x: ~np.isin(x, [\"orphan VDJ\", \"orphan VJ\"]))"
    ]
   },
   {
@@ -1031,9 +1020,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.clonotype_network(\n",
-    "    mdata, color=\"gex:source\", base_size=20, label_fontsize=9, panel_size=(7, 7)\n",
-    ")"
+    "_ = ir.pl.clonotype_network(mdata, color=\"gex:source\", base_size=20, label_fontsize=9, panel_size=(7, 7))"
    ]
   },
   {
@@ -1163,9 +1150,7 @@
     }
    ],
    "source": [
-    "ir.tl.define_clonotype_clusters(\n",
-    "    mdata, sequence=\"aa\", metric=\"alignment\", receptor_arms=\"all\", dual_ir=\"any\"\n",
-    ")"
+    "ir.tl.define_clonotype_clusters(mdata, sequence=\"aa\", metric=\"alignment\", receptor_arms=\"all\", dual_ir=\"any\")"
    ]
   },
   {
@@ -1215,9 +1200,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.clonotype_network(\n",
-    "    mdata, color=\"gex:patient\", label_fontsize=9, panel_size=(7, 7), base_size=20\n",
-    ")"
+    "_ = ir.pl.clonotype_network(mdata, color=\"gex:patient\", label_fontsize=9, panel_size=(7, 7), base_size=20)"
    ]
   },
   {
@@ -1664,9 +1647,7 @@
     }
    ],
    "source": [
-    "mu.pl.embedding(\n",
-    "    mdata, basis=\"gex:umap\", color=[\"airr:clonal_expansion\", \"airr:clone_id_size\"]\n",
-    ")"
+    "mu.pl.embedding(mdata, basis=\"gex:umap\", color=[\"airr:clonal_expansion\", \"airr:clone_id_size\"])"
    ]
   },
   {
@@ -1705,9 +1686,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.clonal_expansion(\n",
-    "    mdata, target_col=\"clone_id\", groupby=\"gex:cluster\", breakpoints=(1, 2, 5), normalize=False\n",
-    ")"
+    "_ = ir.pl.clonal_expansion(mdata, target_col=\"clone_id\", groupby=\"gex:cluster\", breakpoints=(1, 2, 5), normalize=False)"
    ]
   },
   {
@@ -1794,9 +1773,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.alpha_diversity(\n",
-    "    mdata, metric=\"normalized_shannon_entropy\", groupby=\"gex:cluster\"\n",
-    ")"
+    "_ = ir.pl.alpha_diversity(mdata, metric=\"normalized_shannon_entropy\", groupby=\"gex:cluster\")"
    ]
   },
   {
@@ -1845,9 +1822,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.group_abundance(\n",
-    "    mdata, groupby=\"airr:clone_id\", target_col=\"gex:cluster\", max_cols=10\n",
-    ")"
+    "_ = ir.pl.group_abundance(mdata, groupby=\"airr:clone_id\", target_col=\"gex:cluster\", max_cols=10)"
    ]
   },
   {
@@ -1926,9 +1901,7 @@
     }
    ],
    "source": [
-    "_ = ir.pl.group_abundance(\n",
-    "    mdata, groupby=\"airr:clone_id\", target_col=\"gex:source\", max_cols=15, figsize=(5, 3)\n",
-    ")"
+    "_ = ir.pl.group_abundance(mdata, groupby=\"airr:clone_id\", target_col=\"gex:source\", max_cols=15, figsize=(5, 3))"
    ]
   },
   {
@@ -2143,9 +2116,7 @@
     "with ir.get.airr_context(mdata, \"v_call\"):\n",
     "    ir.pl.group_abundance(\n",
     "        mdata[\n",
-    "            mdata.obs[\"VDJ_1_v_call\"].isin(\n",
-    "                [\"TRBV20-1\", \"TRBV7-2\", \"TRBV28\", \"TRBV5-1\", \"TRBV7-9\"]\n",
-    "            ),\n",
+    "            mdata.obs[\"VDJ_1_v_call\"].isin([\"TRBV20-1\", \"TRBV7-2\", \"TRBV28\", \"TRBV5-1\", \"TRBV7-9\"]),\n",
     "            :,\n",
     "        ],\n",
     "        groupby=\"gex:cluster\",\n",
@@ -2399,9 +2370,7 @@
     "with ir.get.airr_context(mdata, \"v_call\"):\n",
     "    ir.pl.spectratype(\n",
     "        mdata[\n",
-    "            mdata.obs[\"VDJ_1_v_call\"].isin(\n",
-    "                [\"TRBV20-1\", \"TRBV7-2\", \"TRBV28\", \"TRBV5-1\", \"TRBV7-9\"]\n",
-    "            ),\n",
+    "            mdata.obs[\"VDJ_1_v_call\"].isin([\"TRBV20-1\", \"TRBV7-2\", \"TRBV28\", \"TRBV5-1\", \"TRBV7-9\"]),\n",
     "            :,\n",
     "        ],\n",
     "        chain=\"VDJ_1\",\n",
@@ -2755,9 +2724,7 @@
     }
    ],
    "source": [
-    "ir.pl.repertoire_overlap(\n",
-    "    mdata, \"gex:sample\", pair_to_plot=[\"LN2\", \"LT2\"], fig_kws={\"dpi\": 120}\n",
-    ")"
+    "ir.pl.repertoire_overlap(mdata, \"gex:sample\", pair_to_plot=[\"LN2\", \"LT2\"], fig_kws={\"dpi\": 120})"
    ]
   },
   {
@@ -3066,9 +3033,7 @@
    "source": [
     "# Since sc.tl.rank_genes_group does not support MuData, we need to temporarily add\n",
     "# the AIRR columns to the gene expression AnnData object\n",
-    "with ir.get.obs_context(\n",
-    "    mdata[\"gex\"], {\"cc_aa_alignment\": mdata.obs[\"airr:cc_aa_alignment\"]}\n",
-    ") as tmp_ad:\n",
+    "with ir.get.obs_context(mdata[\"gex\"], {\"cc_aa_alignment\": mdata.obs[\"airr:cc_aa_alignment\"]}) as tmp_ad:\n",
     "    sc.tl.rank_genes_groups(\n",
     "        tmp_ad,\n",
     "        \"cc_aa_alignment\",\n",
@@ -3078,9 +3043,7 @@
     "    )\n",
     "    fig, axs = plt.subplots(1, 2, figsize=(8, 4))\n",
     "    for ct, ax in zip(clonotypes_top_modularity, axs):\n",
-    "        sc.pl.rank_genes_groups_violin(\n",
-    "            tmp_ad, groups=[ct], n_genes=15, ax=ax, show=False, strip=False\n",
-    "        )"
+    "        sc.pl.rank_genes_groups_violin(tmp_ad, groups=[ct], n_genes=15, ax=ax, show=False, strip=False)"
    ]
   },
   {
@@ -3182,8 +3145,7 @@
     "    ax=ax2,\n",
     "    # increase size of highlighted dots\n",
     "    size=[\n",
-    "        80 if c in top_differential_clonotypes else 30\n",
-    "        for c in mdata.obs[\"airr:clone_id\"][mdata.mod[\"gex\"].obs_names]\n",
+    "        80 if c in top_differential_clonotypes else 30 for c in mdata.obs[\"airr:clone_id\"][mdata.mod[\"gex\"].obs_names]\n",
     "    ],\n",
     "    palette=cycler(color=mpl_cm.Dark2_r.colors),\n",
     ")"
@@ -3231,9 +3193,7 @@
    ],
    "source": [
     "# ir.tl.repertoire_overlap(mdata, \"gex:cluster\")\n",
-    "_ = ir.pl.repertoire_overlap(\n",
-    "    mdata, \"gex:cluster\", pair_to_plot=[\"CD8_Teff\", \"CD8_Trm\"], fig_kws={\"dpi\": 120}\n",
-    ")"
+    "_ = ir.pl.repertoire_overlap(mdata, \"gex:cluster\", pair_to_plot=[\"CD8_Teff\", \"CD8_Trm\"], fig_kws={\"dpi\": 120})"
    ]
   },
   {
@@ -3284,12 +3244,8 @@
     }
    ],
    "source": [
-    "with ir.get.obs_context(\n",
-    "    mdata[\"gex\"], {\"clone_id\": mdata.obs[\"airr:clone_id\"]}\n",
-    ") as tmp_ad:\n",
-    "    sc.tl.rank_genes_groups(\n",
-    "        tmp_ad, \"clone_id\", groups=[\"101\"], reference=\"68\", method=\"wilcoxon\"\n",
-    "    )\n",
+    "with ir.get.obs_context(mdata[\"gex\"], {\"clone_id\": mdata.obs[\"airr:clone_id\"]}) as tmp_ad:\n",
+    "    sc.tl.rank_genes_groups(tmp_ad, \"clone_id\", groups=[\"101\"], reference=\"68\", method=\"wilcoxon\")\n",
     "    sc.pl.rank_genes_groups_violin(tmp_ad, groups=\"101\", n_genes=15)"
    ]
   },

diff --git a/docs/tutorials/tutorial_io.ipynb b/docs/tutorials/tutorial_io.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -110,12 +110,16 @@ markers = [
 minversion = 6.0
 norecursedirs = [ '.*', 'build', 'dist', '*.egg', 'data', '__pycache__']
 
-[tool.black]
-line-length = 120
 
 [tool.ruff]
-src = ["src"]
 line-length = 120
+src = ["src"]
+extend-include = ["*.ipynb"]
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
     "F",  # Errors detected by Pyflakes
     "E",  # Error detected by Pycodestyle
@@ -130,7 +134,7 @@ select = [
     "RUF100",  # Report unused noqa directives
 ]
 ignore = [
-    # line too long -> we accept long comment lines; black gets rid of long code lines
+    # line too long -> we accept long comment lines; formatter gets rid of long code lines
     "E501",
     # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
     "E731",
@@ -165,14 +169,15 @@ ignore = [
     "D301"
 ]
 
-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "docs/*" = ["I"]
 "tests/*" = ["D"]
 "*/__init__.py" = ["F401"]
 "src/scirpy/datasets/_processing_scripts/*" = ["B018", "E402"]
+"*.ipynb" = ["E402"] # Module level import not at top of cell
 
 [tool.cruft]
 skip = [
@@ -183,5 +188,5 @@ skip = [
     "docs/changelog.md",
     "docs/references.bib",
     "docs/references.md",
-    "docs/notebooks/example.ipynb"
+    "docs/notebooks/example.ipynb",
 ]
diff --git a/src/scirpy/get/__init__.py b/src/scirpy/get/__init__.py
@@ -127,9 +127,7 @@ def obs_context(data: Union[AnnData, MuData], temp_cols: Union[pd.DataFrame, Map
                 "v_gene_primary_vj_chain": ir.get.airr(mdata, "v_call", "VJ_1"),
             },
         ) as m:
-            ir.pl.group_abundance(
-                m, groupby="v_gene_primary_vj_chain", target_col="new_col_with_constant_value"
-            )
+            ir.pl.group_abundance(m, groupby="v_gene_primary_vj_chain", target_col="new_col_with_constant_value")
 
 
     Parameters

diff --git a/src/scirpy/io/_datastructures.py b/src/scirpy/io/_datastructures.py
@@ -141,9 +141,7 @@ def add_chain(self, chain: Mapping) -> None:
             self._chain_fields = list(chain.keys())
 
         if "locus" not in chain:
-            self._logger.warning(
-                "`locus` field not specified, but required for most scirpy functionality. "
-            )  # type: ignore
+            self._logger.warning("`locus` field not specified, but required for most scirpy functionality. ")  # type: ignore
         elif chain["locus"] not in self.VALID_LOCI:
             self._logger.warning(f"Non-standard locus name: {chain['locus']} ")  # type: ignore
 

diff --git a/src/scirpy/ir_dist/_clonotype_neighbors.py b/src/scirpy/ir_dist/_clonotype_neighbors.py
@@ -160,7 +160,9 @@ def _add_distance_matrices(self) -> None:
                 )
 
             self.neighbor_finder.add_distance_matrix(
-                "v_gene", sp.identity(len(v_genes), dtype=bool, format="csr"), v_genes  # type: ignore
+                "v_gene",
+                sp.identity(len(v_genes), dtype=bool, format="csr"),
+                v_genes,  # type: ignore
             )
 
         if self.match_columns is not None:

diff --git a/src/scirpy/pl/_vdj_usage.py b/src/scirpy/pl/_vdj_usage.py
@@ -106,9 +106,7 @@ def vdj_usage(
     )
     df = get_airr(params, airr_variables, chains).assign(
         # make sure this also works with mudata columns:
-        cell_weights=_normalize_counts(tmp_obs, normalize_to)
-        if isinstance(normalize_to, (bool, str))
-        else normalize_to
+        cell_weights=_normalize_counts(tmp_obs, normalize_to) if isinstance(normalize_to, (bool, str)) else normalize_to
     )
     for col in df.columns:
         if col.startswith("VJ") or col.startswith("VDJ"):