diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 0fc30cc6b3a..2e615f3d429 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -35,14 +35,13 @@ jobs:
     runs-on: "ubuntu-latest"
     needs: detect-ci-trigger
     if: needs.detect-ci-trigger.outputs.triggered == 'false'
+
     defaults:
       run:
         shell: bash -l {0}
-
     env:
       CONDA_ENV_FILE: ci/requirements/environment.yml
       PYTHON_VERSION: "3.11"
-
     steps:
       - uses: actions/checkout@v4
         with:
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index f6cc2bbb834..da7402a0708 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -127,6 +127,14 @@ jobs:
         run: |
           python -c "import xarray"
 
+      - name: Restore cached hypothesis directory
+        uses: actions/cache@v4
+        with:
+          path: .hypothesis/
+          key: cache-hypothesis
+          enableCrossOsArchive: true
+          save-always: true
+
       - name: Run tests
         run: python -m pytest -n 4
           --timeout 180
diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml
new file mode 100644
index 00000000000..7e9459c6598
--- /dev/null
+++ b/.github/workflows/hypothesis.yaml
@@ -0,0 +1,100 @@
+name: Slow Hypothesis CI
+on:
+  push:
+    branches:
+      - "main"
+  pull_request:
+    branches:
+      - "main"
+    types: [opened, reopened, synchronize, labeled]
+  workflow_dispatch: # allows you to trigger manually
+
+jobs:
+  detect-ci-trigger:
+    name: detect ci trigger
+    runs-on: ubuntu-latest
+    if: |
+      github.repository == 'pydata/xarray'
+      && (github.event_name == 'push' || github.event_name == 'pull_request')
+    outputs:
+      triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+      - uses: xarray-contrib/ci-trigger@v1
+        id: detect-trigger
+        with:
+          keyword: "[skip-ci]"
+
+  hypothesis:
+    name: Slow Hypothesis Tests
+    runs-on: "ubuntu-latest"
+    needs: detect-ci-trigger
+    if: |
+        always()
+        && (
+            (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+            || needs.detect-ci-trigger.outputs.triggered == 'true'
+            || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis')
+        )
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    env:
+      CONDA_ENV_FILE: ci/requirements/environment.yml
+      PYTHON_VERSION: "3.12"
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history for all branches and tags.
+
+      - name: set environment variables
+        run: |
+          echo "TODAY=$(date  +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - name: Setup micromamba
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: ci/requirements/environment.yml
+          environment-name: xarray-tests
+          create-args: >-
+            python=${{env.PYTHON_VERSION}}
+            pytest-reportlog
+          cache-environment: true
+          cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
+
+      - name: Install xarray
+        run: |
+          python -m pip install --no-deps -e .
+      - name: Version info
+        run: |
+          conda info -a
+          conda list
+          python xarray/util/print_versions.py
+      - name: Restore cached hypothesis directory
+        uses: actions/cache@v4
+        with:
+          path: .hypothesis/
+          key: cache-hypothesis
+          enableCrossOsArchive: true
+          save-always: true
+      - name: Run slow Hypothesis tests
+        if: success()
+        id: status
+        run: |
+           python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \
+             --report-log output-${{ matrix.python-version }}-log.jsonl
+      - name: Generate and publish the report
+        if: |
+          failure()
+          && steps.status.outcome == 'failure'
+          && github.event_name == 'schedule'
+          && github.repository_owner == 'pydata'
+        uses: xarray-contrib/issue-from-pytest-log@v1
+        with:
+          log-path: output-${{ matrix.python-version }}-log.jsonl
+          issue-title: "Nightly Hypothesis tests failed"
+          issue-label: "topic-hypothesis"
diff --git a/properties/conftest.py b/properties/conftest.py
index 0a66d92ebc6..30e638161a1 100644
--- a/properties/conftest.py
+++ b/properties/conftest.py
@@ -1,3 +1,24 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--run-slow-hypothesis",
+        action="store_true",
+        default=False,
+        help="run slow hypothesis tests",
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--run-slow-hypothesis"):
+        return
+    skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run")
+    for item in items:
+        if "slow_hypothesis" in item.keywords:
+            item.add_marker(skip_slow_hyp)
+
+
 try:
     from hypothesis import settings
 except ImportError:
diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py
new file mode 100644
index 00000000000..77b7fcbcd99
--- /dev/null
+++ b/properties/test_index_manipulation.py
@@ -0,0 +1,273 @@
+import itertools
+
+import numpy as np
+import pytest
+
+import xarray as xr
+from xarray import Dataset
+from xarray.testing import _assert_internal_invariants
+
+pytest.importorskip("hypothesis")
+pytestmark = pytest.mark.slow_hypothesis
+
+import hypothesis.extra.numpy as npst
+import hypothesis.strategies as st
+from hypothesis import note, settings
+from hypothesis.stateful import (
+    RuleBasedStateMachine,
+    initialize,
+    invariant,
+    precondition,
+    rule,
+)
+
+import xarray.testing.strategies as xrst
+
+
+@st.composite
+def unique(draw, strategy):
+    # https://stackoverflow.com/questions/73737073/create-hypothesis-strategy-that-returns-unique-values
+    seen = draw(st.shared(st.builds(set), key="key-for-unique-elems"))
+    return draw(
+        strategy.filter(lambda x: x not in seen).map(lambda x: seen.add(x) or x)
+    )
+
+
+# Share to ensure we get unique names on each draw,
+# so we don't try to add two variables with the same name
+# or stack to a dimension with a name that already exists in the Dataset.
+UNIQUE_NAME = unique(strategy=xrst.names())
+DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1)
+index_variables = st.builds(
+    xr.Variable,
+    data=npst.arrays(
+        dtype=xrst.pandas_index_dtypes(),
+        shape=npst.array_shapes(min_dims=1, max_dims=1),
+        elements=dict(allow_nan=False, allow_infinity=False, allow_subnormal=False),
+        unique=True,
+    ),
+    dims=DIM_NAME,
+    attrs=xrst.attrs(),
+)
+
+
+def add_dim_coord_and_data_var(ds, var):
+    (name,) = var.dims
+    # dim coord
+    ds[name] = var
+    # non-dim coord of same size; this allows renaming
+    ds[name + "_"] = var
+
+
+class DatasetStateMachine(RuleBasedStateMachine):
+    # Can't use bundles because we'd need pre-conditions on consumes(bundle)
+    # indexed_dims = Bundle("indexed_dims")
+    # multi_indexed_dims = Bundle("multi_indexed_dims")
+
+    def __init__(self):
+        super().__init__()
+        self.dataset = Dataset()
+        self.check_default_indexes = True
+
+        # We track these separately as lists so we can guarantee order of iteration over them.
+        # Order of iteration over Dataset.dims is not guaranteed
+        self.indexed_dims = []
+        self.multi_indexed_dims = []
+
+    @initialize(var=index_variables)
+    def init_ds(self, var):
+        """Initialize the Dataset so that at least one rule will always fire."""
+        (name,) = var.dims
+        add_dim_coord_and_data_var(self.dataset, var)
+
+        self.indexed_dims.append(name)
+
+    # TODO: stacking with a timedelta64 index and unstacking converts it to object
+    @rule(var=index_variables)
+    def add_dim_coord(self, var):
+        (name,) = var.dims
+        note(f"adding dimension coordinate {name}")
+        add_dim_coord_and_data_var(self.dataset, var)
+
+        self.indexed_dims.append(name)
+
+    @rule(var=index_variables)
+    def assign_coords(self, var):
+        (name,) = var.dims
+        note(f"assign_coords: {name}")
+        self.dataset = self.dataset.assign_coords({name: var})
+
+        self.indexed_dims.append(name)
+
+    @property
+    def has_indexed_dims(self) -> bool:
+        return bool(self.indexed_dims + self.multi_indexed_dims)
+
+    @rule(data=st.data())
+    @precondition(lambda self: self.has_indexed_dims)
+    def reset_index(self, data):
+        dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims))
+        self.check_default_indexes = False
+        note(f"> resetting {dim}")
+        self.dataset = self.dataset.reset_index(dim)
+
+        if dim in self.indexed_dims:
+            del self.indexed_dims[self.indexed_dims.index(dim)]
+        elif dim in self.multi_indexed_dims:
+            del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)]
+
+    @rule(newname=UNIQUE_NAME, data=st.data(), create_index=st.booleans())
+    @precondition(lambda self: bool(self.indexed_dims))
+    def stack(self, newname, data, create_index):
+        oldnames = data.draw(
+            st.lists(
+                st.sampled_from(self.indexed_dims),
+                min_size=1,
+                max_size=3 if create_index else None,
+                unique=True,
+            )
+        )
+        note(f"> stacking {oldnames} as {newname}")
+        self.dataset = self.dataset.stack(
+            {newname: oldnames}, create_index=create_index
+        )
+
+        if create_index:
+            self.multi_indexed_dims += [newname]
+
+        # if create_index is False, then we just drop these
+        for dim in oldnames:
+            del self.indexed_dims[self.indexed_dims.index(dim)]
+
+    @rule(data=st.data())
+    @precondition(lambda self: bool(self.multi_indexed_dims))
+    def unstack(self, data):
+        # TODO: add None
+        dim = data.draw(st.sampled_from(self.multi_indexed_dims))
+        note(f"> unstacking {dim}")
+        if dim is not None:
+            pd_index = self.dataset.xindexes[dim].index
+        self.dataset = self.dataset.unstack(dim)
+
+        del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)]
+
+        if dim is not None:
+            self.indexed_dims.extend(pd_index.names)
+        else:
+            # TODO: fix this
+            pass
+
+    @rule(newname=UNIQUE_NAME, data=st.data())
+    @precondition(lambda self: bool(self.dataset.variables))
+    def rename_vars(self, newname, data):
+        dim = data.draw(st.sampled_from(sorted(self.dataset.variables)))
+        # benbovy: "skip the default indexes invariant test when the name of an
+        # existing dimension coordinate is passed as input kwarg or dict key
+        # to .rename_vars()."
+        self.check_default_indexes = False
+        note(f"> renaming {dim} to {newname}")
+        self.dataset = self.dataset.rename_vars({dim: newname})
+
+        if dim in self.indexed_dims:
+            del self.indexed_dims[self.indexed_dims.index(dim)]
+        elif dim in self.multi_indexed_dims:
+            del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)]
+
+    @precondition(lambda self: bool(self.dataset.dims))
+    @rule(data=st.data())
+    def drop_dims(self, data):
+        dims = data.draw(
+            st.lists(
+                st.sampled_from(sorted(tuple(self.dataset.dims))),
+                min_size=1,
+                unique=True,
+            )
+        )
+        note(f"> drop_dims: {dims}")
+        self.dataset = self.dataset.drop_dims(dims)
+
+        for dim in dims:
+            if dim in self.indexed_dims:
+                del self.indexed_dims[self.indexed_dims.index(dim)]
+            elif dim in self.multi_indexed_dims:
+                del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)]
+
+    @precondition(lambda self: bool(self.indexed_dims))
+    @rule(data=st.data())
+    def drop_indexes(self, data):
+        self.check_default_indexes = False
+
+        dims = data.draw(
+            st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True)
+        )
+        note(f"> drop_indexes: {dims}")
+        self.dataset = self.dataset.drop_indexes(dims)
+
+        for dim in dims:
+            if dim in self.indexed_dims:
+                del self.indexed_dims[self.indexed_dims.index(dim)]
+            elif dim in self.multi_indexed_dims:
+                del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)]
+
+    @property
+    def swappable_dims(self):
+        ds = self.dataset
+        options = []
+        for dim in self.indexed_dims:
+            choices = [
+                name
+                for name, var in ds._variables.items()
+                if var.dims == (dim,)
+                # TODO: Avoid swapping a dimension to itself
+                and name != dim
+            ]
+            options.extend(
+                (a, b) for a, b in itertools.zip_longest((dim,), choices, fillvalue=dim)
+            )
+        return options
+
+    @rule(data=st.data())
+    # TODO: swap_dims is basically all broken if a multiindex is present
+    # TODO: Avoid swapping from Index to a MultiIndex level
+    # TODO: Avoid swapping from MultiIndex to a level of the same MultiIndex
+    # TODO: Avoid swapping when a MultiIndex is present
+    @precondition(lambda self: not bool(self.multi_indexed_dims))
+    @precondition(lambda self: bool(self.swappable_dims))
+    def swap_dims(self, data):
+        ds = self.dataset
+        options = self.swappable_dims
+        dim, to = data.draw(st.sampled_from(options))
+        note(
+            f"> swapping {dim} to {to}, found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}"
+        )
+        self.dataset = ds.swap_dims({dim: to})
+
+        del self.indexed_dims[self.indexed_dims.index(dim)]
+        self.indexed_dims += [to]
+
+    @invariant()
+    def assert_invariants(self):
+        # note(f"> ===\n\n {self.dataset!r} \n===\n\n")
+        _assert_internal_invariants(self.dataset, self.check_default_indexes)
+
+
+DatasetStateMachine.TestCase.settings = settings(max_examples=300, deadline=None)
+DatasetTest = DatasetStateMachine.TestCase
+
+
+@pytest.mark.skip(reason="failure detected by hypothesis")
+def test_unstack_object():
+    import xarray as xr
+
+    ds = xr.Dataset()
+    ds["0"] = np.array(["", "\x000"], dtype=object)
+    ds.stack({"1": ["0"]}).unstack()
+
+
+@pytest.mark.skip(reason="failure detected by hypothesis")
+def test_unstack_timedelta_index():
+    import xarray as xr
+
+    ds = xr.Dataset()
+    ds["0"] = np.array([0, 1, 2, 3], dtype="timedelta64[ns]")
+    ds.stack({"1": ["0"]}).unstack()
diff --git a/pyproject.toml b/pyproject.toml
index 7836cba40d4..751c9085ec8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -294,6 +294,7 @@ markers = [
   "flaky: flaky tests",
   "network: tests requiring a network connection",
   "slow: slow tests",
+  "slow_hypothesis: slow hypothesis tests",
 ]
 minversion = "7"
 python_files = "test_*.py"
diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py
index c5a7afdf54e..d2503dfd535 100644
--- a/xarray/testing/strategies.py
+++ b/xarray/testing/strategies.py
@@ -21,6 +21,7 @@
 
 __all__ = [
     "supported_dtypes",
+    "pandas_index_dtypes",
     "names",
     "dimension_names",
     "dimension_sizes",
@@ -59,6 +60,26 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]:
         | npst.unsigned_integer_dtypes()
         | npst.floating_dtypes()
         | npst.complex_number_dtypes()
+        # | npst.datetime64_dtypes()
+        # | npst.timedelta64_dtypes()
+        # | npst.unicode_string_dtypes()
+    )
+
+
+def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]:
+    """
+    Dtypes supported by pandas indexes.
+    Restrict datetime64 and timedelta64 to ns frequency till Xarray relaxes that.
+    """
+    return (
+        npst.integer_dtypes(endianness="=", sizes=(32, 64))
+        | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64))
+        | npst.floating_dtypes(endianness="=", sizes=(32, 64))
+        # TODO: unset max_period
+        | npst.datetime64_dtypes(endianness="=", max_period="ns")
+        # TODO: set max_period="D"
+        | npst.timedelta64_dtypes(endianness="=", max_period="ns")
+        | npst.unicode_string_dtypes(endianness="=")
     )
 
 
@@ -87,6 +108,7 @@ def names() -> st.SearchStrategy[str]:
 
 def dimension_names(
     *,
+    name_strategy=names(),
     min_dims: int = 0,
     max_dims: int = 3,
 ) -> st.SearchStrategy[list[Hashable]]:
@@ -97,6 +119,8 @@ def dimension_names(
 
     Parameters
     ----------
+    name_strategy
+        Strategy for making names. Useful if we need to share this.
     min_dims
         Minimum number of dimensions in generated list.
     max_dims
@@ -104,7 +128,7 @@ def dimension_names(
     """
 
     return st.lists(
-        elements=names(),
+        elements=name_strategy,
         min_size=min_dims,
         max_size=max_dims,
         unique=True,