diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 000000000..79b57dafd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,211 @@
+name: CI
+
+on:
+  pull_request:
+    branches:
+      - master
+  push:
+    branches:
+      - master
+  schedule:
+    - cron: '0 2 * * 0'
+
+env:
+  RUSTFLAGS: -Dwarnings
+  RUST_BACKTRACE: 1
+  nightly: nightly-2024-09-15
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  # Check formatting
+  rustfmt:
+    name: rustfmt
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update stable
+    - name: Check formatting
+      run: cargo fmt --all --check
+
+  # TODO
+  # # Apply clippy lints
+  # clippy:
+  #   name: clippy
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v4
+  #   - name: Apply clippy lints
+  #     run: cargo clippy --all-features
+
+  # This represents the minimum Rust version supported by
+  # Bytes. Updating this should be done in a dedicated PR.
+  #
+  # Tests are not run as tests may require newer versions of
+  # rust.
+  minrust:
+    name: minrust
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install cargo-hack
+        uses: taiki-e/install-action@cargo-hack
+      - name: Check
+        run: cargo hack check --feature-powerset --optional-deps --rust-version
+
+  # Stable
+  stable:
+    name: stable
+    strategy:
+      matrix:
+        os:
+          - ubuntu-latest
+          - ubuntu-22.04-arm # TODO: update to 24.04 when https://github.com/rust-lang/rust/issues/135867 solved
+          - macos-latest
+          - windows-latest
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        # --no-self-update is necessary because the windows environment cannot self-update rustup.exe.
+        run: rustup update stable --no-self-update
+      - name: Install cargo-hack
+        uses: taiki-e/install-action@cargo-hack
+      - name: Test
+        run: ci/test-stable.sh test
+
+  # Nightly
+  nightly:
+    name: nightly
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Install cargo-hack
+        uses: taiki-e/install-action@cargo-hack
+      - name: Test
+        run: ci/test-stable.sh test
+
+  panic-abort:
+    name: panic=abort tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Run tests with -Cpanic=abort
+        run: ci/panic-abort.sh
+
+  # Run tests on some extra platforms
+  cross:
+    name: cross
+    strategy:
+      matrix:
+        include:
+          - target: i686-unknown-linux-gnu
+            os: ubuntu-latest
+          - target: armv7-unknown-linux-gnueabihf
+            os: ubuntu-22.04-arm # TODO: update to 24.04 when https://github.com/rust-lang/rust/issues/135867 solved
+          - target: powerpc-unknown-linux-gnu
+            os: ubuntu-latest
+          - target: powerpc64-unknown-linux-gnu
+            os: ubuntu-latest
+          - target: wasm32-wasip1
+            os: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update stable
+      - uses: taiki-e/setup-cross-toolchain-action@v1
+        with:
+          target: ${{ matrix.target }}
+      - name: Test
+        run: cargo test --target ${{ matrix.target }}
+
+  # Build for no_std environment.
+  no-std:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update stable
+      - name: Install cargo-hack
+        uses: taiki-e/install-action@cargo-hack
+      # thumbv6m-none-eabi supports atomic, but not atomic CAS.
+      # thumbv7m-none-eabi supports atomic CAS.
+      - run: rustup target add thumbv6m-none-eabi thumbv7m-none-eabi
+      # * --optional-deps is needed for serde feature
+      # * --no-dev-deps is needed to avoid https://github.com/rust-lang/cargo/issues/4866
+      - run: cargo hack build --target thumbv7m-none-eabi --feature-powerset --skip std,default --optional-deps --no-dev-deps
+      # A sound way to provide atomic CAS on platforms without native atomic CAS is system-dependent.
+      # portable-atomic provides major ways via cfgs and accepts user-defined implementations via critical-section feature.
+      - run: cargo hack build --target thumbv6m-none-eabi --feature-powerset --skip std,default --optional-deps --no-dev-deps --features extra-platforms,extra-platforms/critical-section
+
+  # Sanitizers
+  tsan:
+    name: tsan
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Install rust-src
+        run: rustup component add rust-src
+      - name: ASAN / TSAN
+        run: ci/tsan.sh
+
+  miri:
+    name: miri
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Miri
+        run: ci/miri.sh
+
+  # Loom
+  loom:
+    name: loom
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Loom tests
+        run: RUSTFLAGS="--cfg loom -Dwarnings" cargo test --lib
+
+  publish_docs:
+    name: Publish Documentation
+    needs:
+      - rustfmt
+      # - clippy
+      - stable
+      - nightly
+      - minrust
+      - cross
+      - tsan
+      - miri
+      - loom
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: rustup update $nightly && rustup default $nightly
+      - name: Build documentation
+        run: cargo doc --no-deps --all-features
+        env:
+          RUSTDOCFLAGS: --cfg docsrs
+      - name: Publish documentation
+        run: |
+          cd target/doc
+          git init
+          git add .
+          git -c user.name='ci' -c user.email='ci' commit -m 'Deploy Bytes API documentation'
+          git push -f -q https://git:${{ secrets.github_token }}@github.com/${{ github.repository }} HEAD:gh-pages
+        if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' && github.repository == 'tokio-rs/bytes'
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 21a51ab0c..a98a5c981 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,279 @@
+# 1.10.1 (March 5th, 2025)
+
+### Fixed
+
+- Fix memory leak when using `to_vec` with `Bytes::from_owner` (#773)
+
+# 1.10.0 (February 3rd, 2025)
+
+### Added
+
+- Add feature to support platforms without atomic CAS (#467)
+- `try_get_*` methods for `Buf` trait (#753)
+- Implement `Buf::chunks_vectored` for `Take` (#617)
+- Implement `Buf::chunks_vectored` for `VecDeque<u8>` (#708)
+
+### Fixed
+
+- Remove incorrect guarantee for `chunks_vectored` (#754)
+- Ensure that tests pass under `panic=abort` (#749)
+
+# 1.9.0 (November 27, 2024)
+
+### Added
+
+- Add `Bytes::from_owner` to enable externally-allocated memory (#742)
+
+### Documented
+
+- Fix typo in Buf::chunk() comment (#744)
+
+### Internal changes
+
+- Replace BufMut::put with BufMut::put_slice in Writer impl (#745)
+- Rename hex_impl! to fmt_impl! and reuse it for fmt::Debug (#743)
+
+# 1.8.0 (October 21, 2024)
+
+- Guarantee address in `split_off`/`split_to` for empty slices (#740)
+
+# 1.7.2 (September 17, 2024)
+
+### Fixed
+
+- Fix default impl of `Buf::{get_int, get_int_le}` (#732)
+
+### Documented
+
+- Fix double spaces in comments and doc comments (#731)
+
+### Internal changes
+
+- Ensure BytesMut::advance reduces capacity (#728)
+
+# 1.7.1 (August 1, 2024)
+
+This release reverts the following change due to a regression:
+
+- Reuse capacity when possible in `<BytesMut as Buf>::advance` impl (#698)
+
+The revert can be found at #726.
+
+# 1.7.0 (July 31, 2024)
+
+### Added
+
+- Add conversion from `Bytes` to `BytesMut` (#695, #710)
+- Add reclaim method without additional allocation (#686)
+
+### Documented
+
+- Clarify how `BytesMut::zeroed` works (#714)
+- Clarify the behavior of `Buf::chunk` (#717)
+
+### Changed
+
+- Change length condition of `BytesMut::truncate`
+- Reuse capacity when possible in `<BytesMut as Buf>::advance` impl (#698)
+- Improve `must_use` suggestion of `BytesMut::split` (#699)
+
+### Internal changes
+
+- Use `ManuallyDrop` instead of `mem::forget` (#678)
+- Don't set `len` in `BytesMut::reserve` (#682)
+- Optimize `Bytes::copy_to_bytes` (#688)
+- Refactor `BytesMut::truncate` (#694)
+- Refactor `BytesMut::resize` (#696)
+- Reorder assertion in `Bytes::split_to`, `Bytes::split_off` (#689, #693)
+- Use `offset_from` in more places (#705)
+- Correct the wrong usage of `IntoIter` (#707)
+
+# 1.6.1 (July 13, 2024)
+
+This release fixes a bug where `Bytes::is_unique` returns incorrect values when
+the `Bytes` originates from a shared `BytesMut`. (#718)
+
+# 1.6.0 (March 22, 2024)
+
+### Added
+
+- Add `Bytes::is_unique` (#643)
+
+### Documented
+
+- Fix changelog typo (#628)
+- Fix some spelling mistakes (#633)
+- Typo fix (#637)
+- Fix broken links (#639)
+- Add security policy (#649)
+
+### Internal changes
+
+- Move comment to correct constant (#629)
+- Various cleanup (#635)
+- Simplify `UninitSlice::as_uninit_slice_mut()` logic (#644)
+- Use `self.` instead of `Self::` (#642)
+- `BytesMut`: Assert alignment of `Shared` (#652)
+- Remove unnecessary namespace qualifier (#660)
+- Remove an unnecessary else branch (#662)
+- Remove unreachable else branch (#661)
+- make parameter mut in `From<Vec>` (#667)
+- Restore commented tests (#665)
+- Use `sub` instead of `offset` (#668)
+- Calculate original capacity only if necessary (#666)
+- `set_vec_pos` does not need a second parameter (#672)
+- `get_vec_pos`: use `&self` instead of `&mut self` (#670)
+- Refactor `split_at`/`split_to` (#663)
+- Use `Iterator` from the prelude (#673)
+- `copy_to_bytes`: Add panic section to docs (#676)
+- Remove redundant reserve call (#674)
+- Use `ManuallyDrop` instead of `mem::forget` (#675)
+
+
+# 1.5.0 (September 7, 2023)
+
+### Added
+
+- Add `UninitSlice::{new,uninit}` (#598, #599)
+- Implement `BufMut` for `&mut [MaybeUninit<u8>]` (#597)
+
+### Changed
+
+- Mark `BytesMut::extend_from_slice` as inline (#595)
+
+# 1.4.0 (January 31, 2023)
+
+### Added
+
+- Make `IntoIter` constructor public (#581)
+
+### Fixed
+
+- Avoid large reallocations when freezing `BytesMut` (#592)
+
+### Documented
+
+- Document which functions require `std` (#591)
+- Fix duplicate "the the" typos (#585)
+
+# 1.3.0 (November 20, 2022)
+
+### Added
+
+- Rename and expose `BytesMut::spare_capacity_mut` (#572)
+- Implement native-endian get and put functions for `Buf` and `BufMut` (#576)
+
+### Fixed
+
+- Don't have important data in unused capacity when calling reserve (#563)
+
+### Documented
+
+- `Bytes::new` etc should return `Self` not `Bytes` (#568)
+
+# 1.2.1 (July 30, 2022)
+
+### Fixed
+
+- Fix unbounded memory growth when using `reserve` (#560)
+
+# 1.2.0 (July 19, 2022)
+
+### Added
+
+- Add `BytesMut::zeroed` (#517)
+- Implement `Extend<Bytes>` for `BytesMut` (#527)
+- Add conversion from `BytesMut` to `Vec<u8>` (#543, #554)
+- Add conversion from `Bytes` to `Vec<u8>` (#547)
+- Add `UninitSlice::as_uninit_slice_mut()` (#548)
+- Add const to `Bytes::{len,is_empty}` (#514)
+
+### Changed
+
+- Reuse vector in `BytesMut::reserve` (#539, #544)
+
+### Fixed
+
+- Make miri happy (#515, #523, #542, #545, #553)
+- Make tsan happy (#541)
+- Fix `remaining_mut()` on chain (#488)
+- Fix amortized asymptotics of `BytesMut` (#555)
+
+### Documented
+
+- Redraw layout diagram with box drawing characters (#539)
+- Clarify `BytesMut::unsplit` docs (#535)
+
+# 1.1.0 (August 25, 2021)
+
+### Added
+
+- `BufMut::put_bytes(self, val, cnt)` (#487)
+- Implement `From<Box<[u8]>>` for `Bytes` (#504)
+
+### Changed
+
+- Override `put_slice` for `&mut [u8]` (#483)
+- Panic on integer overflow in `Chain::remaining` (#482)
+- Add inline tags to `UninitSlice` methods (#443)
+- Override `copy_to_bytes` for Chain and Take (#481)
+- Keep capacity when unsplit on empty other buf (#502)
+
+### Documented
+
+- Clarify `BufMut` allocation guarantees (#501)
+- Clarify `BufMut::put_int` behavior (#486)
+- Clarify actions of `clear` and `truncate`. (#508)
+
+# 1.0.1 (January 11, 2021)
+
+### Changed
+- mark `Vec::put_slice` with `#[inline]` (#459)
+
+### Fixed
+- Fix deprecation warning (#457)
+- use `Box::into_raw` instead of `mem::forget`-in-disguise (#458)
+
+# 1.0.0 (December 22, 2020)
+
+### Changed
+- Rename `Buf`/`BufMut` methods `bytes()` and `bytes_mut()` to `chunk()` and `chunk_mut()` (#450)
+
+### Removed
+- remove unused Buf implementation. (#449)
+
+# 0.6.0 (October 21, 2020)
+
+API polish in preparation for a 1.0 release.
+
+### Changed
+- `BufMut` is now an `unsafe` trait (#432).
+- `BufMut::bytes_mut()` returns `&mut UninitSlice`, a type owned by `bytes` to
+  avoid undefined behavior (#433).
+- `Buf::copy_to_bytes(len)` replaces `Buf::into_bytes()` (#439).
+- `Buf`/`BufMut` utility methods are moved onto the trait and `*Ext` traits are
+  removed (#431).
+
+### Removed
+- `BufMut::bytes_vectored_mut()` (#430).
+- `new` methods on combinator types (#434).
+
+# 0.5.6 (July 13, 2020)
+
+- Improve `BytesMut` to reuse buffer when fully `advance`d.
+- Mark `BytesMut::{as_mut, set_len}` with `#[inline]`.
+- Relax synchronization when cloning in shared vtable of `Bytes`.
+- Move `loom` to `dev-dependencies`.
+
+# 0.5.5 (June 18, 2020)
+
+### Added
+- Allow using the `serde` feature in `no_std` environments (#385).
+
+### Fix
+- Fix `BufMut::advance_mut` to panic if advanced passed the capacity (#354)..
+- Fix `BytesMut::freeze` ignoring amount previously `advance`d (#352).
+
 # 0.5.4 (January 23, 2020)
 
 ### Added
diff --git a/Cargo.toml b/Cargo.toml
index 257e78858..7ef12eafc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,35 +2,44 @@
 
 name = "bytes"
 # When releasing to crates.io:
-# - Update html_root_url.
 # - Update CHANGELOG.md.
-# - Update doc URL.
-# - Create "v0.5.x" git tag.
-version = "0.5.4"
+# - Create "v1.x.y" git tag.
+version = "1.10.1"
+edition = "2018"
+rust-version = "1.39"
 license = "MIT"
 authors = [
     "Carl Lerche <me@carllerche.com>",
     "Sean McArthur <sean@seanmonstar.com>",
 ]
 description = "Types and traits for working with bytes"
-documentation = "https://docs.rs/bytes"
 repository = "https://github.com/tokio-rs/bytes"
 readme = "README.md"
 keywords = ["buffers", "zero-copy", "io"]
 categories = ["network-programming", "data-structures"]
-edition = "2018"
 
 [features]
 default = ["std"]
 std = []
 
 [dependencies]
-serde = { version = "1.0", optional = true }
+serde = { version = "1.0.60", optional = true, default-features = false, features = ["alloc"] }
+# Use portable-atomic crate to support platforms without atomic CAS.
+# See "no_std support" section in readme for more information.
+#
+# Enable require-cas feature to provide a better error message if the end user forgets to use the cfg or feature.
+extra-platforms = { package = "portable-atomic", version = "1.3", optional = true, default-features = false, features = ["require-cas"] }
 
 [dev-dependencies]
 serde_test = "1.0"
 
-# loom is currently not compiling on windows.
-# See: https://github.com/Xudong-Huang/generator-rs/issues/19
-[target.'cfg(not(windows))'.dev-dependencies]
-loom = "0.2.13"
+[target.'cfg(loom)'.dev-dependencies]
+loom = "0.7"
+
+[package.metadata.docs.rs]
+rustdoc-args = ["--cfg", "docsrs"]
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+  'cfg(loom)',
+] }
diff --git a/README.md b/README.md
index afc2ed21c..90d631c97 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,12 @@
 A utility library for working with bytes.
 
 [![Crates.io][crates-badge]][crates-url]
-[![Build Status][azure-badge]][azure-url]
+[![Build Status][ci-badge]][ci-url]
 
 [crates-badge]: https://img.shields.io/crates/v/bytes.svg
 [crates-url]: https://crates.io/crates/bytes
-[azure-badge]: https://dev.azure.com/tokio-rs/bytes/_apis/build/status/tokio-rs.bytes?branchName=master
-[azure-url]: https://dev.azure.com/tokio-rs/bytes/_build/latest?definitionId=3&branchName=master
+[ci-badge]: https://github.com/tokio-rs/bytes/workflows/CI/badge.svg
+[ci-url]: https://github.com/tokio-rs/bytes/actions
 
 [Documentation](https://docs.rs/bytes)
 
@@ -18,7 +18,7 @@ To use `bytes`, first add this to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-bytes = "0.5"
+bytes = "1"
 ```
 
 Next, add this to your crate:
@@ -27,13 +27,39 @@ Next, add this to your crate:
 use bytes::{Bytes, BytesMut, Buf, BufMut};
 ```
 
+## no_std support
+
+To use `bytes` with no_std environment, disable the (enabled by default) `std` feature.
+
+```toml
+[dependencies]
+bytes = { version = "1", default-features = false }
+```
+
+To use `bytes` with no_std environment without atomic CAS, such as thumbv6m, you also need to enable
+the `extra-platforms` feature. See the [documentation for the `portable-atomic`
+crate](https://docs.rs/portable-atomic) for more information.
+
+The MSRV when `extra-platforms` feature is enabled depends on the MSRV of `portable-atomic`.
+
 ## Serde support
 
 Serde support is optional and disabled by default. To enable use the feature `serde`.
 
 ```toml
 [dependencies]
-bytes = { version = "0.5", features = ["serde"] }
+bytes = { version = "1", features = ["serde"] }
+```
+
+The MSRV when `serde` feature is enabled depends on the MSRV of `serde`.
+
+## Building documentation
+
+When building the `bytes` documentation the `docsrs` option should be used, otherwise
+feature gates will not be shown. This requires a nightly toolchain:
+
+```
+RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc
 ```
 
 ## License
@@ -45,4 +71,3 @@ This project is licensed under the [MIT license](LICENSE).
 Unless you explicitly state otherwise, any contribution intentionally submitted
 for inclusion in `bytes` by you, shall be licensed as MIT, without any additional
 terms or conditions.
-
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 000000000..b74a831cb
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,9 @@
+# Security Policy
+
+Bytes is part of the Tokio project and uses the same security policy as [Tokio][tokio-security].
+
+## Report a security issue
+
+The process for reporting an issue is the same as for [Tokio][tokio-security]. This includes private reporting via security@tokio.rs.
+
+[tokio-security]: https://github.com/tokio-rs/tokio/security/policy
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 18b59745d..000000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-trigger: ["master"]
-pr: ["master"]
-
-jobs:
-# Check formatting
-# - template: ci/azure-rustfmt.yml
-#   parameters:
-#     name: rustfmt
-
-# Apply clippy lints
-# - template: ci/azure-clippy.yml
-#   parameters:
-#     name: clippy
-
-# This represents the minimum Rust version supported by
-# Bytes. Updating this should be done in a dedicated PR.
-#
-# Tests are not run as tests may require newer versions of
-# rust.
-- template: ci/azure-test-stable.yml
-  parameters:
-    name: minrust
-    rust_version: 1.39.0
-    cmd: check
-
-# Stable
-- template: ci/azure-test-stable.yml
-  parameters:
-    name: stable
-    cross: true
-    features:
-      - serde
-
-# Nightly
-- template: ci/azure-test-stable.yml
-  parameters:
-    name: nightly
-    # Pin nightly to avoid being impacted by breakage
-    rust_version: nightly-2019-09-25
-    benches: true
-
-# Run tests on some extra platforms
-- template: ci/azure-cross-compile.yml
-  parameters:
-    name: cross
-
-# Sanitizers
-- template: ci/azure-tsan.yml
-  parameters:
-    name: tsan
-    rust_version: nightly
-
-# Loom
-- template: ci/azure-loom.yml
-  parameters:
-    name: loom
-    rust_version: stable
-
-
-- template: ci/azure-deploy-docs.yml
-  parameters:
-    dependsOn:
-      # - rustfmt
-      # - clippy
-      - stable
-      - nightly
-      - minrust
-      - cross
diff --git a/benches/buf.rs b/benches/buf.rs
index 0c9a1d955..616d18748 100644
--- a/benches/buf.rs
+++ b/benches/buf.rs
@@ -1,10 +1,10 @@
 #![feature(test)]
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
 extern crate test;
 
-use test::Bencher;
 use bytes::Buf;
+use test::Bencher;
 
 /// Dummy Buf implementation
 struct TestBuf {
@@ -46,14 +46,14 @@ impl TestBuf {
 }
 impl Buf for TestBuf {
     fn remaining(&self) -> usize {
-        return self.buf.len() - self.pos;
+        self.buf.len() - self.pos
     }
     fn advance(&mut self, cnt: usize) {
         self.pos += cnt;
         assert!(self.pos <= self.buf.len());
         self.next_readlen();
     }
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         if self.readlen == 0 {
             Default::default()
         } else {
@@ -87,8 +87,8 @@ impl Buf for TestBufC {
         self.inner.advance(cnt)
     }
     #[inline(never)]
-    fn bytes(&self) -> &[u8] {
-        self.inner.bytes()
+    fn chunk(&self) -> &[u8] {
+        self.inner.chunk()
     }
 }
 
@@ -159,7 +159,6 @@ macro_rules! bench_group {
 mod get_u8 {
     use super::*;
     bench_group!(get_u8);
-    bench!(option, option);
 }
 mod get_u16 {
     use super::*;
diff --git a/benches/bytes.rs b/benches/bytes.rs
index 9c36e6081..8782d0066 100644
--- a/benches/bytes.rs
+++ b/benches/bytes.rs
@@ -1,10 +1,10 @@
 #![feature(test)]
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
 extern crate test;
 
-use test::Bencher;
 use bytes::Bytes;
+use test::Bencher;
 
 #[bench]
 fn deref_unique(b: &mut Bencher) {
@@ -42,11 +42,12 @@ fn deref_static(b: &mut Bencher) {
 
 #[bench]
 fn clone_static(b: &mut Bencher) {
-    let bytes = Bytes::from_static("hello world 1234567890 and have a good byte 0987654321".as_bytes());
+    let bytes =
+        Bytes::from_static("hello world 1234567890 and have a good byte 0987654321".as_bytes());
 
     b.iter(|| {
         for _ in 0..1024 {
-            test::black_box(&bytes.clone());
+            test::black_box(test::black_box(&bytes).clone());
         }
     })
 }
@@ -57,7 +58,7 @@ fn clone_shared(b: &mut Bencher) {
 
     b.iter(|| {
         for _ in 0..1024 {
-            test::black_box(&bytes.clone());
+            test::black_box(test::black_box(&bytes).clone());
         }
     })
 }
@@ -69,7 +70,7 @@ fn clone_arc_vec(b: &mut Bencher) {
 
     b.iter(|| {
         for _ in 0..1024 {
-            test::black_box(&bytes.clone());
+            test::black_box(test::black_box(&bytes).clone());
         }
     })
 }
@@ -87,6 +88,7 @@ fn from_long_slice(b: &mut Bencher) {
 #[bench]
 fn slice_empty(b: &mut Bencher) {
     b.iter(|| {
+        // `clone` is to convert to ARC
         let b = Bytes::from(vec![17; 1024]).clone();
         for i in 0..1000 {
             test::black_box(b.slice(i % 100..i % 100));
diff --git a/benches/bytes_mut.rs b/benches/bytes_mut.rs
index ded1d1486..b06943621 100644
--- a/benches/bytes_mut.rs
+++ b/benches/bytes_mut.rs
@@ -1,10 +1,10 @@
 #![feature(test)]
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
 extern crate test;
 
-use test::Bencher;
 use bytes::{BufMut, BytesMut};
+use test::Bencher;
 
 #[bench]
 fn alloc_small(b: &mut Bencher) {
@@ -29,7 +29,6 @@ fn alloc_big(b: &mut Bencher) {
     })
 }
 
-
 #[bench]
 fn deref_unique(b: &mut Bencher) {
     let mut buf = BytesMut::with_capacity(4096);
@@ -92,7 +91,9 @@ fn deref_two(b: &mut Bencher) {
 
 #[bench]
 fn clone_frozen(b: &mut Bencher) {
-    let bytes = BytesMut::from(&b"hello world 1234567890 and have a good byte 0987654321"[..]).split().freeze();
+    let bytes = BytesMut::from(&b"hello world 1234567890 and have a good byte 0987654321"[..])
+        .split()
+        .freeze();
 
     b.iter(|| {
         for _ in 0..1024 {
@@ -137,7 +138,9 @@ fn fmt_write(b: &mut Bencher) {
     b.iter(|| {
         let _ = write!(buf, "{}", s);
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     })
 }
 
@@ -152,7 +155,9 @@ fn bytes_mut_extend(b: &mut Bencher) {
             buf.extend(&data);
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -169,7 +174,9 @@ fn put_slice_bytes_mut(b: &mut Bencher) {
             buf.put_slice(&data);
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -184,7 +191,9 @@ fn put_u8_bytes_mut(b: &mut Bencher) {
             buf.put_u8(b'x');
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -199,7 +208,9 @@ fn put_slice_vec(b: &mut Bencher) {
             buf.put_slice(&data);
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -214,7 +225,9 @@ fn put_u8_vec(b: &mut Bencher) {
             buf.put_u8(b'x');
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -229,7 +242,9 @@ fn put_slice_vec_extend(b: &mut Bencher) {
             buf.extend_from_slice(&data);
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
 
@@ -244,6 +259,8 @@ fn put_u8_vec_push(b: &mut Bencher) {
             buf.push(b'x');
         }
         test::black_box(&buf);
-        unsafe { buf.set_len(0); }
+        unsafe {
+            buf.set_len(0);
+        }
     });
 }
diff --git a/ci/azure-cross-compile.yml b/ci/azure-cross-compile.yml
deleted file mode 100644
index be46ca346..000000000
--- a/ci/azure-cross-compile.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-parameters:
-  cmd: build
-  rust_version: stable
-
-jobs:
-- job: ${{ parameters.name }}
-  displayName: Cross
-  strategy:
-    matrix:
-      i686:
-        vmImage: ubuntu-16.04
-        target: i686-unknown-linux-gnu
-      armv7:
-        vmImage: ubuntu-16.04
-        target: armv7-unknown-linux-gnueabihf
-      powerpc:
-        vmImage: ubuntu-16.04
-        target: powerpc-unknown-linux-gnu
-      powerpc64:
-        vmImage: ubuntu-16.04
-        target: powerpc64-unknown-linux-gnu
-      wasm:
-        vmImage: ubuntu-16.04
-        target: wasm32-unknown-unknown
-  pool:
-    vmImage: $(vmImage)
-
-  steps:
-    - template: azure-install-rust.yml
-      parameters:
-        rust_version: ${{parameters.rust_version}}
-
-    - script: cargo install cross
-      displayName: Install cross
-      condition: not(eq(variables['target'], 'wasm32-unknown-unknown'))
-
-    - script: cross ${{ parameters.cmd }} --target $(target)
-      displayName: cross ${{ parameters.cmd }} --target $(target)
-      condition: not(eq(variables['target'], 'wasm32-unknown-unknown'))
-
-    # WASM support
-    - script: |
-        rustup target add $(target)
-        cargo build --target $(target)
-      displayName: cargo build --target $(target)
-      condition: eq(variables['target'], 'wasm32-unknown-unknown')
diff --git a/ci/azure-deploy-docs.yml b/ci/azure-deploy-docs.yml
deleted file mode 100644
index 52ac48fcd..000000000
--- a/ci/azure-deploy-docs.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-parameters:
-  dependsOn: []
-
-jobs:
-- job: documentation
-  displayName: 'Deploy API Documentation'
-  condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-  pool:
-    vmImage: 'Ubuntu 16.04'
-  dependsOn:
-    - ${{ parameters.dependsOn }}
-  steps:
-  - template: azure-install-rust.yml
-    parameters:
-      rust_version: stable
-  - script: |
-      cargo doc --no-deps
-      cp -R target/doc '$(Build.BinariesDirectory)'
-    displayName: 'Generate Documentation'
-  - script: |
-      set -e
-
-      git --version
-      ls -la
-      git init
-      git config user.name 'Deployment Bot (from Azure Pipelines)'
-      git config user.email 'deploy@tokio-rs.com'
-      git config --global credential.helper 'store --file ~/.my-credentials'
-      printf "protocol=https\nhost=github.com\nusername=carllerche\npassword=%s\n\n" "$GITHUB_TOKEN" | git credential-store --file ~/.my-credentials store
-      git remote add origin https://github.com/tokio-rs/bytes
-      git checkout -b gh-pages
-      git add .
-      git commit -m 'Deploy Bytes API documentation'
-      git push -f origin gh-pages
-    env:
-      GITHUB_TOKEN: $(githubPersonalToken)
-    workingDirectory: '$(Build.BinariesDirectory)'
-    displayName: 'Deploy Documentation'
-
diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml
deleted file mode 100644
index 02176592a..000000000
--- a/ci/azure-install-rust.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-steps:
-  # Linux and macOS.
-  - script: |
-      set -e
-      curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain none
-      export PATH=$PATH:$HOME/.cargo/bin
-      rustup toolchain install $RUSTUP_TOOLCHAIN
-      rustup default $RUSTUP_TOOLCHAIN
-      echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin"
-    env:
-      RUSTUP_TOOLCHAIN: ${{parameters.rust_version}}
-    displayName: "Install rust (*nix)"
-    condition: not(eq(variables['Agent.OS'], 'Windows_NT'))
-
-  # Windows.
-  - script: |
-      curl -sSf -o rustup-init.exe https://win.rustup.rs
-      rustup-init.exe -y --default-toolchain none
-      set PATH=%PATH%;%USERPROFILE%\.cargo\bin
-      rustup toolchain install %RUSTUP_TOOLCHAIN%
-      rustup default %RUSTUP_TOOLCHAIN%
-      echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin"
-    env:
-      RUSTUP_TOOLCHAIN: ${{parameters.rust_version}}
-    displayName: "Install rust (windows)"
-    condition: eq(variables['Agent.OS'], 'Windows_NT')
-
-  # All platforms.
-  - script: |
-        rustup toolchain list
-        rustc -Vv
-        cargo -V
-    displayName: Query rust and cargo versions
diff --git a/ci/azure-loom.yml b/ci/azure-loom.yml
deleted file mode 100644
index 1db9c3afe..000000000
--- a/ci/azure-loom.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-jobs:
-- job: ${{parameters.name}}
-  displayName: Loom tests
-  pool:
-    vmImage: ubuntu-16.04
-
-  steps:
-  - template: azure-install-rust.yml
-    parameters:
-      rust_version: ${{parameters.rust_version}}
-
-  - script: RUSTFLAGS="--cfg loom" cargo test --lib
-    displayName: RUSTFLAGS="--cfg loom" cargo test --lib
-
-
diff --git a/ci/azure-test-stable.yml b/ci/azure-test-stable.yml
deleted file mode 100644
index e543eeeb4..000000000
--- a/ci/azure-test-stable.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-parameters:
-  cmd: test
-  rust_version: stable
-  features: []
-
-jobs:
-- job: ${{ parameters.name }}
-  displayName: ${{ parameters.displayName }}
-  strategy:
-    matrix:
-      Linux:
-        vmImage: ubuntu-16.04
-
-      ${{ if parameters.cross }}:
-        MacOS:
-          vmImage: macOS-10.13
-        Windows:
-          vmImage: vs2017-win2016
-  pool:
-    vmImage: $(vmImage)
-
-  steps:
-  - template: azure-install-rust.yml
-    parameters:
-      rust_version: ${{parameters.rust_version}}
-
-  # Run with default crate features
-  - script: cargo ${{ parameters.cmd }}
-    displayName: cargo ${{ parameters.cmd }}
-
-  # Run with each specified feature
-  - ${{ each feature in parameters.features }}:
-    - script: cargo ${{ parameters.cmd }} --features ${{ feature }}
-      displayName: cargo ${{ parameters.cmd }} --features ${{ feature }}
-
-  - ${{ if eq(parameters.cmd, 'test') }}:
-    - script: cargo doc --no-deps
-      displayName: cargo doc --no-deps
-
-  - ${{ if parameters.benches }}:
-    - script: cargo check --benches
-      displayName: Check benchmarks
-
-  # Run with all features
-  - script: cargo ${{ parameters.cmd }} --all-features
-    displayName: cargo ${{ parameters.cmd }} --all-features
-
-  # Run with no default features
-  - script: cargo check --no-default-features
-    displayName: cargo check --no-default-features
diff --git a/ci/azure-tsan.yml b/ci/azure-tsan.yml
deleted file mode 100644
index 198b187ab..000000000
--- a/ci/azure-tsan.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-jobs:
-- job: ${{ parameters.name }}
-  displayName: TSAN
-  pool:
-    vmImage: ubuntu-16.04
-
-  steps:
-  - template: azure-install-rust.yml
-    parameters:
-      rust_version: ${{ parameters.rust_version }}
-
-  - script: |
-      set -e
-
-      export RUST_TEST_THREADS=1
-      export ASAN_OPTIONS="detect_odr_violation=0 detect_leaks=0"
-      export TSAN_OPTIONS="suppressions=`pwd`/ci/tsan"
-
-      # Run address sanitizer
-      RUSTFLAGS="-Z sanitizer=address" \
-      cargo test --target x86_64-unknown-linux-gnu --test test_bytes --test test_buf --test test_buf_mut
-
-      # Run thread sanitizer
-      RUSTFLAGS="-Z sanitizer=thread" \
-      cargo test --target x86_64-unknown-linux-gnu --test test_bytes --test test_buf --test test_buf_mut
-    displayName: TSAN / MSAN
diff --git a/ci/miri.sh b/ci/miri.sh
new file mode 100755
index 000000000..7df29f360
--- /dev/null
+++ b/ci/miri.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -e
+
+rustup component add miri
+cargo miri setup
+
+export MIRIFLAGS="-Zmiri-strict-provenance"
+
+cargo miri test
+cargo miri test --target mips64-unknown-linux-gnuabi64
diff --git a/ci/panic-abort.sh b/ci/panic-abort.sh
new file mode 100755
index 000000000..4284791f1
--- /dev/null
+++ b/ci/panic-abort.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+set -ex
+RUSTFLAGS="$RUSTFLAGS -Cpanic=abort -Zpanic-abort-tests" cargo test --all-features --test '*'
diff --git a/ci/test-stable.sh b/ci/test-stable.sh
new file mode 100755
index 000000000..ad9757449
--- /dev/null
+++ b/ci/test-stable.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -ex
+
+cmd="${1:-test}"
+
+# Run with each feature
+# * --each-feature includes both default/no-default features
+# * --optional-deps is needed for serde feature
+cargo hack "${cmd}" --each-feature --optional-deps
+# Run with all features
+cargo "${cmd}" --all-features
+
+if [[ "${RUST_VERSION}" == "nightly"* ]]; then
+    # Check benchmarks
+    cargo check --benches
+
+    # Check minimal versions
+    # Remove dev-dependencies from Cargo.toml to prevent the next `cargo update`
+    # from determining minimal versions based on dev-dependencies.
+    cargo hack --remove-dev-deps --workspace
+    # Update Cargo.lock to minimal version dependencies.
+    cargo update -Z minimal-versions
+    cargo check --all-features
+fi
diff --git a/ci/tsan b/ci/tsan
deleted file mode 100644
index e53f9b893..000000000
--- a/ci/tsan
+++ /dev/null
@@ -1,24 +0,0 @@
-# TSAN suppressions file for `bytes`
-
-# TSAN does not understand fences and `Arc::drop` is implemented using a fence.
-# This causes many false positives.
-race:Arc*drop
-race:arc*Weak*drop
-
-# `std` mpsc is not used in any Bytes code base. This race is triggered by some
-# rust runtime logic.
-race:std*mpsc_queue
-
-# Some test runtime races. Allocation should be race free
-race:alloc::alloc
-
-# Not sure why this is warning, but it is in the test harness and not the library.
-race:TestEvent*clone
-race:test::run_tests_console::*closure
-
-# Probably more fences in std.
-race:__call_tls_dtors
-
-# This ignores a false positive caused by `thread::park()`/`thread::unpark()`.
-# See: https://github.com/rust-lang/rust/pull/54806#issuecomment-436193353
-race:pthread_cond_destroy
diff --git a/ci/tsan.sh b/ci/tsan.sh
new file mode 100755
index 000000000..ca520bd7f
--- /dev/null
+++ b/ci/tsan.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -ex
+
+export ASAN_OPTIONS="detect_odr_violation=0 detect_leaks=0"
+
+# Run address sanitizer
+RUSTFLAGS="-Z sanitizer=address" \
+cargo test --target x86_64-unknown-linux-gnu --test test_bytes --test test_buf --test test_buf_mut
+
+# Run thread sanitizer
+RUSTFLAGS="-Z sanitizer=thread" \
+cargo -Zbuild-std test --target x86_64-unknown-linux-gnu --test test_bytes --test test_buf --test test_buf_mut
diff --git a/clippy.toml b/clippy.toml
new file mode 100644
index 000000000..53095b15d
--- /dev/null
+++ b/clippy.toml
@@ -0,0 +1 @@
+msrv = "1.39"
diff --git a/src/buf/buf_impl.rs b/src/buf/buf_impl.rs
index 843db718f..192034fbe 100644
--- a/src/buf/buf_impl.rs
+++ b/src/buf/buf_impl.rs
@@ -1,47 +1,93 @@
-use core::{cmp, ptr, mem};
+#[cfg(feature = "std")]
+use crate::buf::{reader, Reader};
+use crate::buf::{take, Chain, Take};
+#[cfg(feature = "std")]
+use crate::{min_u64_usize, saturating_sub_usize_u64};
+use crate::{panic_advance, panic_does_not_fit, TryGetError};
 
 #[cfg(feature = "std")]
 use std::io::IoSlice;
 
-use alloc::{boxed::Box};
+use alloc::boxed::Box;
 
-macro_rules! buf_get_impl {
-    ($this:ident, $typ:tt::$conv:tt) => ({
-        const SIZE: usize = mem::size_of::<$typ>();
-         // try to convert directly from the bytes
-         // this Option<ret> trick is to avoid keeping a borrow on self
-         // when advance() is called (mut borrow) and to call bytes() only once
-        let ret =  $this.bytes().get(..SIZE).map(|src| unsafe {
-            $typ::$conv(*(src as *const _ as *const [_; SIZE]))
-        });
+macro_rules! buf_try_get_impl {
+    ($this:ident, $typ:tt::$conv:tt) => {{
+        const SIZE: usize = core::mem::size_of::<$typ>();
+
+        if $this.remaining() < SIZE {
+            return Err(TryGetError {
+                requested: SIZE,
+                available: $this.remaining(),
+            });
+        }
+
+        // try to convert directly from the bytes
+        // this Option<ret> trick is to avoid keeping a borrow on self
+        // when advance() is called (mut borrow) and to call bytes() only once
+        let ret = $this
+            .chunk()
+            .get(..SIZE)
+            .map(|src| unsafe { $typ::$conv(*(src as *const _ as *const [_; SIZE])) });
 
         if let Some(ret) = ret {
-             // if the direct conversion was possible, advance and return
+            // if the direct conversion was possible, advance and return
             $this.advance(SIZE);
-            return ret;
+            return Ok(ret);
         } else {
             // if not we copy the bytes in a temp buffer then convert
             let mut buf = [0; SIZE];
             $this.copy_to_slice(&mut buf); // (do the advance)
-            return $typ::$conv(buf);
+            return Ok($typ::$conv(buf));
         }
-    });
-    (le => $this:ident, $typ:tt, $len_to_read:expr) => ({
-        debug_assert!(mem::size_of::<$typ>() >= $len_to_read);
+    }};
+    (le => $this:ident, $typ:tt, $len_to_read:expr) => {{
+        const SIZE: usize = core::mem::size_of::<$typ>();
 
         // The same trick as above does not improve the best case speed.
         // It seems to be linked to the way the method is optimised by the compiler
-        let mut buf = [0; (mem::size_of::<$typ>())];
-        $this.copy_to_slice(&mut buf[..($len_to_read)]);
-        return $typ::from_le_bytes(buf);
-    });
+        let mut buf = [0; SIZE];
+
+        let subslice = match buf.get_mut(..$len_to_read) {
+            Some(subslice) => subslice,
+            None => panic_does_not_fit(SIZE, $len_to_read),
+        };
+
+        $this.try_copy_to_slice(subslice)?;
+        return Ok($typ::from_le_bytes(buf));
+    }};
     (be => $this:ident, $typ:tt, $len_to_read:expr) => {{
-        debug_assert!(mem::size_of::<$typ>() >= $len_to_read);
+        const SIZE: usize = core::mem::size_of::<$typ>();
+
+        let slice_at = match SIZE.checked_sub($len_to_read) {
+            Some(slice_at) => slice_at,
+            None => panic_does_not_fit(SIZE, $len_to_read),
+        };
+
+        let mut buf = [0; SIZE];
+        $this.try_copy_to_slice(&mut buf[slice_at..])?;
+        return Ok($typ::from_be_bytes(buf));
+    }};
+}
 
-        let mut buf = [0; (mem::size_of::<$typ>())];
-        $this.copy_to_slice(&mut buf[mem::size_of::<$typ>()-($len_to_read)..]);
-        return $typ::from_be_bytes(buf);
+macro_rules! buf_get_impl {
+    ($this:ident, $typ:tt::$conv:tt) => {{
+        return (|| buf_try_get_impl!($this, $typ::$conv))()
+            .unwrap_or_else(|error| panic_advance(&error));
+    }};
+    (le => $this:ident, $typ:tt, $len_to_read:expr) => {{
+        return (|| buf_try_get_impl!(le => $this, $typ, $len_to_read))()
+            .unwrap_or_else(|error| panic_advance(&error));
     }};
+    (be => $this:ident, $typ:tt, $len_to_read:expr) => {{
+        return (|| buf_try_get_impl!(be => $this, $typ, $len_to_read))()
+            .unwrap_or_else(|error| panic_advance(&error));
+    }};
+}
+
+// https://en.wikipedia.org/wiki/Sign_extension
+fn sign_extend(val: u64, nbytes: usize) -> i64 {
+    let shift = (8 - nbytes) * 8;
+    (val << shift) as i64 >> shift
 }
 
 /// Read bytes from a buffer.
@@ -73,7 +119,7 @@ pub trait Buf {
     /// the buffer.
     ///
     /// This value is greater than or equal to the length of the slice returned
-    /// by `bytes`.
+    /// by `chunk()`.
     ///
     /// # Examples
     ///
@@ -97,8 +143,8 @@ pub trait Buf {
     fn remaining(&self) -> usize;
 
     /// Returns a slice starting at the current position and of length between 0
-    /// and `Buf::remaining()`. Note that this *can* return shorter slice (this allows
-    /// non-continuous internal representation).
+    /// and `Buf::remaining()`. Note that this *can* return a shorter slice (this
+    /// allows non-continuous internal representation).
     ///
     /// This is a lower level function. Most operations are done with other
     /// functions.
@@ -110,33 +156,38 @@ pub trait Buf {
     ///
     /// let mut buf = &b"hello world"[..];
     ///
-    /// assert_eq!(buf.bytes(), &b"hello world"[..]);
+    /// assert_eq!(buf.chunk(), &b"hello world"[..]);
     ///
     /// buf.advance(6);
     ///
-    /// assert_eq!(buf.bytes(), &b"world"[..]);
+    /// assert_eq!(buf.chunk(), &b"world"[..]);
     /// ```
     ///
     /// # Implementer notes
     ///
-    /// This function should never panic. Once the end of the buffer is reached,
-    /// i.e., `Buf::remaining` returns 0, calls to `bytes` should return an
-    /// empty slice.
-    fn bytes(&self) -> &[u8];
+    /// This function should never panic. `chunk()` should return an empty
+    /// slice **if and only if** `remaining()` returns 0. In other words,
+    /// `chunk()` returning an empty slice implies that `remaining()` will
+    /// return 0 and `remaining()` returning 0 implies that `chunk()` will
+    /// return an empty slice.
+    // The `chunk` method was previously called `bytes`. This alias makes the rename
+    // more easily discoverable.
+    #[cfg_attr(docsrs, doc(alias = "bytes"))]
+    fn chunk(&self) -> &[u8];
 
     /// Fills `dst` with potentially multiple slices starting at `self`'s
     /// current position.
     ///
-    /// If the `Buf` is backed by disjoint slices of bytes, `bytes_vectored` enables
+    /// If the `Buf` is backed by disjoint slices of bytes, `chunk_vectored` enables
     /// fetching more than one slice at once. `dst` is a slice of `IoSlice`
     /// references, enabling the slice to be directly used with [`writev`]
     /// without any further conversion. The sum of the lengths of all the
-    /// buffers in `dst` will be less than or equal to `Buf::remaining()`.
+    /// buffers written to `dst` will be less than or equal to `Buf::remaining()`.
     ///
     /// The entries in `dst` will be overwritten, but the data **contained** by
-    /// the slices **will not** be modified. If `bytes_vectored` does not fill every
-    /// entry in `dst`, then `dst` is guaranteed to contain all remaining slices
-    /// in `self.
+    /// the slices **will not** be modified. The return value is the number of
+    /// slices written to `dst`. If `Buf::remaining()` is non-zero, then this
+    /// writes at least one non-empty slice to `dst`.
     ///
     /// This is a lower level function. Most operations are done with other
     /// functions.
@@ -144,7 +195,7 @@ pub trait Buf {
     /// # Implementer notes
     ///
     /// This function should never panic. Once the end of the buffer is reached,
-    /// i.e., `Buf::remaining` returns 0, calls to `bytes_vectored` must return 0
+    /// i.e., `Buf::remaining` returns 0, calls to `chunk_vectored` must return 0
     /// without mutating `dst`.
     ///
     /// Implementations should also take care to properly handle being called
@@ -152,13 +203,14 @@ pub trait Buf {
     ///
     /// [`writev`]: http://man7.org/linux/man-pages/man2/readv.2.html
     #[cfg(feature = "std")]
-    fn bytes_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize {
+    #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+    fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize {
         if dst.is_empty() {
             return 0;
         }
 
         if self.has_remaining() {
-            dst[0] = IoSlice::new(self.bytes());
+            dst[0] = IoSlice::new(self.chunk());
             1
         } else {
             0
@@ -167,7 +219,7 @@ pub trait Buf {
 
     /// Advance the internal cursor of the Buf
     ///
-    /// The next call to `bytes` will return a slice starting `cnt` bytes
+    /// The next call to `chunk()` will return a slice starting `cnt` bytes
     /// further into the underlying buffer.
     ///
     /// # Examples
@@ -177,11 +229,11 @@ pub trait Buf {
     ///
     /// let mut buf = &b"hello world"[..];
     ///
-    /// assert_eq!(buf.bytes(), &b"hello world"[..]);
+    /// assert_eq!(buf.chunk(), &b"hello world"[..]);
     ///
     /// buf.advance(6);
     ///
-    /// assert_eq!(buf.bytes(), &b"world"[..]);
+    /// assert_eq!(buf.chunk(), &b"world"[..]);
     /// ```
     ///
     /// # Panics
@@ -238,27 +290,10 @@ pub trait Buf {
     ///
     /// # Panics
     ///
-    /// This function panics if `self.remaining() < dst.len()`
+    /// This function panics if `self.remaining() < dst.len()`.
     fn copy_to_slice(&mut self, dst: &mut [u8]) {
-        let mut off = 0;
-
-        assert!(self.remaining() >= dst.len());
-
-        while off < dst.len() {
-            let cnt;
-
-            unsafe {
-                let src = self.bytes();
-                cnt = cmp::min(src.len(), dst.len() - off);
-
-                ptr::copy_nonoverlapping(
-                    src.as_ptr(), dst[off..].as_mut_ptr(), cnt);
-
-                off += cnt;
-            }
-
-            self.advance(cnt);
-        }
+        self.try_copy_to_slice(dst)
+            .unwrap_or_else(|error| panic_advance(&error));
     }
 
     /// Gets an unsigned 8 bit integer from `self`.
@@ -278,8 +313,13 @@ pub trait Buf {
     ///
     /// This function panics if there is no more remaining data in `self`.
     fn get_u8(&mut self) -> u8 {
-        assert!(self.remaining() >= 1);
-        let ret = self.bytes()[0];
+        if self.remaining() < 1 {
+            panic_advance(&TryGetError {
+                requested: 1,
+                available: 0,
+            })
+        }
+        let ret = self.chunk()[0];
         self.advance(1);
         ret
     }
@@ -301,8 +341,13 @@ pub trait Buf {
     ///
     /// This function panics if there is no more remaining data in `self`.
     fn get_i8(&mut self) -> i8 {
-        assert!(self.remaining() >= 1);
-        let ret = self.bytes()[0] as i8;
+        if self.remaining() < 1 {
+            panic_advance(&TryGetError {
+                requested: 1,
+                available: 0,
+            });
+        }
+        let ret = self.chunk()[0] as i8;
         self.advance(1);
         ret
     }
@@ -347,6 +392,29 @@ pub trait Buf {
         buf_get_impl!(self, u16::from_le_bytes);
     }
 
+    /// Gets an unsigned 16 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09 hello",
+    ///     false => b"\x09\x08 hello",
+    /// };
+    /// assert_eq!(0x0809, buf.get_u16_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_u16_ne(&mut self) -> u16 {
+        buf_get_impl!(self, u16::from_ne_bytes);
+    }
+
     /// Gets a signed 16 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 2.
@@ -387,6 +455,29 @@ pub trait Buf {
         buf_get_impl!(self, i16::from_le_bytes);
     }
 
+    /// Gets a signed 16 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09 hello",
+    ///     false => b"\x09\x08 hello",
+    /// };
+    /// assert_eq!(0x0809, buf.get_i16_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_i16_ne(&mut self) -> i16 {
+        buf_get_impl!(self, i16::from_ne_bytes);
+    }
+
     /// Gets an unsigned 32 bit integer from `self` in the big-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -427,6 +518,29 @@ pub trait Buf {
         buf_get_impl!(self, u32::from_le_bytes);
     }
 
+    /// Gets an unsigned 32 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09\xA0\xA1 hello",
+    ///     false => b"\xA1\xA0\x09\x08 hello",
+    /// };
+    /// assert_eq!(0x0809A0A1, buf.get_u32_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_u32_ne(&mut self) -> u32 {
+        buf_get_impl!(self, u32::from_ne_bytes);
+    }
+
     /// Gets a signed 32 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -467,6 +581,29 @@ pub trait Buf {
         buf_get_impl!(self, i32::from_le_bytes);
     }
 
+    /// Gets a signed 32 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09\xA0\xA1 hello",
+    ///     false => b"\xA1\xA0\x09\x08 hello",
+    /// };
+    /// assert_eq!(0x0809A0A1, buf.get_i32_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_i32_ne(&mut self) -> i32 {
+        buf_get_impl!(self, i32::from_ne_bytes);
+    }
+
     /// Gets an unsigned 64 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -507,6 +644,29 @@ pub trait Buf {
         buf_get_impl!(self, u64::from_le_bytes);
     }
 
+    /// Gets an unsigned 64 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08 hello",
+    ///     false => b"\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x0102030405060708, buf.get_u64_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_u64_ne(&mut self) -> u64 {
+        buf_get_impl!(self, u64::from_ne_bytes);
+    }
+
     /// Gets a signed 64 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -547,6 +707,29 @@ pub trait Buf {
         buf_get_impl!(self, i64::from_le_bytes);
     }
 
+    /// Gets a signed 64 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08 hello",
+    ///     false => b"\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x0102030405060708, buf.get_i64_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_i64_ne(&mut self) -> i64 {
+        buf_get_impl!(self, i64::from_ne_bytes);
+    }
+
     /// Gets an unsigned 128 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 16.
@@ -587,6 +770,29 @@ pub trait Buf {
         buf_get_impl!(self, u128::from_le_bytes);
     }
 
+    /// Gets an unsigned 128 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello",
+    ///     false => b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x01020304050607080910111213141516, buf.get_u128_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_u128_ne(&mut self) -> u128 {
+        buf_get_impl!(self, u128::from_ne_bytes);
+    }
+
     /// Gets a signed 128 bit integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by 16.
@@ -627,6 +833,29 @@ pub trait Buf {
         buf_get_impl!(self, i128::from_le_bytes);
     }
 
+    /// Gets a signed 128 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello",
+    ///     false => b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x01020304050607080910111213141516, buf.get_i128_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_i128_ne(&mut self) -> i128 {
+        buf_get_impl!(self, i128::from_ne_bytes);
+    }
+
     /// Gets an unsigned n-byte integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by `nbytes`.
@@ -642,7 +871,8 @@ pub trait Buf {
     ///
     /// # Panics
     ///
-    /// This function panics if there is not enough remaining data in `self`.
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
     fn get_uint(&mut self, nbytes: usize) -> u64 {
         buf_get_impl!(be => self, u64, nbytes);
     }
@@ -662,11 +892,40 @@ pub trait Buf {
     ///
     /// # Panics
     ///
-    /// This function panics if there is not enough remaining data in `self`.
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
     fn get_uint_le(&mut self, nbytes: usize) -> u64 {
         buf_get_impl!(le => self, u64, nbytes);
     }
 
+    /// Gets an unsigned n-byte integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03 hello",
+    ///     false => b"\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x010203, buf.get_uint_ne(3));
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
+    fn get_uint_ne(&mut self, nbytes: usize) -> u64 {
+        if cfg!(target_endian = "big") {
+            self.get_uint(nbytes)
+        } else {
+            self.get_uint_le(nbytes)
+        }
+    }
+
     /// Gets a signed n-byte integer from `self` in big-endian byte order.
     ///
     /// The current position is advanced by `nbytes`.
@@ -682,9 +941,10 @@ pub trait Buf {
     ///
     /// # Panics
     ///
-    /// This function panics if there is not enough remaining data in `self`.
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
     fn get_int(&mut self, nbytes: usize) -> i64 {
-        buf_get_impl!(be => self, i64, nbytes);
+        sign_extend(self.get_uint(nbytes), nbytes)
     }
 
     /// Gets a signed n-byte integer from `self` in little-endian byte order.
@@ -702,9 +962,38 @@ pub trait Buf {
     ///
     /// # Panics
     ///
-    /// This function panics if there is not enough remaining data in `self`.
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
     fn get_int_le(&mut self, nbytes: usize) -> i64 {
-        buf_get_impl!(le => self, i64, nbytes);
+        sign_extend(self.get_uint_le(nbytes), nbytes)
+    }
+
+    /// Gets a signed n-byte integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03 hello",
+    ///     false => b"\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(0x010203, buf.get_int_ne(3));
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`, or
+    /// if `nbytes` is greater than 8.
+    fn get_int_ne(&mut self, nbytes: usize) -> i64 {
+        if cfg!(target_endian = "big") {
+            self.get_int(nbytes)
+        } else {
+            self.get_int_le(nbytes)
+        }
     }
 
     /// Gets an IEEE754 single-precision (4 bytes) floating point number from
@@ -725,7 +1014,7 @@ pub trait Buf {
     ///
     /// This function panics if there is not enough remaining data in `self`.
     fn get_f32(&mut self) -> f32 {
-        f32::from_bits(Self::get_u32(self))
+        f32::from_bits(self.get_u32())
     }
 
     /// Gets an IEEE754 single-precision (4 bytes) floating point number from
@@ -746,7 +1035,31 @@ pub trait Buf {
     ///
     /// This function panics if there is not enough remaining data in `self`.
     fn get_f32_le(&mut self) -> f32 {
-        f32::from_bits(Self::get_u32_le(self))
+        f32::from_bits(self.get_u32_le())
+    }
+
+    /// Gets an IEEE754 single-precision (4 bytes) floating point number from
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x3F\x99\x99\x9A hello",
+    ///     false => b"\x9A\x99\x99\x3F hello",
+    /// };
+    /// assert_eq!(1.2f32, buf.get_f32_ne());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_f32_ne(&mut self) -> f32 {
+        f32::from_bits(self.get_u32_ne())
     }
 
     /// Gets an IEEE754 double-precision (8 bytes) floating point number from
@@ -767,7 +1080,7 @@ pub trait Buf {
     ///
     /// This function panics if there is not enough remaining data in `self`.
     fn get_f64(&mut self) -> f64 {
-        f64::from_bits(Self::get_u64(self))
+        f64::from_bits(self.get_u64())
     }
 
     /// Gets an IEEE754 double-precision (8 bytes) floating point number from
@@ -788,131 +1101,1781 @@ pub trait Buf {
     ///
     /// This function panics if there is not enough remaining data in `self`.
     fn get_f64_le(&mut self) -> f64 {
-        f64::from_bits(Self::get_u64_le(self))
+        f64::from_bits(self.get_u64_le())
     }
 
-    /// Consumes remaining bytes inside self and returns new instance of `Bytes`
+    /// Gets an IEEE754 double-precision (8 bytes) floating point number from
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
     ///
     /// # Examples
     ///
     /// ```
     /// use bytes::Buf;
     ///
-    /// let bytes = (&b"hello world"[..]).to_bytes();
-    /// assert_eq!(&bytes[..], &b"hello world"[..]);
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x3F\xF3\x33\x33\x33\x33\x33\x33 hello",
+    ///     false => b"\x33\x33\x33\x33\x33\x33\xF3\x3F hello",
+    /// };
+    /// assert_eq!(1.2f64, buf.get_f64_ne());
     /// ```
-    fn to_bytes(&mut self) -> crate::Bytes {
-        use super::BufMut;
-        let mut ret = crate::BytesMut::with_capacity(self.remaining());
-        ret.put(self);
-        ret.freeze()
-    }
-}
-
-macro_rules! deref_forward_buf {
-    () => (
-    fn remaining(&self) -> usize {
-        (**self).remaining()
-    }
-
-    fn bytes(&self) -> &[u8] {
-        (**self).bytes()
-    }
-
-    #[cfg(feature = "std")]
-    fn bytes_vectored<'b>(&'b self, dst: &mut [IoSlice<'b>]) -> usize {
-        (**self).bytes_vectored(dst)
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining data in `self`.
+    fn get_f64_ne(&mut self) -> f64 {
+        f64::from_bits(self.get_u64_ne())
     }
 
-    fn advance(&mut self, cnt: usize) {
-        (**self).advance(cnt)
-    }
+    /// Copies bytes from `self` into `dst`.
+    ///
+    /// The cursor is advanced by the number of bytes copied. `self` must have
+    /// enough remaining bytes to fill `dst`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"hello world"[..];
+    /// let mut dst = [0; 5];
+    ///
+    /// assert_eq!(Ok(()), buf.try_copy_to_slice(&mut dst));
+    /// assert_eq!(&b"hello"[..], &dst);
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"hello world"[..];
+    /// let mut dst = [0; 12];
+    ///
+    /// assert_eq!(Err(TryGetError{requested: 12, available: 11}), buf.try_copy_to_slice(&mut dst));
+    /// assert_eq!(11, buf.remaining());
+    /// ```
+    fn try_copy_to_slice(&mut self, mut dst: &mut [u8]) -> Result<(), TryGetError> {
+        if self.remaining() < dst.len() {
+            return Err(TryGetError {
+                requested: dst.len(),
+                available: self.remaining(),
+            });
+        }
 
-    fn has_remaining(&self) -> bool {
-        (**self).has_remaining()
-    }
+        while !dst.is_empty() {
+            let src = self.chunk();
+            let cnt = usize::min(src.len(), dst.len());
 
-    fn copy_to_slice(&mut self, dst: &mut [u8]) {
-        (**self).copy_to_slice(dst)
-    }
+            dst[..cnt].copy_from_slice(&src[..cnt]);
+            dst = &mut dst[cnt..];
 
-    fn get_u8(&mut self) -> u8 {
-        (**self).get_u8()
+            self.advance(cnt);
+        }
+        Ok(())
     }
 
-    fn get_i8(&mut self) -> i8 {
-        (**self).get_i8()
+    /// Gets an unsigned 8 bit integer from `self`.
+    ///
+    /// The current position is advanced by 1.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08 hello"[..];
+    /// assert_eq!(Ok(0x08_u8), buf.try_get_u8());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b""[..];
+    /// assert_eq!(Err(TryGetError{requested: 1, available: 0}), buf.try_get_u8());
+    /// ```
+    fn try_get_u8(&mut self) -> Result<u8, TryGetError> {
+        if self.remaining() < 1 {
+            return Err(TryGetError {
+                requested: 1,
+                available: self.remaining(),
+            });
+        }
+        let ret = self.chunk()[0];
+        self.advance(1);
+        Ok(ret)
+    }
+
+    /// Gets a signed 8 bit integer from `self`.
+    ///
+    /// The current position is advanced by 1.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08 hello"[..];
+    /// assert_eq!(Ok(0x08_i8), buf.try_get_i8());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b""[..];
+    /// assert_eq!(Err(TryGetError{requested: 1, available: 0}), buf.try_get_i8());
+    /// ```
+    fn try_get_i8(&mut self) -> Result<i8, TryGetError> {
+        if self.remaining() < 1 {
+            return Err(TryGetError {
+                requested: 1,
+                available: self.remaining(),
+            });
+        }
+        let ret = self.chunk()[0] as i8;
+        self.advance(1);
+        Ok(ret)
+    }
+
+    /// Gets an unsigned 16 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x09 hello"[..];
+    /// assert_eq!(Ok(0x0809_u16), buf.try_get_u16());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_u16());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_u16(&mut self) -> Result<u16, TryGetError> {
+        buf_try_get_impl!(self, u16::from_be_bytes)
+    }
+
+    /// Gets an unsigned 16 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x09\x08 hello"[..];
+    /// assert_eq!(Ok(0x0809_u16), buf.try_get_u16_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_u16_le());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_u16_le(&mut self) -> Result<u16, TryGetError> {
+        buf_try_get_impl!(self, u16::from_le_bytes)
+    }
+
+    /// Gets an unsigned 16 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09 hello",
+    ///     false => b"\x09\x08 hello",
+    /// };
+    /// assert_eq!(Ok(0x0809_u16), buf.try_get_u16_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_u16_ne());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_u16_ne(&mut self) -> Result<u16, TryGetError> {
+        buf_try_get_impl!(self, u16::from_ne_bytes)
+    }
+
+    /// Gets a signed 16 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x09 hello"[..];
+    /// assert_eq!(Ok(0x0809_i16), buf.try_get_i16());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_i16());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_i16(&mut self) -> Result<i16, TryGetError> {
+        buf_try_get_impl!(self, i16::from_be_bytes)
+    }
+
+    /// Gets an signed 16 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x09\x08 hello"[..];
+    /// assert_eq!(Ok(0x0809_i16), buf.try_get_i16_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_i16_le());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_i16_le(&mut self) -> Result<i16, TryGetError> {
+        buf_try_get_impl!(self, i16::from_le_bytes)
+    }
+
+    /// Gets a signed 16 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09 hello",
+    ///     false => b"\x09\x08 hello",
+    /// };
+    /// assert_eq!(Ok(0x0809_i16), buf.try_get_i16_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08"[..];
+    /// assert_eq!(Err(TryGetError{requested: 2, available: 1}), buf.try_get_i16_ne());
+    /// assert_eq!(1, buf.remaining());
+    /// ```
+    fn try_get_i16_ne(&mut self) -> Result<i16, TryGetError> {
+        buf_try_get_impl!(self, i16::from_ne_bytes)
+    }
+
+    /// Gets an unsigned 32 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x09\xA0\xA1 hello"[..];
+    /// assert_eq!(Ok(0x0809A0A1), buf.try_get_u32());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_u32());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_u32(&mut self) -> Result<u32, TryGetError> {
+        buf_try_get_impl!(self, u32::from_be_bytes)
+    }
+
+    /// Gets an unsigned 32 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\xA1\xA0\x09\x08 hello"[..];
+    /// assert_eq!(Ok(0x0809A0A1_u32), buf.try_get_u32_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x09\xA0"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_u32_le());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_u32_le(&mut self) -> Result<u32, TryGetError> {
+        buf_try_get_impl!(self, u32::from_le_bytes)
+    }
+
+    /// Gets an unsigned 32 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09\xA0\xA1 hello",
+    ///     false => b"\xA1\xA0\x09\x08 hello",
+    /// };
+    /// assert_eq!(Ok(0x0809A0A1_u32), buf.try_get_u32_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x09\xA0"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_u32_ne());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_u32_ne(&mut self) -> Result<u32, TryGetError> {
+        buf_try_get_impl!(self, u32::from_ne_bytes)
+    }
+
+    /// Gets a signed 32 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x09\xA0\xA1 hello"[..];
+    /// assert_eq!(Ok(0x0809A0A1_i32), buf.try_get_i32());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_i32());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_i32(&mut self) -> Result<i32, TryGetError> {
+        buf_try_get_impl!(self, i32::from_be_bytes)
+    }
+
+    /// Gets a signed 32 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\xA1\xA0\x09\x08 hello"[..];
+    /// assert_eq!(Ok(0x0809A0A1_i32), buf.try_get_i32_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x09\xA0"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_i32_le());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_i32_le(&mut self) -> Result<i32, TryGetError> {
+        buf_try_get_impl!(self, i32::from_le_bytes)
+    }
+
+    /// Gets a signed 32 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x08\x09\xA0\xA1 hello",
+    ///     false => b"\xA1\xA0\x09\x08 hello",
+    /// };
+    /// assert_eq!(Ok(0x0809A0A1_i32), buf.try_get_i32_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x09\xA0"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_i32_ne());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_i32_ne(&mut self) -> Result<i32, TryGetError> {
+        buf_try_get_impl!(self, i32::from_ne_bytes)
+    }
+
+    /// Gets an unsigned 64 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08 hello"[..];
+    /// assert_eq!(Ok(0x0102030405060708_u64), buf.try_get_u64());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_u64());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_u64(&mut self) -> Result<u64, TryGetError> {
+        buf_try_get_impl!(self, u64::from_be_bytes)
+    }
+
+    /// Gets an unsigned 64 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x07\x06\x05\x04\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x0102030405060708_u64), buf.try_get_u64_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x07\x06\x05\x04\x03\x02"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_u64_le());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_u64_le(&mut self) -> Result<u64, TryGetError> {
+        buf_try_get_impl!(self, u64::from_le_bytes)
+    }
+
+    /// Gets an unsigned 64 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08 hello",
+    ///     false => b"\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x0102030405060708_u64), buf.try_get_u64_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_u64_ne());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_u64_ne(&mut self) -> Result<u64, TryGetError> {
+        buf_try_get_impl!(self, u64::from_ne_bytes)
+    }
+
+    /// Gets a signed 64 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08 hello"[..];
+    /// assert_eq!(Ok(0x0102030405060708_i64), buf.try_get_i64());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_i64());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_i64(&mut self) -> Result<i64, TryGetError> {
+        buf_try_get_impl!(self, i64::from_be_bytes)
+    }
+
+    /// Gets a signed 64 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x08\x07\x06\x05\x04\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x0102030405060708_i64), buf.try_get_i64_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x08\x07\x06\x05\x04\x03\x02"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_i64_le());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_i64_le(&mut self) -> Result<i64, TryGetError> {
+        buf_try_get_impl!(self, i64::from_le_bytes)
+    }
+
+    /// Gets a signed 64 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08 hello",
+    ///     false => b"\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x0102030405060708_i64), buf.try_get_i64_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_i64_ne());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_i64_ne(&mut self) -> Result<i64, TryGetError> {
+        buf_try_get_impl!(self, i64::from_ne_bytes)
+    }
+
+    /// Gets an unsigned 128 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello"[..];
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_u128), buf.try_get_u128());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_u128());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_u128(&mut self) -> Result<u128, TryGetError> {
+        buf_try_get_impl!(self, u128::from_be_bytes)
+    }
+
+    /// Gets an unsigned 128 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_u128), buf.try_get_u128_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_u128_le());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_u128_le(&mut self) -> Result<u128, TryGetError> {
+        buf_try_get_impl!(self, u128::from_le_bytes)
+    }
+
+    /// Gets an unsigned 128 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello",
+    ///     false => b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_u128), buf.try_get_u128_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_u128_ne());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_u128_ne(&mut self) -> Result<u128, TryGetError> {
+        buf_try_get_impl!(self, u128::from_ne_bytes)
+    }
+
+    /// Gets a signed 128 bit integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello"[..];
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_i128), buf.try_get_i128());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_i128());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_i128(&mut self) -> Result<i128, TryGetError> {
+        buf_try_get_impl!(self, i128::from_be_bytes)
+    }
+
+    /// Gets a signed 128 bit integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_i128), buf.try_get_i128_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_i128_le());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_i128_le(&mut self) -> Result<i128, TryGetError> {
+        buf_try_get_impl!(self, i128::from_le_bytes)
+    }
+
+    /// Gets a signed 128 bit integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16 hello",
+    ///     false => b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x01020304050607080910111213141516_i128), buf.try_get_i128_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15"[..];
+    /// assert_eq!(Err(TryGetError{requested: 16, available: 15}), buf.try_get_i128_ne());
+    /// assert_eq!(15, buf.remaining());
+    /// ```
+    fn try_get_i128_ne(&mut self) -> Result<i128, TryGetError> {
+        buf_try_get_impl!(self, i128::from_ne_bytes)
+    }
+
+    /// Gets an unsigned n-byte integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03 hello"[..];
+    /// assert_eq!(Ok(0x010203_u64), buf.try_get_uint(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_uint(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` > 8.
+    fn try_get_uint(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+        buf_try_get_impl!(be => self, u64, nbytes);
+    }
+
+    /// Gets an unsigned n-byte integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x010203_u64), buf.try_get_uint_le(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_uint_le(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` > 8.
+    fn try_get_uint_le(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+        buf_try_get_impl!(le => self, u64, nbytes);
+    }
+
+    /// Gets an unsigned n-byte integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03 hello",
+    ///     false => b"\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x010203_u64), buf.try_get_uint_ne(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03",
+    ///     false => b"\x03\x02\x01",
+    /// };
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_uint_ne(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` is greater than 8.
+    fn try_get_uint_ne(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+        if cfg!(target_endian = "big") {
+            self.try_get_uint(nbytes)
+        } else {
+            self.try_get_uint_le(nbytes)
+        }
+    }
+
+    /// Gets a signed n-byte integer from `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x01\x02\x03 hello"[..];
+    /// assert_eq!(Ok(0x010203_i64), buf.try_get_int(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_int(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` is greater than 8.
+    fn try_get_int(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+        buf_try_get_impl!(be => self, i64, nbytes);
+    }
+
+    /// Gets a signed n-byte integer from `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x03\x02\x01 hello"[..];
+    /// assert_eq!(Ok(0x010203_i64), buf.try_get_int_le(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x01\x02\x03"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_int_le(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` is greater than 8.
+    fn try_get_int_le(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+        buf_try_get_impl!(le => self, i64, nbytes);
+    }
+
+    /// Gets a signed n-byte integer from `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03 hello",
+    ///     false => b"\x03\x02\x01 hello",
+    /// };
+    /// assert_eq!(Ok(0x010203_i64), buf.try_get_int_ne(3));
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x01\x02\x03",
+    ///     false => b"\x03\x02\x01",
+    /// };
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_int_ne(4));
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `nbytes` is greater than 8.
+    fn try_get_int_ne(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+        if cfg!(target_endian = "big") {
+            self.try_get_int(nbytes)
+        } else {
+            self.try_get_int_le(nbytes)
+        }
+    }
+
+    /// Gets an IEEE754 single-precision (4 bytes) floating point number from
+    /// `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x3F\x99\x99\x9A hello"[..];
+    /// assert_eq!(1.2f32, buf.get_f32());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\x99\x99"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_f32());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_f32(&mut self) -> Result<f32, TryGetError> {
+        Ok(f32::from_bits(self.try_get_u32()?))
+    }
+
+    /// Gets an IEEE754 single-precision (4 bytes) floating point number from
+    /// `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x9A\x99\x99\x3F hello"[..];
+    /// assert_eq!(1.2f32, buf.get_f32_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\x99\x99"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_f32_le());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_f32_le(&mut self) -> Result<f32, TryGetError> {
+        Ok(f32::from_bits(self.try_get_u32_le()?))
+    }
+
+    /// Gets an IEEE754 single-precision (4 bytes) floating point number from
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x3F\x99\x99\x9A hello",
+    ///     false => b"\x9A\x99\x99\x3F hello",
+    /// };
+    /// assert_eq!(1.2f32, buf.get_f32_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\x99\x99"[..];
+    /// assert_eq!(Err(TryGetError{requested: 4, available: 3}), buf.try_get_f32_ne());
+    /// assert_eq!(3, buf.remaining());
+    /// ```
+    fn try_get_f32_ne(&mut self) -> Result<f32, TryGetError> {
+        Ok(f32::from_bits(self.try_get_u32_ne()?))
+    }
+
+    /// Gets an IEEE754 double-precision (8 bytes) floating point number from
+    /// `self` in big-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x3F\xF3\x33\x33\x33\x33\x33\x33 hello"[..];
+    /// assert_eq!(1.2f64, buf.get_f64());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\xF3\x33\x33\x33\x33\x33"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_f64());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_f64(&mut self) -> Result<f64, TryGetError> {
+        Ok(f64::from_bits(self.try_get_u64()?))
+    }
+
+    /// Gets an IEEE754 double-precision (8 bytes) floating point number from
+    /// `self` in little-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf = &b"\x33\x33\x33\x33\x33\x33\xF3\x3F hello"[..];
+    /// assert_eq!(1.2f64, buf.get_f64_le());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\xF3\x33\x33\x33\x33\x33"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_f64_le());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_f64_le(&mut self) -> Result<f64, TryGetError> {
+        Ok(f64::from_bits(self.try_get_u64_le()?))
+    }
+
+    /// Gets an IEEE754 double-precision (8 bytes) floating point number from
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// Returns `Err(TryGetError)` when there are not enough
+    /// remaining bytes to read the value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut buf: &[u8] = match cfg!(target_endian = "big") {
+    ///     true => b"\x3F\xF3\x33\x33\x33\x33\x33\x33 hello",
+    ///     false => b"\x33\x33\x33\x33\x33\x33\xF3\x3F hello",
+    /// };
+    /// assert_eq!(1.2f64, buf.get_f64_ne());
+    /// assert_eq!(6, buf.remaining());
+    /// ```
+    ///
+    /// ```
+    /// use bytes::{Buf, TryGetError};
+    ///
+    /// let mut buf = &b"\x3F\xF3\x33\x33\x33\x33\x33"[..];
+    /// assert_eq!(Err(TryGetError{requested: 8, available: 7}), buf.try_get_f64_ne());
+    /// assert_eq!(7, buf.remaining());
+    /// ```
+    fn try_get_f64_ne(&mut self) -> Result<f64, TryGetError> {
+        Ok(f64::from_bits(self.try_get_u64_ne()?))
     }
 
-    fn get_u16(&mut self) -> u16 {
-        (**self).get_u16()
-    }
+    /// Consumes `len` bytes inside self and returns new instance of `Bytes`
+    /// with this data.
+    ///
+    /// This function may be optimized by the underlying type to avoid actual
+    /// copies. For example, `Bytes` implementation will do a shallow copy
+    /// (ref-count increment).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let bytes = (&b"hello world"[..]).copy_to_bytes(5);
+    /// assert_eq!(&bytes[..], &b"hello"[..]);
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if `len > self.remaining()`.
+    fn copy_to_bytes(&mut self, len: usize) -> crate::Bytes {
+        use super::BufMut;
+
+        if self.remaining() < len {
+            panic_advance(&TryGetError {
+                requested: len,
+                available: self.remaining(),
+            });
+        }
 
-    fn get_u16_le(&mut self) -> u16 {
-        (**self).get_u16_le()
+        let mut ret = crate::BytesMut::with_capacity(len);
+        ret.put(self.take(len));
+        ret.freeze()
     }
 
-    fn get_i16(&mut self) -> i16 {
-        (**self).get_i16()
+    /// Creates an adaptor which will read at most `limit` bytes from `self`.
+    ///
+    /// This function returns a new instance of `Buf` which will read at most
+    /// `limit` bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::{Buf, BufMut};
+    ///
+    /// let mut buf = b"hello world"[..].take(5);
+    /// let mut dst = vec![];
+    ///
+    /// dst.put(&mut buf);
+    /// assert_eq!(dst, b"hello");
+    ///
+    /// let mut buf = buf.into_inner();
+    /// dst.clear();
+    /// dst.put(&mut buf);
+    /// assert_eq!(dst, b" world");
+    /// ```
+    fn take(self, limit: usize) -> Take<Self>
+    where
+        Self: Sized,
+    {
+        take::new(self, limit)
     }
 
-    fn get_i16_le(&mut self) -> i16 {
-        (**self).get_i16_le()
+    /// Creates an adaptor which will chain this buffer with another.
+    ///
+    /// The returned `Buf` instance will first consume all bytes from `self`.
+    /// Afterwards the output is equivalent to the output of next.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Buf;
+    ///
+    /// let mut chain = b"hello "[..].chain(&b"world"[..]);
+    ///
+    /// let full = chain.copy_to_bytes(11);
+    /// assert_eq!(full.chunk(), b"hello world");
+    /// ```
+    fn chain<U: Buf>(self, next: U) -> Chain<Self, U>
+    where
+        Self: Sized,
+    {
+        Chain::new(self, next)
     }
 
-    fn get_u32(&mut self) -> u32 {
-        (**self).get_u32()
+    /// Creates an adaptor which implements the `Read` trait for `self`.
+    ///
+    /// This function returns a new value which implements `Read` by adapting
+    /// the `Read` trait functions to the `Buf` trait functions. Given that
+    /// `Buf` operations are infallible, none of the `Read` functions will
+    /// return with `Err`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::{Bytes, Buf};
+    /// use std::io::Read;
+    ///
+    /// let buf = Bytes::from("hello world");
+    ///
+    /// let mut reader = buf.reader();
+    /// let mut dst = [0; 1024];
+    ///
+    /// let num = reader.read(&mut dst).unwrap();
+    ///
+    /// assert_eq!(11, num);
+    /// assert_eq!(&dst[..11], &b"hello world"[..]);
+    /// ```
+    #[cfg(feature = "std")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+    fn reader(self) -> Reader<Self>
+    where
+        Self: Sized,
+    {
+        reader::new(self)
     }
+}
 
-    fn get_u32_le(&mut self) -> u32 {
-        (**self).get_u32_le()
-    }
+macro_rules! deref_forward_buf {
+    () => {
+        #[inline]
+        fn remaining(&self) -> usize {
+            (**self).remaining()
+        }
 
-    fn get_i32(&mut self) -> i32 {
-        (**self).get_i32()
-    }
+        #[inline]
+        fn chunk(&self) -> &[u8] {
+            (**self).chunk()
+        }
 
-    fn get_i32_le(&mut self) -> i32 {
-        (**self).get_i32_le()
-    }
+        #[cfg(feature = "std")]
+        #[inline]
+        fn chunks_vectored<'b>(&'b self, dst: &mut [IoSlice<'b>]) -> usize {
+            (**self).chunks_vectored(dst)
+        }
 
-    fn get_u64(&mut self) -> u64 {
-        (**self).get_u64()
-    }
+        #[inline]
+        fn advance(&mut self, cnt: usize) {
+            (**self).advance(cnt)
+        }
 
-    fn get_u64_le(&mut self) -> u64 {
-        (**self).get_u64_le()
-    }
+        #[inline]
+        fn has_remaining(&self) -> bool {
+            (**self).has_remaining()
+        }
 
-    fn get_i64(&mut self) -> i64 {
-        (**self).get_i64()
-    }
+        #[inline]
+        fn copy_to_slice(&mut self, dst: &mut [u8]) {
+            (**self).copy_to_slice(dst)
+        }
 
-    fn get_i64_le(&mut self) -> i64 {
-        (**self).get_i64_le()
-    }
+        #[inline]
+        fn get_u8(&mut self) -> u8 {
+            (**self).get_u8()
+        }
 
-    fn get_uint(&mut self, nbytes: usize) -> u64 {
-        (**self).get_uint(nbytes)
-    }
+        #[inline]
+        fn get_i8(&mut self) -> i8 {
+            (**self).get_i8()
+        }
 
-    fn get_uint_le(&mut self, nbytes: usize) -> u64 {
-        (**self).get_uint_le(nbytes)
-    }
+        #[inline]
+        fn get_u16(&mut self) -> u16 {
+            (**self).get_u16()
+        }
 
-    fn get_int(&mut self, nbytes: usize) -> i64 {
-        (**self).get_int(nbytes)
-    }
+        #[inline]
+        fn get_u16_le(&mut self) -> u16 {
+            (**self).get_u16_le()
+        }
 
-    fn get_int_le(&mut self, nbytes: usize) -> i64 {
-        (**self).get_int_le(nbytes)
-    }
+        #[inline]
+        fn get_u16_ne(&mut self) -> u16 {
+            (**self).get_u16_ne()
+        }
 
-    fn to_bytes(&mut self) -> crate::Bytes {
-        (**self).to_bytes()
-    }
+        #[inline]
+        fn get_i16(&mut self) -> i16 {
+            (**self).get_i16()
+        }
+
+        #[inline]
+        fn get_i16_le(&mut self) -> i16 {
+            (**self).get_i16_le()
+        }
+
+        #[inline]
+        fn get_i16_ne(&mut self) -> i16 {
+            (**self).get_i16_ne()
+        }
+
+        #[inline]
+        fn get_u32(&mut self) -> u32 {
+            (**self).get_u32()
+        }
+
+        #[inline]
+        fn get_u32_le(&mut self) -> u32 {
+            (**self).get_u32_le()
+        }
+
+        #[inline]
+        fn get_u32_ne(&mut self) -> u32 {
+            (**self).get_u32_ne()
+        }
+
+        #[inline]
+        fn get_i32(&mut self) -> i32 {
+            (**self).get_i32()
+        }
+
+        #[inline]
+        fn get_i32_le(&mut self) -> i32 {
+            (**self).get_i32_le()
+        }
+
+        #[inline]
+        fn get_i32_ne(&mut self) -> i32 {
+            (**self).get_i32_ne()
+        }
+
+        #[inline]
+        fn get_u64(&mut self) -> u64 {
+            (**self).get_u64()
+        }
+
+        #[inline]
+        fn get_u64_le(&mut self) -> u64 {
+            (**self).get_u64_le()
+        }
+
+        #[inline]
+        fn get_u64_ne(&mut self) -> u64 {
+            (**self).get_u64_ne()
+        }
+
+        #[inline]
+        fn get_i64(&mut self) -> i64 {
+            (**self).get_i64()
+        }
+
+        #[inline]
+        fn get_i64_le(&mut self) -> i64 {
+            (**self).get_i64_le()
+        }
+
+        #[inline]
+        fn get_i64_ne(&mut self) -> i64 {
+            (**self).get_i64_ne()
+        }
+
+        #[inline]
+        fn get_u128(&mut self) -> u128 {
+            (**self).get_u128()
+        }
+
+        #[inline]
+        fn get_u128_le(&mut self) -> u128 {
+            (**self).get_u128_le()
+        }
+
+        #[inline]
+        fn get_u128_ne(&mut self) -> u128 {
+            (**self).get_u128_ne()
+        }
+
+        #[inline]
+        fn get_i128(&mut self) -> i128 {
+            (**self).get_i128()
+        }
+
+        #[inline]
+        fn get_i128_le(&mut self) -> i128 {
+            (**self).get_i128_le()
+        }
+
+        #[inline]
+        fn get_i128_ne(&mut self) -> i128 {
+            (**self).get_i128_ne()
+        }
+
+        #[inline]
+        fn get_uint(&mut self, nbytes: usize) -> u64 {
+            (**self).get_uint(nbytes)
+        }
+
+        #[inline]
+        fn get_uint_le(&mut self, nbytes: usize) -> u64 {
+            (**self).get_uint_le(nbytes)
+        }
 
-    )
+        #[inline]
+        fn get_uint_ne(&mut self, nbytes: usize) -> u64 {
+            (**self).get_uint_ne(nbytes)
+        }
+
+        #[inline]
+        fn get_int(&mut self, nbytes: usize) -> i64 {
+            (**self).get_int(nbytes)
+        }
+
+        #[inline]
+        fn get_int_le(&mut self, nbytes: usize) -> i64 {
+            (**self).get_int_le(nbytes)
+        }
+
+        #[inline]
+        fn get_int_ne(&mut self, nbytes: usize) -> i64 {
+            (**self).get_int_ne(nbytes)
+        }
+
+        #[inline]
+        fn get_f32(&mut self) -> f32 {
+            (**self).get_f32()
+        }
+
+        #[inline]
+        fn get_f32_le(&mut self) -> f32 {
+            (**self).get_f32_le()
+        }
+
+        #[inline]
+        fn get_f32_ne(&mut self) -> f32 {
+            (**self).get_f32_ne()
+        }
+
+        #[inline]
+        fn get_f64(&mut self) -> f64 {
+            (**self).get_f64()
+        }
+
+        #[inline]
+        fn get_f64_le(&mut self) -> f64 {
+            (**self).get_f64_le()
+        }
+
+        #[inline]
+        fn get_f64_ne(&mut self) -> f64 {
+            (**self).get_f64_ne()
+        }
+
+        #[inline]
+        fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), TryGetError> {
+            (**self).try_copy_to_slice(dst)
+        }
+
+        #[inline]
+        fn try_get_u8(&mut self) -> Result<u8, TryGetError> {
+            (**self).try_get_u8()
+        }
+
+        #[inline]
+        fn try_get_i8(&mut self) -> Result<i8, TryGetError> {
+            (**self).try_get_i8()
+        }
+
+        #[inline]
+        fn try_get_u16(&mut self) -> Result<u16, TryGetError> {
+            (**self).try_get_u16()
+        }
+
+        #[inline]
+        fn try_get_u16_le(&mut self) -> Result<u16, TryGetError> {
+            (**self).try_get_u16_le()
+        }
+
+        #[inline]
+        fn try_get_u16_ne(&mut self) -> Result<u16, TryGetError> {
+            (**self).try_get_u16_ne()
+        }
+
+        #[inline]
+        fn try_get_i16(&mut self) -> Result<i16, TryGetError> {
+            (**self).try_get_i16()
+        }
+
+        #[inline]
+        fn try_get_i16_le(&mut self) -> Result<i16, TryGetError> {
+            (**self).try_get_i16_le()
+        }
+
+        #[inline]
+        fn try_get_i16_ne(&mut self) -> Result<i16, TryGetError> {
+            (**self).try_get_i16_ne()
+        }
+
+        #[inline]
+        fn try_get_u32(&mut self) -> Result<u32, TryGetError> {
+            (**self).try_get_u32()
+        }
+
+        #[inline]
+        fn try_get_u32_le(&mut self) -> Result<u32, TryGetError> {
+            (**self).try_get_u32_le()
+        }
+
+        #[inline]
+        fn try_get_u32_ne(&mut self) -> Result<u32, TryGetError> {
+            (**self).try_get_u32_ne()
+        }
+
+        #[inline]
+        fn try_get_i32(&mut self) -> Result<i32, TryGetError> {
+            (**self).try_get_i32()
+        }
+
+        #[inline]
+        fn try_get_i32_le(&mut self) -> Result<i32, TryGetError> {
+            (**self).try_get_i32_le()
+        }
+
+        #[inline]
+        fn try_get_i32_ne(&mut self) -> Result<i32, TryGetError> {
+            (**self).try_get_i32_ne()
+        }
+
+        #[inline]
+        fn try_get_u64(&mut self) -> Result<u64, TryGetError> {
+            (**self).try_get_u64()
+        }
+
+        #[inline]
+        fn try_get_u64_le(&mut self) -> Result<u64, TryGetError> {
+            (**self).try_get_u64_le()
+        }
+
+        #[inline]
+        fn try_get_u64_ne(&mut self) -> Result<u64, TryGetError> {
+            (**self).try_get_u64_ne()
+        }
+
+        #[inline]
+        fn try_get_i64(&mut self) -> Result<i64, TryGetError> {
+            (**self).try_get_i64()
+        }
+
+        #[inline]
+        fn try_get_i64_le(&mut self) -> Result<i64, TryGetError> {
+            (**self).try_get_i64_le()
+        }
+
+        #[inline]
+        fn try_get_i64_ne(&mut self) -> Result<i64, TryGetError> {
+            (**self).try_get_i64_ne()
+        }
+
+        #[inline]
+        fn try_get_u128(&mut self) -> Result<u128, TryGetError> {
+            (**self).try_get_u128()
+        }
+
+        #[inline]
+        fn try_get_u128_le(&mut self) -> Result<u128, TryGetError> {
+            (**self).try_get_u128_le()
+        }
+
+        #[inline]
+        fn try_get_u128_ne(&mut self) -> Result<u128, TryGetError> {
+            (**self).try_get_u128_ne()
+        }
+
+        #[inline]
+        fn try_get_i128(&mut self) -> Result<i128, TryGetError> {
+            (**self).try_get_i128()
+        }
+
+        #[inline]
+        fn try_get_i128_le(&mut self) -> Result<i128, TryGetError> {
+            (**self).try_get_i128_le()
+        }
+
+        #[inline]
+        fn try_get_i128_ne(&mut self) -> Result<i128, TryGetError> {
+            (**self).try_get_i128_ne()
+        }
+
+        #[inline]
+        fn try_get_uint(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+            (**self).try_get_uint(nbytes)
+        }
+
+        #[inline]
+        fn try_get_uint_le(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+            (**self).try_get_uint_le(nbytes)
+        }
+
+        #[inline]
+        fn try_get_uint_ne(&mut self, nbytes: usize) -> Result<u64, TryGetError> {
+            (**self).try_get_uint_ne(nbytes)
+        }
+
+        #[inline]
+        fn try_get_int(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+            (**self).try_get_int(nbytes)
+        }
+
+        #[inline]
+        fn try_get_int_le(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+            (**self).try_get_int_le(nbytes)
+        }
+
+        #[inline]
+        fn try_get_int_ne(&mut self, nbytes: usize) -> Result<i64, TryGetError> {
+            (**self).try_get_int_ne(nbytes)
+        }
+
+        #[inline]
+        fn try_get_f32(&mut self) -> Result<f32, TryGetError> {
+            (**self).try_get_f32()
+        }
+
+        #[inline]
+        fn try_get_f32_le(&mut self) -> Result<f32, TryGetError> {
+            (**self).try_get_f32_le()
+        }
+
+        #[inline]
+        fn try_get_f32_ne(&mut self) -> Result<f32, TryGetError> {
+            (**self).try_get_f32_ne()
+        }
+
+        #[inline]
+        fn try_get_f64(&mut self) -> Result<f64, TryGetError> {
+            (**self).try_get_f64()
+        }
+
+        #[inline]
+        fn try_get_f64_le(&mut self) -> Result<f64, TryGetError> {
+            (**self).try_get_f64_le()
+        }
+
+        #[inline]
+        fn try_get_f64_ne(&mut self) -> Result<f64, TryGetError> {
+            (**self).try_get_f64_ne()
+        }
+
+        #[inline]
+        fn copy_to_bytes(&mut self, len: usize) -> crate::Bytes {
+            (**self).copy_to_bytes(len)
+        }
+    };
 }
 
 impl<T: Buf + ?Sized> Buf for &mut T {
@@ -930,74 +2893,67 @@ impl Buf for &[u8] {
     }
 
     #[inline]
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         self
     }
 
     #[inline]
     fn advance(&mut self, cnt: usize) {
-        *self = &self[cnt..];
-    }
-}
-
-impl Buf for Option<[u8; 1]> {
-    fn remaining(&self) -> usize {
-        if self.is_some() {
-            1
-        } else {
-            0
+        if self.len() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.len(),
+            });
         }
-    }
 
-    fn bytes(&self) -> &[u8] {
-        self.as_ref().map(AsRef::as_ref)
-            .unwrap_or(Default::default())
+        *self = &self[cnt..];
     }
 
-    fn advance(&mut self, cnt: usize) {
-        if cnt == 0 {
-            return;
+    #[inline]
+    fn copy_to_slice(&mut self, dst: &mut [u8]) {
+        if self.len() < dst.len() {
+            panic_advance(&TryGetError {
+                requested: dst.len(),
+                available: self.len(),
+            });
         }
 
-        if self.is_none() {
-            panic!("overflow");
-        } else {
-            assert_eq!(1, cnt);
-            *self = None;
-        }
+        dst.copy_from_slice(&self[..dst.len()]);
+        self.advance(dst.len());
     }
 }
 
 #[cfg(feature = "std")]
 impl<T: AsRef<[u8]>> Buf for std::io::Cursor<T> {
+    #[inline]
     fn remaining(&self) -> usize {
-        let len = self.get_ref().as_ref().len();
-        let pos = self.position();
-
-        if pos >= len as u64 {
-            return 0;
-        }
+        saturating_sub_usize_u64(self.get_ref().as_ref().len(), self.position())
+    }
 
-        len - pos as usize
+    #[inline]
+    fn chunk(&self) -> &[u8] {
+        let slice = self.get_ref().as_ref();
+        let pos = min_u64_usize(self.position(), slice.len());
+        &slice[pos..]
     }
 
-    fn bytes(&self) -> &[u8] {
+    #[inline]
+    fn advance(&mut self, cnt: usize) {
         let len = self.get_ref().as_ref().len();
         let pos = self.position();
 
-        if pos >= len as u64 {
-            return &[];
+        // We intentionally allow `cnt == 0` here even if `pos > len`.
+        let max_cnt = saturating_sub_usize_u64(len, pos);
+        if cnt > max_cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: max_cnt,
+            });
         }
 
-        &self.get_ref().as_ref()[pos as usize..]
-    }
-
-    fn advance(&mut self, cnt: usize) {
-        let pos = (self.position() as usize)
-            .checked_add(cnt).expect("overflow");
-
-        assert!(pos <= self.get_ref().as_ref().len());
-        self.set_position(pos as u64);
+        // This will not overflow because either `cnt == 0` or the sum is not
+        // greater than `len`.
+        self.set_position(pos + cnt as u64);
     }
 }
 
diff --git a/src/buf/buf_mut.rs b/src/buf/buf_mut.rs
index f5ed2a771..26645c6ae 100644
--- a/src/buf/buf_mut.rs
+++ b/src/buf/buf_mut.rs
@@ -1,9 +1,11 @@
-use core::{cmp, mem::{self, MaybeUninit}, ptr, usize};
-
+use crate::buf::{limit, Chain, Limit, UninitSlice};
 #[cfg(feature = "std")]
-use std::fmt;
+use crate::buf::{writer, Writer};
+use crate::{panic_advance, panic_does_not_fit, TryGetError};
+
+use core::{mem, ptr, usize};
 
-use alloc::{vec::Vec, boxed::Box};
+use alloc::{boxed::Box, vec::Vec};
 
 /// A trait for values that provide sequential write access to bytes.
 ///
@@ -25,12 +27,16 @@ use alloc::{vec::Vec, boxed::Box};
 ///
 /// assert_eq!(buf, b"hello world");
 /// ```
-pub trait BufMut {
+pub unsafe trait BufMut {
     /// Returns the number of bytes that can be written from the current
     /// position until the end of the buffer is reached.
     ///
     /// This value is greater than or equal to the length of the slice returned
-    /// by `bytes_mut`.
+    /// by `chunk_mut()`.
+    ///
+    /// Writing to a `BufMut` may involve allocating more memory on the fly.
+    /// Implementations may fail before reaching the number of bytes indicated
+    /// by this method if they encounter an allocation failure.
     ///
     /// # Examples
     ///
@@ -51,15 +57,21 @@ pub trait BufMut {
     /// Implementations of `remaining_mut` should ensure that the return value
     /// does not change unless a call is made to `advance_mut` or any other
     /// function that is documented to change the `BufMut`'s current position.
+    ///
+    /// # Note
+    ///
+    /// `remaining_mut` may return value smaller than actual available space.
     fn remaining_mut(&self) -> usize;
 
     /// Advance the internal cursor of the BufMut
     ///
-    /// The next call to `bytes_mut` will return a slice starting `cnt` bytes
+    /// The next call to `chunk_mut` will return a slice starting `cnt` bytes
     /// further into the underlying buffer.
     ///
-    /// This function is unsafe because there is no guarantee that the bytes
-    /// being advanced past have been initialized.
+    /// # Safety
+    ///
+    /// The caller must ensure that the next `cnt` bytes of `chunk` are
+    /// initialized.
     ///
     /// # Examples
     ///
@@ -68,19 +80,14 @@ pub trait BufMut {
     ///
     /// let mut buf = Vec::with_capacity(16);
     ///
-    /// unsafe {
-    ///     // MaybeUninit::as_mut_ptr
-    ///     buf.bytes_mut()[0].as_mut_ptr().write(b'h');
-    ///     buf.bytes_mut()[1].as_mut_ptr().write(b'e');
-    ///
-    ///     buf.advance_mut(2);
+    /// // Write some data
+    /// buf.chunk_mut()[0..2].copy_from_slice(b"he");
+    /// unsafe { buf.advance_mut(2) };
     ///
-    ///     buf.bytes_mut()[0].as_mut_ptr().write(b'l');
-    ///     buf.bytes_mut()[1].as_mut_ptr().write(b'l');
-    ///     buf.bytes_mut()[2].as_mut_ptr().write(b'o');
+    /// // write more bytes
+    /// buf.chunk_mut()[0..3].copy_from_slice(b"llo");
     ///
-    ///     buf.advance_mut(3);
-    /// }
+    /// unsafe { buf.advance_mut(3); }
     ///
     /// assert_eq!(5, buf.len());
     /// assert_eq!(buf, b"hello");
@@ -117,6 +124,7 @@ pub trait BufMut {
     ///
     /// assert!(!buf.has_remaining_mut());
     /// ```
+    #[inline]
     fn has_remaining_mut(&self) -> bool {
         self.remaining_mut() > 0
     }
@@ -139,14 +147,14 @@ pub trait BufMut {
     ///
     /// unsafe {
     ///     // MaybeUninit::as_mut_ptr
-    ///     buf.bytes_mut()[0].as_mut_ptr().write(b'h');
-    ///     buf.bytes_mut()[1].as_mut_ptr().write(b'e');
+    ///     buf.chunk_mut()[0..].as_mut_ptr().write(b'h');
+    ///     buf.chunk_mut()[1..].as_mut_ptr().write(b'e');
     ///
     ///     buf.advance_mut(2);
     ///
-    ///     buf.bytes_mut()[0].as_mut_ptr().write(b'l');
-    ///     buf.bytes_mut()[1].as_mut_ptr().write(b'l');
-    ///     buf.bytes_mut()[2].as_mut_ptr().write(b'o');
+    ///     buf.chunk_mut()[0..].as_mut_ptr().write(b'l');
+    ///     buf.chunk_mut()[1..].as_mut_ptr().write(b'l');
+    ///     buf.chunk_mut()[2..].as_mut_ptr().write(b'o');
     ///
     ///     buf.advance_mut(3);
     /// }
@@ -157,54 +165,18 @@ pub trait BufMut {
     ///
     /// # Implementer notes
     ///
-    /// This function should never panic. `bytes_mut` should return an empty
-    /// slice **if and only if** `remaining_mut` returns 0. In other words,
-    /// `bytes_mut` returning an empty slice implies that `remaining_mut` will
-    /// return 0 and `remaining_mut` returning 0 implies that `bytes_mut` will
+    /// This function should never panic. `chunk_mut()` should return an empty
+    /// slice **if and only if** `remaining_mut()` returns 0. In other words,
+    /// `chunk_mut()` returning an empty slice implies that `remaining_mut()` will
+    /// return 0 and `remaining_mut()` returning 0 implies that `chunk_mut()` will
     /// return an empty slice.
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>];
-
-    /// Fills `dst` with potentially multiple mutable slices starting at `self`'s
-    /// current position.
-    ///
-    /// If the `BufMut` is backed by disjoint slices of bytes, `bytes_vectored_mut`
-    /// enables fetching more than one slice at once. `dst` is a slice of
-    /// mutable `IoSliceMut` references, enabling the slice to be directly used with
-    /// [`readv`] without any further conversion. The sum of the lengths of all
-    /// the buffers in `dst` will be less than or equal to
-    /// `Buf::remaining_mut()`.
-    ///
-    /// The entries in `dst` will be overwritten, but the data **contained** by
-    /// the slices **will not** be modified. If `bytes_vectored_mut` does not fill every
-    /// entry in `dst`, then `dst` is guaranteed to contain all remaining slices
-    /// in `self.
     ///
-    /// This is a lower level function. Most operations are done with other
-    /// functions.
-    ///
-    /// # Implementer notes
-    ///
-    /// This function should never panic. Once the end of the buffer is reached,
-    /// i.e., `BufMut::remaining_mut` returns 0, calls to `bytes_vectored_mut` must
-    /// return 0 without mutating `dst`.
-    ///
-    /// Implementations should also take care to properly handle being called
-    /// with `dst` being a zero length slice.
-    ///
-    /// [`readv`]: http://man7.org/linux/man-pages/man2/readv.2.html
-    #[cfg(feature = "std")]
-    fn bytes_vectored_mut<'a>(&'a mut self, dst: &mut [IoSliceMut<'a>]) -> usize {
-        if dst.is_empty() {
-            return 0;
-        }
-
-        if self.has_remaining_mut() {
-            dst[0] = IoSliceMut::from(self.bytes_mut());
-            1
-        } else {
-            0
-        }
-    }
+    /// This function may trigger an out-of-memory abort if it tries to allocate
+    /// memory and fails to do so.
+    // The `chunk_mut` method was previously called `bytes_mut`. This alias makes the
+    // rename more easily discoverable.
+    #[cfg_attr(docsrs, doc(alias = "bytes_mut"))]
+    fn chunk_mut(&mut self) -> &mut UninitSlice;
 
     /// Transfer bytes into `self` from `src` and advance the cursor by the
     /// number of bytes written.
@@ -226,25 +198,28 @@ pub trait BufMut {
     /// # Panics
     ///
     /// Panics if `self` does not have enough capacity to contain `src`.
-    fn put<T: super::Buf>(&mut self, mut src: T) where Self: Sized {
-        assert!(self.remaining_mut() >= src.remaining());
+    #[inline]
+    fn put<T: super::Buf>(&mut self, mut src: T)
+    where
+        Self: Sized,
+    {
+        if self.remaining_mut() < src.remaining() {
+            panic_advance(&TryGetError {
+                requested: src.remaining(),
+                available: self.remaining_mut(),
+            });
+        }
 
         while src.has_remaining() {
-            let l;
-
-            unsafe {
-                let s = src.bytes();
-                let d = self.bytes_mut();
-                l = cmp::min(s.len(), d.len());
+            let s = src.chunk();
+            let d = self.chunk_mut();
+            let cnt = usize::min(s.len(), d.len());
 
-                ptr::copy_nonoverlapping(
-                    s.as_ptr(),
-                    d.as_mut_ptr() as *mut u8,
-                    l);
-            }
+            d[..cnt].copy_from_slice(&s[..cnt]);
 
-            src.advance(l);
-            unsafe { self.advance_mut(l); }
+            // SAFETY: We just initialized `cnt` bytes in `self`.
+            unsafe { self.advance_mut(cnt) };
+            src.advance(cnt);
         }
     }
 
@@ -267,28 +242,69 @@ pub trait BufMut {
     ///
     /// assert_eq!(b"hello\0", &dst);
     /// ```
-    fn put_slice(&mut self, src: &[u8]) {
-        let mut off = 0;
-
-        assert!(self.remaining_mut() >= src.len(), "buffer overflow; remaining = {}; src = {}", self.remaining_mut(), src.len());
-
-        while off < src.len() {
-            let cnt;
+    #[inline]
+    fn put_slice(&mut self, mut src: &[u8]) {
+        if self.remaining_mut() < src.len() {
+            panic_advance(&TryGetError {
+                requested: src.len(),
+                available: self.remaining_mut(),
+            });
+        }
 
-            unsafe {
-                let dst = self.bytes_mut();
-                cnt = cmp::min(dst.len(), src.len() - off);
+        while !src.is_empty() {
+            let dst = self.chunk_mut();
+            let cnt = usize::min(src.len(), dst.len());
 
-                ptr::copy_nonoverlapping(
-                    src[off..].as_ptr(),
-                    dst.as_mut_ptr() as *mut u8,
-                    cnt);
+            dst[..cnt].copy_from_slice(&src[..cnt]);
+            src = &src[cnt..];
 
-                off += cnt;
+            // SAFETY: We just initialized `cnt` bytes in `self`.
+            unsafe { self.advance_mut(cnt) };
+        }
+    }
 
-            }
+    /// Put `cnt` bytes `val` into `self`.
+    ///
+    /// Logically equivalent to calling `self.put_u8(val)` `cnt` times, but may work faster.
+    ///
+    /// `self` must have at least `cnt` remaining capacity.
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut dst = [0; 6];
+    ///
+    /// {
+    ///     let mut buf = &mut dst[..];
+    ///     buf.put_bytes(b'a', 4);
+    ///
+    ///     assert_eq!(2, buf.remaining_mut());
+    /// }
+    ///
+    /// assert_eq!(b"aaaa\0\0", &dst);
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_bytes(&mut self, val: u8, mut cnt: usize) {
+        if self.remaining_mut() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.remaining_mut(),
+            })
+        }
 
-            unsafe { self.advance_mut(cnt); }
+        while cnt > 0 {
+            let dst = self.chunk_mut();
+            let dst_len = usize::min(dst.len(), cnt);
+            // SAFETY: The pointer is valid for `dst_len <= dst.len()` bytes.
+            unsafe { core::ptr::write_bytes(dst.as_mut_ptr(), val, dst_len) };
+            // SAFETY: We just initialized `dst_len` bytes in `self`.
+            unsafe { self.advance_mut(dst_len) };
+            cnt -= dst_len;
         }
     }
 
@@ -310,6 +326,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u8(&mut self, n: u8) {
         let src = [n];
         self.put_slice(&src);
@@ -333,6 +350,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i8(&mut self, n: i8) {
         let src = [n as u8];
         self.put_slice(&src)
@@ -356,6 +374,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u16(&mut self, n: u16) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -378,10 +397,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u16_le(&mut self, n: u16) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes an unsigned 16 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_u16_ne(0x0809);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x08\x09");
+    /// } else {
+    ///     assert_eq!(buf, b"\x09\x08");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_u16_ne(&mut self, n: u16) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes a signed 16 bit integer to `self` in big-endian byte order.
     ///
     /// The current position is advanced by 2.
@@ -400,6 +447,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i16(&mut self, n: i16) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -422,10 +470,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i16_le(&mut self, n: i16) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes a signed 16 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 2.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_i16_ne(0x0809);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x08\x09");
+    /// } else {
+    ///     assert_eq!(buf, b"\x09\x08");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_i16_ne(&mut self, n: i16) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes an unsigned 32 bit integer to `self` in big-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -444,6 +520,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u32(&mut self, n: u32) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -466,10 +543,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u32_le(&mut self, n: u32) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes an unsigned 32 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_u32_ne(0x0809A0A1);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x08\x09\xA0\xA1");
+    /// } else {
+    ///     assert_eq!(buf, b"\xA1\xA0\x09\x08");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_u32_ne(&mut self, n: u32) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes a signed 32 bit integer to `self` in big-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -488,6 +593,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i32(&mut self, n: i32) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -510,10 +616,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i32_le(&mut self, n: i32) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes a signed 32 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_i32_ne(0x0809A0A1);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x08\x09\xA0\xA1");
+    /// } else {
+    ///     assert_eq!(buf, b"\xA1\xA0\x09\x08");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_i32_ne(&mut self, n: i32) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes an unsigned 64 bit integer to `self` in the big-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -532,6 +666,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u64(&mut self, n: u64) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -554,10 +689,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u64_le(&mut self, n: u64) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes an unsigned 64 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_u64_ne(0x0102030405060708);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03\x04\x05\x06\x07\x08");
+    /// } else {
+    ///     assert_eq!(buf, b"\x08\x07\x06\x05\x04\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_u64_ne(&mut self, n: u64) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes a signed 64 bit integer to `self` in the big-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -576,6 +739,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i64(&mut self, n: i64) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -598,10 +762,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i64_le(&mut self, n: i64) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes a signed 64 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_i64_ne(0x0102030405060708);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03\x04\x05\x06\x07\x08");
+    /// } else {
+    ///     assert_eq!(buf, b"\x08\x07\x06\x05\x04\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_i64_ne(&mut self, n: i64) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes an unsigned 128 bit integer to `self` in the big-endian byte order.
     ///
     /// The current position is advanced by 16.
@@ -620,6 +812,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u128(&mut self, n: u128) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -642,10 +835,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_u128_le(&mut self, n: u128) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes an unsigned 128 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_u128_ne(0x01020304050607080910111213141516);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16");
+    /// } else {
+    ///     assert_eq!(buf, b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_u128_ne(&mut self, n: u128) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes a signed 128 bit integer to `self` in the big-endian byte order.
     ///
     /// The current position is advanced by 16.
@@ -664,6 +885,7 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i128(&mut self, n: i128) {
         self.put_slice(&n.to_be_bytes())
     }
@@ -686,10 +908,38 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_i128_le(&mut self, n: i128) {
         self.put_slice(&n.to_le_bytes())
     }
 
+    /// Writes a signed 128 bit integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 16.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_i128_ne(0x01020304050607080910111213141516);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16");
+    /// } else {
+    ///     assert_eq!(buf, b"\x16\x15\x14\x13\x12\x11\x10\x09\x08\x07\x06\x05\x04\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_i128_ne(&mut self, n: i128) {
+        self.put_slice(&n.to_ne_bytes())
+    }
+
     /// Writes an unsigned n-byte integer to `self` in big-endian byte order.
     ///
     /// The current position is advanced by `nbytes`.
@@ -707,9 +957,15 @@ pub trait BufMut {
     /// # Panics
     ///
     /// This function panics if there is not enough remaining capacity in
-    /// `self`.
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
     fn put_uint(&mut self, n: u64, nbytes: usize) {
-        self.put_slice(&n.to_be_bytes()[mem::size_of_val(&n) - nbytes..]);
+        let start = match mem::size_of_val(&n).checked_sub(nbytes) {
+            Some(start) => start,
+            None => panic_does_not_fit(nbytes, mem::size_of_val(&n)),
+        };
+
+        self.put_slice(&n.to_be_bytes()[start..]);
     }
 
     /// Writes an unsigned n-byte integer to `self` in the little-endian byte order.
@@ -729,12 +985,50 @@ pub trait BufMut {
     /// # Panics
     ///
     /// This function panics if there is not enough remaining capacity in
-    /// `self`.
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
     fn put_uint_le(&mut self, n: u64, nbytes: usize) {
-        self.put_slice(&n.to_le_bytes()[0..nbytes]);
+        let slice = n.to_le_bytes();
+        let slice = match slice.get(..nbytes) {
+            Some(slice) => slice,
+            None => panic_does_not_fit(nbytes, slice.len()),
+        };
+
+        self.put_slice(slice);
+    }
+
+    /// Writes an unsigned n-byte integer to `self` in the native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_uint_ne(0x010203, 3);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03");
+    /// } else {
+    ///     assert_eq!(buf, b"\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
+    fn put_uint_ne(&mut self, n: u64, nbytes: usize) {
+        if cfg!(target_endian = "big") {
+            self.put_uint(n, nbytes)
+        } else {
+            self.put_uint_le(n, nbytes)
+        }
     }
 
-    /// Writes a signed n-byte integer to `self` in big-endian byte order.
+    /// Writes low `nbytes` of a signed integer to `self` in big-endian byte order.
     ///
     /// The current position is advanced by `nbytes`.
     ///
@@ -744,19 +1038,25 @@ pub trait BufMut {
     /// use bytes::BufMut;
     ///
     /// let mut buf = vec![];
-    /// buf.put_int(0x010203, 3);
+    /// buf.put_int(0x0504010203, 3);
     /// assert_eq!(buf, b"\x01\x02\x03");
     /// ```
     ///
     /// # Panics
     ///
     /// This function panics if there is not enough remaining capacity in
-    /// `self`.
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
     fn put_int(&mut self, n: i64, nbytes: usize) {
-        self.put_slice(&n.to_be_bytes()[mem::size_of_val(&n) - nbytes..]);
+        let start = match mem::size_of_val(&n).checked_sub(nbytes) {
+            Some(start) => start,
+            None => panic_does_not_fit(nbytes, mem::size_of_val(&n)),
+        };
+
+        self.put_slice(&n.to_be_bytes()[start..]);
     }
 
-    /// Writes a signed n-byte integer to `self` in little-endian byte order.
+    /// Writes low `nbytes` of a signed integer to `self` in little-endian byte order.
     ///
     /// The current position is advanced by `nbytes`.
     ///
@@ -766,19 +1066,57 @@ pub trait BufMut {
     /// use bytes::BufMut;
     ///
     /// let mut buf = vec![];
-    /// buf.put_int_le(0x010203, 3);
+    /// buf.put_int_le(0x0504010203, 3);
     /// assert_eq!(buf, b"\x03\x02\x01");
     /// ```
     ///
     /// # Panics
     ///
     /// This function panics if there is not enough remaining capacity in
-    /// `self`.
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
     fn put_int_le(&mut self, n: i64, nbytes: usize) {
-        self.put_slice(&n.to_le_bytes()[0..nbytes]);
+        let slice = n.to_le_bytes();
+        let slice = match slice.get(..nbytes) {
+            Some(slice) => slice,
+            None => panic_does_not_fit(nbytes, slice.len()),
+        };
+
+        self.put_slice(slice);
+    }
+
+    /// Writes low `nbytes` of a signed integer to `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by `nbytes`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_int_ne(0x010203, 3);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x01\x02\x03");
+    /// } else {
+    ///     assert_eq!(buf, b"\x03\x02\x01");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self` or if `nbytes` is greater than 8.
+    #[inline]
+    fn put_int_ne(&mut self, n: i64, nbytes: usize) {
+        if cfg!(target_endian = "big") {
+            self.put_int(n, nbytes)
+        } else {
+            self.put_int_le(n, nbytes)
+        }
     }
 
-    /// Writes  an IEEE754 single-precision (4 bytes) floating point number to
+    /// Writes an IEEE754 single-precision (4 bytes) floating point number to
     /// `self` in big-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -797,11 +1135,12 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_f32(&mut self, n: f32) {
         self.put_u32(n.to_bits());
     }
 
-    /// Writes  an IEEE754 single-precision (4 bytes) floating point number to
+    /// Writes an IEEE754 single-precision (4 bytes) floating point number to
     /// `self` in little-endian byte order.
     ///
     /// The current position is advanced by 4.
@@ -820,11 +1159,40 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_f32_le(&mut self, n: f32) {
         self.put_u32_le(n.to_bits());
     }
 
-    /// Writes  an IEEE754 double-precision (8 bytes) floating point number to
+    /// Writes an IEEE754 single-precision (4 bytes) floating point number to
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 4.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_f32_ne(1.2f32);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x3F\x99\x99\x9A");
+    /// } else {
+    ///     assert_eq!(buf, b"\x9A\x99\x99\x3F");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_f32_ne(&mut self, n: f32) {
+        self.put_u32_ne(n.to_bits());
+    }
+
+    /// Writes an IEEE754 double-precision (8 bytes) floating point number to
     /// `self` in big-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -843,11 +1211,12 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_f64(&mut self, n: f64) {
         self.put_u64(n.to_bits());
     }
 
-    /// Writes  an IEEE754 double-precision (8 bytes) floating point number to
+    /// Writes an IEEE754 double-precision (8 bytes) floating point number to
     /// `self` in little-endian byte order.
     ///
     /// The current position is advanced by 8.
@@ -866,143 +1235,392 @@ pub trait BufMut {
     ///
     /// This function panics if there is not enough remaining capacity in
     /// `self`.
+    #[inline]
     fn put_f64_le(&mut self, n: f64) {
         self.put_u64_le(n.to_bits());
     }
-}
 
-macro_rules! deref_forward_bufmut {
-    () => (
-    fn remaining_mut(&self) -> usize {
-        (**self).remaining_mut()
+    /// Writes an IEEE754 double-precision (8 bytes) floating point number to
+    /// `self` in native-endian byte order.
+    ///
+    /// The current position is advanced by 8.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut buf = vec![];
+    /// buf.put_f64_ne(1.2f64);
+    /// if cfg!(target_endian = "big") {
+    ///     assert_eq!(buf, b"\x3F\xF3\x33\x33\x33\x33\x33\x33");
+    /// } else {
+    ///     assert_eq!(buf, b"\x33\x33\x33\x33\x33\x33\xF3\x3F");
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This function panics if there is not enough remaining capacity in
+    /// `self`.
+    #[inline]
+    fn put_f64_ne(&mut self, n: f64) {
+        self.put_u64_ne(n.to_bits());
     }
 
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>] {
-        (**self).bytes_mut()
+    /// Creates an adaptor which can write at most `limit` bytes to `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let arr = &mut [0u8; 128][..];
+    /// assert_eq!(arr.remaining_mut(), 128);
+    ///
+    /// let dst = arr.limit(10);
+    /// assert_eq!(dst.remaining_mut(), 10);
+    /// ```
+    #[inline]
+    fn limit(self, limit: usize) -> Limit<Self>
+    where
+        Self: Sized,
+    {
+        limit::new(self, limit)
     }
 
+    /// Creates an adaptor which implements the `Write` trait for `self`.
+    ///
+    /// This function returns a new value which implements `Write` by adapting
+    /// the `Write` trait functions to the `BufMut` trait functions. Given that
+    /// `BufMut` operations are infallible, none of the `Write` functions will
+    /// return with `Err`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    /// use std::io::Write;
+    ///
+    /// let mut buf = vec![].writer();
+    ///
+    /// let num = buf.write(&b"hello world"[..]).unwrap();
+    /// assert_eq!(11, num);
+    ///
+    /// let buf = buf.into_inner();
+    ///
+    /// assert_eq!(*buf, b"hello world"[..]);
+    /// ```
     #[cfg(feature = "std")]
-    fn bytes_vectored_mut<'b>(&'b mut self, dst: &mut [IoSliceMut<'b>]) -> usize {
-        (**self).bytes_vectored_mut(dst)
+    #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+    #[inline]
+    fn writer(self) -> Writer<Self>
+    where
+        Self: Sized,
+    {
+        writer::new(self)
     }
 
-    unsafe fn advance_mut(&mut self, cnt: usize) {
-        (**self).advance_mut(cnt)
+    /// Creates an adapter which will chain this buffer with another.
+    ///
+    /// The returned `BufMut` instance will first write to all bytes from
+    /// `self`. Afterwards, it will write to `next`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut a = [0u8; 5];
+    /// let mut b = [0u8; 6];
+    ///
+    /// let mut chain = (&mut a[..]).chain_mut(&mut b[..]);
+    ///
+    /// chain.put_slice(b"hello world");
+    ///
+    /// assert_eq!(&a[..], b"hello");
+    /// assert_eq!(&b[..], b" world");
+    /// ```
+    #[inline]
+    fn chain_mut<U: BufMut>(self, next: U) -> Chain<Self, U>
+    where
+        Self: Sized,
+    {
+        Chain::new(self, next)
     }
+}
 
-    fn put_slice(&mut self, src: &[u8]) {
-        (**self).put_slice(src)
-    }
+macro_rules! deref_forward_bufmut {
+    () => {
+        #[inline]
+        fn remaining_mut(&self) -> usize {
+            (**self).remaining_mut()
+        }
 
-    fn put_u8(&mut self, n: u8) {
-        (**self).put_u8(n)
-    }
+        #[inline]
+        fn chunk_mut(&mut self) -> &mut UninitSlice {
+            (**self).chunk_mut()
+        }
 
-    fn put_i8(&mut self, n: i8) {
-        (**self).put_i8(n)
-    }
+        #[inline]
+        unsafe fn advance_mut(&mut self, cnt: usize) {
+            (**self).advance_mut(cnt)
+        }
 
-    fn put_u16(&mut self, n: u16) {
-        (**self).put_u16(n)
-    }
+        #[inline]
+        fn put_slice(&mut self, src: &[u8]) {
+            (**self).put_slice(src)
+        }
 
-    fn put_u16_le(&mut self, n: u16) {
-        (**self).put_u16_le(n)
-    }
+        #[inline]
+        fn put_u8(&mut self, n: u8) {
+            (**self).put_u8(n)
+        }
 
-    fn put_i16(&mut self, n: i16) {
-        (**self).put_i16(n)
-    }
+        #[inline]
+        fn put_i8(&mut self, n: i8) {
+            (**self).put_i8(n)
+        }
 
-    fn put_i16_le(&mut self, n: i16) {
-        (**self).put_i16_le(n)
-    }
+        #[inline]
+        fn put_u16(&mut self, n: u16) {
+            (**self).put_u16(n)
+        }
 
-    fn put_u32(&mut self, n: u32) {
-        (**self).put_u32(n)
-    }
+        #[inline]
+        fn put_u16_le(&mut self, n: u16) {
+            (**self).put_u16_le(n)
+        }
 
-    fn put_u32_le(&mut self, n: u32) {
-        (**self).put_u32_le(n)
-    }
+        #[inline]
+        fn put_u16_ne(&mut self, n: u16) {
+            (**self).put_u16_ne(n)
+        }
 
-    fn put_i32(&mut self, n: i32) {
-        (**self).put_i32(n)
-    }
+        #[inline]
+        fn put_i16(&mut self, n: i16) {
+            (**self).put_i16(n)
+        }
 
-    fn put_i32_le(&mut self, n: i32) {
-        (**self).put_i32_le(n)
-    }
+        #[inline]
+        fn put_i16_le(&mut self, n: i16) {
+            (**self).put_i16_le(n)
+        }
 
-    fn put_u64(&mut self, n: u64) {
-        (**self).put_u64(n)
-    }
+        #[inline]
+        fn put_i16_ne(&mut self, n: i16) {
+            (**self).put_i16_ne(n)
+        }
 
-    fn put_u64_le(&mut self, n: u64) {
-        (**self).put_u64_le(n)
-    }
+        #[inline]
+        fn put_u32(&mut self, n: u32) {
+            (**self).put_u32(n)
+        }
 
-    fn put_i64(&mut self, n: i64) {
-        (**self).put_i64(n)
-    }
+        #[inline]
+        fn put_u32_le(&mut self, n: u32) {
+            (**self).put_u32_le(n)
+        }
 
-    fn put_i64_le(&mut self, n: i64) {
-        (**self).put_i64_le(n)
-    }
-    )
+        #[inline]
+        fn put_u32_ne(&mut self, n: u32) {
+            (**self).put_u32_ne(n)
+        }
+
+        #[inline]
+        fn put_i32(&mut self, n: i32) {
+            (**self).put_i32(n)
+        }
+
+        #[inline]
+        fn put_i32_le(&mut self, n: i32) {
+            (**self).put_i32_le(n)
+        }
+
+        #[inline]
+        fn put_i32_ne(&mut self, n: i32) {
+            (**self).put_i32_ne(n)
+        }
+
+        #[inline]
+        fn put_u64(&mut self, n: u64) {
+            (**self).put_u64(n)
+        }
+
+        #[inline]
+        fn put_u64_le(&mut self, n: u64) {
+            (**self).put_u64_le(n)
+        }
+
+        #[inline]
+        fn put_u64_ne(&mut self, n: u64) {
+            (**self).put_u64_ne(n)
+        }
+
+        #[inline]
+        fn put_i64(&mut self, n: i64) {
+            (**self).put_i64(n)
+        }
+
+        #[inline]
+        fn put_i64_le(&mut self, n: i64) {
+            (**self).put_i64_le(n)
+        }
+
+        #[inline]
+        fn put_i64_ne(&mut self, n: i64) {
+            (**self).put_i64_ne(n)
+        }
+    };
 }
 
-impl<T: BufMut + ?Sized> BufMut for &mut T {
+unsafe impl<T: BufMut + ?Sized> BufMut for &mut T {
     deref_forward_bufmut!();
 }
 
-impl<T: BufMut + ?Sized> BufMut for Box<T> {
+unsafe impl<T: BufMut + ?Sized> BufMut for Box<T> {
     deref_forward_bufmut!();
 }
 
-impl BufMut for &mut [u8] {
+unsafe impl BufMut for &mut [u8] {
     #[inline]
     fn remaining_mut(&self) -> usize {
         self.len()
     }
 
     #[inline]
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>] {
-        // MaybeUninit is repr(transparent), so safe to transmute
-        unsafe { mem::transmute(&mut **self) }
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
+        UninitSlice::new(self)
     }
 
     #[inline]
     unsafe fn advance_mut(&mut self, cnt: usize) {
+        if self.len() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.len(),
+            });
+        }
+
         // Lifetime dance taken from `impl Write for &mut [u8]`.
         let (_, b) = core::mem::replace(self, &mut []).split_at_mut(cnt);
         *self = b;
     }
+
+    #[inline]
+    fn put_slice(&mut self, src: &[u8]) {
+        if self.len() < src.len() {
+            panic_advance(&TryGetError {
+                requested: src.len(),
+                available: self.len(),
+            });
+        }
+
+        self[..src.len()].copy_from_slice(src);
+        // SAFETY: We just initialized `src.len()` bytes.
+        unsafe { self.advance_mut(src.len()) };
+    }
+
+    #[inline]
+    fn put_bytes(&mut self, val: u8, cnt: usize) {
+        if self.len() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.len(),
+            });
+        }
+
+        // SAFETY: We just checked that the pointer is valid for `cnt` bytes.
+        unsafe {
+            ptr::write_bytes(self.as_mut_ptr(), val, cnt);
+            self.advance_mut(cnt);
+        }
+    }
 }
 
-impl BufMut for Vec<u8> {
+unsafe impl BufMut for &mut [core::mem::MaybeUninit<u8>] {
     #[inline]
     fn remaining_mut(&self) -> usize {
-        usize::MAX - self.len()
+        self.len()
+    }
+
+    #[inline]
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
+        UninitSlice::uninit(self)
+    }
+
+    #[inline]
+    unsafe fn advance_mut(&mut self, cnt: usize) {
+        if self.len() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.len(),
+            });
+        }
+
+        // Lifetime dance taken from `impl Write for &mut [u8]`.
+        let (_, b) = core::mem::replace(self, &mut []).split_at_mut(cnt);
+        *self = b;
+    }
+
+    #[inline]
+    fn put_slice(&mut self, src: &[u8]) {
+        if self.len() < src.len() {
+            panic_advance(&TryGetError {
+                requested: src.len(),
+                available: self.len(),
+            });
+        }
+
+        // SAFETY: We just checked that the pointer is valid for `src.len()` bytes.
+        unsafe {
+            ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr().cast(), src.len());
+            self.advance_mut(src.len());
+        }
+    }
+
+    #[inline]
+    fn put_bytes(&mut self, val: u8, cnt: usize) {
+        if self.len() < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: self.len(),
+            });
+        }
+
+        // SAFETY: We just checked that the pointer is valid for `cnt` bytes.
+        unsafe {
+            ptr::write_bytes(self.as_mut_ptr() as *mut u8, val, cnt);
+            self.advance_mut(cnt);
+        }
+    }
+}
+
+unsafe impl BufMut for Vec<u8> {
+    #[inline]
+    fn remaining_mut(&self) -> usize {
+        // A vector can never have more than isize::MAX bytes
+        core::isize::MAX as usize - self.len()
     }
 
     #[inline]
     unsafe fn advance_mut(&mut self, cnt: usize) {
         let len = self.len();
         let remaining = self.capacity() - len;
-        if cnt > remaining {
-            // Reserve additional capacity, and ensure that the total length
-            // will not overflow usize.
-            self.reserve(cnt);
+
+        if remaining < cnt {
+            panic_advance(&TryGetError {
+                requested: cnt,
+                available: remaining,
+            });
         }
 
+        // Addition will not overflow since the sum is at most the capacity.
         self.set_len(len + cnt);
     }
 
     #[inline]
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>] {
-        use core::slice;
-
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
         if self.capacity() == self.len() {
             self.reserve(64); // Grow the vec
         }
@@ -1010,79 +1628,44 @@ impl BufMut for Vec<u8> {
         let cap = self.capacity();
         let len = self.len();
 
-        let ptr = self.as_mut_ptr() as *mut MaybeUninit<u8>;
-        unsafe {
-            &mut slice::from_raw_parts_mut(ptr, cap)[len..]
-        }
+        let ptr = self.as_mut_ptr();
+        // SAFETY: Since `ptr` is valid for `cap` bytes, `ptr.add(len)` must be
+        // valid for `cap - len` bytes. The subtraction will not underflow since
+        // `len <= cap`.
+        unsafe { UninitSlice::from_raw_parts_mut(ptr.add(len), cap - len) }
     }
 
     // Specialize these methods so they can skip checking `remaining_mut`
     // and `advance_mut`.
-
-    fn put<T: super::Buf>(&mut self, mut src: T) where Self: Sized {
-        // In case the src isn't contiguous, reserve upfront
+    #[inline]
+    fn put<T: super::Buf>(&mut self, mut src: T)
+    where
+        Self: Sized,
+    {
+        // In case the src isn't contiguous, reserve upfront.
         self.reserve(src.remaining());
 
         while src.has_remaining() {
-            let l;
-
-            // a block to contain the src.bytes() borrow
-            {
-                let s = src.bytes();
-                l = s.len();
-                self.extend_from_slice(s);
-            }
-
+            let s = src.chunk();
+            let l = s.len();
+            self.extend_from_slice(s);
             src.advance(l);
         }
     }
 
+    #[inline]
     fn put_slice(&mut self, src: &[u8]) {
         self.extend_from_slice(src);
     }
+
+    #[inline]
+    fn put_bytes(&mut self, val: u8, cnt: usize) {
+        // If the addition overflows, then the `resize` will fail.
+        let new_len = self.len().saturating_add(cnt);
+        self.resize(new_len, val);
+    }
 }
 
 // The existence of this function makes the compiler catch if the BufMut
 // trait is "object-safe" or not.
 fn _assert_trait_object(_b: &dyn BufMut) {}
-
-// ===== impl IoSliceMut =====
-
-/// A buffer type used for `readv`.
-///
-/// This is a wrapper around an `std::io::IoSliceMut`, but does not expose
-/// the inner bytes in a safe API, as they may point at uninitialized memory.
-///
-/// This is `repr(transparent)` of the `std::io::IoSliceMut`, so it is valid to
-/// transmute them. However, as the memory might be uninitialized, care must be
-/// taken to not *read* the internal bytes, only *write* to them.
-#[repr(transparent)]
-#[cfg(feature = "std")]
-pub struct IoSliceMut<'a>(std::io::IoSliceMut<'a>);
-
-#[cfg(feature = "std")]
-impl fmt::Debug for IoSliceMut<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("IoSliceMut")
-            .field("len", &self.0.len())
-            .finish()
-    }
-}
-
-#[cfg(feature = "std")]
-impl<'a> From<&'a mut [u8]> for IoSliceMut<'a> {
-    fn from(buf: &'a mut [u8]) -> IoSliceMut<'a> {
-        IoSliceMut(std::io::IoSliceMut::new(buf))
-    }
-}
-
-#[cfg(feature = "std")]
-impl<'a> From<&'a mut [MaybeUninit<u8>]> for IoSliceMut<'a> {
-    fn from(buf: &'a mut [MaybeUninit<u8>]) -> IoSliceMut<'a> {
-        IoSliceMut(std::io::IoSliceMut::new(unsafe {
-            // We don't look at the contents, and `std::io::IoSliceMut`
-            // doesn't either.
-            mem::transmute::<&'a mut [MaybeUninit<u8>], &'a mut [u8]>(buf)
-        }))
-    }
-}
diff --git a/src/buf/ext/chain.rs b/src/buf/chain.rs
similarity index 70%
rename from src/buf/ext/chain.rs
rename to src/buf/chain.rs
index a1ec597df..c8bc36de9 100644
--- a/src/buf/ext/chain.rs
+++ b/src/buf/chain.rs
@@ -1,12 +1,8 @@
+use crate::buf::{IntoIter, UninitSlice};
 use crate::{Buf, BufMut};
-use crate::buf::IntoIter;
 
-use core::mem::MaybeUninit;
-
-#[cfg(feature = "std")]
-use std::io::{IoSlice};
 #[cfg(feature = "std")]
-use crate::buf::IoSliceMut;
+use std::io::IoSlice;
 
 /// A `Chain` sequences two buffers.
 ///
@@ -20,18 +16,16 @@ use crate::buf::IoSliceMut;
 /// # Examples
 ///
 /// ```
-/// use bytes::{Bytes, Buf, buf::BufExt};
+/// use bytes::{Bytes, Buf};
 ///
 /// let mut buf = (&b"hello "[..])
 ///     .chain(&b"world"[..]);
 ///
-/// let full: Bytes = buf.to_bytes();
+/// let full: Bytes = buf.copy_to_bytes(11);
 /// assert_eq!(full[..], b"hello world"[..]);
 /// ```
 ///
-/// [`Buf::chain`]: trait.Buf.html#method.chain
-/// [`Buf`]: trait.Buf.html
-/// [`BufMut`]: trait.BufMut.html
+/// [`Buf::chain`]: Buf::chain
 #[derive(Debug)]
 pub struct Chain<T, U> {
     a: T,
@@ -40,11 +34,8 @@ pub struct Chain<T, U> {
 
 impl<T, U> Chain<T, U> {
     /// Creates a new `Chain` sequencing the provided values.
-    pub fn new(a: T, b: U) -> Chain<T, U> {
-        Chain {
-            a,
-            b,
-        }
+    pub(crate) fn new(a: T, b: U) -> Chain<T, U> {
+        Chain { a, b }
     }
 
     /// Gets a reference to the first underlying `Buf`.
@@ -52,7 +43,7 @@ impl<T, U> Chain<T, U> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::buf::BufExt;
+    /// use bytes::Buf;
     ///
     /// let buf = (&b"hello"[..])
     ///     .chain(&b"world"[..]);
@@ -68,14 +59,14 @@ impl<T, U> Chain<T, U> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::{Buf, buf::BufExt};
+    /// use bytes::Buf;
     ///
     /// let mut buf = (&b"hello"[..])
     ///     .chain(&b"world"[..]);
     ///
     /// buf.first_mut().advance(1);
     ///
-    /// let full = buf.to_bytes();
+    /// let full = buf.copy_to_bytes(9);
     /// assert_eq!(full, b"elloworld"[..]);
     /// ```
     pub fn first_mut(&mut self) -> &mut T {
@@ -87,7 +78,7 @@ impl<T, U> Chain<T, U> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::buf::BufExt;
+    /// use bytes::Buf;
     ///
     /// let buf = (&b"hello"[..])
     ///     .chain(&b"world"[..]);
@@ -103,14 +94,14 @@ impl<T, U> Chain<T, U> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::{Buf, buf::BufExt};
+    /// use bytes::Buf;
     ///
     /// let mut buf = (&b"hello "[..])
     ///     .chain(&b"world"[..]);
     ///
     /// buf.last_mut().advance(1);
     ///
-    /// let full = buf.to_bytes();
+    /// let full = buf.copy_to_bytes(10);
     /// assert_eq!(full, b"hello orld"[..]);
     /// ```
     pub fn last_mut(&mut self) -> &mut U {
@@ -122,7 +113,7 @@ impl<T, U> Chain<T, U> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::buf::BufExt;
+    /// use bytes::Buf;
     ///
     /// let chain = (&b"hello"[..])
     ///     .chain(&b"world"[..]);
@@ -137,18 +128,19 @@ impl<T, U> Chain<T, U> {
 }
 
 impl<T, U> Buf for Chain<T, U>
-    where T: Buf,
-          U: Buf,
+where
+    T: Buf,
+    U: Buf,
 {
     fn remaining(&self) -> usize {
-        self.a.remaining() + self.b.remaining()
+        self.a.remaining().saturating_add(self.b.remaining())
     }
 
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         if self.a.has_remaining() {
-            self.a.bytes()
+            self.a.chunk()
         } else {
-            self.b.bytes()
+            self.b.chunk()
         }
     }
 
@@ -171,26 +163,47 @@ impl<T, U> Buf for Chain<T, U>
     }
 
     #[cfg(feature = "std")]
-    fn bytes_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize {
-        let mut n = self.a.bytes_vectored(dst);
-        n += self.b.bytes_vectored(&mut dst[n..]);
+    fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize {
+        let mut n = self.a.chunks_vectored(dst);
+        n += self.b.chunks_vectored(&mut dst[n..]);
         n
     }
+
+    fn copy_to_bytes(&mut self, len: usize) -> crate::Bytes {
+        let a_rem = self.a.remaining();
+        if a_rem >= len {
+            self.a.copy_to_bytes(len)
+        } else if a_rem == 0 {
+            self.b.copy_to_bytes(len)
+        } else {
+            assert!(
+                len - a_rem <= self.b.remaining(),
+                "`len` greater than remaining"
+            );
+            let mut ret = crate::BytesMut::with_capacity(len);
+            ret.put(&mut self.a);
+            ret.put((&mut self.b).take(len - a_rem));
+            ret.freeze()
+        }
+    }
 }
 
-impl<T, U> BufMut for Chain<T, U>
-    where T: BufMut,
-          U: BufMut,
+unsafe impl<T, U> BufMut for Chain<T, U>
+where
+    T: BufMut,
+    U: BufMut,
 {
     fn remaining_mut(&self) -> usize {
-        self.a.remaining_mut() + self.b.remaining_mut()
+        self.a
+            .remaining_mut()
+            .saturating_add(self.b.remaining_mut())
     }
 
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>] {
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
         if self.a.has_remaining_mut() {
-            self.a.bytes_mut()
+            self.a.chunk_mut()
         } else {
-            self.b.bytes_mut()
+            self.b.chunk_mut()
         }
     }
 
@@ -211,13 +224,6 @@ impl<T, U> BufMut for Chain<T, U>
 
         self.b.advance_mut(cnt);
     }
-
-    #[cfg(feature = "std")]
-    fn bytes_vectored_mut<'a>(&'a mut self, dst: &mut [IoSliceMut<'a>]) -> usize {
-        let mut n = self.a.bytes_vectored_mut(dst);
-        n += self.b.bytes_vectored_mut(&mut dst[n..]);
-        n
-    }
 }
 
 impl<T, U> IntoIterator for Chain<T, U>
diff --git a/src/buf/ext/mod.rs b/src/buf/ext/mod.rs
deleted file mode 100644
index 7b0bdab20..000000000
--- a/src/buf/ext/mod.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-//! Extra utilities for `Buf` and `BufMut` types.
-
-use super::{Buf, BufMut};
-
-mod chain;
-mod limit;
-#[cfg(feature = "std")]
-mod reader;
-mod take;
-#[cfg(feature = "std")]
-mod writer;
-
-pub use self::limit::Limit;
-pub use self::take::Take;
-pub use self::chain::Chain;
-
-#[cfg(feature = "std")]
-pub use self::{reader::Reader, writer::Writer};
-
-/// Extra methods for implementations of `Buf`.
-pub trait BufExt: Buf {
-    /// Creates an adaptor which will read at most `limit` bytes from `self`.
-    ///
-    /// This function returns a new instance of `Buf` which will read at most
-    /// `limit` bytes.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{Buf, BufMut, buf::BufExt};
-    ///
-    /// let mut buf = b"hello world"[..].take(5);
-    /// let mut dst = vec![];
-    ///
-    /// dst.put(&mut buf);
-    /// assert_eq!(dst, b"hello");
-    ///
-    /// let mut buf = buf.into_inner();
-    /// dst.clear();
-    /// dst.put(&mut buf);
-    /// assert_eq!(dst, b" world");
-    /// ```
-    fn take(self, limit: usize) -> Take<Self>
-        where Self: Sized
-    {
-        take::new(self, limit)
-    }
-
-    /// Creates an adaptor which will chain this buffer with another.
-    ///
-    /// The returned `Buf` instance will first consume all bytes from `self`.
-    /// Afterwards the output is equivalent to the output of next.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{Buf, buf::BufExt};
-    ///
-    /// let mut chain = b"hello "[..].chain(&b"world"[..]);
-    ///
-    /// let full = chain.to_bytes();
-    /// assert_eq!(full.bytes(), b"hello world");
-    /// ```
-    fn chain<U: Buf>(self, next: U) -> Chain<Self, U>
-        where Self: Sized
-    {
-        Chain::new(self, next)
-    }
-
-    /// Creates an adaptor which implements the `Read` trait for `self`.
-    ///
-    /// This function returns a new value which implements `Read` by adapting
-    /// the `Read` trait functions to the `Buf` trait functions. Given that
-    /// `Buf` operations are infallible, none of the `Read` functions will
-    /// return with `Err`.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{Buf, Bytes, buf::BufExt};
-    /// use std::io::Read;
-    ///
-    /// let buf = Bytes::from("hello world");
-    ///
-    /// let mut reader = buf.reader();
-    /// let mut dst = [0; 1024];
-    ///
-    /// let num = reader.read(&mut dst).unwrap();
-    ///
-    /// assert_eq!(11, num);
-    /// assert_eq!(&dst[..11], &b"hello world"[..]);
-    /// ```
-    #[cfg(feature = "std")]
-    fn reader(self) -> Reader<Self> where Self: Sized {
-        reader::new(self)
-    }
-}
-
-impl<B: Buf + ?Sized> BufExt for B {}
-
-/// Extra methods for implementations of `BufMut`.
-pub trait BufMutExt: BufMut {
-    /// Creates an adaptor which can write at most `limit` bytes to `self`.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{BufMut, buf::BufMutExt};
-    ///
-    /// let arr = &mut [0u8; 128][..];
-    /// assert_eq!(arr.remaining_mut(), 128);
-    ///
-    /// let dst = arr.limit(10);
-    /// assert_eq!(dst.remaining_mut(), 10);
-    /// ```
-    fn limit(self, limit: usize) -> Limit<Self>
-        where Self: Sized
-    {
-        limit::new(self, limit)
-    }
-
-    /// Creates an adaptor which implements the `Write` trait for `self`.
-    ///
-    /// This function returns a new value which implements `Write` by adapting
-    /// the `Write` trait functions to the `BufMut` trait functions. Given that
-    /// `BufMut` operations are infallible, none of the `Write` functions will
-    /// return with `Err`.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{BufMut, buf::BufMutExt};
-    /// use std::io::Write;
-    ///
-    /// let mut buf = vec![].writer();
-    ///
-    /// let num = buf.write(&b"hello world"[..]).unwrap();
-    /// assert_eq!(11, num);
-    ///
-    /// let buf = buf.into_inner();
-    ///
-    /// assert_eq!(*buf, b"hello world"[..]);
-    /// ```
-    #[cfg(feature = "std")]
-    fn writer(self) -> Writer<Self> where Self: Sized {
-        writer::new(self)
-    }
-
-    /// Creates an adapter which will chain this buffer with another.
-    ///
-    /// The returned `BufMut` instance will first write to all bytes from
-    /// `self`. Afterwards, it will write to `next`.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use bytes::{BufMut, buf::BufMutExt};
-    ///
-    /// let mut a = [0u8; 5];
-    /// let mut b = [0u8; 6];
-    ///
-    /// let mut chain = (&mut a[..]).chain_mut(&mut b[..]);
-    ///
-    /// chain.put_slice(b"hello world");
-    ///
-    /// assert_eq!(&a[..], b"hello");
-    /// assert_eq!(&b[..], b" world");
-    /// ```
-    fn chain_mut<U: BufMut>(self, next: U) -> Chain<Self, U>
-        where Self: Sized
-    {
-        Chain::new(self, next)
-    }
-}
-
-impl<B: BufMut + ?Sized> BufMutExt for B {}
diff --git a/src/buf/iter.rs b/src/buf/iter.rs
index 1af421a8d..74f9b991e 100644
--- a/src/buf/iter.rs
+++ b/src/buf/iter.rs
@@ -2,14 +2,12 @@ use crate::Buf;
 
 /// Iterator over the bytes contained by the buffer.
 ///
-/// This struct is created by the [`iter`] method on [`Buf`].
-///
 /// # Examples
 ///
 /// Basic usage:
 ///
 /// ```
-/// use bytes::{Buf, Bytes};
+/// use bytes::Bytes;
 ///
 /// let buf = Bytes::from(&b"abc"[..]);
 /// let mut iter = buf.into_iter();
@@ -19,9 +17,6 @@ use crate::Buf;
 /// assert_eq!(iter.next(), Some(b'c'));
 /// assert_eq!(iter.next(), None);
 /// ```
-///
-/// [`iter`]: trait.Buf.html#method.iter
-/// [`Buf`]: trait.Buf.html
 #[derive(Debug)]
 pub struct IntoIter<T> {
     inner: T,
@@ -33,11 +28,10 @@ impl<T> IntoIter<T> {
     /// # Examples
     ///
     /// ```
-    /// use bytes::{Buf, Bytes};
-    /// use bytes::buf::IntoIter;
+    /// use bytes::Bytes;
     ///
     /// let buf = Bytes::from_static(b"abc");
-    /// let mut iter = IntoIter::new(buf);
+    /// let mut iter = buf.into_iter();
     ///
     /// assert_eq!(iter.next(), Some(b'a'));
     /// assert_eq!(iter.next(), Some(b'b'));
@@ -47,6 +41,7 @@ impl<T> IntoIter<T> {
     pub fn new(inner: T) -> IntoIter<T> {
         IntoIter { inner }
     }
+
     /// Consumes this `IntoIter`, returning the underlying value.
     ///
     /// # Examples
@@ -109,7 +104,6 @@ impl<T> IntoIter<T> {
     }
 }
 
-
 impl<T: Buf> Iterator for IntoIter<T> {
     type Item = u8;
 
@@ -118,7 +112,7 @@ impl<T: Buf> Iterator for IntoIter<T> {
             return None;
         }
 
-        let b = self.inner.bytes()[0];
+        let b = self.inner.chunk()[0];
         self.inner.advance(1);
 
         Some(b)
@@ -130,4 +124,4 @@ impl<T: Buf> Iterator for IntoIter<T> {
     }
 }
 
-impl<T: Buf> ExactSizeIterator for IntoIter<T> { }
+impl<T: Buf> ExactSizeIterator for IntoIter<T> {}
diff --git a/src/buf/ext/limit.rs b/src/buf/limit.rs
similarity index 88%
rename from src/buf/ext/limit.rs
rename to src/buf/limit.rs
index f86e01151..b422be538 100644
--- a/src/buf/ext/limit.rs
+++ b/src/buf/limit.rs
@@ -1,6 +1,7 @@
+use crate::buf::UninitSlice;
 use crate::BufMut;
 
-use core::{cmp, mem::MaybeUninit};
+use core::cmp;
 
 /// A `BufMut` adapter which limits the amount of bytes that can be written
 /// to an underlying buffer.
@@ -11,10 +12,7 @@ pub struct Limit<T> {
 }
 
 pub(super) fn new<T>(inner: T, limit: usize) -> Limit<T> {
-    Limit {
-        inner,
-        limit,
-    }
+    Limit { inner, limit }
 }
 
 impl<T> Limit<T> {
@@ -58,13 +56,13 @@ impl<T> Limit<T> {
     }
 }
 
-impl<T: BufMut> BufMut for Limit<T> {
+unsafe impl<T: BufMut> BufMut for Limit<T> {
     fn remaining_mut(&self) -> usize {
         cmp::min(self.inner.remaining_mut(), self.limit)
     }
 
-    fn bytes_mut(&mut self) -> &mut [MaybeUninit<u8>] {
-        let bytes = self.inner.bytes_mut();
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
+        let bytes = self.inner.chunk_mut();
         let end = cmp::min(bytes.len(), self.limit);
         &mut bytes[..end]
     }
diff --git a/src/buf/mod.rs b/src/buf/mod.rs
index d4538f21e..1bf0a47e8 100644
--- a/src/buf/mod.rs
+++ b/src/buf/mod.rs
@@ -13,19 +13,27 @@
 //! See [`Buf`] and [`BufMut`] for more details.
 //!
 //! [rope]: https://en.wikipedia.org/wiki/Rope_(data_structure)
-//! [`Buf`]: trait.Buf.html
-//! [`BufMut`]: trait.BufMut.html
 
 mod buf_impl;
 mod buf_mut;
-pub mod ext;
+mod chain;
 mod iter;
+mod limit;
+#[cfg(feature = "std")]
+mod reader;
+mod take;
+mod uninit_slice;
 mod vec_deque;
+#[cfg(feature = "std")]
+mod writer;
 
 pub use self::buf_impl::Buf;
 pub use self::buf_mut::BufMut;
-pub use self::ext::{BufExt, BufMutExt};
-#[cfg(feature = "std")]
-pub use self::buf_mut::IoSliceMut;
+pub use self::chain::Chain;
 pub use self::iter::IntoIter;
+pub use self::limit::Limit;
+pub use self::take::Take;
+pub use self::uninit_slice::UninitSlice;
 
+#[cfg(feature = "std")]
+pub use self::{reader::Reader, writer::Writer};
diff --git a/src/buf/ext/reader.rs b/src/buf/reader.rs
similarity index 87%
rename from src/buf/ext/reader.rs
rename to src/buf/reader.rs
index e38103b1d..521494958 100644
--- a/src/buf/ext/reader.rs
+++ b/src/buf/reader.rs
@@ -1,11 +1,11 @@
-use crate::{Buf};
+use crate::Buf;
 
 use std::{cmp, io};
 
 /// A `Buf` adapter which implements `io::Read` for the inner value.
 ///
 /// This struct is generally created by calling `reader()` on `Buf`. See
-/// documentation of [`reader()`](trait.Buf.html#method.reader) for more
+/// documentation of [`reader()`](Buf::reader) for more
 /// details.
 #[derive(Debug)]
 pub struct Reader<B> {
@@ -24,9 +24,9 @@ impl<B: Buf> Reader<B> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::buf::BufExt;
+    /// use bytes::Buf;
     ///
-    /// let mut buf = b"hello world".reader();
+    /// let buf = b"hello world".reader();
     ///
     /// assert_eq!(b"hello world", buf.get_ref());
     /// ```
@@ -46,7 +46,7 @@ impl<B: Buf> Reader<B> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::{Buf, buf::BufExt};
+    /// use bytes::Buf;
     /// use std::io;
     ///
     /// let mut buf = b"hello world".reader();
@@ -73,7 +73,7 @@ impl<B: Buf + Sized> io::Read for Reader<B> {
 
 impl<B: Buf + Sized> io::BufRead for Reader<B> {
     fn fill_buf(&mut self) -> io::Result<&[u8]> {
-        Ok(self.buf.bytes())
+        Ok(self.buf.chunk())
     }
     fn consume(&mut self, amt: usize) {
         self.buf.advance(amt)
diff --git a/src/buf/ext/take.rs b/src/buf/take.rs
similarity index 58%
rename from src/buf/ext/take.rs
rename to src/buf/take.rs
index 6fc4ffc72..acfeef6e1 100644
--- a/src/buf/ext/take.rs
+++ b/src/buf/take.rs
@@ -2,10 +2,13 @@ use crate::Buf;
 
 use core::cmp;
 
+#[cfg(feature = "std")]
+use std::io::IoSlice;
+
 /// A `Buf` adapter which limits the bytes read from an underlying buffer.
 ///
 /// This struct is generally created by calling `take()` on `Buf`. See
-/// documentation of [`take()`](trait.Buf.html#method.take) for more details.
+/// documentation of [`take()`](Buf::take) for more details.
 #[derive(Debug)]
 pub struct Take<T> {
     inner: T,
@@ -13,10 +16,7 @@ pub struct Take<T> {
 }
 
 pub fn new<T>(inner: T, limit: usize) -> Take<T> {
-    Take {
-        inner,
-        limit,
-    }
+    Take { inner, limit }
 }
 
 impl<T> Take<T> {
@@ -25,7 +25,7 @@ impl<T> Take<T> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::buf::{Buf, BufMut, BufExt};
+    /// use bytes::{Buf, BufMut};
     ///
     /// let mut buf = b"hello world".take(2);
     /// let mut dst = vec![];
@@ -50,9 +50,9 @@ impl<T> Take<T> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::{Buf, buf::BufExt};
+    /// use bytes::Buf;
     ///
-    /// let mut buf = b"hello world".take(2);
+    /// let buf = b"hello world".take(2);
     ///
     /// assert_eq!(11, buf.get_ref().remaining());
     /// ```
@@ -67,7 +67,7 @@ impl<T> Take<T> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::{Buf, BufMut, buf::BufExt};
+    /// use bytes::{Buf, BufMut};
     ///
     /// let mut buf = b"hello world".take(2);
     /// let mut dst = vec![];
@@ -91,7 +91,7 @@ impl<T> Take<T> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::{Buf, buf::BufExt};
+    /// use bytes::Buf;
     ///
     /// let mut buf = b"hello world".take(2);
     ///
@@ -113,7 +113,7 @@ impl<T> Take<T> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::{Buf, BufMut, buf::BufExt};
+    /// use bytes::{Buf, BufMut};
     ///
     /// let mut buf = b"hello world".take(2);
     /// let mut dst = vec![];
@@ -137,8 +137,8 @@ impl<T: Buf> Buf for Take<T> {
         cmp::min(self.inner.remaining(), self.limit)
     }
 
-    fn bytes(&self) -> &[u8] {
-        let bytes = self.inner.bytes();
+    fn chunk(&self) -> &[u8] {
+        let bytes = self.inner.chunk();
         &bytes[..cmp::min(bytes.len(), self.limit)]
     }
 
@@ -147,4 +147,58 @@ impl<T: Buf> Buf for Take<T> {
         self.inner.advance(cnt);
         self.limit -= cnt;
     }
+
+    fn copy_to_bytes(&mut self, len: usize) -> crate::Bytes {
+        assert!(len <= self.remaining(), "`len` greater than remaining");
+
+        let r = self.inner.copy_to_bytes(len);
+        self.limit -= len;
+        r
+    }
+
+    #[cfg(feature = "std")]
+    fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize {
+        if self.limit == 0 {
+            return 0;
+        }
+
+        const LEN: usize = 16;
+        let mut slices: [IoSlice<'a>; LEN] = [
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+            IoSlice::new(&[]),
+        ];
+
+        let cnt = self
+            .inner
+            .chunks_vectored(&mut slices[..dst.len().min(LEN)]);
+        let mut limit = self.limit;
+        for (i, (dst, slice)) in dst[..cnt].iter_mut().zip(slices.iter()).enumerate() {
+            if let Some(buf) = slice.get(..limit) {
+                // SAFETY: We could do this safely with `IoSlice::advance` if we had a larger MSRV.
+                let buf = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(buf) };
+                *dst = IoSlice::new(buf);
+                return i + 1;
+            } else {
+                // SAFETY: We could do this safely with `IoSlice::advance` if we had a larger MSRV.
+                let buf = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(slice) };
+                *dst = IoSlice::new(buf);
+                limit -= slice.len();
+            }
+        }
+        cnt
+    }
 }
diff --git a/src/buf/uninit_slice.rs b/src/buf/uninit_slice.rs
new file mode 100644
index 000000000..aea096ae6
--- /dev/null
+++ b/src/buf/uninit_slice.rs
@@ -0,0 +1,257 @@
+use core::fmt;
+use core::mem::MaybeUninit;
+use core::ops::{
+    Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive,
+};
+
+/// Uninitialized byte slice.
+///
+/// Returned by `BufMut::chunk_mut()`, the referenced byte slice may be
+/// uninitialized. The wrapper provides safe access without introducing
+/// undefined behavior.
+///
+/// The safety invariants of this wrapper are:
+///
+///  1. Reading from an `UninitSlice` is undefined behavior.
+///  2. Writing uninitialized bytes to an `UninitSlice` is undefined behavior.
+///
+/// The difference between `&mut UninitSlice` and `&mut [MaybeUninit<u8>]` is
+/// that it is possible in safe code to write uninitialized bytes to an
+/// `&mut [MaybeUninit<u8>]`, which this type prohibits.
+#[repr(transparent)]
+pub struct UninitSlice([MaybeUninit<u8>]);
+
+impl UninitSlice {
+    /// Creates a `&mut UninitSlice` wrapping a slice of initialised memory.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::buf::UninitSlice;
+    ///
+    /// let mut buffer = [0u8; 64];
+    /// let slice = UninitSlice::new(&mut buffer[..]);
+    /// ```
+    #[inline]
+    pub fn new(slice: &mut [u8]) -> &mut UninitSlice {
+        unsafe { &mut *(slice as *mut [u8] as *mut [MaybeUninit<u8>] as *mut UninitSlice) }
+    }
+
+    /// Creates a `&mut UninitSlice` wrapping a slice of uninitialised memory.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::buf::UninitSlice;
+    /// use core::mem::MaybeUninit;
+    ///
+    /// let mut buffer = [MaybeUninit::uninit(); 64];
+    /// let slice = UninitSlice::uninit(&mut buffer[..]);
+    ///
+    /// let mut vec = Vec::with_capacity(1024);
+    /// let spare: &mut UninitSlice = vec.spare_capacity_mut().into();
+    /// ```
+    #[inline]
+    pub fn uninit(slice: &mut [MaybeUninit<u8>]) -> &mut UninitSlice {
+        unsafe { &mut *(slice as *mut [MaybeUninit<u8>] as *mut UninitSlice) }
+    }
+
+    fn uninit_ref(slice: &[MaybeUninit<u8>]) -> &UninitSlice {
+        unsafe { &*(slice as *const [MaybeUninit<u8>] as *const UninitSlice) }
+    }
+
+    /// Create a `&mut UninitSlice` from a pointer and a length.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that `ptr` references a valid memory region owned
+    /// by the caller representing a byte slice for the duration of `'a`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::buf::UninitSlice;
+    ///
+    /// let bytes = b"hello world".to_vec();
+    /// let ptr = bytes.as_ptr() as *mut _;
+    /// let len = bytes.len();
+    ///
+    /// let slice = unsafe { UninitSlice::from_raw_parts_mut(ptr, len) };
+    /// ```
+    #[inline]
+    pub unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut UninitSlice {
+        let maybe_init: &mut [MaybeUninit<u8>] =
+            core::slice::from_raw_parts_mut(ptr as *mut _, len);
+        Self::uninit(maybe_init)
+    }
+
+    /// Write a single byte at the specified offset.
+    ///
+    /// # Panics
+    ///
+    /// The function panics if `index` is out of bounds.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::buf::UninitSlice;
+    ///
+    /// let mut data = [b'f', b'o', b'o'];
+    /// let slice = unsafe { UninitSlice::from_raw_parts_mut(data.as_mut_ptr(), 3) };
+    ///
+    /// slice.write_byte(0, b'b');
+    ///
+    /// assert_eq!(b"boo", &data[..]);
+    /// ```
+    #[inline]
+    pub fn write_byte(&mut self, index: usize, byte: u8) {
+        assert!(index < self.len());
+
+        unsafe { self[index..].as_mut_ptr().write(byte) }
+    }
+
+    /// Copies bytes from `src` into `self`.
+    ///
+    /// The length of `src` must be the same as `self`.
+    ///
+    /// # Panics
+    ///
+    /// The function panics if `src` has a different length than `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::buf::UninitSlice;
+    ///
+    /// let mut data = [b'f', b'o', b'o'];
+    /// let slice = unsafe { UninitSlice::from_raw_parts_mut(data.as_mut_ptr(), 3) };
+    ///
+    /// slice.copy_from_slice(b"bar");
+    ///
+    /// assert_eq!(b"bar", &data[..]);
+    /// ```
+    #[inline]
+    pub fn copy_from_slice(&mut self, src: &[u8]) {
+        use core::ptr;
+
+        assert_eq!(self.len(), src.len());
+
+        unsafe {
+            ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.len());
+        }
+    }
+
+    /// Return a raw pointer to the slice's buffer.
+    ///
+    /// # Safety
+    ///
+    /// The caller **must not** read from the referenced memory and **must not**
+    /// write **uninitialized** bytes to the slice either.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut data = [0, 1, 2];
+    /// let mut slice = &mut data[..];
+    /// let ptr = BufMut::chunk_mut(&mut slice).as_mut_ptr();
+    /// ```
+    #[inline]
+    pub fn as_mut_ptr(&mut self) -> *mut u8 {
+        self.0.as_mut_ptr() as *mut _
+    }
+
+    /// Return a `&mut [MaybeUninit<u8>]` to this slice's buffer.
+    ///
+    /// # Safety
+    ///
+    /// The caller **must not** read from the referenced memory and **must not** write
+    /// **uninitialized** bytes to the slice either. This is because `BufMut` implementation
+    /// that created the `UninitSlice` knows which parts are initialized. Writing uninitialized
+    /// bytes to the slice may cause the `BufMut` to read those bytes and trigger undefined
+    /// behavior.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut data = [0, 1, 2];
+    /// let mut slice = &mut data[..];
+    /// unsafe {
+    ///     let uninit_slice = BufMut::chunk_mut(&mut slice).as_uninit_slice_mut();
+    /// };
+    /// ```
+    #[inline]
+    pub unsafe fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<u8>] {
+        &mut self.0
+    }
+
+    /// Returns the number of bytes in the slice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BufMut;
+    ///
+    /// let mut data = [0, 1, 2];
+    /// let mut slice = &mut data[..];
+    /// let len = BufMut::chunk_mut(&mut slice).len();
+    ///
+    /// assert_eq!(len, 3);
+    /// ```
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+impl fmt::Debug for UninitSlice {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt.debug_struct("UninitSlice[...]").finish()
+    }
+}
+
+impl<'a> From<&'a mut [u8]> for &'a mut UninitSlice {
+    fn from(slice: &'a mut [u8]) -> Self {
+        UninitSlice::new(slice)
+    }
+}
+
+impl<'a> From<&'a mut [MaybeUninit<u8>]> for &'a mut UninitSlice {
+    fn from(slice: &'a mut [MaybeUninit<u8>]) -> Self {
+        UninitSlice::uninit(slice)
+    }
+}
+
+macro_rules! impl_index {
+    ($($t:ty),*) => {
+        $(
+            impl Index<$t> for UninitSlice {
+                type Output = UninitSlice;
+
+                #[inline]
+                fn index(&self, index: $t) -> &UninitSlice {
+                    UninitSlice::uninit_ref(&self.0[index])
+                }
+            }
+
+            impl IndexMut<$t> for UninitSlice {
+                #[inline]
+                fn index_mut(&mut self, index: $t) -> &mut UninitSlice {
+                    UninitSlice::uninit(&mut self.0[index])
+                }
+            }
+        )*
+    };
+}
+
+impl_index!(
+    Range<usize>,
+    RangeFrom<usize>,
+    RangeFull,
+    RangeInclusive<usize>,
+    RangeTo<usize>,
+    RangeToInclusive<usize>
+);
diff --git a/src/buf/vec_deque.rs b/src/buf/vec_deque.rs
index 195e6897f..55d5636b1 100644
--- a/src/buf/vec_deque.rs
+++ b/src/buf/vec_deque.rs
@@ -1,4 +1,6 @@
 use alloc::collections::VecDeque;
+#[cfg(feature = "std")]
+use std::io;
 
 use super::Buf;
 
@@ -7,7 +9,7 @@ impl Buf for VecDeque<u8> {
         self.len()
     }
 
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         let (s1, s2) = self.as_slices();
         if s1.is_empty() {
             s2
@@ -16,6 +18,22 @@ impl Buf for VecDeque<u8> {
         }
     }
 
+    #[cfg(feature = "std")]
+    fn chunks_vectored<'a>(&'a self, dst: &mut [io::IoSlice<'a>]) -> usize {
+        if self.is_empty() || dst.is_empty() {
+            return 0;
+        }
+
+        let (s1, s2) = self.as_slices();
+        dst[0] = io::IoSlice::new(s1);
+        if s2.is_empty() || dst.len() == 1 {
+            return 1;
+        }
+
+        dst[1] = io::IoSlice::new(s2);
+        2
+    }
+
     fn advance(&mut self, cnt: usize) {
         self.drain(..cnt);
     }
diff --git a/src/buf/ext/writer.rs b/src/buf/writer.rs
similarity index 86%
rename from src/buf/ext/writer.rs
rename to src/buf/writer.rs
index 1418418e8..e72348f40 100644
--- a/src/buf/ext/writer.rs
+++ b/src/buf/writer.rs
@@ -5,7 +5,7 @@ use std::{cmp, io};
 /// A `BufMut` adapter which implements `io::Write` for the inner value.
 ///
 /// This struct is generally created by calling `writer()` on `BufMut`. See
-/// documentation of [`writer()`](trait.BufMut.html#method.writer) for more
+/// documentation of [`writer()`](BufMut::writer) for more
 /// details.
 #[derive(Debug)]
 pub struct Writer<B> {
@@ -24,9 +24,9 @@ impl<B: BufMut> Writer<B> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::buf::BufMutExt;
+    /// use bytes::BufMut;
     ///
-    /// let mut buf = Vec::with_capacity(1024).writer();
+    /// let buf = Vec::with_capacity(1024).writer();
     ///
     /// assert_eq!(1024, buf.get_ref().capacity());
     /// ```
@@ -41,7 +41,7 @@ impl<B: BufMut> Writer<B> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::buf::BufMutExt;
+    /// use bytes::BufMut;
     ///
     /// let mut buf = vec![].writer();
     ///
@@ -58,7 +58,7 @@ impl<B: BufMut> Writer<B> {
     /// # Examples
     ///
     /// ```rust
-    /// use bytes::buf::BufMutExt;
+    /// use bytes::BufMut;
     /// use std::io;
     ///
     /// let mut buf = vec![].writer();
@@ -78,7 +78,7 @@ impl<B: BufMut + Sized> io::Write for Writer<B> {
     fn write(&mut self, src: &[u8]) -> io::Result<usize> {
         let n = cmp::min(self.buf.remaining_mut(), src.len());
 
-        self.buf.put(&src[0..n]);
+        self.buf.put_slice(&src[..n]);
         Ok(n)
     }
 
diff --git a/src/bytes.rs b/src/bytes.rs
index 380b1c681..cdb6ea559 100644
--- a/src/bytes.rs
+++ b/src/bytes.rs
@@ -1,23 +1,40 @@
-use core::{cmp, fmt, hash, mem, ptr, slice, usize};
-use core::iter::{FromIterator};
+use core::iter::FromIterator;
+use core::mem::{self, ManuallyDrop};
 use core::ops::{Deref, RangeBounds};
+use core::ptr::NonNull;
+use core::{cmp, fmt, hash, ptr, slice, usize};
+
+use alloc::{
+    alloc::{dealloc, Layout},
+    borrow::Borrow,
+    boxed::Box,
+    string::String,
+    vec::Vec,
+};
 
-use alloc::{vec::Vec, string::String, boxed::Box, borrow::Borrow};
-
-use crate::Buf;
 use crate::buf::IntoIter;
-use crate::loom::sync::atomic::{self, AtomicPtr, AtomicUsize, Ordering};
+#[allow(unused)]
+use crate::loom::sync::atomic::AtomicMut;
+use crate::loom::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
+use crate::{offset_from, Buf, BytesMut};
 
-/// A reference counted contiguous slice of memory.
+/// A cheaply cloneable and sliceable chunk of contiguous memory.
 ///
 /// `Bytes` is an efficient container for storing and operating on contiguous
 /// slices of memory. It is intended for use primarily in networking code, but
 /// could have applications elsewhere as well.
 ///
 /// `Bytes` values facilitate zero-copy network programming by allowing multiple
-/// `Bytes` objects to point to the same underlying memory. This is managed by
-/// using a reference count to track when the memory is no longer needed and can
-/// be freed.
+/// `Bytes` objects to point to the same underlying memory.
+///
+/// `Bytes` does not have a single implementation. It is an interface, whose
+/// exact behavior is implemented through dynamic dispatch in several underlying
+/// implementations of `Bytes`.
+///
+/// All `Bytes` implementations must fulfill the following requirements:
+/// - They are cheaply cloneable and thereby shareable between an unlimited amount
+///   of components, for example by modifying a reference count.
+/// - Instances can be sliced to refer to a subset of the original buffer.
 ///
 /// ```
 /// use bytes::Bytes;
@@ -39,32 +56,48 @@ use crate::loom::sync::atomic::{self, AtomicPtr, AtomicUsize, Ordering};
 /// to track information about which segment of the underlying memory the
 /// `Bytes` handle has access to.
 ///
-/// `Bytes` keeps both a pointer to the shared `Arc` containing the full memory
+/// `Bytes` keeps both a pointer to the shared state containing the full memory
 /// slice and a pointer to the start of the region visible by the handle.
 /// `Bytes` also tracks the length of its view into the memory.
 ///
 /// # Sharing
 ///
-/// The memory itself is reference counted, and multiple `Bytes` objects may
-/// point to the same region. Each `Bytes` handle point to different sections within
-/// the memory region, and `Bytes` handle may or may not have overlapping views
+/// `Bytes` contains a vtable, which allows implementations of `Bytes` to define
+/// how sharing/cloning is implemented in detail.
+/// When `Bytes::clone()` is called, `Bytes` will call the vtable function for
+/// cloning the backing storage in order to share it behind multiple `Bytes`
+/// instances.
+///
+/// For `Bytes` implementations which refer to constant memory (e.g. created
+/// via `Bytes::from_static()`) the cloning implementation will be a no-op.
+///
+/// For `Bytes` implementations which point to a reference counted shared storage
+/// (e.g. an `Arc<[u8]>`), sharing will be implemented by increasing the
+/// reference count.
+///
+/// Due to this mechanism, multiple `Bytes` instances may point to the same
+/// shared memory region.
+/// Each `Bytes` instance can point to different sections within that
+/// memory region, and `Bytes` instances may or may not have overlapping views
 /// into the memory.
 ///
+/// The following diagram visualizes a scenario where 2 `Bytes` instances make
+/// use of an `Arc`-based backing storage, and provide access to different views:
 ///
 /// ```text
 ///
-///    Arc ptrs                   +---------+
-///    ________________________ / | Bytes 2 |
-///   /                           +---------+
-///  /          +-----------+     |         |
-/// |_________/ |  Bytes 1  |     |         |
-/// |           +-----------+     |         |
+///    Arc ptrs                   ┌─────────┐
+///    ________________________ / │ Bytes 2 │
+///   /                           └─────────┘
+///  /          ┌───────────┐     |         |
+/// |_________/ │  Bytes 1  │     |         |
+/// |           └───────────┘     |         |
 /// |           |           | ___/ data     | tail
 /// |      data |      tail |/              |
 /// v           v           v               v
-/// +-----+---------------------------------+-----+
-/// | Arc |     |           |               |     |
-/// +-----+---------------------------------+-----+
+/// ┌─────┬─────┬───────────┬───────────────┬─────┐
+/// │ Arc │     │           │               │     │
+/// └─────┴─────┴───────────┴───────────────┴─────┘
 /// ```
 pub struct Bytes {
     ptr: *const u8,
@@ -78,6 +111,13 @@ pub(crate) struct Vtable {
     /// fn(data, ptr, len)
     pub clone: unsafe fn(&AtomicPtr<()>, *const u8, usize) -> Bytes,
     /// fn(data, ptr, len)
+    ///
+    /// takes `Bytes` to value
+    pub to_vec: unsafe fn(&AtomicPtr<()>, *const u8, usize) -> Vec<u8>,
+    pub to_mut: unsafe fn(&AtomicPtr<()>, *const u8, usize) -> BytesMut,
+    /// fn(data)
+    pub is_unique: unsafe fn(&AtomicPtr<()>) -> bool,
+    /// fn(data, ptr, len)
     pub drop: unsafe fn(&mut AtomicPtr<()>, *const u8, usize),
 }
 
@@ -96,15 +136,16 @@ impl Bytes {
     /// ```
     #[inline]
     #[cfg(not(all(loom, test)))]
-    pub const fn new() -> Bytes {
+    pub const fn new() -> Self {
         // Make it a named const to work around
         // "unsizing casts are not allowed in const fn"
         const EMPTY: &[u8] = &[];
         Bytes::from_static(EMPTY)
     }
 
+    /// Creates a new empty `Bytes`.
     #[cfg(all(loom, test))]
-    pub fn new() -> Bytes {
+    pub fn new() -> Self {
         const EMPTY: &[u8] = &[];
         Bytes::from_static(EMPTY)
     }
@@ -124,7 +165,7 @@ impl Bytes {
     /// ```
     #[inline]
     #[cfg(not(all(loom, test)))]
-    pub const fn from_static(bytes: &'static [u8]) -> Bytes {
+    pub const fn from_static(bytes: &'static [u8]) -> Self {
         Bytes {
             ptr: bytes.as_ptr(),
             len: bytes.len(),
@@ -133,8 +174,9 @@ impl Bytes {
         }
     }
 
+    /// Creates a new `Bytes` from a static slice.
     #[cfg(all(loom, test))]
-    pub fn from_static(bytes: &'static [u8]) -> Bytes {
+    pub fn from_static(bytes: &'static [u8]) -> Self {
         Bytes {
             ptr: bytes.as_ptr(),
             len: bytes.len(),
@@ -143,6 +185,110 @@ impl Bytes {
         }
     }
 
+    /// Creates a new `Bytes` with length zero and the given pointer as the address.
+    fn new_empty_with_ptr(ptr: *const u8) -> Self {
+        debug_assert!(!ptr.is_null());
+
+        // Detach this pointer's provenance from whichever allocation it came from, and reattach it
+        // to the provenance of the fake ZST [u8;0] at the same address.
+        let ptr = without_provenance(ptr as usize);
+
+        Bytes {
+            ptr,
+            len: 0,
+            data: AtomicPtr::new(ptr::null_mut()),
+            vtable: &STATIC_VTABLE,
+        }
+    }
+
+    /// Create [Bytes] with a buffer whose lifetime is controlled
+    /// via an explicit owner.
+    ///
+    /// A common use case is to zero-copy construct from mapped memory.
+    ///
+    /// ```
+    /// # struct File;
+    /// #
+    /// # impl File {
+    /// #     pub fn open(_: &str) -> Result<Self, ()> {
+    /// #         Ok(Self)
+    /// #     }
+    /// # }
+    /// #
+    /// # mod memmap2 {
+    /// #     pub struct Mmap;
+    /// #
+    /// #     impl Mmap {
+    /// #         pub unsafe fn map(_file: &super::File) -> Result<Self, ()> {
+    /// #             Ok(Self)
+    /// #         }
+    /// #     }
+    /// #
+    /// #     impl AsRef<[u8]> for Mmap {
+    /// #         fn as_ref(&self) -> &[u8] {
+    /// #             b"buf"
+    /// #         }
+    /// #     }
+    /// # }
+    /// use bytes::Bytes;
+    /// use memmap2::Mmap;
+    ///
+    /// # fn main() -> Result<(), ()> {
+    /// let file = File::open("upload_bundle.tar.gz")?;
+    /// let mmap = unsafe { Mmap::map(&file) }?;
+    /// let b = Bytes::from_owner(mmap);
+    /// # Ok(())
+    /// # }
+    /// ```
+    ///
+    /// The `owner` will be transferred to the constructed [Bytes] object, which
+    /// will ensure it is dropped once all remaining clones of the constructed
+    /// object are dropped. The owner will then be responsible for dropping the
+    /// specified region of memory as part of its [Drop] implementation.
+    ///
+    /// Note that converting [Bytes] constructed from an owner into a [BytesMut]
+    /// will always create a deep copy of the buffer into newly allocated memory.
+    pub fn from_owner<T>(owner: T) -> Self
+    where
+        T: AsRef<[u8]> + Send + 'static,
+    {
+        // Safety & Miri:
+        // The ownership of `owner` is first transferred to the `Owned` wrapper and `Bytes` object.
+        // This ensures that the owner is pinned in memory, allowing us to call `.as_ref()` safely
+        // since the lifetime of the owner is controlled by the lifetime of the new `Bytes` object,
+        // and the lifetime of the resulting borrowed `&[u8]` matches that of the owner.
+        // Note that this remains safe so long as we only call `.as_ref()` once.
+        //
+        // There are some additional special considerations here:
+        //   * We rely on Bytes's Drop impl to clean up memory should `.as_ref()` panic.
+        //   * Setting the `ptr` and `len` on the bytes object last (after moving the owner to
+        //     Bytes) allows Miri checks to pass since it avoids obtaining the `&[u8]` slice
+        //     from a stack-owned Box.
+        // More details on this: https://github.com/tokio-rs/bytes/pull/742/#discussion_r1813375863
+        //                  and: https://github.com/tokio-rs/bytes/pull/742/#discussion_r1813316032
+
+        let owned = Box::into_raw(Box::new(Owned {
+            lifetime: OwnedLifetime {
+                ref_cnt: AtomicUsize::new(1),
+                drop: owned_box_and_drop::<T>,
+            },
+            owner,
+        }));
+
+        let mut ret = Bytes {
+            ptr: NonNull::dangling().as_ptr(),
+            len: 0,
+            data: AtomicPtr::new(owned.cast()),
+            vtable: &OWNED_VTABLE,
+        };
+
+        let buf = unsafe { &*owned }.owner.as_ref();
+        ret.ptr = buf.as_ptr();
+        ret.len = buf.len();
+
+        ret
+    }
+
     /// Returns the number of bytes contained in this `Bytes`.
     ///
     /// # Examples
@@ -154,7 +300,7 @@ impl Bytes {
     /// assert_eq!(b.len(), 5);
     /// ```
     #[inline]
-    pub fn len(&self) -> usize {
+    pub const fn len(&self) -> usize {
         self.len
     }
 
@@ -169,12 +315,35 @@ impl Bytes {
     /// assert!(b.is_empty());
     /// ```
     #[inline]
-    pub fn is_empty(&self) -> bool {
+    pub const fn is_empty(&self) -> bool {
         self.len == 0
     }
 
+    /// Returns true if this is the only reference to the data and
+    /// `Into<BytesMut>` would avoid cloning the underlying buffer.
+    ///
+    /// Always returns false if the data is backed by a [static slice](Bytes::from_static),
+    /// or an [owner](Bytes::from_owner).
+    ///
+    /// The result of this method may be invalidated immediately if another
+    /// thread clones this value while this is being called. Ensure you have
+    /// unique access to this value (`&mut Bytes`) first if you need to be
+    /// certain the result is valid (i.e. for safety reasons).
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::Bytes;
+    ///
+    /// let a = Bytes::from(vec![1, 2, 3]);
+    /// assert!(a.is_unique());
+    /// let b = a.clone();
+    /// assert!(!a.is_unique());
+    /// ```
+    pub fn is_unique(&self) -> bool {
+        unsafe { (self.vtable.is_unique)(&self.data) }
+    }
 
-    ///Creates `Bytes` instance from slice, by copying it.
+    /// Creates `Bytes` instance from slice, by copying it.
     pub fn copy_from_slice(data: &[u8]) -> Self {
         data.to_vec().into()
     }
@@ -201,19 +370,19 @@ impl Bytes {
     ///
     /// Requires that `begin <= end` and `end <= self.len()`, otherwise slicing
     /// will panic.
-    pub fn slice(&self, range: impl RangeBounds<usize>) -> Bytes {
+    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
         use core::ops::Bound;
 
         let len = self.len();
 
         let begin = match range.start_bound() {
             Bound::Included(&n) => n,
-            Bound::Excluded(&n) => n + 1,
+            Bound::Excluded(&n) => n.checked_add(1).expect("out of range"),
             Bound::Unbounded => 0,
         };
 
         let end = match range.end_bound() {
-            Bound::Included(&n) => n + 1,
+            Bound::Included(&n) => n.checked_add(1).expect("out of range"),
             Bound::Excluded(&n) => n,
             Bound::Unbounded => len,
         };
@@ -235,11 +404,10 @@ impl Bytes {
             return Bytes::new();
         }
 
-
         let mut ret = self.clone();
 
         ret.len = end - begin;
-        ret.ptr = unsafe { ret.ptr.offset(begin as isize) };
+        ret.ptr = unsafe { ret.ptr.add(begin) };
 
         ret
     }
@@ -269,7 +437,7 @@ impl Bytes {
     ///
     /// Requires that the given `sub` slice is in fact contained within the
     /// `Bytes` buffer; otherwise this function will panic.
-    pub fn slice_ref(&self, subset: &[u8]) -> Bytes {
+    pub fn slice_ref(&self, subset: &[u8]) -> Self {
         // Empty slice and empty Bytes may have their pointers reset
         // so explicitly allow empty slice to be a subslice of any slice.
         if subset.is_empty() {
@@ -285,15 +453,15 @@ impl Bytes {
         assert!(
             sub_p >= bytes_p,
             "subset pointer ({:p}) is smaller than self pointer ({:p})",
-            sub_p as *const u8,
-            bytes_p as *const u8,
+            subset.as_ptr(),
+            self.as_ptr(),
         );
         assert!(
             sub_p + sub_len <= bytes_p + bytes_len,
             "subset is out of bounds: self = ({:p}, {}), subset = ({:p}, {})",
-            bytes_p as *const u8,
+            self.as_ptr(),
             bytes_len,
-            sub_p as *const u8,
+            subset.as_ptr(),
             sub_len,
         );
 
@@ -305,7 +473,9 @@ impl Bytes {
     /// Splits the bytes into two at the given index.
     ///
     /// Afterwards `self` contains elements `[0, at)`, and the returned `Bytes`
-    /// contains elements `[at, len)`.
+    /// contains elements `[at, len)`. It's guaranteed that the memory does not
+    /// move, that is, the address of `self` does not change, and the address of
+    /// the returned slice is `at` bytes after that.
     ///
     /// This is an `O(1)` operation that just increases the reference count and
     /// sets a few indices.
@@ -326,7 +496,15 @@ impl Bytes {
     ///
     /// Panics if `at > len`.
     #[must_use = "consider Bytes::truncate if you don't need the other half"]
-    pub fn split_off(&mut self, at: usize) -> Bytes {
+    pub fn split_off(&mut self, at: usize) -> Self {
+        if at == self.len() {
+            return Bytes::new_empty_with_ptr(self.ptr.wrapping_add(at));
+        }
+
+        if at == 0 {
+            return mem::replace(self, Bytes::new_empty_with_ptr(self.ptr));
+        }
+
         assert!(
             at <= self.len(),
             "split_off out of bounds: {:?} <= {:?}",
@@ -334,14 +512,6 @@ impl Bytes {
             self.len(),
         );
 
-        if at == self.len() {
-            return Bytes::new();
-        }
-
-        if at == 0 {
-            return mem::replace(self, Bytes::new());
-        }
-
         let mut ret = self.clone();
 
         self.len = at;
@@ -375,22 +545,22 @@ impl Bytes {
     ///
     /// Panics if `at > len`.
     #[must_use = "consider Bytes::advance if you don't need the other half"]
-    pub fn split_to(&mut self, at: usize) -> Bytes {
-        assert!(
-            at <= self.len(),
-            "split_to out of bounds: {:?} <= {:?}",
-            at,
-            self.len(),
-        );
-
+    pub fn split_to(&mut self, at: usize) -> Self {
         if at == self.len() {
-            return mem::replace(self, Bytes::new());
+            let end_ptr = self.ptr.wrapping_add(at);
+            return mem::replace(self, Bytes::new_empty_with_ptr(end_ptr));
         }
 
         if at == 0 {
-            return Bytes::new();
+            return Bytes::new_empty_with_ptr(self.ptr);
         }
 
+        assert!(
+            at <= self.len(),
+            "split_to out of bounds: {:?} <= {:?}",
+            at,
+            self.len(),
+        );
 
         let mut ret = self.clone();
 
@@ -406,7 +576,7 @@ impl Bytes {
     /// If `len` is greater than the buffer's current length, this has no
     /// effect.
     ///
-    /// The [`split_off`] method can emulate `truncate`, but this causes the
+    /// The [split_off](`Self::split_off()`) method can emulate `truncate`, but this causes the
     /// excess bytes to be returned instead of dropped.
     ///
     /// # Examples
@@ -418,16 +588,15 @@ impl Bytes {
     /// buf.truncate(5);
     /// assert_eq!(buf, b"hello"[..]);
     /// ```
-    ///
-    /// [`split_off`]: #method.split_off
     #[inline]
     pub fn truncate(&mut self, len: usize) {
         if len < self.len {
             // The Vec "promotable" vtables do not store the capacity,
             // so we cannot truncate while using this repr. We *have* to
             // promote using `split_off` so the capacity can be stored.
-            if self.vtable as *const Vtable == &PROMOTABLE_EVEN_VTABLE ||
-                self.vtable as *const Vtable == &PROMOTABLE_ODD_VTABLE {
+            if self.vtable as *const Vtable == &PROMOTABLE_EVEN_VTABLE
+                || self.vtable as *const Vtable == &PROMOTABLE_ODD_VTABLE
+            {
                 drop(self.split_off(len));
             } else {
                 self.len = len;
@@ -451,8 +620,39 @@ impl Bytes {
         self.truncate(0);
     }
 
+    /// Try to convert self into `BytesMut`.
+    ///
+    /// If `self` is unique for the entire original buffer, this will succeed
+    /// and return a `BytesMut` with the contents of `self` without copying.
+    /// If `self` is not unique for the entire original buffer, this will fail
+    /// and return self.
+    ///
+    /// This will also always fail if the buffer was constructed via either
+    /// [from_owner](Bytes::from_owner) or [from_static](Bytes::from_static).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::{Bytes, BytesMut};
+    ///
+    /// let bytes = Bytes::from(b"hello".to_vec());
+    /// assert_eq!(bytes.try_into_mut(), Ok(BytesMut::from(&b"hello"[..])));
+    /// ```
+    pub fn try_into_mut(self) -> Result<BytesMut, Bytes> {
+        if self.is_unique() {
+            Ok(self.into())
+        } else {
+            Err(self)
+        }
+    }
+
     #[inline]
-    pub(crate) unsafe fn with_vtable(ptr: *const u8, len: usize, data: AtomicPtr<()>, vtable: &'static Vtable) -> Bytes {
+    pub(crate) unsafe fn with_vtable(
+        ptr: *const u8,
+        len: usize,
+        data: AtomicPtr<()>,
+        vtable: &'static Vtable,
+    ) -> Bytes {
         Bytes {
             ptr,
             len,
@@ -465,9 +665,7 @@ impl Bytes {
 
     #[inline]
     fn as_slice(&self) -> &[u8] {
-        unsafe {
-            slice::from_raw_parts(self.ptr, self.len)
-        }
+        unsafe { slice::from_raw_parts(self.ptr, self.len) }
     }
 
     #[inline]
@@ -475,7 +673,7 @@ impl Bytes {
         // should already be asserted, but debug assert for tests
         debug_assert!(self.len >= by, "internal: inc_start out of bounds");
         self.len -= by;
-        self.ptr = self.ptr.offset(by as isize);
+        self.ptr = self.ptr.add(by);
     }
 }
 
@@ -486,18 +684,14 @@ unsafe impl Sync for Bytes {}
 impl Drop for Bytes {
     #[inline]
     fn drop(&mut self) {
-        unsafe {
-            (self.vtable.drop)(&mut self.data, self.ptr, self.len)
-        }
+        unsafe { (self.vtable.drop)(&mut self.data, self.ptr, self.len) }
     }
 }
 
 impl Clone for Bytes {
     #[inline]
     fn clone(&self) -> Bytes {
-        unsafe {
-            (self.vtable.clone)(&self.data, self.ptr, self.len)
-        }
+        unsafe { (self.vtable.clone)(&self.data, self.ptr, self.len) }
     }
 }
 
@@ -508,7 +702,7 @@ impl Buf for Bytes {
     }
 
     #[inline]
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         self.as_slice()
     }
 
@@ -526,8 +720,8 @@ impl Buf for Bytes {
         }
     }
 
-    fn to_bytes(&mut self) -> crate::Bytes {
-        core::mem::replace(self, Bytes::new())
+    fn copy_to_bytes(&mut self, len: usize) -> Self {
+        self.split_to(len)
     }
 }
 
@@ -548,7 +742,10 @@ impl AsRef<[u8]> for Bytes {
 }
 
 impl hash::Hash for Bytes {
-    fn hash<H>(&self, state: &mut H) where H: hash::Hasher {
+    fn hash<H>(&self, state: &mut H)
+    where
+        H: hash::Hasher,
+    {
         self.as_slice().hash(state);
     }
 }
@@ -573,7 +770,7 @@ impl<'a> IntoIterator for &'a Bytes {
     type IntoIter = core::slice::Iter<'a, u8>;
 
     fn into_iter(self) -> Self::IntoIter {
-        self.as_slice().into_iter()
+        self.as_slice().iter()
     }
 }
 
@@ -655,7 +852,7 @@ impl PartialOrd<Bytes> for str {
 
 impl PartialEq<Vec<u8>> for Bytes {
     fn eq(&self, other: &Vec<u8>) -> bool {
-        *self == &other[..]
+        *self == other[..]
     }
 }
 
@@ -679,7 +876,7 @@ impl PartialOrd<Bytes> for Vec<u8> {
 
 impl PartialEq<String> for Bytes {
     fn eq(&self, other: &String) -> bool {
-        *self == &other[..]
+        *self == other[..]
     }
 }
 
@@ -726,7 +923,8 @@ impl PartialOrd<Bytes> for &str {
 }
 
 impl<'a, T: ?Sized> PartialEq<&'a T> for Bytes
-    where Bytes: PartialEq<T>
+where
+    Bytes: PartialEq<T>,
 {
     fn eq(&self, other: &&'a T) -> bool {
         *self == **other
@@ -734,7 +932,8 @@ impl<'a, T: ?Sized> PartialEq<&'a T> for Bytes
 }
 
 impl<'a, T: ?Sized> PartialOrd<&'a T> for Bytes
-    where Bytes: PartialOrd<T>
+where
+    Bytes: PartialOrd<T>,
 {
     fn partial_cmp(&self, other: &&'a T) -> Option<cmp::Ordering> {
         self.partial_cmp(&**other)
@@ -764,43 +963,105 @@ impl From<&'static str> for Bytes {
 
 impl From<Vec<u8>> for Bytes {
     fn from(vec: Vec<u8>) -> Bytes {
-        // into_boxed_slice doesn't return a heap allocation for empty vectors,
+        let mut vec = ManuallyDrop::new(vec);
+        let ptr = vec.as_mut_ptr();
+        let len = vec.len();
+        let cap = vec.capacity();
+
+        // Avoid an extra allocation if possible.
+        if len == cap {
+            let vec = ManuallyDrop::into_inner(vec);
+            return Bytes::from(vec.into_boxed_slice());
+        }
+
+        let shared = Box::new(Shared {
+            buf: ptr,
+            cap,
+            ref_cnt: AtomicUsize::new(1),
+        });
+
+        let shared = Box::into_raw(shared);
+        // The pointer should be aligned, so this assert should
+        // always succeed.
+        debug_assert!(
+            0 == (shared as usize & KIND_MASK),
+            "internal: Box<Shared> should have an aligned pointer",
+        );
+        Bytes {
+            ptr,
+            len,
+            data: AtomicPtr::new(shared as _),
+            vtable: &SHARED_VTABLE,
+        }
+    }
+}
+
+impl From<Box<[u8]>> for Bytes {
+    fn from(slice: Box<[u8]>) -> Bytes {
+        // Box<[u8]> doesn't contain a heap allocation for empty slices,
         // so the pointer isn't aligned enough for the KIND_VEC stashing to
         // work.
-        if vec.is_empty() {
+        if slice.is_empty() {
             return Bytes::new();
         }
 
-        let slice = vec.into_boxed_slice();
         let len = slice.len();
-        let ptr = slice.as_ptr();
-        drop(Box::into_raw(slice));
+        let ptr = Box::into_raw(slice) as *mut u8;
 
         if ptr as usize & 0x1 == 0 {
-            let data = ptr as usize | KIND_VEC;
+            let data = ptr_map(ptr, |addr| addr | KIND_VEC);
             Bytes {
                 ptr,
                 len,
-                data: AtomicPtr::new(data as *mut _),
+                data: AtomicPtr::new(data.cast()),
                 vtable: &PROMOTABLE_EVEN_VTABLE,
             }
         } else {
             Bytes {
                 ptr,
                 len,
-                data: AtomicPtr::new(ptr as *mut _),
+                data: AtomicPtr::new(ptr.cast()),
                 vtable: &PROMOTABLE_ODD_VTABLE,
             }
         }
     }
 }
 
+impl From<Bytes> for BytesMut {
+    /// Convert self into `BytesMut`.
+    ///
+    /// If `bytes` is unique for the entire original buffer, this will return a
+    /// `BytesMut` with the contents of `bytes` without copying.
+    /// If `bytes` is not unique for the entire original buffer, this will make
+    /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::{Bytes, BytesMut};
+    ///
+    /// let bytes = Bytes::from(b"hello".to_vec());
+    /// assert_eq!(BytesMut::from(bytes), BytesMut::from(&b"hello"[..]));
+    /// ```
+    fn from(bytes: Bytes) -> Self {
+        let bytes = ManuallyDrop::new(bytes);
+        unsafe { (bytes.vtable.to_mut)(&bytes.data, bytes.ptr, bytes.len) }
+    }
+}
+
 impl From<String> for Bytes {
     fn from(s: String) -> Bytes {
         Bytes::from(s.into_bytes())
     }
 }
 
+impl From<Bytes> for Vec<u8> {
+    fn from(bytes: Bytes) -> Vec<u8> {
+        let bytes = ManuallyDrop::new(bytes);
+        unsafe { (bytes.vtable.to_vec)(&bytes.data, bytes.ptr, bytes.len) }
+    }
+}
+
 // ===== impl Vtable =====
 
 impl fmt::Debug for Vtable {
@@ -816,6 +1077,9 @@ impl fmt::Debug for Vtable {
 
 const STATIC_VTABLE: Vtable = Vtable {
     clone: static_clone,
+    to_vec: static_to_vec,
+    to_mut: static_to_mut,
+    is_unique: static_is_unique,
     drop: static_drop,
 };
 
@@ -824,19 +1088,120 @@ unsafe fn static_clone(_: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Bytes {
     Bytes::from_static(slice)
 }
 
+unsafe fn static_to_vec(_: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    let slice = slice::from_raw_parts(ptr, len);
+    slice.to_vec()
+}
+
+unsafe fn static_to_mut(_: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    let slice = slice::from_raw_parts(ptr, len);
+    BytesMut::from(slice)
+}
+
+fn static_is_unique(_: &AtomicPtr<()>) -> bool {
+    false
+}
+
 unsafe fn static_drop(_: &mut AtomicPtr<()>, _: *const u8, _: usize) {
     // nothing to drop for &'static [u8]
 }
 
+// ===== impl OwnedVtable =====
+
+#[repr(C)]
+struct OwnedLifetime {
+    ref_cnt: AtomicUsize,
+    drop: unsafe fn(*mut ()),
+}
+
+#[repr(C)]
+struct Owned<T> {
+    lifetime: OwnedLifetime,
+    owner: T,
+}
+
+unsafe fn owned_box_and_drop<T>(ptr: *mut ()) {
+    let b: Box<Owned<T>> = Box::from_raw(ptr as _);
+    drop(b);
+}
+
+unsafe fn owned_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Bytes {
+    let owned = data.load(Ordering::Relaxed);
+    let ref_cnt = &(*owned.cast::<OwnedLifetime>()).ref_cnt;
+    let old_cnt = ref_cnt.fetch_add(1, Ordering::Relaxed);
+    if old_cnt > usize::MAX >> 1 {
+        crate::abort()
+    }
+
+    Bytes {
+        ptr,
+        len,
+        data: AtomicPtr::new(owned as _),
+        vtable: &OWNED_VTABLE,
+    }
+}
+
+unsafe fn owned_to_vec(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    let slice = slice::from_raw_parts(ptr, len);
+    let vec = slice.to_vec();
+    owned_drop_impl(data.load(Ordering::Relaxed));
+    vec
+}
+
+unsafe fn owned_to_mut(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    BytesMut::from_vec(owned_to_vec(data, ptr, len))
+}
+
+unsafe fn owned_is_unique(_data: &AtomicPtr<()>) -> bool {
+    false
+}
+
+unsafe fn owned_drop_impl(owned: *mut ()) {
+    let lifetime = owned.cast::<OwnedLifetime>();
+    let ref_cnt = &(*lifetime).ref_cnt;
+
+    let old_cnt = ref_cnt.fetch_sub(1, Ordering::Release);
+    debug_assert!(
+        old_cnt > 0 && old_cnt <= usize::MAX >> 1,
+        "expected non-zero refcount and no underflow"
+    );
+    if old_cnt != 1 {
+        return;
+    }
+    ref_cnt.load(Ordering::Acquire);
+
+    let drop_fn = &(*lifetime).drop;
+    drop_fn(owned)
+}
+
+unsafe fn owned_drop(data: &mut AtomicPtr<()>, _ptr: *const u8, _len: usize) {
+    let owned = data.load(Ordering::Relaxed);
+    owned_drop_impl(owned);
+}
+
+static OWNED_VTABLE: Vtable = Vtable {
+    clone: owned_clone,
+    to_vec: owned_to_vec,
+    to_mut: owned_to_mut,
+    is_unique: owned_is_unique,
+    drop: owned_drop,
+};
+
 // ===== impl PromotableVtable =====
 
 static PROMOTABLE_EVEN_VTABLE: Vtable = Vtable {
     clone: promotable_even_clone,
+    to_vec: promotable_even_to_vec,
+    to_mut: promotable_even_to_mut,
+    is_unique: promotable_is_unique,
     drop: promotable_even_drop,
 };
 
 static PROMOTABLE_ODD_VTABLE: Vtable = Vtable {
     clone: promotable_odd_clone,
+    to_vec: promotable_odd_to_vec,
+    to_mut: promotable_odd_to_mut,
+    is_unique: promotable_is_unique,
     drop: promotable_odd_drop,
 };
 
@@ -845,27 +1210,96 @@ unsafe fn promotable_even_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize
     let kind = shared as usize & KIND_MASK;
 
     if kind == KIND_ARC {
-        shallow_clone_arc(shared as _, ptr, len)
+        shallow_clone_arc(shared.cast(), ptr, len)
     } else {
         debug_assert_eq!(kind, KIND_VEC);
-        let buf = (shared as usize & !KIND_MASK) as *mut u8;
+        let buf = ptr_map(shared.cast(), |addr| addr & !KIND_MASK);
         shallow_clone_vec(data, shared, buf, ptr, len)
     }
 }
 
-unsafe fn promotable_even_drop(data: &mut AtomicPtr<()>, ptr: *const u8, len: usize) {
-    let shared = *data.get_mut();
+unsafe fn promotable_to_vec(
+    data: &AtomicPtr<()>,
+    ptr: *const u8,
+    len: usize,
+    f: fn(*mut ()) -> *mut u8,
+) -> Vec<u8> {
+    let shared = data.load(Ordering::Acquire);
+    let kind = shared as usize & KIND_MASK;
+
+    if kind == KIND_ARC {
+        shared_to_vec_impl(shared.cast(), ptr, len)
+    } else {
+        // If Bytes holds a Vec, then the offset must be 0.
+        debug_assert_eq!(kind, KIND_VEC);
+
+        let buf = f(shared);
+
+        let cap = offset_from(ptr, buf) + len;
+
+        // Copy back buffer
+        ptr::copy(ptr, buf, len);
+
+        Vec::from_raw_parts(buf, len, cap)
+    }
+}
+
+unsafe fn promotable_to_mut(
+    data: &AtomicPtr<()>,
+    ptr: *const u8,
+    len: usize,
+    f: fn(*mut ()) -> *mut u8,
+) -> BytesMut {
+    let shared = data.load(Ordering::Acquire);
     let kind = shared as usize & KIND_MASK;
 
     if kind == KIND_ARC {
-        release_shared(shared as *mut Shared);
+        shared_to_mut_impl(shared.cast(), ptr, len)
     } else {
+        // KIND_VEC is a view of an underlying buffer at a certain offset.
+        // The ptr + len always represents the end of that buffer.
+        // Before truncating it, it is first promoted to KIND_ARC.
+        // Thus, we can safely reconstruct a Vec from it without leaking memory.
         debug_assert_eq!(kind, KIND_VEC);
-        let buf = (shared as usize & !KIND_MASK) as *mut u8;
-        drop(rebuild_boxed_slice(buf, ptr, len));
+
+        let buf = f(shared);
+        let off = offset_from(ptr, buf);
+        let cap = off + len;
+        let v = Vec::from_raw_parts(buf, cap, cap);
+
+        let mut b = BytesMut::from_vec(v);
+        b.advance_unchecked(off);
+        b
     }
 }
 
+unsafe fn promotable_even_to_vec(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    promotable_to_vec(data, ptr, len, |shared| {
+        ptr_map(shared.cast(), |addr| addr & !KIND_MASK)
+    })
+}
+
+unsafe fn promotable_even_to_mut(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    promotable_to_mut(data, ptr, len, |shared| {
+        ptr_map(shared.cast(), |addr| addr & !KIND_MASK)
+    })
+}
+
+unsafe fn promotable_even_drop(data: &mut AtomicPtr<()>, ptr: *const u8, len: usize) {
+    data.with_mut(|shared| {
+        let shared = *shared;
+        let kind = shared as usize & KIND_MASK;
+
+        if kind == KIND_ARC {
+            release_shared(shared.cast());
+        } else {
+            debug_assert_eq!(kind, KIND_VEC);
+            let buf = ptr_map(shared.cast(), |addr| addr & !KIND_MASK);
+            free_boxed_slice(buf, ptr, len);
+        }
+    });
+}
+
 unsafe fn promotable_odd_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Bytes {
     let shared = data.load(Ordering::Acquire);
     let kind = shared as usize & KIND_MASK;
@@ -874,36 +1308,65 @@ unsafe fn promotable_odd_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize)
         shallow_clone_arc(shared as _, ptr, len)
     } else {
         debug_assert_eq!(kind, KIND_VEC);
-        shallow_clone_vec(data, shared, shared as *mut u8, ptr, len)
+        shallow_clone_vec(data, shared, shared.cast(), ptr, len)
     }
 }
 
+unsafe fn promotable_odd_to_vec(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    promotable_to_vec(data, ptr, len, |shared| shared.cast())
+}
+
+unsafe fn promotable_odd_to_mut(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    promotable_to_mut(data, ptr, len, |shared| shared.cast())
+}
+
 unsafe fn promotable_odd_drop(data: &mut AtomicPtr<()>, ptr: *const u8, len: usize) {
-    let shared = *data.get_mut();
+    data.with_mut(|shared| {
+        let shared = *shared;
+        let kind = shared as usize & KIND_MASK;
+
+        if kind == KIND_ARC {
+            release_shared(shared.cast());
+        } else {
+            debug_assert_eq!(kind, KIND_VEC);
+
+            free_boxed_slice(shared.cast(), ptr, len);
+        }
+    });
+}
+
+unsafe fn promotable_is_unique(data: &AtomicPtr<()>) -> bool {
+    let shared = data.load(Ordering::Acquire);
     let kind = shared as usize & KIND_MASK;
 
     if kind == KIND_ARC {
-        release_shared(shared as *mut Shared);
+        let ref_cnt = (*shared.cast::<Shared>()).ref_cnt.load(Ordering::Relaxed);
+        ref_cnt == 1
     } else {
-        debug_assert_eq!(kind, KIND_VEC);
-
-        drop(rebuild_boxed_slice(shared as *mut u8, ptr, len));
+        true
     }
 }
 
-unsafe fn rebuild_boxed_slice(buf: *mut u8, offset: *const u8, len: usize) -> Box<[u8]> {
-    let cap = (offset as usize - buf as usize) + len;
-    Box::from_raw(slice::from_raw_parts_mut(buf, cap))
+unsafe fn free_boxed_slice(buf: *mut u8, offset: *const u8, len: usize) {
+    let cap = offset_from(offset, buf) + len;
+    dealloc(buf, Layout::from_size_align(cap, 1).unwrap())
 }
 
 // ===== impl SharedVtable =====
 
 struct Shared {
-    // holds vec for drop, but otherwise doesnt access it
-    _vec: Vec<u8>,
+    // Holds arguments to dealloc upon Drop, but otherwise doesn't use them
+    buf: *mut u8,
+    cap: usize,
     ref_cnt: AtomicUsize,
 }
 
+impl Drop for Shared {
+    fn drop(&mut self) {
+        unsafe { dealloc(self.buf, Layout::from_size_align(self.cap, 1).unwrap()) }
+    }
+}
+
 // Assert that the alignment of `Shared` is divisible by 2.
 // This is a necessary invariant since we depend on allocating `Shared` a
 // shared object to implicitly carry the `KIND_ARC` flag in its pointer.
@@ -912,6 +1375,9 @@ const _: [(); 0 - mem::align_of::<Shared>() % 2] = []; // Assert that the alignm
 
 static SHARED_VTABLE: Vtable = Vtable {
     clone: shared_clone,
+    to_vec: shared_to_vec,
+    to_mut: shared_to_mut,
+    is_unique: shared_is_unique,
     drop: shared_drop,
 };
 
@@ -920,13 +1386,92 @@ const KIND_VEC: usize = 0b1;
 const KIND_MASK: usize = 0b1;
 
 unsafe fn shared_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Bytes {
-    let shared = data.load(Ordering::Acquire);
+    let shared = data.load(Ordering::Relaxed);
     shallow_clone_arc(shared as _, ptr, len)
 }
 
+unsafe fn shared_to_vec_impl(shared: *mut Shared, ptr: *const u8, len: usize) -> Vec<u8> {
+    // Check that the ref_cnt is 1 (unique).
+    //
+    // If it is unique, then it is set to 0 with AcqRel fence for the same
+    // reason in release_shared.
+    //
+    // Otherwise, we take the other branch and call release_shared.
+    if (*shared)
+        .ref_cnt
+        .compare_exchange(1, 0, Ordering::AcqRel, Ordering::Relaxed)
+        .is_ok()
+    {
+        // Deallocate the `Shared` instance without running its destructor.
+        let shared = *Box::from_raw(shared);
+        let shared = ManuallyDrop::new(shared);
+        let buf = shared.buf;
+        let cap = shared.cap;
+
+        // Copy back buffer
+        ptr::copy(ptr, buf, len);
+
+        Vec::from_raw_parts(buf, len, cap)
+    } else {
+        let v = slice::from_raw_parts(ptr, len).to_vec();
+        release_shared(shared);
+        v
+    }
+}
+
+unsafe fn shared_to_vec(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    shared_to_vec_impl(data.load(Ordering::Relaxed).cast(), ptr, len)
+}
+
+unsafe fn shared_to_mut_impl(shared: *mut Shared, ptr: *const u8, len: usize) -> BytesMut {
+    // The goal is to check if the current handle is the only handle
+    // that currently has access to the buffer. This is done by
+    // checking if the `ref_cnt` is currently 1.
+    //
+    // The `Acquire` ordering synchronizes with the `Release` as
+    // part of the `fetch_sub` in `release_shared`. The `fetch_sub`
+    // operation guarantees that any mutations done in other threads
+    // are ordered before the `ref_cnt` is decremented. As such,
+    // this `Acquire` will guarantee that those mutations are
+    // visible to the current thread.
+    //
+    // Otherwise, we take the other branch, copy the data and call `release_shared`.
+    if (*shared).ref_cnt.load(Ordering::Acquire) == 1 {
+        // Deallocate the `Shared` instance without running its destructor.
+        let shared = *Box::from_raw(shared);
+        let shared = ManuallyDrop::new(shared);
+        let buf = shared.buf;
+        let cap = shared.cap;
+
+        // Rebuild Vec
+        let off = offset_from(ptr, buf);
+        let v = Vec::from_raw_parts(buf, len + off, cap);
+
+        let mut b = BytesMut::from_vec(v);
+        b.advance_unchecked(off);
+        b
+    } else {
+        // Copy the data from Shared in a new Vec, then release it
+        let v = slice::from_raw_parts(ptr, len).to_vec();
+        release_shared(shared);
+        BytesMut::from_vec(v)
+    }
+}
+
+unsafe fn shared_to_mut(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    shared_to_mut_impl(data.load(Ordering::Relaxed).cast(), ptr, len)
+}
+
+pub(crate) unsafe fn shared_is_unique(data: &AtomicPtr<()>) -> bool {
+    let shared = data.load(Ordering::Acquire);
+    let ref_cnt = (*shared.cast::<Shared>()).ref_cnt.load(Ordering::Relaxed);
+    ref_cnt == 1
+}
+
 unsafe fn shared_drop(data: &mut AtomicPtr<()>, _ptr: *const u8, _len: usize) {
-    let shared = *data.get_mut();
-    release_shared(shared as *mut Shared);
+    data.with_mut(|shared| {
+        release_shared(shared.cast());
+    });
 }
 
 unsafe fn shallow_clone_arc(shared: *mut Shared, ptr: *const u8, len: usize) -> Bytes {
@@ -945,8 +1490,14 @@ unsafe fn shallow_clone_arc(shared: *mut Shared, ptr: *const u8, len: usize) ->
 }
 
 #[cold]
-unsafe fn shallow_clone_vec(atom: &AtomicPtr<()>, ptr: *const (), buf: *mut u8, offset: *const u8, len: usize) -> Bytes {
-    // If  the buffer is still tracked in a `Vec<u8>`. It is time to
+unsafe fn shallow_clone_vec(
+    atom: &AtomicPtr<()>,
+    ptr: *const (),
+    buf: *mut u8,
+    offset: *const u8,
+    len: usize,
+) -> Bytes {
+    // If the buffer is still tracked in a `Vec<u8>`. It is time to
     // promote the vec to an `Arc`. This could potentially be called
     // concurrently, so some care must be taken.
 
@@ -957,9 +1508,9 @@ unsafe fn shallow_clone_vec(atom: &AtomicPtr<()>, ptr: *const (), buf: *mut u8,
     // updated and since the buffer hasn't been promoted to an
     // `Arc`, those three fields still are the components of the
     // vector.
-    let vec = rebuild_boxed_slice(buf, offset, len).into_vec();
     let shared = Box::new(Shared {
-        _vec: vec,
+        buf,
+        cap: offset_from(offset, buf) + len,
         // Initialize refcount to 2. One for this reference, and one
         // for the new clone that will be returned from
         // `shallow_clone`.
@@ -979,33 +1530,35 @@ unsafe fn shallow_clone_vec(atom: &AtomicPtr<()>, ptr: *const (), buf: *mut u8,
     // `Release` is used synchronize with other threads that
     // will load the `arc` field.
     //
-    // If the `compare_and_swap` fails, then the thread lost the
+    // If the `compare_exchange` fails, then the thread lost the
     // race to promote the buffer to shared. The `Acquire`
-    // ordering will synchronize with the `compare_and_swap`
+    // ordering will synchronize with the `compare_exchange`
     // that happened in the other thread and the `Shared`
     // pointed to by `actual` will be visible.
-    let actual = atom.compare_and_swap(ptr as _, shared as _, Ordering::AcqRel);
-
-    if actual as usize == ptr as usize {
-        // The upgrade was successful, the new handle can be
-        // returned.
-        return Bytes {
-            ptr: offset,
-            len,
-            data: AtomicPtr::new(shared as _),
-            vtable: &SHARED_VTABLE,
-        };
+    match atom.compare_exchange(ptr as _, shared as _, Ordering::AcqRel, Ordering::Acquire) {
+        Ok(actual) => {
+            debug_assert!(actual as usize == ptr as usize);
+            // The upgrade was successful, the new handle can be
+            // returned.
+            Bytes {
+                ptr: offset,
+                len,
+                data: AtomicPtr::new(shared as _),
+                vtable: &SHARED_VTABLE,
+            }
+        }
+        Err(actual) => {
+            // The upgrade failed, a concurrent clone happened. Release
+            // the allocation that was made in this thread, it will not
+            // be needed.
+            let shared = Box::from_raw(shared);
+            mem::forget(*shared);
+
+            // Buffer already promoted to shared storage, so increment ref
+            // count.
+            shallow_clone_arc(actual as _, offset, len)
+        }
     }
-
-    // The upgrade failed, a concurrent clone happened. Release
-    // the allocation that was made in this thread, it will not
-    // be needed.
-    let shared = Box::from_raw(shared);
-    mem::forget(*shared);
-
-    // Buffer already promoted to shared storage, so increment ref
-    // count.
-    shallow_clone_arc(actual as _, offset, len)
 }
 
 unsafe fn release_shared(ptr: *mut Shared) {
@@ -1031,10 +1584,44 @@ unsafe fn release_shared(ptr: *mut Shared) {
     // > "acquire" operation before deleting the object.
     //
     // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html)
-    atomic::fence(Ordering::Acquire);
+    //
+    // Thread sanitizer does not support atomic fences. Use an atomic load
+    // instead.
+    (*ptr).ref_cnt.load(Ordering::Acquire);
 
     // Drop the data
-    Box::from_raw(ptr);
+    drop(Box::from_raw(ptr));
+}
+
+// Ideally we would always use this version of `ptr_map` since it is strict
+// provenance compatible, but it results in worse codegen. We will however still
+// use it on miri because it gives better diagnostics for people who test bytes
+// code with miri.
+//
+// See https://github.com/tokio-rs/bytes/pull/545 for more info.
+#[cfg(miri)]
+fn ptr_map<F>(ptr: *mut u8, f: F) -> *mut u8
+where
+    F: FnOnce(usize) -> usize,
+{
+    let old_addr = ptr as usize;
+    let new_addr = f(old_addr);
+    let diff = new_addr.wrapping_sub(old_addr);
+    ptr.wrapping_add(diff)
+}
+
+#[cfg(not(miri))]
+fn ptr_map<F>(ptr: *mut u8, f: F) -> *mut u8
+where
+    F: FnOnce(usize) -> usize,
+{
+    let old_addr = ptr as usize;
+    let new_addr = f(old_addr);
+    new_addr as *mut u8
+}
+
+fn without_provenance(ptr: usize) -> *const u8 {
+    core::ptr::null::<u8>().wrapping_add(ptr)
 }
 
 // compile-fails
@@ -1062,7 +1649,7 @@ fn _split_off_must_use() {}
 // fuzz tests
 #[cfg(all(test, loom))]
 mod fuzz {
-    use std::sync::Arc;
+    use loom::sync::Arc;
     use loom::thread;
 
     use super::Bytes;
diff --git a/src/bytes_mut.rs b/src/bytes_mut.rs
index dc4e4b179..d5db5124b 100644
--- a/src/bytes_mut.rs
+++ b/src/bytes_mut.rs
@@ -1,22 +1,34 @@
-use core::{cmp, fmt, hash, isize, slice, usize};
-use core::mem::{self, ManuallyDrop};
+use core::iter::FromIterator;
+use core::mem::{self, ManuallyDrop, MaybeUninit};
 use core::ops::{Deref, DerefMut};
 use core::ptr::{self, NonNull};
-use core::iter::{FromIterator, Iterator};
+use core::{cmp, fmt, hash, isize, slice, usize};
 
-use alloc::{vec::Vec, string::String, boxed::Box, borrow::{Borrow, BorrowMut}};
+use alloc::{
+    borrow::{Borrow, BorrowMut},
+    boxed::Box,
+    string::String,
+    vec,
+    vec::Vec,
+};
 
-use crate::{Bytes, Buf, BufMut};
+use crate::buf::{IntoIter, UninitSlice};
 use crate::bytes::Vtable;
-use crate::buf::IntoIter;
-use crate::loom::sync::atomic::{self, AtomicPtr, AtomicUsize, Ordering};
+#[allow(unused)]
+use crate::loom::sync::atomic::AtomicMut;
+use crate::loom::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
+use crate::{offset_from, Buf, BufMut, Bytes, TryGetError};
 
 /// A unique reference to a contiguous slice of memory.
 ///
 /// `BytesMut` represents a unique view into a potentially shared memory region.
 /// Given the uniqueness guarantee, owners of `BytesMut` handles are able to
-/// mutate the memory. It is similar to a `Vec<u8>` but with less copies and
-/// allocations.
+/// mutate the memory.
+///
+/// `BytesMut` can be thought of as containing a `buf: Arc<Vec<u8>>`, an offset
+/// into `buf`, a slice length, and a guarantee that no other `BytesMut` for the
+/// same `buf` overlaps with its slice. That guarantee means that a write lock
+/// is not required.
 ///
 /// # Growth
 ///
@@ -68,6 +80,12 @@ struct Shared {
     ref_count: AtomicUsize,
 }
 
+// Assert that the alignment of `Shared` is divisible by 2.
+// This is a necessary invariant since we depend on allocating `Shared` a
+// shared object to implicitly carry the `KIND_ARC` flag in its pointer.
+// This flag is set when the LSB is 0.
+const _: [(); 0 - mem::align_of::<Shared>() % 2] = []; // Assert that the alignment of `Shared` is divisible by 2.
+
 // Buffer storage strategy flags.
 const KIND_ARC: usize = 0b0;
 const KIND_VEC: usize = 0b1;
@@ -84,11 +102,11 @@ const MIN_ORIGINAL_CAPACITY_WIDTH: usize = 10;
 const ORIGINAL_CAPACITY_MASK: usize = 0b11100;
 const ORIGINAL_CAPACITY_OFFSET: usize = 2;
 
+const VEC_POS_OFFSET: usize = 5;
 // When the storage is in the `Vec` representation, the pointer can be advanced
 // at most this value. This is due to the amount of storage available to track
 // the offset is usize - number of KIND bits and number of ORIGINAL_CAPACITY
 // bits.
-const VEC_POS_OFFSET: usize = 5;
 const MAX_VEC_POS: usize = usize::MAX >> VEC_POS_OFFSET;
 const NOT_VEC_POS_MASK: usize = 0b11111;
 
@@ -107,8 +125,7 @@ impl BytesMut {
     /// Creates a new `BytesMut` with the specified capacity.
     ///
     /// The returned `BytesMut` will be able to hold at least `capacity` bytes
-    /// without reallocating. If `capacity` is under `4 * size_of::<usize>() - 1`,
-    /// then `BytesMut` will not allocate.
+    /// without reallocating.
     ///
     /// It is important to note that this function does not specify the length
     /// of the returned `BytesMut`, but only the capacity.
@@ -226,32 +243,57 @@ impl BytesMut {
     /// th.join().unwrap();
     /// ```
     #[inline]
-    pub fn freeze(mut self) -> Bytes {
-        if self.kind() == KIND_VEC {
+    pub fn freeze(self) -> Bytes {
+        let bytes = ManuallyDrop::new(self);
+        if bytes.kind() == KIND_VEC {
             // Just re-use `Bytes` internal Vec vtable
             unsafe {
-                let (off, _) = self.get_vec_pos();
-                let vec = rebuild_vec(self.ptr.as_ptr(), self.len, self.cap, off);
-                mem::forget(self);
-                vec.into()
+                let off = bytes.get_vec_pos();
+                let vec = rebuild_vec(bytes.ptr.as_ptr(), bytes.len, bytes.cap, off);
+                let mut b: Bytes = vec.into();
+                b.advance(off);
+                b
             }
         } else {
-            debug_assert_eq!(self.kind(), KIND_ARC);
+            debug_assert_eq!(bytes.kind(), KIND_ARC);
 
-            let ptr = self.ptr.as_ptr();
-            let len = self.len;
-            let data = AtomicPtr::new(self.data as _);
-            mem::forget(self);
-            unsafe {
-                Bytes::with_vtable(ptr, len, data, &SHARED_VTABLE)
-            }
+            let ptr = bytes.ptr.as_ptr();
+            let len = bytes.len;
+            let data = AtomicPtr::new(bytes.data.cast());
+            unsafe { Bytes::with_vtable(ptr, len, data, &SHARED_VTABLE) }
         }
     }
 
+    /// Creates a new `BytesMut` containing `len` zeros.
+    ///
+    /// The resulting object has a length of `len` and a capacity greater
+    /// than or equal to `len`. The entire length of the object will be filled
+    /// with zeros.
+    ///
+    /// On some platforms or allocators this function may be faster than
+    /// a manual implementation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BytesMut;
+    ///
+    /// let zeros = BytesMut::zeroed(42);
+    ///
+    /// assert!(zeros.capacity() >= 42);
+    /// assert_eq!(zeros.len(), 42);
+    /// zeros.into_iter().for_each(|x| assert_eq!(x, 0));
+    /// ```
+    pub fn zeroed(len: usize) -> BytesMut {
+        BytesMut::from_vec(vec![0; len])
+    }
+
     /// Splits the bytes into two at the given index.
     ///
     /// Afterwards `self` contains elements `[0, at)`, and the returned
-    /// `BytesMut` contains elements `[at, capacity)`.
+    /// `BytesMut` contains elements `[at, capacity)`. It's guaranteed that the
+    /// memory does not move, that is, the address of `self` does not change,
+    /// and the address of the returned slice is `at` bytes after that.
     ///
     /// This is an `O(1)` operation that just increases the reference count
     /// and sets a few indices.
@@ -284,8 +326,10 @@ impl BytesMut {
         );
         unsafe {
             let mut other = self.shallow_clone();
-            other.set_start(at);
-            self.set_end(at);
+            // SAFETY: We've checked that `at` <= `self.capacity()` above.
+            other.advance_unchecked(at);
+            self.cap = at;
+            self.len = cmp::min(self.len, at);
             other
         }
     }
@@ -315,7 +359,7 @@ impl BytesMut {
     ///
     /// assert_eq!(other, b"hello world"[..]);
     /// ```
-    #[must_use = "consider BytesMut::advance(len()) if you don't need the other half"]
+    #[must_use = "consider BytesMut::clear if you don't need the other half"]
     pub fn split(&mut self) -> BytesMut {
         let len = self.len();
         self.split_to(len)
@@ -358,8 +402,11 @@ impl BytesMut {
 
         unsafe {
             let mut other = self.shallow_clone();
-            other.set_end(at);
-            self.set_start(at);
+            // SAFETY: We've checked that `at` <= `self.len()` and we know that `self.len()` <=
+            // `self.capacity()`.
+            self.advance_unchecked(at);
+            other.cap = at;
+            other.len = at;
             other
         }
     }
@@ -370,7 +417,9 @@ impl BytesMut {
     /// If `len` is greater than the buffer's current length, this has no
     /// effect.
     ///
-    /// The [`split_off`] method can emulate `truncate`, but this causes the
+    /// Existing underlying capacity is preserved.
+    ///
+    /// The [split_off](`Self::split_off()`) method can emulate `truncate`, but this causes the
     /// excess bytes to be returned instead of dropped.
     ///
     /// # Examples
@@ -382,15 +431,14 @@ impl BytesMut {
     /// buf.truncate(5);
     /// assert_eq!(buf, b"hello"[..]);
     /// ```
-    ///
-    /// [`split_off`]: #method.split_off
     pub fn truncate(&mut self, len: usize) {
         if len <= self.len() {
-            unsafe { self.set_len(len); }
+            // SAFETY: Shrinking the buffer cannot expose uninitialized bytes.
+            unsafe { self.set_len(len) };
         }
     }
 
-    /// Clears the buffer, removing all data.
+    /// Clears the buffer, removing all data. Existing capacity is preserved.
     ///
     /// # Examples
     ///
@@ -402,7 +450,8 @@ impl BytesMut {
     /// assert!(buf.is_empty());
     /// ```
     pub fn clear(&mut self) {
-        self.truncate(0);
+        // SAFETY: Setting the length to zero cannot expose uninitialized bytes.
+        unsafe { self.set_len(0) };
     }
 
     /// Resizes the buffer so that `len` is equal to `new_len`.
@@ -428,18 +477,26 @@ impl BytesMut {
     /// assert_eq!(&buf[..], &[0x1, 0x1, 0x3, 0x3]);
     /// ```
     pub fn resize(&mut self, new_len: usize, value: u8) {
-        let len = self.len();
-        if new_len > len {
-            let additional = new_len - len;
-            self.reserve(additional);
-            unsafe {
-                let dst = self.bytes_mut().as_mut_ptr();
-                ptr::write_bytes(dst, value, additional);
-                self.set_len(new_len);
-            }
+        let additional = if let Some(additional) = new_len.checked_sub(self.len()) {
+            additional
         } else {
             self.truncate(new_len);
+            return;
+        };
+
+        if additional == 0 {
+            return;
         }
+
+        self.reserve(additional);
+        let dst = self.spare_capacity_mut().as_mut_ptr();
+        // SAFETY: `spare_capacity_mut` returns a valid, properly aligned pointer and we've
+        // reserved enough space to write `additional` bytes.
+        unsafe { ptr::write_bytes(dst, value, additional) };
+
+        // SAFETY: There are at least `new_len` initialized bytes in the buffer so no
+        // uninitialized bytes are being exposed.
+        unsafe { self.set_len(new_len) };
     }
 
     /// Sets the length of the buffer.
@@ -467,6 +524,7 @@ impl BytesMut {
     ///
     /// assert_eq!(&b[..], b"hello world");
     /// ```
+    #[inline]
     pub unsafe fn set_len(&mut self, len: usize) {
         debug_assert!(len <= self.cap, "set_len out of bounds");
         self.len = len;
@@ -479,11 +537,20 @@ impl BytesMut {
     /// reallocations. A call to `reserve` may result in an allocation.
     ///
     /// Before allocating new buffer space, the function will attempt to reclaim
-    /// space in the existing buffer. If the current handle references a small
-    /// view in the original buffer and all other handles have been dropped,
-    /// and the requested capacity is less than or equal to the existing
-    /// buffer's capacity, then the current view will be copied to the front of
-    /// the buffer and the handle will take ownership of the full buffer.
+    /// space in the existing buffer. If the current handle references a view
+    /// into a larger original buffer, and all other handles referencing part
+    /// of the same original buffer have been dropped, then the current view
+    /// can be copied/shifted to the front of the buffer and the handle can take
+    /// ownership of the full buffer, provided that the full buffer is large
+    /// enough to fit the requested additional capacity.
+    ///
+    /// This optimization will only happen if shifting the data from the current
+    /// view to the front of the buffer is not too expensive in terms of the
+    /// (amortized) time required. The precise condition is subject to change;
+    /// as of now, the length of the data being shifted needs to be at least as
+    /// large as the distance that it's shifted by. If the current view is empty
+    /// and the original buffer is large enough to fit the requested additional
+    /// capacity, then reallocations will never happen.
     ///
     /// # Examples
     ///
@@ -532,12 +599,13 @@ impl BytesMut {
             return;
         }
 
-        self.reserve_inner(additional);
+        // will always succeed
+        let _ = self.reserve_inner(additional, true);
     }
 
-    // In separate function to allow the short-circuits in `reserve` to
-    // be inline-able. Significant helps performance.
-    fn reserve_inner(&mut self, additional: usize) {
+    // In separate function to allow the short-circuits in `reserve` and `try_reclaim` to
+    // be inline-able. Significantly helps performance. Returns false if it did not succeed.
+    fn reserve_inner(&mut self, additional: usize, allocate: bool) -> bool {
         let len = self.len();
         let kind = self.kind();
 
@@ -547,56 +615,74 @@ impl BytesMut {
             // space.
             //
             // Otherwise, since backed by a vector, use `Vec::reserve`
+            //
+            // We need to make sure that this optimization does not kill the
+            // amortized runtimes of BytesMut's operations.
             unsafe {
-                let (off, prev) = self.get_vec_pos();
+                let off = self.get_vec_pos();
 
-                // Only reuse space if we stand to gain at least capacity/2
-                // bytes of space back
-                if off >= additional && off >= (self.cap / 2) {
-                    // There's space - reuse it
+                // Only reuse space if we can satisfy the requested additional space.
+                //
+                // Also check if the value of `off` suggests that enough bytes
+                // have been read to account for the overhead of shifting all
+                // the data (in an amortized analysis).
+                // Hence the condition `off >= self.len()`.
+                //
+                // This condition also already implies that the buffer is going
+                // to be (at least) half-empty in the end; so we do not break
+                // the (amortized) runtime with future resizes of the underlying
+                // `Vec`.
+                //
+                // [For more details check issue #524, and PR #525.]
+                if self.capacity() - self.len() + off >= additional && off >= self.len() {
+                    // There's enough space, and it's not too much overhead:
+                    // reuse the space!
                     //
                     // Just move the pointer back to the start after copying
                     // data back.
-                    let base_ptr = self.ptr.as_ptr().offset(-(off as isize));
-                    ptr::copy(self.ptr.as_ptr(), base_ptr, self.len);
+                    let base_ptr = self.ptr.as_ptr().sub(off);
+                    // Since `off >= self.len()`, the two regions don't overlap.
+                    ptr::copy_nonoverlapping(self.ptr.as_ptr(), base_ptr, self.len);
                     self.ptr = vptr(base_ptr);
-                    self.set_vec_pos(0, prev);
+                    self.set_vec_pos(0);
 
                     // Length stays constant, but since we moved backwards we
                     // can gain capacity back.
                     self.cap += off;
                 } else {
-                    // No space - allocate more
-                    let mut v = ManuallyDrop::new(rebuild_vec(self.ptr.as_ptr(), self.len, self.cap, off));
+                    if !allocate {
+                        return false;
+                    }
+                    // Not enough space, or reusing might be too much overhead:
+                    // allocate more space!
+                    let mut v =
+                        ManuallyDrop::new(rebuild_vec(self.ptr.as_ptr(), self.len, self.cap, off));
                     v.reserve(additional);
 
                     // Update the info
-                    self.ptr = vptr(v.as_mut_ptr().offset(off as isize));
-                    self.len = v.len() - off;
+                    self.ptr = vptr(v.as_mut_ptr().add(off));
                     self.cap = v.capacity() - off;
+                    debug_assert_eq!(self.len, v.len() - off);
                 }
 
-                return;
+                return true;
             }
         }
 
         debug_assert_eq!(kind, KIND_ARC);
-        let shared: *mut Shared = self.data as _;
-
+        let shared: *mut Shared = self.data;
 
         // Reserving involves abandoning the currently shared buffer and
         // allocating a new vector with the requested capacity.
         //
         // Compute the new capacity
-        let mut new_cap = len.checked_add(additional).expect("overflow");
-
-        let original_capacity;
-        let original_capacity_repr;
+        let mut new_cap = match len.checked_add(additional) {
+            Some(new_cap) => new_cap,
+            None if !allocate => return false,
+            None => panic!("overflow"),
+        };
 
         unsafe {
-            original_capacity_repr = (*shared).original_capacity_repr;
-            original_capacity = original_capacity_from_repr(original_capacity_repr);
-
             // First, try to reclaim the buffer. This is possible if the current
             // handle is the only outstanding handle pointing to the buffer.
             if (*shared).is_unique() {
@@ -605,35 +691,78 @@ impl BytesMut {
                 // sure that the vector has enough capacity.
                 let v = &mut (*shared).vec;
 
-                if v.capacity() >= new_cap {
-                    // The capacity is sufficient, reclaim the buffer
-                    let ptr = v.as_mut_ptr();
+                let v_capacity = v.capacity();
+                let ptr = v.as_mut_ptr();
 
-                    ptr::copy(self.ptr.as_ptr(), ptr, len);
+                let offset = offset_from(self.ptr.as_ptr(), ptr);
+
+                // Compare the condition in the `kind == KIND_VEC` case above
+                // for more details.
+                if v_capacity >= new_cap + offset {
+                    self.cap = new_cap;
+                    // no copy is necessary
+                } else if v_capacity >= new_cap && offset >= len {
+                    // The capacity is sufficient, and copying is not too much
+                    // overhead: reclaim the buffer!
+
+                    // `offset >= len` means: no overlap
+                    ptr::copy_nonoverlapping(self.ptr.as_ptr(), ptr, len);
 
                     self.ptr = vptr(ptr);
                     self.cap = v.capacity();
+                } else {
+                    if !allocate {
+                        return false;
+                    }
+                    // calculate offset
+                    let off = (self.ptr.as_ptr() as usize) - (v.as_ptr() as usize);
+
+                    // new_cap is calculated in terms of `BytesMut`, not the underlying
+                    // `Vec`, so it does not take the offset into account.
+                    //
+                    // Thus we have to manually add it here.
+                    new_cap = new_cap.checked_add(off).expect("overflow");
+
+                    // The vector capacity is not sufficient. The reserve request is
+                    // asking for more than the initial buffer capacity. Allocate more
+                    // than requested if `new_cap` is not much bigger than the current
+                    // capacity.
+                    //
+                    // There are some situations, using `reserve_exact` that the
+                    // buffer capacity could be below `original_capacity`, so do a
+                    // check.
+                    let double = v.capacity().checked_shl(1).unwrap_or(new_cap);
 
-                    return;
+                    new_cap = cmp::max(double, new_cap);
+
+                    // No space - allocate more
+                    //
+                    // The length field of `Shared::vec` is not used by the `BytesMut`;
+                    // instead we use the `len` field in the `BytesMut` itself. However,
+                    // when calling `reserve`, it doesn't guarantee that data stored in
+                    // the unused capacity of the vector is copied over to the new
+                    // allocation, so we need to ensure that we don't have any data we
+                    // care about in the unused capacity before calling `reserve`.
+                    debug_assert!(off + len <= v.capacity());
+                    v.set_len(off + len);
+                    v.reserve(new_cap - v.len());
+
+                    // Update the info
+                    self.ptr = vptr(v.as_mut_ptr().add(off));
+                    self.cap = v.capacity() - off;
                 }
 
-                // The vector capacity is not sufficient. The reserve request is
-                // asking for more than the initial buffer capacity. Allocate more
-                // than requested if `new_cap` is not much bigger than the current
-                // capacity.
-                //
-                // There are some situations, using `reserve_exact` that the
-                // buffer capacity could be below `original_capacity`, so do a
-                // check.
-                let double = v.capacity().checked_shl(1).unwrap_or(new_cap);
-
-                new_cap = cmp::max(
-                    cmp::max(double, new_cap),
-                    original_capacity);
-            } else {
-                new_cap = cmp::max(new_cap, original_capacity);
+                return true;
             }
         }
+        if !allocate {
+            return false;
+        }
+
+        let original_capacity_repr = unsafe { (*shared).original_capacity_repr };
+        let original_capacity = original_capacity_from_repr(original_capacity_repr);
+
+        new_cap = cmp::max(new_cap, original_capacity);
 
         // Create a new vector to store the data
         let mut v = ManuallyDrop::new(Vec::with_capacity(new_cap));
@@ -647,10 +776,71 @@ impl BytesMut {
 
         // Update self
         let data = (original_capacity_repr << ORIGINAL_CAPACITY_OFFSET) | KIND_VEC;
-        self.data = data as _;
+        self.data = invalid_ptr(data);
         self.ptr = vptr(v.as_mut_ptr());
-        self.len = v.len();
         self.cap = v.capacity();
+        debug_assert_eq!(self.len, v.len());
+        return true;
+    }
+
+    /// Attempts to cheaply reclaim already allocated capacity for at least `additional` more
+    /// bytes to be inserted into the given `BytesMut` and returns `true` if it succeeded.
+    ///
+    /// `try_reclaim` behaves exactly like `reserve`, except that it never allocates new storage
+    /// and returns a `bool` indicating whether it was successful in doing so:
+    ///
+    /// `try_reclaim` returns false under these conditions:
+    ///  - The spare capacity left is less than `additional` bytes AND
+    ///  - The existing allocation cannot be reclaimed cheaply or it was less than
+    ///    `additional` bytes in size
+    ///
+    /// Reclaiming the allocation cheaply is possible if the `BytesMut` has no outstanding
+    /// references through other `BytesMut`s or `Bytes` which point to the same underlying
+    /// storage.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BytesMut;
+    ///
+    /// let mut buf = BytesMut::with_capacity(64);
+    /// assert_eq!(true, buf.try_reclaim(64));
+    /// assert_eq!(64, buf.capacity());
+    ///
+    /// buf.extend_from_slice(b"abcd");
+    /// let mut split = buf.split();
+    /// assert_eq!(60, buf.capacity());
+    /// assert_eq!(4, split.capacity());
+    /// assert_eq!(false, split.try_reclaim(64));
+    /// assert_eq!(false, buf.try_reclaim(64));
+    /// // The split buffer is filled with "abcd"
+    /// assert_eq!(false, split.try_reclaim(4));
+    /// // buf is empty and has capacity for 60 bytes
+    /// assert_eq!(true, buf.try_reclaim(60));
+    ///
+    /// drop(buf);
+    /// assert_eq!(false, split.try_reclaim(64));
+    ///
+    /// split.clear();
+    /// assert_eq!(4, split.capacity());
+    /// assert_eq!(true, split.try_reclaim(64));
+    /// assert_eq!(64, split.capacity());
+    /// ```
+    // I tried splitting out try_reclaim_inner after the short circuits, but it was inlined
+    // regardless with Rust 1.78.0 so probably not worth it
+    #[inline]
+    #[must_use = "consider BytesMut::reserve if you need an infallible reservation"]
+    pub fn try_reclaim(&mut self, additional: usize) -> bool {
+        let len = self.len();
+        let rem = self.capacity() - len;
+
+        if additional <= rem {
+            // The handle can already store at least `additional` more bytes, so
+            // there is no further work needed to be done.
+            return true;
+        }
+
+        self.reserve_inner(additional, false)
     }
 
     /// Appends given bytes to this `BytesMut`.
@@ -669,31 +859,31 @@ impl BytesMut {
     ///
     /// assert_eq!(b"aaabbbcccddd", &buf[..]);
     /// ```
+    #[inline]
     pub fn extend_from_slice(&mut self, extend: &[u8]) {
         let cnt = extend.len();
         self.reserve(cnt);
 
         unsafe {
-            let dst = self.maybe_uninit_bytes();
+            let dst = self.spare_capacity_mut();
             // Reserved above
             debug_assert!(dst.len() >= cnt);
 
-            ptr::copy_nonoverlapping(
-                extend.as_ptr(),
-                dst.as_mut_ptr() as *mut u8,
-                cnt);
-
+            ptr::copy_nonoverlapping(extend.as_ptr(), dst.as_mut_ptr().cast(), cnt);
         }
 
-        unsafe { self.advance_mut(cnt); }
+        unsafe {
+            self.advance_mut(cnt);
+        }
     }
 
     /// Absorbs a `BytesMut` that was previously split off.
     ///
-    /// If the two `BytesMut` objects were previously contiguous, i.e., if
-    /// `other` was created by calling `split_off` on this `BytesMut`, then
-    /// this is an `O(1)` operation that just decreases a reference
-    /// count and sets a few indices. Otherwise this method degenerates to
+    /// If the two `BytesMut` objects were previously contiguous and not mutated
+    /// in a way that causes re-allocation i.e., if `other` was created by
+    /// calling `split_off` on this `BytesMut`, then this is an `O(1)` operation
+    /// that just decreases a reference count and sets a few indices.
+    /// Otherwise this method degenerates to
     /// `self.extend_from_slice(other.as_ref())`.
     ///
     /// # Examples
@@ -731,11 +921,11 @@ impl BytesMut {
     // internal change could make a simple pattern (`BytesMut::from(vec)`)
     // suddenly a lot more expensive.
     #[inline]
-    pub(crate) fn from_vec(mut vec: Vec<u8>) -> BytesMut {
+    pub(crate) fn from_vec(vec: Vec<u8>) -> BytesMut {
+        let mut vec = ManuallyDrop::new(vec);
         let ptr = vptr(vec.as_mut_ptr());
         let len = vec.len();
         let cap = vec.capacity();
-        mem::forget(vec);
 
         let original_capacity_repr = original_capacity_to_repr(cap);
         let data = (original_capacity_repr << ORIGINAL_CAPACITY_OFFSET) | KIND_VEC;
@@ -744,32 +934,33 @@ impl BytesMut {
             ptr,
             len,
             cap,
-            data: data as *mut _,
+            data: invalid_ptr(data),
         }
     }
 
     #[inline]
     fn as_slice(&self) -> &[u8] {
-        unsafe {
-            slice::from_raw_parts(self.ptr.as_ptr(), self.len)
-        }
+        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
     }
 
     #[inline]
     fn as_slice_mut(&mut self) -> &mut [u8] {
-        unsafe {
-            slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len)
-        }
+        unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
     }
 
-    unsafe fn set_start(&mut self, start: usize) {
+    /// Advance the buffer without bounds checking.
+    ///
+    /// # SAFETY
+    ///
+    /// The caller must ensure that `count` <= `self.cap`.
+    pub(crate) unsafe fn advance_unchecked(&mut self, count: usize) {
         // Setting the start to 0 is a no-op, so return early if this is the
         // case.
-        if start == 0 {
+        if count == 0 {
             return;
         }
 
-        debug_assert!(start <= self.cap, "internal: set_start out of bounds");
+        debug_assert!(count <= self.cap, "internal: set_start out of bounds");
 
         let kind = self.kind();
 
@@ -778,52 +969,37 @@ impl BytesMut {
             // complicated. First, we have to track how far ahead the
             // "start" of the byte buffer from the beginning of the vec. We
             // also have to ensure that we don't exceed the maximum shift.
-            let (mut pos, prev) = self.get_vec_pos();
-            pos += start;
+            let pos = self.get_vec_pos() + count;
 
             if pos <= MAX_VEC_POS {
-                self.set_vec_pos(pos, prev);
+                self.set_vec_pos(pos);
             } else {
                 // The repr must be upgraded to ARC. This will never happen
                 // on 64 bit systems and will only happen on 32 bit systems
                 // when shifting past 134,217,727 bytes. As such, we don't
                 // worry too much about performance here.
-                self.promote_to_shared(/*ref_count = */1);
+                self.promote_to_shared(/*ref_count = */ 1);
             }
         }
 
         // Updating the start of the view is setting `ptr` to point to the
         // new start and updating the `len` field to reflect the new length
         // of the view.
-        self.ptr = vptr(self.ptr.as_ptr().offset(start as isize));
-
-        if self.len >= start {
-            self.len -= start;
-        } else {
-            self.len = 0;
-        }
-
-        self.cap -= start;
-    }
-
-    unsafe fn set_end(&mut self, end: usize) {
-        debug_assert_eq!(self.kind(), KIND_ARC);
-        assert!(end <= self.cap, "set_end out of bounds");
-
-        self.cap = end;
-        self.len = cmp::min(self.len, end);
+        self.ptr = vptr(self.ptr.as_ptr().add(count));
+        self.len = self.len.checked_sub(count).unwrap_or(0);
+        self.cap -= count;
     }
 
     fn try_unsplit(&mut self, other: BytesMut) -> Result<(), BytesMut> {
-        if other.is_empty() {
+        if other.capacity() == 0 {
             return Ok(());
         }
 
-        let ptr = unsafe { self.ptr.as_ptr().offset(self.len as isize) };
-        if ptr == other.ptr.as_ptr() &&
-           self.kind() == KIND_ARC &&
-           other.kind() == KIND_ARC &&
-           self.data == other.data
+        let ptr = unsafe { self.ptr.as_ptr().add(self.len) };
+        if ptr == other.ptr.as_ptr()
+            && self.kind() == KIND_ARC
+            && other.kind() == KIND_ARC
+            && self.data == other.data
         {
             // Contiguous blocks, just combine directly
             self.len += other.len;
@@ -869,7 +1045,7 @@ impl BytesMut {
         // always succeed.
         debug_assert_eq!(shared as usize & KIND_MASK, KIND_ARC);
 
-        self.data = shared as _;
+        self.data = shared;
     }
 
     /// Makes an exact shallow clone of `self`.
@@ -884,34 +1060,62 @@ impl BytesMut {
             increment_shared(self.data);
             ptr::read(self)
         } else {
-            self.promote_to_shared(/*ref_count = */2);
+            self.promote_to_shared(/*ref_count = */ 2);
             ptr::read(self)
         }
     }
 
     #[inline]
-    unsafe fn get_vec_pos(&mut self) -> (usize, usize) {
+    unsafe fn get_vec_pos(&self) -> usize {
         debug_assert_eq!(self.kind(), KIND_VEC);
 
-        let prev = self.data as usize;
-        (prev >> VEC_POS_OFFSET, prev)
+        self.data as usize >> VEC_POS_OFFSET
     }
 
     #[inline]
-    unsafe fn set_vec_pos(&mut self, pos: usize, prev: usize) {
+    unsafe fn set_vec_pos(&mut self, pos: usize) {
         debug_assert_eq!(self.kind(), KIND_VEC);
         debug_assert!(pos <= MAX_VEC_POS);
 
-        self.data = ((pos << VEC_POS_OFFSET) | (prev & NOT_VEC_POS_MASK)) as *mut _;
+        self.data = invalid_ptr((pos << VEC_POS_OFFSET) | (self.data as usize & NOT_VEC_POS_MASK));
     }
 
+    /// Returns the remaining spare capacity of the buffer as a slice of `MaybeUninit<u8>`.
+    ///
+    /// The returned slice can be used to fill the buffer with data (e.g. by
+    /// reading from a file) before marking the data as initialized using the
+    /// [`set_len`] method.
+    ///
+    /// [`set_len`]: BytesMut::set_len
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bytes::BytesMut;
+    ///
+    /// // Allocate buffer big enough for 10 bytes.
+    /// let mut buf = BytesMut::with_capacity(10);
+    ///
+    /// // Fill in the first 3 elements.
+    /// let uninit = buf.spare_capacity_mut();
+    /// uninit[0].write(0);
+    /// uninit[1].write(1);
+    /// uninit[2].write(2);
+    ///
+    /// // Mark the first 3 bytes of the buffer as being initialized.
+    /// unsafe {
+    ///     buf.set_len(3);
+    /// }
+    ///
+    /// assert_eq!(&buf[..], &[0, 1, 2]);
+    /// ```
     #[inline]
-    fn maybe_uninit_bytes(&mut self) -> &mut [mem::MaybeUninit<u8>] {
+    pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<u8>] {
         unsafe {
-            let ptr = self.ptr.as_ptr().offset(self.len as isize);
+            let ptr = self.ptr.as_ptr().add(self.len);
             let len = self.cap - self.len;
 
-            slice::from_raw_parts_mut(ptr as *mut mem::MaybeUninit<u8>, len)
+            slice::from_raw_parts_mut(ptr.cast(), len)
         }
     }
 }
@@ -922,13 +1126,13 @@ impl Drop for BytesMut {
 
         if kind == KIND_VEC {
             unsafe {
-                let (off, _) = self.get_vec_pos();
+                let off = self.get_vec_pos();
 
                 // Vector storage, free the vector
                 let _ = rebuild_vec(self.ptr.as_ptr(), self.len, self.cap, off);
             }
         } else if kind == KIND_ARC {
-            unsafe { release_shared(self.data as _) };
+            unsafe { release_shared(self.data) };
         }
     }
 }
@@ -940,7 +1144,7 @@ impl Buf for BytesMut {
     }
 
     #[inline]
-    fn bytes(&self) -> &[u8] {
+    fn chunk(&self) -> &[u8] {
         self.as_slice()
     }
 
@@ -952,15 +1156,19 @@ impl Buf for BytesMut {
             cnt,
             self.remaining(),
         );
-        unsafe { self.set_start(cnt); }
+        unsafe {
+            // SAFETY: We've checked that `cnt` <= `self.remaining()` and we know that
+            // `self.remaining()` <= `self.cap`.
+            self.advance_unchecked(cnt);
+        }
     }
 
-    fn to_bytes(&mut self) -> crate::Bytes {
-        self.split().freeze()
+    fn copy_to_bytes(&mut self, len: usize) -> Bytes {
+        self.split_to(len).freeze()
     }
 }
 
-impl BufMut for BytesMut {
+unsafe impl BufMut for BytesMut {
     #[inline]
     fn remaining_mut(&self) -> usize {
         usize::MAX - self.len()
@@ -968,25 +1176,34 @@ impl BufMut for BytesMut {
 
     #[inline]
     unsafe fn advance_mut(&mut self, cnt: usize) {
-        let new_len = self.len() + cnt;
-        assert!(new_len <= self.cap, "new_len = {}; capacity = {}", new_len, self.cap);
-        self.len = new_len;
+        let remaining = self.cap - self.len();
+        if cnt > remaining {
+            super::panic_advance(&TryGetError {
+                requested: cnt,
+                available: remaining,
+            });
+        }
+        // Addition won't overflow since it is at most `self.cap`.
+        self.len = self.len() + cnt;
     }
 
     #[inline]
-    fn bytes_mut(&mut self) -> &mut [mem::MaybeUninit<u8>] {
+    fn chunk_mut(&mut self) -> &mut UninitSlice {
         if self.capacity() == self.len() {
             self.reserve(64);
         }
-        self.maybe_uninit_bytes()
+        self.spare_capacity_mut().into()
     }
 
     // Specialize these methods so they can skip checking `remaining_mut`
     // and `advance_mut`.
 
-    fn put<T: crate::Buf>(&mut self, mut src: T) where Self: Sized {
+    fn put<T: Buf>(&mut self, mut src: T)
+    where
+        Self: Sized,
+    {
         while src.has_remaining() {
-            let s = src.bytes();
+            let s = src.chunk();
             let l = s.len();
             self.extend_from_slice(s);
             src.advance(l);
@@ -996,6 +1213,19 @@ impl BufMut for BytesMut {
     fn put_slice(&mut self, src: &[u8]) {
         self.extend_from_slice(src);
     }
+
+    fn put_bytes(&mut self, val: u8, cnt: usize) {
+        self.reserve(cnt);
+        unsafe {
+            let dst = self.spare_capacity_mut();
+            // Reserved above
+            debug_assert!(dst.len() >= cnt);
+
+            ptr::write_bytes(dst.as_mut_ptr(), val, cnt);
+
+            self.advance_mut(cnt);
+        }
+    }
 }
 
 impl AsRef<[u8]> for BytesMut {
@@ -1015,6 +1245,7 @@ impl Deref for BytesMut {
 }
 
 impl AsMut<[u8]> for BytesMut {
+    #[inline]
     fn as_mut(&mut self) -> &mut [u8] {
         self.as_slice_mut()
     }
@@ -1063,8 +1294,7 @@ impl Ord for BytesMut {
     }
 }
 
-impl Eq for BytesMut {
-}
+impl Eq for BytesMut {}
 
 impl Default for BytesMut {
     #[inline]
@@ -1074,7 +1304,10 @@ impl Default for BytesMut {
 }
 
 impl hash::Hash for BytesMut {
-    fn hash<H>(&self, state: &mut H) where H: hash::Hasher {
+    fn hash<H>(&self, state: &mut H)
+    where
+        H: hash::Hasher,
+    {
         let s: &[u8] = self.as_ref();
         s.hash(state);
     }
@@ -1129,12 +1362,15 @@ impl<'a> IntoIterator for &'a BytesMut {
     type IntoIter = core::slice::Iter<'a, u8>;
 
     fn into_iter(self) -> Self::IntoIter {
-        self.as_ref().into_iter()
+        self.as_ref().iter()
     }
 }
 
 impl Extend<u8> for BytesMut {
-    fn extend<T>(&mut self, iter: T) where T: IntoIterator<Item = u8> {
+    fn extend<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = u8>,
+    {
         let iter = iter.into_iter();
 
         let (lower, _) = iter.size_hint();
@@ -1142,17 +1378,29 @@ impl Extend<u8> for BytesMut {
 
         // TODO: optimize
         // 1. If self.kind() == KIND_VEC, use Vec::extend
-        // 2. Make `reserve` inline-able
         for b in iter {
-            self.reserve(1);
             self.put_u8(b);
         }
     }
 }
 
 impl<'a> Extend<&'a u8> for BytesMut {
-    fn extend<T>(&mut self, iter: T) where T: IntoIterator<Item = &'a u8> {
-        self.extend(iter.into_iter().map(|b| *b))
+    fn extend<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = &'a u8>,
+    {
+        self.extend(iter.into_iter().copied())
+    }
+}
+
+impl Extend<Bytes> for BytesMut {
+    fn extend<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = Bytes>,
+    {
+        for bytes in iter {
+            self.extend_from_slice(&bytes)
+        }
     }
 }
 
@@ -1164,7 +1412,7 @@ impl FromIterator<u8> for BytesMut {
 
 impl<'a> FromIterator<&'a u8> for BytesMut {
     fn from_iter<T: IntoIterator<Item = &'a u8>>(into_iter: T) -> Self {
-        BytesMut::from_iter(into_iter.into_iter().map(|b| *b))
+        BytesMut::from_iter(into_iter.into_iter().copied())
     }
 }
 
@@ -1205,10 +1453,13 @@ unsafe fn release_shared(ptr: *mut Shared) {
     // > "acquire" operation before deleting the object.
     //
     // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html)
-    atomic::fence(Ordering::Acquire);
+    //
+    // Thread sanitizer does not support atomic fences. Use an atomic load
+    // instead.
+    (*ptr).ref_count.load(Ordering::Acquire);
 
     // Drop the data
-    Box::from_raw(ptr);
+    drop(Box::from_raw(ptr));
 }
 
 impl Shared {
@@ -1227,9 +1478,13 @@ impl Shared {
     }
 }
 
+#[inline]
 fn original_capacity_to_repr(cap: usize) -> usize {
     let width = PTR_WIDTH - ((cap >> MIN_ORIGINAL_CAPACITY_WIDTH).leading_zeros() as usize);
-    cmp::min(width, MAX_ORIGINAL_CAPACITY_WIDTH - MIN_ORIGINAL_CAPACITY_WIDTH)
+    cmp::min(
+        width,
+        MAX_ORIGINAL_CAPACITY_WIDTH - MIN_ORIGINAL_CAPACITY_WIDTH,
+    )
 }
 
 fn original_capacity_from_repr(repr: usize) -> usize {
@@ -1240,56 +1495,59 @@ fn original_capacity_from_repr(repr: usize) -> usize {
     1 << (repr + (MIN_ORIGINAL_CAPACITY_WIDTH - 1))
 }
 
-/*
-#[test]
-fn test_original_capacity_to_repr() {
-    assert_eq!(original_capacity_to_repr(0), 0);
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_original_capacity_to_repr() {
+        assert_eq!(original_capacity_to_repr(0), 0);
 
-    let max_width = 32;
+        let max_width = 32;
 
-    for width in 1..(max_width + 1) {
-        let cap = 1 << width - 1;
+        for width in 1..(max_width + 1) {
+            let cap = 1 << width - 1;
 
-        let expected = if width < MIN_ORIGINAL_CAPACITY_WIDTH {
-            0
-        } else if width < MAX_ORIGINAL_CAPACITY_WIDTH {
-            width - MIN_ORIGINAL_CAPACITY_WIDTH
-        } else {
-            MAX_ORIGINAL_CAPACITY_WIDTH - MIN_ORIGINAL_CAPACITY_WIDTH
-        };
+            let expected = if width < MIN_ORIGINAL_CAPACITY_WIDTH {
+                0
+            } else if width < MAX_ORIGINAL_CAPACITY_WIDTH {
+                width - MIN_ORIGINAL_CAPACITY_WIDTH
+            } else {
+                MAX_ORIGINAL_CAPACITY_WIDTH - MIN_ORIGINAL_CAPACITY_WIDTH
+            };
 
-        assert_eq!(original_capacity_to_repr(cap), expected);
+            assert_eq!(original_capacity_to_repr(cap), expected);
 
-        if width > 1 {
-            assert_eq!(original_capacity_to_repr(cap + 1), expected);
-        }
+            if width > 1 {
+                assert_eq!(original_capacity_to_repr(cap + 1), expected);
+            }
 
-        //  MIN_ORIGINAL_CAPACITY_WIDTH must be bigger than 7 to pass tests below
-        if width == MIN_ORIGINAL_CAPACITY_WIDTH + 1 {
-            assert_eq!(original_capacity_to_repr(cap - 24), expected - 1);
-            assert_eq!(original_capacity_to_repr(cap + 76), expected);
-        } else if width == MIN_ORIGINAL_CAPACITY_WIDTH + 2 {
-            assert_eq!(original_capacity_to_repr(cap - 1), expected - 1);
-            assert_eq!(original_capacity_to_repr(cap - 48), expected - 1);
+            //  MIN_ORIGINAL_CAPACITY_WIDTH must be bigger than 7 to pass tests below
+            if width == MIN_ORIGINAL_CAPACITY_WIDTH + 1 {
+                assert_eq!(original_capacity_to_repr(cap - 24), expected - 1);
+                assert_eq!(original_capacity_to_repr(cap + 76), expected);
+            } else if width == MIN_ORIGINAL_CAPACITY_WIDTH + 2 {
+                assert_eq!(original_capacity_to_repr(cap - 1), expected - 1);
+                assert_eq!(original_capacity_to_repr(cap - 48), expected - 1);
+            }
         }
     }
-}
 
-#[test]
-fn test_original_capacity_from_repr() {
-    assert_eq!(0, original_capacity_from_repr(0));
+    #[test]
+    fn test_original_capacity_from_repr() {
+        assert_eq!(0, original_capacity_from_repr(0));
 
-    let min_cap = 1 << MIN_ORIGINAL_CAPACITY_WIDTH;
+        let min_cap = 1 << MIN_ORIGINAL_CAPACITY_WIDTH;
 
-    assert_eq!(min_cap, original_capacity_from_repr(1));
-    assert_eq!(min_cap * 2, original_capacity_from_repr(2));
-    assert_eq!(min_cap * 4, original_capacity_from_repr(3));
-    assert_eq!(min_cap * 8, original_capacity_from_repr(4));
-    assert_eq!(min_cap * 16, original_capacity_from_repr(5));
-    assert_eq!(min_cap * 32, original_capacity_from_repr(6));
-    assert_eq!(min_cap * 64, original_capacity_from_repr(7));
+        assert_eq!(min_cap, original_capacity_from_repr(1));
+        assert_eq!(min_cap * 2, original_capacity_from_repr(2));
+        assert_eq!(min_cap * 4, original_capacity_from_repr(3));
+        assert_eq!(min_cap * 8, original_capacity_from_repr(4));
+        assert_eq!(min_cap * 16, original_capacity_from_repr(5));
+        assert_eq!(min_cap * 32, original_capacity_from_repr(6));
+        assert_eq!(min_cap * 64, original_capacity_from_repr(7));
+    }
 }
-*/
 
 unsafe impl Send for BytesMut {}
 unsafe impl Sync for BytesMut {}
@@ -1350,7 +1608,7 @@ impl PartialOrd<BytesMut> for str {
 
 impl PartialEq<Vec<u8>> for BytesMut {
     fn eq(&self, other: &Vec<u8>) -> bool {
-        *self == &other[..]
+        *self == other[..]
     }
 }
 
@@ -1374,7 +1632,7 @@ impl PartialOrd<BytesMut> for Vec<u8> {
 
 impl PartialEq<String> for BytesMut {
     fn eq(&self, other: &String) -> bool {
-        *self == &other[..]
+        *self == other[..]
     }
 }
 
@@ -1397,7 +1655,8 @@ impl PartialOrd<BytesMut> for String {
 }
 
 impl<'a, T: ?Sized> PartialEq<&'a T> for BytesMut
-    where BytesMut: PartialEq<T>
+where
+    BytesMut: PartialEq<T>,
 {
     fn eq(&self, other: &&'a T) -> bool {
         *self == **other
@@ -1405,7 +1664,8 @@ impl<'a, T: ?Sized> PartialEq<&'a T> for BytesMut
 }
 
 impl<'a, T: ?Sized> PartialOrd<&'a T> for BytesMut
-    where BytesMut: PartialOrd<T>
+where
+    BytesMut: PartialOrd<T>,
 {
     fn partial_cmp(&self, other: &&'a T) -> Option<cmp::Ordering> {
         self.partial_cmp(*other)
@@ -1438,16 +1698,52 @@ impl PartialOrd<BytesMut> for &str {
 
 impl PartialEq<BytesMut> for Bytes {
     fn eq(&self, other: &BytesMut) -> bool {
-        &other[..] == &self[..]
+        other[..] == self[..]
     }
 }
 
 impl PartialEq<Bytes> for BytesMut {
     fn eq(&self, other: &Bytes) -> bool {
-        &other[..] == &self[..]
+        other[..] == self[..]
+    }
+}
+
+impl From<BytesMut> for Vec<u8> {
+    fn from(bytes: BytesMut) -> Self {
+        let kind = bytes.kind();
+        let bytes = ManuallyDrop::new(bytes);
+
+        let mut vec = if kind == KIND_VEC {
+            unsafe {
+                let off = bytes.get_vec_pos();
+                rebuild_vec(bytes.ptr.as_ptr(), bytes.len, bytes.cap, off)
+            }
+        } else {
+            let shared = bytes.data as *mut Shared;
+
+            if unsafe { (*shared).is_unique() } {
+                let vec = mem::replace(unsafe { &mut (*shared).vec }, Vec::new());
+
+                unsafe { release_shared(shared) };
+
+                vec
+            } else {
+                return ManuallyDrop::into_inner(bytes).deref().to_vec();
+            }
+        };
+
+        let len = bytes.len;
+
+        unsafe {
+            ptr::copy(bytes.ptr.as_ptr(), vec.as_mut_ptr(), len);
+            vec.set_len(len);
+        }
+
+        vec
     }
 }
 
+#[inline]
 fn vptr(ptr: *mut u8) -> NonNull<u8> {
     if cfg!(debug_assertions) {
         NonNull::new(ptr).expect("Vec pointer should be non-null")
@@ -1456,8 +1752,20 @@ fn vptr(ptr: *mut u8) -> NonNull<u8> {
     }
 }
 
+/// Returns a dangling pointer with the given address. This is used to store
+/// integer data in pointer fields.
+///
+/// It is equivalent to `addr as *mut T`, but this fails on miri when strict
+/// provenance checking is enabled.
+#[inline]
+fn invalid_ptr<T>(addr: usize) -> *mut T {
+    let ptr = core::ptr::null_mut::<u8>().wrapping_add(addr);
+    debug_assert_eq!(ptr as usize, addr);
+    ptr.cast::<T>()
+}
+
 unsafe fn rebuild_vec(ptr: *mut u8, mut len: usize, mut cap: usize, off: usize) -> Vec<u8> {
-    let ptr = ptr.offset(-(off as isize));
+    let ptr = ptr.sub(off);
     len += off;
     cap += off;
 
@@ -1468,20 +1776,81 @@ unsafe fn rebuild_vec(ptr: *mut u8, mut len: usize, mut cap: usize, off: usize)
 
 static SHARED_VTABLE: Vtable = Vtable {
     clone: shared_v_clone,
+    to_vec: shared_v_to_vec,
+    to_mut: shared_v_to_mut,
+    is_unique: shared_v_is_unique,
     drop: shared_v_drop,
 };
 
 unsafe fn shared_v_clone(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Bytes {
-    let shared = data.load(Ordering::Acquire) as *mut Shared;
+    let shared = data.load(Ordering::Relaxed) as *mut Shared;
     increment_shared(shared);
 
-    let data = AtomicPtr::new(shared as _);
+    let data = AtomicPtr::new(shared as *mut ());
     Bytes::with_vtable(ptr, len, data, &SHARED_VTABLE)
 }
 
+unsafe fn shared_v_to_vec(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> Vec<u8> {
+    let shared: *mut Shared = data.load(Ordering::Relaxed).cast();
+
+    if (*shared).is_unique() {
+        let shared = &mut *shared;
+
+        // Drop shared
+        let mut vec = mem::replace(&mut shared.vec, Vec::new());
+        release_shared(shared);
+
+        // Copy back buffer
+        ptr::copy(ptr, vec.as_mut_ptr(), len);
+        vec.set_len(len);
+
+        vec
+    } else {
+        let v = slice::from_raw_parts(ptr, len).to_vec();
+        release_shared(shared);
+        v
+    }
+}
+
+unsafe fn shared_v_to_mut(data: &AtomicPtr<()>, ptr: *const u8, len: usize) -> BytesMut {
+    let shared: *mut Shared = data.load(Ordering::Relaxed).cast();
+
+    if (*shared).is_unique() {
+        let shared = &mut *shared;
+
+        // The capacity is always the original capacity of the buffer
+        // minus the offset from the start of the buffer
+        let v = &mut shared.vec;
+        let v_capacity = v.capacity();
+        let v_ptr = v.as_mut_ptr();
+        let offset = offset_from(ptr as *mut u8, v_ptr);
+        let cap = v_capacity - offset;
+
+        let ptr = vptr(ptr as *mut u8);
+
+        BytesMut {
+            ptr,
+            len,
+            cap,
+            data: shared,
+        }
+    } else {
+        let v = slice::from_raw_parts(ptr, len).to_vec();
+        release_shared(shared);
+        BytesMut::from_vec(v)
+    }
+}
+
+unsafe fn shared_v_is_unique(data: &AtomicPtr<()>) -> bool {
+    let shared = data.load(Ordering::Acquire);
+    let ref_count = (*shared.cast::<Shared>()).ref_count.load(Ordering::Relaxed);
+    ref_count == 1
+}
+
 unsafe fn shared_v_drop(data: &mut AtomicPtr<()>, _ptr: *const u8, _len: usize) {
-    let shared = (*data.get_mut()) as *mut Shared;
-    release_shared(shared as *mut Shared);
+    data.with_mut(|shared| {
+        release_shared(*shared as *mut Shared);
+    });
 }
 
 // compile-fails
@@ -1519,11 +1888,11 @@ fn _split_must_use() {}
 // fuzz tests
 #[cfg(all(test, loom))]
 mod fuzz {
-    use std::sync::Arc;
+    use loom::sync::Arc;
     use loom::thread;
 
-    use crate::Bytes;
     use super::BytesMut;
+    use crate::Bytes;
 
     #[test]
     fn bytes_mut_cloning_frozen() {
diff --git a/src/fmt/debug.rs b/src/fmt/debug.rs
index f6a08b863..82d0aa5e3 100644
--- a/src/fmt/debug.rs
+++ b/src/fmt/debug.rs
@@ -1,7 +1,7 @@
 use core::fmt::{Debug, Formatter, Result};
 
-use crate::{Bytes, BytesMut};
 use super::BytesRef;
+use crate::{Bytes, BytesMut};
 
 /// Alternative implementation of `std::fmt::Debug` for byte slice.
 ///
@@ -25,7 +25,7 @@ impl Debug for BytesRef<'_> {
             } else if b == b'\0' {
                 write!(f, "\\0")?;
             // ASCII printable
-            } else if b >= 0x20 && b < 0x7f {
+            } else if (0x20..0x7f).contains(&b) {
                 write!(f, "{}", b as char)?;
             } else {
                 write!(f, "\\x{:02x}", b)?;
@@ -36,14 +36,5 @@ impl Debug for BytesRef<'_> {
     }
 }
 
-impl Debug for Bytes {
-    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        Debug::fmt(&BytesRef(&self.as_ref()), f)
-    }
-}
-
-impl Debug for BytesMut {
-    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        Debug::fmt(&BytesRef(&self.as_ref()), f)
-    }
-}
+fmt_impl!(Debug, Bytes);
+fmt_impl!(Debug, BytesMut);
diff --git a/src/fmt/hex.rs b/src/fmt/hex.rs
index 09170ae1a..1203b4198 100644
--- a/src/fmt/hex.rs
+++ b/src/fmt/hex.rs
@@ -1,7 +1,7 @@
 use core::fmt::{Formatter, LowerHex, Result, UpperHex};
 
-use crate::{Bytes, BytesMut};
 use super::BytesRef;
+use crate::{Bytes, BytesMut};
 
 impl LowerHex for BytesRef<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
@@ -21,17 +21,7 @@ impl UpperHex for BytesRef<'_> {
     }
 }
 
-macro_rules! hex_impl {
-    ($tr:ident, $ty:ty) => {
-        impl $tr for $ty {
-            fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-                $tr::fmt(&BytesRef(self.as_ref()), f)
-            }
-        }
-    };
-}
-
-hex_impl!(LowerHex, Bytes);
-hex_impl!(LowerHex, BytesMut);
-hex_impl!(UpperHex, Bytes);
-hex_impl!(UpperHex, BytesMut);
+fmt_impl!(LowerHex, Bytes);
+fmt_impl!(LowerHex, BytesMut);
+fmt_impl!(UpperHex, Bytes);
+fmt_impl!(UpperHex, BytesMut);
diff --git a/src/fmt/mod.rs b/src/fmt/mod.rs
index 676d15fc2..b8a0eafaf 100644
--- a/src/fmt/mod.rs
+++ b/src/fmt/mod.rs
@@ -1,3 +1,13 @@
+macro_rules! fmt_impl {
+    ($tr:ident, $ty:ty) => {
+        impl $tr for $ty {
+            fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+                $tr::fmt(&BytesRef(self.as_ref()), f)
+            }
+        }
+    };
+}
+
 mod debug;
 mod hex;
 
diff --git a/src/lib.rs b/src/lib.rs
index a61e3476a..08c424942 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,17 @@
-#![deny(warnings, missing_docs, missing_debug_implementations, rust_2018_idioms)]
-#![doc(html_root_url = "https://docs.rs/bytes/0.5.4")]
+#![warn(missing_docs, missing_debug_implementations, rust_2018_idioms)]
+#![doc(test(
+    no_crate_inject,
+    attr(deny(warnings, rust_2018_idioms), allow(dead_code, unused_variables))
+))]
 #![no_std]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 
 //! Provides abstractions for working with bytes.
 //!
 //! The `bytes` crate provides an efficient byte buffer structure
-//! ([`Bytes`](struct.Bytes.html)) and traits for working with buffer
+//! ([`Bytes`]) and traits for working with buffer
 //! implementations ([`Buf`], [`BufMut`]).
 //!
-//! [`Buf`]: trait.Buf.html
-//! [`BufMut`]: trait.BufMut.html
-//!
 //! # `Bytes`
 //!
 //! `Bytes` is an efficient container for storing and operating on contiguous
@@ -48,9 +49,7 @@
 //! `a` and `b` will share the underlying buffer and maintain indices tracking
 //! the view into the buffer represented by the handle.
 //!
-//! See the [struct docs] for more details.
-//!
-//! [struct docs]: struct.Bytes.html
+//! See the [struct docs](`Bytes`) for more details.
 //!
 //! # `Buf`, `BufMut`
 //!
@@ -66,30 +65,26 @@
 //! ## Relation with `Read` and `Write`
 //!
 //! At first glance, it may seem that `Buf` and `BufMut` overlap in
-//! functionality with `std::io::Read` and `std::io::Write`. However, they
+//! functionality with [`std::io::Read`] and [`std::io::Write`]. However, they
 //! serve different purposes. A buffer is the value that is provided as an
 //! argument to `Read::read` and `Write::write`. `Read` and `Write` may then
 //! perform a syscall, which has the potential of failing. Operations on `Buf`
 //! and `BufMut` are infallible.
 
-
 extern crate alloc;
 
 #[cfg(feature = "std")]
 extern crate std;
 
 pub mod buf;
-pub use crate::buf::{
-    Buf,
-    BufMut,
-};
+pub use crate::buf::{Buf, BufMut};
 
-mod bytes_mut;
 mod bytes;
+mod bytes_mut;
 mod fmt;
 mod loom;
-pub use crate::bytes_mut::BytesMut;
 pub use crate::bytes::Bytes;
+pub use crate::bytes_mut::BytesMut;
 
 // Optional Serde support
 #[cfg(feature = "serde")]
@@ -115,3 +110,90 @@ fn abort() -> ! {
         panic!("abort");
     }
 }
+
+#[inline(always)]
+#[cfg(feature = "std")]
+fn saturating_sub_usize_u64(a: usize, b: u64) -> usize {
+    use core::convert::TryFrom;
+    match usize::try_from(b) {
+        Ok(b) => a.saturating_sub(b),
+        Err(_) => 0,
+    }
+}
+
+#[inline(always)]
+#[cfg(feature = "std")]
+fn min_u64_usize(a: u64, b: usize) -> usize {
+    use core::convert::TryFrom;
+    match usize::try_from(a) {
+        Ok(a) => usize::min(a, b),
+        Err(_) => b,
+    }
+}
+
+/// Error type for the `try_get_` methods of [`Buf`].
+/// Indicates that there were not enough remaining
+/// bytes in the buffer while attempting
+/// to get a value from a [`Buf`] with one
+/// of the `try_get_` methods.
+#[derive(Debug, PartialEq, Eq)]
+pub struct TryGetError {
+    /// The number of bytes necessary to get the value
+    pub requested: usize,
+
+    /// The number of bytes available in the buffer
+    pub available: usize,
+}
+
+impl core::fmt::Display for TryGetError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
+        write!(
+            f,
+            "Not enough bytes remaining in buffer to read value (requested {} but only {} available)",
+            self.requested,
+            self.available
+        )
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for TryGetError {}
+
+#[cfg(feature = "std")]
+impl From<TryGetError> for std::io::Error {
+    fn from(error: TryGetError) -> Self {
+        std::io::Error::new(std::io::ErrorKind::Other, error)
+    }
+}
+
+/// Panic with a nice error message.
+#[cold]
+fn panic_advance(error_info: &TryGetError) -> ! {
+    panic!(
+        "advance out of bounds: the len is {} but advancing by {}",
+        error_info.available, error_info.requested
+    );
+}
+
+#[cold]
+fn panic_does_not_fit(size: usize, nbytes: usize) -> ! {
+    panic!(
+        "size too large: the integer type can fit {} bytes, but nbytes is {}",
+        size, nbytes
+    );
+}
+
+/// Precondition: dst >= original
+///
+/// The following line is equivalent to:
+///
+/// ```rust,ignore
+/// self.ptr.as_ptr().offset_from(ptr) as usize;
+/// ```
+///
+/// But due to min rust is 1.39 and it is only stabilized
+/// in 1.47, we cannot use it.
+#[inline]
+fn offset_from(dst: *const u8, original: *const u8) -> usize {
+    dst as usize - original as usize
+}
diff --git a/src/loom.rs b/src/loom.rs
index 80947acec..c80929092 100644
--- a/src/loom.rs
+++ b/src/loom.rs
@@ -1,9 +1,33 @@
 #[cfg(not(all(test, loom)))]
 pub(crate) mod sync {
     pub(crate) mod atomic {
-        pub(crate) use core::sync::atomic::{fence, AtomicPtr, AtomicUsize, Ordering};
+        #[cfg(not(feature = "extra-platforms"))]
+        pub(crate) use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
+        #[cfg(feature = "extra-platforms")]
+        pub(crate) use extra_platforms::{AtomicPtr, AtomicUsize, Ordering};
+
+        pub(crate) trait AtomicMut<T> {
+            fn with_mut<F, R>(&mut self, f: F) -> R
+            where
+                F: FnOnce(&mut *mut T) -> R;
+        }
+
+        impl<T> AtomicMut<T> for AtomicPtr<T> {
+            fn with_mut<F, R>(&mut self, f: F) -> R
+            where
+                F: FnOnce(&mut *mut T) -> R,
+            {
+                f(self.get_mut())
+            }
+        }
     }
 }
 
 #[cfg(all(test, loom))]
-pub(crate) use ::loom::sync;
+pub(crate) mod sync {
+    pub(crate) mod atomic {
+        pub(crate) use loom::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
+
+        pub(crate) trait AtomicMut<T> {}
+    }
+}
diff --git a/src/serde.rs b/src/serde.rs
index 11020ae7f..0a5bd144a 100644
--- a/src/serde.rs
+++ b/src/serde.rs
@@ -1,15 +1,16 @@
+use super::{Bytes, BytesMut};
 use alloc::string::String;
 use alloc::vec::Vec;
 use core::{cmp, fmt};
-use serde::{Serialize, Serializer, Deserialize, Deserializer, de};
-use super::{Bytes, BytesMut};
+use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
 
 macro_rules! serde_impl {
-    ($ty:ident, $visitor_ty:ident, $from_slice:ident, $from_vec:ident) => (
+    ($ty:ident, $visitor_ty:ident, $from_slice:ident, $from_vec:ident) => {
         impl Serialize for $ty {
             #[inline]
             fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-                where S: Serializer
+            where
+                S: Serializer,
             {
                 serializer.serialize_bytes(&self)
             }
@@ -26,7 +27,8 @@ macro_rules! serde_impl {
 
             #[inline]
             fn visit_seq<V>(self, mut seq: V) -> Result<Self::Value, V::Error>
-                where V: de::SeqAccess<'de>
+            where
+                V: de::SeqAccess<'de>,
             {
                 let len = cmp::min(seq.size_hint().unwrap_or(0), 4096);
                 let mut values: Vec<u8> = Vec::with_capacity(len);
@@ -40,28 +42,32 @@ macro_rules! serde_impl {
 
             #[inline]
             fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
-                where E: de::Error
+            where
+                E: de::Error,
             {
                 Ok($ty::$from_slice(v))
             }
 
             #[inline]
             fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
-                where E: de::Error
+            where
+                E: de::Error,
             {
                 Ok($ty::$from_vec(v))
             }
 
             #[inline]
             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-                where E: de::Error
+            where
+                E: de::Error,
             {
                 Ok($ty::$from_slice(v.as_bytes()))
             }
 
             #[inline]
             fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
-                where E: de::Error
+            where
+                E: de::Error,
             {
                 Ok($ty::$from_vec(v.into_bytes()))
             }
@@ -70,12 +76,13 @@ macro_rules! serde_impl {
         impl<'de> Deserialize<'de> for $ty {
             #[inline]
             fn deserialize<D>(deserializer: D) -> Result<$ty, D::Error>
-                where D: Deserializer<'de>
+            where
+                D: Deserializer<'de>,
             {
                 deserializer.deserialize_byte_buf($visitor_ty)
             }
         }
-    );
+    };
 }
 
 serde_impl!(Bytes, BytesVisitor, copy_from_slice, from);
diff --git a/tests/test_buf.rs b/tests/test_buf.rs
index 12b75a4ad..099016e24 100644
--- a/tests/test_buf.rs
+++ b/tests/test_buf.rs
@@ -1,75 +1,413 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use bytes::Buf;
+use ::bytes::{Buf, Bytes, BytesMut};
+use core::{cmp, mem};
+use std::collections::VecDeque;
+#[cfg(feature = "std")]
 use std::io::IoSlice;
 
-#[test]
-fn test_fresh_cursor_vec() {
-    let mut buf = &b"hello"[..];
+// A random 64-byte ascii string, with the first 8 bytes altered to
+// give valid representations of f32 and f64 (making them easier to compare)
+// and negative signed numbers when interpreting as big endian
+// (testing Sign Extension for `Buf::get_int' and `Buf::get_int_ne`).
+const INPUT: &[u8] = b"\xffFqrjrDqPhvTc45vvq33f6bJrUtyHESuTeklWKgYd64xgzxJwvAkpYYnpNJyZSRn";
+
+macro_rules! e {
+    ($big_endian_val:expr, $little_endian_val:expr) => {
+        if cfg!(target_endian = "big") {
+            $big_endian_val
+        } else {
+            $little_endian_val
+        }
+    };
+}
+
+macro_rules! buf_tests {
+    ($make_input:ident) => {
+        buf_tests!($make_input, true);
+    };
+    ($make_input:ident, $checks_vectored_is_complete:expr) => {
+        use super::*;
+
+        #[test]
+        fn empty_state() {
+            let buf = $make_input(&[]);
+            assert_eq!(buf.remaining(), 0);
+            assert!(!buf.has_remaining());
+            assert!(buf.chunk().is_empty());
+        }
+
+        #[test]
+        fn fresh_state() {
+            let buf = $make_input(INPUT);
+            assert_eq!(buf.remaining(), 64);
+            assert!(buf.has_remaining());
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64);
+            assert!(INPUT.starts_with(chunk));
+        }
+
+        #[test]
+        fn advance() {
+            let mut buf = $make_input(INPUT);
+            buf.advance(8);
+            assert_eq!(buf.remaining(), 64 - 8);
+            assert!(buf.has_remaining());
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64 - 8);
+            assert!(INPUT[8..].starts_with(chunk));
+        }
+
+        #[test]
+        fn advance_to_end() {
+            let mut buf = $make_input(INPUT);
+            buf.advance(64);
+            assert_eq!(buf.remaining(), 0);
+            assert!(!buf.has_remaining());
+
+            let chunk = buf.chunk();
+            assert!(chunk.is_empty());
+        }
+
+        #[test]
+        #[should_panic]
+        fn advance_past_end() {
+            let  mut buf = $make_input(INPUT);
+            buf.advance(65);
+        }
+
+        #[test]
+        #[cfg(feature = "std")]
+        fn chunks_vectored_empty() {
+            let  buf = $make_input(&[]);
+            let mut bufs = [IoSlice::new(&[]); 16];
+
+            let n = buf.chunks_vectored(&mut bufs);
+            assert_eq!(n, 0);
+            assert!(bufs.iter().all(|buf| buf.is_empty()));
+        }
+
+        #[test]
+        #[cfg(feature = "std")]
+        fn chunks_vectored_is_complete() {
+            let buf = $make_input(INPUT);
+            let mut bufs = [IoSlice::new(&[]); 16];
+
+            let n = buf.chunks_vectored(&mut bufs);
+            assert!(n > 0);
+            assert!(n <= 16);
+
+            let bufs_concat = bufs[..n]
+                .iter()
+                .flat_map(|b| b.iter().copied())
+                .collect::<Vec<u8>>();
+            if $checks_vectored_is_complete {
+                assert_eq!(bufs_concat, INPUT);
+            } else {
+                // If this panics then `buf` implements `chunks_vectored`.
+                // Remove the `false` argument from `buf_tests!` for that type.
+                assert!(bufs_concat.len() < INPUT.len());
+                assert!(INPUT.starts_with(&bufs_concat));
+            }
+
+            for i in n..16 {
+                assert!(bufs[i].is_empty());
+            }
+        }
+
+        #[test]
+        fn copy_to_slice() {
+            let mut buf = $make_input(INPUT);
 
-    assert_eq!(buf.remaining(), 5);
-    assert_eq!(buf.bytes(), b"hello");
+            let mut chunk = [0u8; 8];
+            buf.copy_to_slice(&mut chunk);
+            assert_eq!(buf.remaining(), 64 - 8);
+            assert!(buf.has_remaining());
+            assert_eq!(chunk, INPUT[..8]);
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64 - 8);
+            assert!(INPUT[8..].starts_with(chunk));
+        }
 
-    buf.advance(2);
+        #[test]
+        fn copy_to_slice_big() {
+            let mut buf = $make_input(INPUT);
+
+            let mut chunk = [0u8; 56];
+            buf.copy_to_slice(&mut chunk);
+            assert_eq!(buf.remaining(), 64 - 56);
+            assert!(buf.has_remaining());
+            assert_eq!(chunk, INPUT[..56]);
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64 - 56);
+            assert!(INPUT[56..].starts_with(chunk));
+        }
 
-    assert_eq!(buf.remaining(), 3);
-    assert_eq!(buf.bytes(), b"llo");
+        #[test]
+        fn copy_to_slice_to_end() {
+            let mut buf = $make_input(INPUT);
+
+            let mut chunk = [0u8; 64];
+            buf.copy_to_slice(&mut chunk);
+            assert_eq!(buf.remaining(), 0);
+            assert!(!buf.has_remaining());
+            assert_eq!(chunk, INPUT);
+
+            assert!(buf.chunk().is_empty());
+        }
+
+        #[test]
+        #[should_panic]
+        fn copy_to_slice_overflow() {
+            let mut buf = $make_input(INPUT);
+
+            let mut chunk = [0u8; 65];
+            buf.copy_to_slice(&mut chunk);
+        }
+
+        #[test]
+        fn copy_to_bytes() {
+            let mut buf = $make_input(INPUT);
+
+            let chunk = buf.copy_to_bytes(8);
+            assert_eq!(buf.remaining(), 64 - 8);
+            assert!(buf.has_remaining());
+            assert_eq!(chunk, INPUT[..8]);
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64 - 8);
+            assert!(INPUT[8..].starts_with(chunk));
+        }
+
+        #[test]
+        fn copy_to_bytes_big() {
+            let mut buf = $make_input(INPUT);
+
+            let chunk = buf.copy_to_bytes(56);
+            assert_eq!(buf.remaining(), 64 - 56);
+            assert!(buf.has_remaining());
+            assert_eq!(chunk, INPUT[..56]);
+
+            let chunk = buf.chunk();
+            assert!(chunk.len() <= 64 - 56);
+            assert!(INPUT[56..].starts_with(chunk));
+        }
+
+        #[test]
+        fn copy_to_bytes_to_end() {
+            let mut buf = $make_input(INPUT);
+
+            let chunk = buf.copy_to_bytes(64);
+            assert_eq!(buf.remaining(), 0);
+            assert!(!buf.has_remaining());
+            assert_eq!(chunk, INPUT);
+
+            assert!(buf.chunk().is_empty());
+        }
 
-    buf.advance(3);
+        #[test]
+        #[should_panic]
+        fn copy_to_bytes_overflow() {
+            let mut buf = $make_input(INPUT);
 
-    assert_eq!(buf.remaining(), 0);
-    assert_eq!(buf.bytes(), b"");
+            let _ = buf.copy_to_bytes(65);
+        }
+
+        buf_tests!(number $make_input, get_u8, get_u8_overflow, u8, get_u8, 0xff);
+        buf_tests!(number $make_input, get_i8, get_i8_overflow, i8, get_i8, 0xffu8 as i8);
+        buf_tests!(number $make_input, get_u16_be, get_u16_be_overflow, u16, get_u16, 0xff46);
+        buf_tests!(number $make_input, get_u16_le, get_u16_le_overflow, u16, get_u16_le, 0x46ff);
+        buf_tests!(number $make_input, get_u16_ne, get_u16_ne_overflow, u16, get_u16_ne, e!(0xff46, 0x46ff));
+        buf_tests!(number $make_input, get_i16_be, get_i16_be_overflow, i16, get_i16, 0xff46u16 as i16);
+        buf_tests!(number $make_input, get_i16_le, get_i16_le_overflow, i16, get_i16_le, 0x46ff);
+        buf_tests!(number $make_input, get_i16_ne, get_i16_ne_overflow, i16, get_i16_ne, e!(0xff46u16 as i16, 0x46ff));
+        buf_tests!(number $make_input, get_u32_be, get_u32_be_overflow, u32, get_u32, 0xff467172);
+        buf_tests!(number $make_input, get_u32_le, get_u32_le_overflow, u32, get_u32_le, 0x727146ff);
+        buf_tests!(number $make_input, get_u32_ne, get_u32_ne_overflow, u32, get_u32_ne, e!(0xff467172, 0x727146ff));
+        buf_tests!(number $make_input, get_i32_be, get_i32_be_overflow, i32, get_i32, 0xff467172u32 as i32);
+        buf_tests!(number $make_input, get_i32_le, get_i32_le_overflow, i32, get_i32_le, 0x727146ff);
+        buf_tests!(number $make_input, get_i32_ne, get_i32_ne_overflow, i32, get_i32_ne, e!(0xff467172u32 as i32, 0x727146ff));
+        buf_tests!(number $make_input, get_u64_be, get_u64_be_overflow, u64, get_u64, 0xff4671726a724471);
+        buf_tests!(number $make_input, get_u64_le, get_u64_le_overflow, u64, get_u64_le, 0x7144726a727146ff);
+        buf_tests!(number $make_input, get_u64_ne, get_u64_ne_overflow, u64, get_u64_ne, e!(0xff4671726a724471, 0x7144726a727146ff));
+        buf_tests!(number $make_input, get_i64_be, get_i64_be_overflow, i64, get_i64, 0xff4671726a724471u64 as i64);
+        buf_tests!(number $make_input, get_i64_le, get_i64_le_overflow, i64, get_i64_le, 0x7144726a727146ff);
+        buf_tests!(number $make_input, get_i64_ne, get_i64_ne_overflow, i64, get_i64_ne, e!(0xff4671726a724471u64 as i64, 0x7144726a727146ff));
+        buf_tests!(number $make_input, get_u128_be, get_u128_be_overflow, u128, get_u128, 0xff4671726a7244715068765463343576);
+        buf_tests!(number $make_input, get_u128_le, get_u128_le_overflow, u128, get_u128_le, 0x76353463547668507144726a727146ff);
+        buf_tests!(number $make_input, get_u128_ne, get_u128_ne_overflow, u128, get_u128_ne, e!(0xff4671726a7244715068765463343576, 0x76353463547668507144726a727146ff));
+        buf_tests!(number $make_input, get_i128_be, get_i128_be_overflow, i128, get_i128, 0xff4671726a7244715068765463343576u128 as i128);
+        buf_tests!(number $make_input, get_i128_le, get_i128_le_overflow, i128, get_i128_le, 0x76353463547668507144726a727146ff);
+        buf_tests!(number $make_input, get_i128_ne, get_i128_ne_overflow, i128, get_i128_ne, e!(0xff4671726a7244715068765463343576u128 as i128, 0x76353463547668507144726a727146ff));
+        buf_tests!(number $make_input, get_f32_be, get_f32_be_overflow, f32, get_f32, f32::from_bits(0xff467172));
+        buf_tests!(number $make_input, get_f32_le, get_f32_le_overflow, f32, get_f32_le, f32::from_bits(0x727146ff));
+        buf_tests!(number $make_input, get_f32_ne, get_f32_ne_overflow, f32, get_f32_ne, f32::from_bits(e!(0xff467172, 0x727146ff)));
+        buf_tests!(number $make_input, get_f64_be, get_f64_be_overflow, f64, get_f64, f64::from_bits(0xff4671726a724471));
+        buf_tests!(number $make_input, get_f64_le, get_f64_le_overflow, f64, get_f64_le, f64::from_bits(0x7144726a727146ff));
+        buf_tests!(number $make_input, get_f64_ne, get_f64_ne_overflow, f64, get_f64_ne, f64::from_bits(e!(0xff4671726a724471, 0x7144726a727146ff)));
+
+        buf_tests!(var_number $make_input, get_uint_be, get_uint_be_overflow, u64, get_uint, 3, 0xff4671);
+        buf_tests!(var_number $make_input, get_uint_le, get_uint_le_overflow, u64, get_uint_le, 3, 0x7146ff);
+        buf_tests!(var_number $make_input, get_uint_ne, get_uint_ne_overflow, u64, get_uint_ne, 3, e!(0xff4671, 0x7146ff));
+        buf_tests!(var_number $make_input, get_int_be, get_int_be_overflow, i64, get_int, 3, 0xffffffffffff4671u64 as i64);
+        buf_tests!(var_number $make_input, get_int_le, get_int_le_overflow, i64, get_int_le, 3, 0x7146ff);
+        buf_tests!(var_number $make_input, get_int_ne, get_int_ne_overflow, i64, get_int_ne, 3, e!(0xffffffffffff4671u64 as i64, 0x7146ff));
+    };
+    (number $make_input:ident, $ok_name:ident, $panic_name:ident, $number:ty, $method:ident, $value:expr) => {
+        #[test]
+        fn $ok_name() {
+            let mut buf = $make_input(INPUT);
+
+            let value = buf.$method();
+            assert_eq!(buf.remaining(), 64 - mem::size_of::<$number>());
+            assert!(buf.has_remaining());
+            assert_eq!(value, $value);
+        }
+
+        #[test]
+        #[should_panic]
+        fn $panic_name() {
+            let mut buf = $make_input(&[]);
+
+            let _ = buf.$method();
+        }
+    };
+    (var_number $make_input:ident, $ok_name:ident, $panic_name:ident, $number:ty, $method:ident, $len:expr, $value:expr) => {
+        #[test]
+        fn $ok_name() {
+            let mut buf = $make_input(INPUT);
+
+            let value = buf.$method($len);
+            assert_eq!(buf.remaining(), 64 - $len);
+            assert!(buf.has_remaining());
+            assert_eq!(value, $value);
+        }
+
+        #[test]
+        #[should_panic]
+        fn $panic_name() {
+            let mut buf = $make_input(&[]);
+
+            let _ = buf.$method($len);
+        }
+    };
 }
 
-#[test]
-fn test_get_u8() {
-    let mut buf = &b"\x21zomg"[..];
-    assert_eq!(0x21, buf.get_u8());
+mod u8_slice {
+    fn make_input(buf: &'static [u8]) -> &'static [u8] {
+        buf
+    }
+
+    buf_tests!(make_input);
 }
 
-#[test]
-fn test_get_u16() {
-    let mut buf = &b"\x21\x54zomg"[..];
-    assert_eq!(0x2154, buf.get_u16());
-    let mut buf = &b"\x21\x54zomg"[..];
-    assert_eq!(0x5421, buf.get_u16_le());
+mod bytes {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        Bytes::from_static(buf)
+    }
+
+    buf_tests!(make_input);
 }
 
-#[test]
-#[should_panic]
-fn test_get_u16_buffer_underflow() {
-    let mut buf = &b"\x21"[..];
-    buf.get_u16();
+mod bytes_mut {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        BytesMut::from(buf)
+    }
+
+    buf_tests!(make_input);
 }
 
-#[test]
-fn test_bufs_vec() {
-    let buf = &b"hello world"[..];
+mod vec_deque {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        let mut deque = VecDeque::new();
+
+        if !buf.is_empty() {
+            // Construct |b|some bytes|a| `VecDeque`
+            let mid = buf.len() / 2;
+            let (a, b) = buf.split_at(mid);
+
+            deque.reserve_exact(buf.len() + 1);
 
-    let b1: &[u8] = &mut [];
-    let b2: &[u8] = &mut [];
+            let extra_space = deque.capacity() - b.len() - 1;
+            deque.resize(extra_space, 0);
 
-    let mut dst = [IoSlice::new(b1), IoSlice::new(b2)];
+            deque.extend(a);
+            deque.drain(..extra_space);
+            deque.extend(b);
 
-    assert_eq!(1, buf.bytes_vectored(&mut dst[..]));
+            let (a, b) = deque.as_slices();
+            assert!(
+                !a.is_empty(),
+                "could not setup test - attempt to create discontiguous VecDeque failed"
+            );
+            assert!(
+                !b.is_empty(),
+                "could not setup test - attempt to create discontiguous VecDeque failed"
+            );
+        }
+
+        deque
+    }
+
+    buf_tests!(make_input, true);
 }
 
-#[test]
-fn test_vec_deque() {
-    use std::collections::VecDeque;
-
-    let mut buffer: VecDeque<u8> = VecDeque::new();
-    buffer.extend(b"hello world");
-    assert_eq!(11, buffer.remaining());
-    assert_eq!(b"hello world", buffer.bytes());
-    buffer.advance(6);
-    assert_eq!(b"world", buffer.bytes());
-    buffer.extend(b" piece");
-    let mut out = [0; 11];
-    buffer.copy_to_slice(&mut out);
-    assert_eq!(b"world piece", &out[..]);
+#[cfg(feature = "std")]
+mod cursor {
+    use std::io::Cursor;
+
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        Cursor::new(buf)
+    }
+
+    buf_tests!(make_input);
+}
+
+mod box_bytes {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        Box::new(Bytes::from_static(buf))
+    }
+
+    buf_tests!(make_input);
+}
+
+mod chain_u8_slice {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        let (a, b) = buf.split_at(buf.len() / 2);
+        Buf::chain(a, b)
+    }
+
+    buf_tests!(make_input);
+}
+
+mod chain_small_big_u8_slice {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        let mid = cmp::min(1, buf.len());
+        let (a, b) = buf.split_at(mid);
+        Buf::chain(a, b)
+    }
+
+    buf_tests!(make_input);
+}
+
+mod chain_limited_slices {
+    fn make_input(buf: &'static [u8]) -> impl Buf {
+        let buf3 = &buf[cmp::min(buf.len(), 3)..];
+        let a = Buf::take(buf3, 0);
+        let b = Buf::take(buf, 3);
+        let c = Buf::take(buf3, usize::MAX);
+        let d = buf;
+        Buf::take(Buf::chain(Buf::chain(a, b), Buf::chain(c, d)), buf.len())
+    }
+
+    buf_tests!(make_input, true);
 }
 
+#[allow(unused_allocation)] // This is intentional.
 #[test]
 fn test_deref_buf_forwards() {
     struct Special;
@@ -79,8 +417,8 @@ fn test_deref_buf_forwards() {
             unreachable!("remaining");
         }
 
-        fn bytes(&self) -> &[u8] {
-            unreachable!("bytes");
+        fn chunk(&self) -> &[u8] {
+            unreachable!("chunk");
         }
 
         fn advance(&mut self, _: usize) {
diff --git a/tests/test_buf_mut.rs b/tests/test_buf_mut.rs
index f002f7d5f..0abeb9f7a 100644
--- a/tests/test_buf_mut.rs
+++ b/tests/test_buf_mut.rs
@@ -1,22 +1,24 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use bytes::{buf::IoSliceMut, BufMut, BytesMut};
-use std::usize;
-use std::fmt::Write;
+use bytes::buf::UninitSlice;
+use bytes::{BufMut, BytesMut};
+use core::fmt::Write;
+use core::mem::MaybeUninit;
+use core::usize;
 
 #[test]
 fn test_vec_as_mut_buf() {
     let mut buf = Vec::with_capacity(64);
 
-    assert_eq!(buf.remaining_mut(), usize::MAX);
+    assert_eq!(buf.remaining_mut(), isize::MAX as usize);
 
-    assert!(buf.bytes_mut().len() >= 64);
+    assert!(buf.chunk_mut().len() >= 64);
 
     buf.put(&b"zomg"[..]);
 
     assert_eq!(&buf, b"zomg");
 
-    assert_eq!(buf.remaining_mut(), usize::MAX - 4);
+    assert_eq!(buf.remaining_mut(), isize::MAX as usize - 4);
     assert_eq!(buf.capacity(), 64);
 
     for _ in 0..16 {
@@ -26,6 +28,14 @@ fn test_vec_as_mut_buf() {
     assert_eq!(buf.len(), 68);
 }
 
+#[test]
+fn test_vec_put_bytes() {
+    let mut buf = Vec::new();
+    buf.push(17);
+    buf.put_bytes(19, 2);
+    assert_eq!([17, 19, 19], &buf[..]);
+}
+
 #[test]
 fn test_put_u8() {
     let mut buf = Vec::with_capacity(8);
@@ -45,13 +55,40 @@ fn test_put_u16() {
 }
 
 #[test]
+fn test_put_int() {
+    let mut buf = Vec::with_capacity(8);
+    buf.put_int(0x1020304050607080, 3);
+    assert_eq!(b"\x60\x70\x80", &buf[..]);
+}
+
+#[test]
+#[should_panic]
+fn test_put_int_nbytes_overflow() {
+    let mut buf = Vec::with_capacity(8);
+    buf.put_int(0x1020304050607080, 9);
+}
+
+#[test]
+fn test_put_int_le() {
+    let mut buf = Vec::with_capacity(8);
+    buf.put_int_le(0x1020304050607080, 3);
+    assert_eq!(b"\x80\x70\x60", &buf[..]);
+}
+
+#[test]
+#[should_panic]
+fn test_put_int_le_nbytes_overflow() {
+    let mut buf = Vec::with_capacity(8);
+    buf.put_int_le(0x1020304050607080, 9);
+}
+
+#[test]
+#[should_panic(expected = "advance out of bounds: the len is 8 but advancing by 12")]
 fn test_vec_advance_mut() {
-    // Regression test for carllerche/bytes#108.
+    // Verify fix for #354
     let mut buf = Vec::with_capacity(8);
     unsafe {
         buf.advance_mut(12);
-        assert_eq!(buf.len(), 12);
-        assert!(buf.capacity() >= 12, "capacity: {}", buf.capacity());
     }
 }
 
@@ -65,40 +102,134 @@ fn test_clone() {
     assert!(buf != buf2);
 }
 
+fn do_test_slice_small<T: ?Sized>(make: impl Fn(&mut [u8]) -> &mut T)
+where
+    for<'r> &'r mut T: BufMut,
+{
+    let mut buf = [b'X'; 8];
+
+    let mut slice = make(&mut buf[..]);
+    slice.put_bytes(b'A', 2);
+    slice.put_u8(b'B');
+    slice.put_slice(b"BCC");
+    assert_eq!(2, slice.remaining_mut());
+    assert_eq!(b"AABBCCXX", &buf[..]);
+
+    let mut slice = make(&mut buf[..]);
+    slice.put_u32(0x61626364);
+    assert_eq!(4, slice.remaining_mut());
+    assert_eq!(b"abcdCCXX", &buf[..]);
+
+    let mut slice = make(&mut buf[..]);
+    slice.put_u32_le(0x30313233);
+    assert_eq!(4, slice.remaining_mut());
+    assert_eq!(b"3210CCXX", &buf[..]);
+}
+
+fn do_test_slice_large<T: ?Sized>(make: impl Fn(&mut [u8]) -> &mut T)
+where
+    for<'r> &'r mut T: BufMut,
+{
+    const LEN: usize = 100;
+    const FILL: [u8; LEN] = [b'Y'; LEN];
+
+    let test = |fill: &dyn Fn(&mut &mut T, usize)| {
+        for buf_len in 0..LEN {
+            let mut buf = [b'X'; LEN];
+            for fill_len in 0..=buf_len {
+                let mut slice = make(&mut buf[..buf_len]);
+                fill(&mut slice, fill_len);
+                assert_eq!(buf_len - fill_len, slice.remaining_mut());
+                let (head, tail) = buf.split_at(fill_len);
+                assert_eq!(&FILL[..fill_len], head);
+                assert!(tail.iter().all(|b| *b == b'X'));
+            }
+        }
+    };
+
+    test(&|slice, fill_len| slice.put_slice(&FILL[..fill_len]));
+    test(&|slice, fill_len| slice.put_bytes(FILL[0], fill_len));
+}
+
+fn do_test_slice_put_slice_panics<T: ?Sized>(make: impl Fn(&mut [u8]) -> &mut T)
+where
+    for<'r> &'r mut T: BufMut,
+{
+    let mut buf = [b'X'; 4];
+    let mut slice = make(&mut buf[..]);
+    slice.put_slice(b"12345");
+}
+
+fn do_test_slice_put_bytes_panics<T: ?Sized>(make: impl Fn(&mut [u8]) -> &mut T)
+where
+    for<'r> &'r mut T: BufMut,
+{
+    let mut buf = [b'X'; 4];
+    let mut slice = make(&mut buf[..]);
+    slice.put_bytes(b'1', 5);
+}
+
+#[test]
+fn test_slice_buf_mut_small() {
+    do_test_slice_small(|x| x);
+}
+
+#[test]
+fn test_slice_buf_mut_large() {
+    do_test_slice_large(|x| x);
+}
+
+#[test]
+#[should_panic]
+fn test_slice_buf_mut_put_slice_overflow() {
+    do_test_slice_put_slice_panics(|x| x);
+}
+
 #[test]
-fn test_bufs_vec_mut() {
-    let b1: &mut [u8] = &mut [];
-    let b2: &mut [u8] = &mut [];
-    let mut dst = [IoSliceMut::from(b1), IoSliceMut::from(b2)];
+#[should_panic]
+fn test_slice_buf_mut_put_bytes_overflow() {
+    do_test_slice_put_bytes_panics(|x| x);
+}
+
+fn make_maybe_uninit_slice(slice: &mut [u8]) -> &mut [MaybeUninit<u8>] {
+    // SAFETY: [u8] has the same layout as [MaybeUninit<u8>].
+    unsafe { core::mem::transmute(slice) }
+}
 
-    // with no capacity
-    let mut buf = BytesMut::new();
-    assert_eq!(buf.capacity(), 0);
-    assert_eq!(1, buf.bytes_vectored_mut(&mut dst[..]));
+#[test]
+fn test_maybe_uninit_buf_mut_small() {
+    do_test_slice_small(make_maybe_uninit_slice);
+}
 
-    // with capacity
-    let mut buf = BytesMut::with_capacity(64);
-    assert_eq!(1, buf.bytes_vectored_mut(&mut dst[..]));
+#[test]
+fn test_maybe_uninit_buf_mut_large() {
+    do_test_slice_large(make_maybe_uninit_slice);
 }
 
 #[test]
-fn test_mut_slice() {
-    let mut v = vec![0, 0, 0, 0];
-    let mut s = &mut v[..];
-    s.put_u32(42);
+#[should_panic]
+fn test_maybe_uninit_buf_mut_put_slice_overflow() {
+    do_test_slice_put_slice_panics(make_maybe_uninit_slice);
 }
 
+#[test]
+#[should_panic]
+fn test_maybe_uninit_buf_mut_put_bytes_overflow() {
+    do_test_slice_put_bytes_panics(make_maybe_uninit_slice);
+}
+
+#[allow(unused_allocation)] // This is intentional.
 #[test]
 fn test_deref_bufmut_forwards() {
     struct Special;
 
-    impl BufMut for Special {
+    unsafe impl BufMut for Special {
         fn remaining_mut(&self) -> usize {
             unreachable!("remaining_mut");
         }
 
-        fn bytes_mut(&mut self) -> &mut [std::mem::MaybeUninit<u8>] {
-            unreachable!("bytes_mut");
+        fn chunk_mut(&mut self) -> &mut UninitSlice {
+            unreachable!("chunk_mut");
         }
 
         unsafe fn advance_mut(&mut self, _: usize) {
@@ -116,3 +247,30 @@ fn test_deref_bufmut_forwards() {
     (Box::new(Special) as Box<dyn BufMut>).put_u8(b'x');
     Box::new(Special).put_u8(b'x');
 }
+
+#[test]
+#[should_panic]
+fn write_byte_panics_if_out_of_bounds() {
+    let mut data = [b'b', b'a', b'r'];
+
+    let slice = unsafe { UninitSlice::from_raw_parts_mut(data.as_mut_ptr(), 3) };
+    slice.write_byte(4, b'f');
+}
+
+#[test]
+#[should_panic]
+fn copy_from_slice_panics_if_different_length_1() {
+    let mut data = [b'b', b'a', b'r'];
+
+    let slice = unsafe { UninitSlice::from_raw_parts_mut(data.as_mut_ptr(), 3) };
+    slice.copy_from_slice(b"a");
+}
+
+#[test]
+#[should_panic]
+fn copy_from_slice_panics_if_different_length_2() {
+    let mut data = [b'b', b'a', b'r'];
+
+    let slice = unsafe { UninitSlice::from_raw_parts_mut(data.as_mut_ptr(), 3) };
+    slice.copy_from_slice(b"abcd");
+}
diff --git a/tests/test_bytes.rs b/tests/test_bytes.rs
index 40fcae431..613efc886 100644
--- a/tests/test_bytes.rs
+++ b/tests/test_bytes.rs
@@ -1,11 +1,14 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use bytes::{Bytes, BytesMut, Buf, BufMut};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
 
+use std::panic::{self, AssertUnwindSafe};
 use std::usize;
 
-const LONG: &'static [u8] = b"mary had a little lamb, little lamb, little lamb";
-const SHORT: &'static [u8] = b"hello world";
+const LONG: &[u8] = b"mary had a little lamb, little lamb, little lamb";
+const SHORT: &[u8] = b"hello world";
 
 fn is_sync<T: Sync>() {}
 fn is_send<T: Send>() {}
@@ -44,7 +47,6 @@ fn test_layout() {
         mem::size_of::<Option<BytesMut>>(),
         "BytesMut should be same size as Option<BytesMut>",
     );
-
 }
 
 #[test]
@@ -87,13 +89,11 @@ fn fmt_write() {
     write!(a, "{}", &s[..64]).unwrap();
     assert_eq!(a, s[..64].as_bytes());
 
-
     let mut b = BytesMut::with_capacity(64);
     write!(b, "{}", &s[..32]).unwrap();
     write!(b, "{}", &s[32..64]).unwrap();
     assert_eq!(b, s[..64].as_bytes());
 
-
     let mut c = BytesMut::with_capacity(64);
     write!(c, "{}", s).unwrap();
     assert_eq!(c, s[..].as_bytes());
@@ -291,6 +291,7 @@ fn split_to_uninitialized() {
 }
 
 #[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
 fn split_off_to_at_gt_len() {
     fn make_bytes() -> Bytes {
         let mut bytes = BytesMut::with_capacity(100);
@@ -305,11 +306,13 @@ fn split_off_to_at_gt_len() {
 
     assert!(panic::catch_unwind(move || {
         let _ = make_bytes().split_to(5);
-    }).is_err());
+    })
+    .is_err());
 
     assert!(panic::catch_unwind(move || {
         let _ = make_bytes().split_off(5);
-    }).is_err());
+    })
+    .is_err());
 }
 
 #[test]
@@ -342,12 +345,78 @@ fn freeze_clone_unique() {
     assert_eq!(c, s);
 }
 
+#[test]
+fn freeze_after_advance() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    b.advance(1);
+    assert_eq!(b, s[1..]);
+    let b = b.freeze();
+    // Verify fix for #352. Previously, freeze would ignore the start offset
+    // for BytesMuts in Vec mode.
+    assert_eq!(b, s[1..]);
+}
+
+#[test]
+fn freeze_after_advance_arc() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    // Make b Arc
+    let _ = b.split_to(0);
+    b.advance(1);
+    assert_eq!(b, s[1..]);
+    let b = b.freeze();
+    assert_eq!(b, s[1..]);
+}
+
+#[test]
+fn freeze_after_split_to() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    let _ = b.split_to(1);
+    assert_eq!(b, s[1..]);
+    let b = b.freeze();
+    assert_eq!(b, s[1..]);
+}
+
+#[test]
+fn freeze_after_truncate() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    b.truncate(7);
+    assert_eq!(b, s[..7]);
+    let b = b.freeze();
+    assert_eq!(b, s[..7]);
+}
+
+#[test]
+fn freeze_after_truncate_arc() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    // Make b Arc
+    let _ = b.split_to(0);
+    b.truncate(7);
+    assert_eq!(b, s[..7]);
+    let b = b.freeze();
+    assert_eq!(b, s[..7]);
+}
+
+#[test]
+fn freeze_after_split_off() {
+    let s = &b"abcdefgh"[..];
+    let mut b = BytesMut::from(s);
+    let _ = b.split_off(7);
+    assert_eq!(b, s[..7]);
+    let b = b.freeze();
+    assert_eq!(b, s[..7]);
+}
+
 #[test]
 fn fns_defined_for_bytes_mut() {
     let mut bytes = BytesMut::from(&b"hello world"[..]);
 
-    bytes.as_ptr();
-    bytes.as_mut_ptr();
+    let _ = bytes.as_ptr();
+    let _ = bytes.as_mut_ptr();
 
     // Iterator
     let v: Vec<u8> = bytes.as_ref().iter().cloned().collect();
@@ -378,7 +447,7 @@ fn reserve_growth() {
     let _ = bytes.split();
 
     bytes.reserve(65);
-    assert_eq!(bytes.capacity(), 128);
+    assert_eq!(bytes.capacity(), 117);
 }
 
 #[test]
@@ -396,6 +465,7 @@ fn reserve_allocates_at_least_original_capacity() {
 }
 
 #[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
 fn reserve_max_original_capacity_value() {
     const SIZE: usize = 128 * 1024;
 
@@ -449,6 +519,34 @@ fn reserve_in_arc_unique_doubles() {
     assert_eq!(2000, bytes.capacity());
 }
 
+#[test]
+fn reserve_in_arc_unique_does_not_overallocate_after_split() {
+    let mut bytes = BytesMut::from(LONG);
+    let orig_capacity = bytes.capacity();
+    drop(bytes.split_off(LONG.len() / 2));
+
+    // now bytes is Arc and refcount == 1
+
+    let new_capacity = bytes.capacity();
+    bytes.reserve(orig_capacity - new_capacity);
+    assert_eq!(bytes.capacity(), orig_capacity);
+}
+
+#[test]
+fn reserve_in_arc_unique_does_not_overallocate_after_multiple_splits() {
+    let mut bytes = BytesMut::from(LONG);
+    let orig_capacity = bytes.capacity();
+    for _ in 0..10 {
+        drop(bytes.split_off(LONG.len() / 2));
+
+        // now bytes is Arc and refcount == 1
+
+        let new_capacity = bytes.capacity();
+        bytes.reserve(orig_capacity - new_capacity);
+    }
+    assert_eq!(bytes.capacity(), orig_capacity);
+}
+
 #[test]
 fn reserve_in_arc_nonunique_does_not_overallocate() {
     let mut bytes = BytesMut::with_capacity(1000);
@@ -461,6 +559,25 @@ fn reserve_in_arc_nonunique_does_not_overallocate() {
     assert_eq!(2001, bytes.capacity());
 }
 
+/// This function tests `BytesMut::reserve_inner`, where `BytesMut` holds
+/// a unique reference to the shared vector and decide to reuse it
+/// by reallocating the `Vec`.
+#[test]
+fn reserve_shared_reuse() {
+    let mut bytes = BytesMut::with_capacity(1000);
+    bytes.put_slice(b"Hello, World!");
+    drop(bytes.split());
+
+    bytes.put_slice(b"!123ex123,sadchELLO,_wORLD!");
+    // Use split_off so that v.capacity() - self.cap != off
+    drop(bytes.split_off(9));
+    assert_eq!(&*bytes, b"!123ex123");
+
+    bytes.reserve(2000);
+    assert_eq!(&*bytes, b"!123ex123");
+    assert_eq!(bytes.capacity(), 2009);
+}
+
 #[test]
 fn extend_mut() {
     let mut bytes = BytesMut::with_capacity(0);
@@ -478,6 +595,35 @@ fn extend_from_slice_mut() {
     }
 }
 
+#[test]
+fn extend_mut_from_bytes() {
+    let mut bytes = BytesMut::with_capacity(0);
+    bytes.extend([Bytes::from(LONG)]);
+    assert_eq!(*bytes, LONG[..]);
+}
+
+#[test]
+fn extend_past_lower_limit_of_size_hint() {
+    // See https://github.com/tokio-rs/bytes/pull/674#pullrequestreview-1913035700
+    struct Iter<I>(I);
+
+    impl<I: Iterator<Item = u8>> Iterator for Iter<I> {
+        type Item = u8;
+
+        fn next(&mut self) -> Option<Self::Item> {
+            self.0.next()
+        }
+
+        fn size_hint(&self) -> (usize, Option<usize>) {
+            (5, None)
+        }
+    }
+
+    let mut bytes = BytesMut::with_capacity(5);
+    bytes.extend(Iter(std::iter::repeat(0).take(10)));
+    assert_eq!(bytes.len(), 10);
+}
+
 #[test]
 fn extend_mut_without_size_hint() {
     let mut bytes = BytesMut::with_capacity(0);
@@ -534,6 +680,43 @@ fn advance_bytes_mut() {
     assert_eq!(a, b"d zomg wat wat"[..]);
 }
 
+// Ensures BytesMut::advance reduces always capacity
+//
+// See https://github.com/tokio-rs/bytes/issues/725
+#[test]
+fn advance_bytes_mut_remaining_capacity() {
+    // reduce the search space under miri
+    let max_capacity = if cfg!(miri) { 16 } else { 256 };
+    for capacity in 0..=max_capacity {
+        for len in 0..=capacity {
+            for advance in 0..=len {
+                eprintln!("testing capacity={capacity}, len={len}, advance={advance}");
+                let mut buf = BytesMut::with_capacity(capacity);
+
+                buf.resize(len, 42);
+                assert_eq!(buf.len(), len, "resize should write `len` bytes");
+                assert_eq!(
+                    buf.remaining(),
+                    len,
+                    "Buf::remaining() should equal BytesMut::len"
+                );
+
+                buf.advance(advance);
+                assert_eq!(
+                    buf.remaining(),
+                    len - advance,
+                    "Buf::advance should reduce the remaining len"
+                );
+                assert_eq!(
+                    buf.capacity(),
+                    capacity - advance,
+                    "Buf::advance should reduce the remaining capacity"
+                );
+            }
+        }
+    }
+}
+
 #[test]
 #[should_panic]
 fn advance_past_len() {
@@ -543,15 +726,16 @@ fn advance_past_len() {
 
 #[test]
 // Only run these tests on little endian systems. CI uses qemu for testing
-// little endian... and qemu doesn't really support threading all that well.
-#[cfg(target_endian = "little")]
+// big endian... and qemu doesn't really support threading all that well.
+#[cfg(any(miri, target_endian = "little"))]
+#[cfg(not(target_family = "wasm"))] // wasm without experimental threads proposal doesn't support threads
 fn stress() {
     // Tests promoting a buffer from a vec -> shared in a concurrent situation
     use std::sync::{Arc, Barrier};
     use std::thread;
 
     const THREADS: usize = 8;
-    const ITERS: usize = 1_000;
+    const ITERS: usize = if cfg!(miri) { 100 } else { 1_000 };
 
     for i in 0..ITERS {
         let data = [i as u8; 256];
@@ -590,10 +774,10 @@ fn partial_eq_bytesmut() {
     assert!(bytesmut != bytes2);
 }
 
-/*
 #[test]
-fn bytes_unsplit_basic() {
-    let buf = Bytes::from(&b"aaabbbcccddd"[..]);
+fn bytes_mut_unsplit_basic() {
+    let mut buf = BytesMut::with_capacity(64);
+    buf.extend_from_slice(b"aaabbbcccddd");
 
     let splitted = buf.split_off(6);
     assert_eq!(b"aaabbb", &buf[..]);
@@ -604,22 +788,23 @@ fn bytes_unsplit_basic() {
 }
 
 #[test]
-fn bytes_unsplit_empty_other() {
-    let buf = Bytes::from(&b"aaabbbcccddd"[..]);
+fn bytes_mut_unsplit_empty_other() {
+    let mut buf = BytesMut::with_capacity(64);
+    buf.extend_from_slice(b"aaabbbcccddd");
 
     // empty other
-    let other = Bytes::new();
+    let other = BytesMut::new();
 
     buf.unsplit(other);
     assert_eq!(b"aaabbbcccddd", &buf[..]);
 }
 
 #[test]
-fn bytes_unsplit_empty_self() {
+fn bytes_mut_unsplit_empty_self() {
     // empty self
-    let mut buf = Bytes::new();
+    let mut buf = BytesMut::new();
 
-    let mut other = Bytes::with_capacity(64);
+    let mut other = BytesMut::with_capacity(64);
     other.extend_from_slice(b"aaabbbcccddd");
 
     buf.unsplit(other);
@@ -627,95 +812,28 @@ fn bytes_unsplit_empty_self() {
 }
 
 #[test]
-fn bytes_unsplit_arc_different() {
-    let mut buf = Bytes::with_capacity(64);
-    buf.extend_from_slice(b"aaaabbbbeeee");
-
-    buf.split_off(8); //arc
-
-    let mut buf2 = Bytes::with_capacity(64);
-    buf2.extend_from_slice(b"ccccddddeeee");
-
-    buf2.split_off(8); //arc
-
-    buf.unsplit(buf2);
-    assert_eq!(b"aaaabbbbccccdddd", &buf[..]);
-}
-
-#[test]
-fn bytes_unsplit_arc_non_contiguous() {
-    let mut buf = Bytes::with_capacity(64);
-    buf.extend_from_slice(b"aaaabbbbeeeeccccdddd");
-
-    let mut buf2 = buf.split_off(8); //arc
-
-    let buf3 = buf2.split_off(4); //arc
-
-    buf.unsplit(buf3);
-    assert_eq!(b"aaaabbbbccccdddd", &buf[..]);
-}
-
-#[test]
-fn bytes_unsplit_two_split_offs() {
-    let mut buf = Bytes::with_capacity(64);
-    buf.extend_from_slice(b"aaaabbbbccccdddd");
-
-    let mut buf2 = buf.split_off(8); //arc
-    let buf3 = buf2.split_off(4); //arc
-
-    buf2.unsplit(buf3);
-    buf.unsplit(buf2);
-    assert_eq!(b"aaaabbbbccccdddd", &buf[..]);
-}
-
-#[test]
-fn bytes_unsplit_overlapping_references() {
-    let mut buf = Bytes::with_capacity(64);
-    buf.extend_from_slice(b"abcdefghijklmnopqrstuvwxyz");
-    let mut buf0010 = buf.slice(0..10);
-    let buf1020 = buf.slice(10..20);
-    let buf0515 = buf.slice(5..15);
-    buf0010.unsplit(buf1020);
-    assert_eq!(b"abcdefghijklmnopqrst", &buf0010[..]);
-    assert_eq!(b"fghijklmno", &buf0515[..]);
-}
-*/
-
-#[test]
-fn bytes_mut_unsplit_basic() {
+fn bytes_mut_unsplit_other_keeps_capacity() {
     let mut buf = BytesMut::with_capacity(64);
-    buf.extend_from_slice(b"aaabbbcccddd");
+    buf.extend_from_slice(b"aabb");
 
-    let splitted = buf.split_off(6);
-    assert_eq!(b"aaabbb", &buf[..]);
-    assert_eq!(b"cccddd", &splitted[..]);
+    // non empty other created "from" buf
+    let mut other = buf.split_off(buf.len());
+    other.extend_from_slice(b"ccddee");
+    buf.unsplit(other);
 
-    buf.unsplit(splitted);
-    assert_eq!(b"aaabbbcccddd", &buf[..]);
+    assert_eq!(buf.capacity(), 64);
 }
 
 #[test]
-fn bytes_mut_unsplit_empty_other() {
+fn bytes_mut_unsplit_empty_other_keeps_capacity() {
     let mut buf = BytesMut::with_capacity(64);
-    buf.extend_from_slice(b"aaabbbcccddd");
-
-    // empty other
-    let other = BytesMut::new();
+    buf.extend_from_slice(b"aabbccddee");
 
+    // empty other created "from" buf
+    let other = buf.split_off(buf.len());
     buf.unsplit(other);
-    assert_eq!(b"aaabbbcccddd", &buf[..]);
-}
-
-#[test]
-fn bytes_mut_unsplit_empty_self() {
-    // empty self
-    let mut buf = BytesMut::new();
-
-    let mut other = BytesMut::with_capacity(64);
-    other.extend_from_slice(b"aaabbbcccddd");
 
-    buf.unsplit(other);
-    assert_eq!(b"aaabbbcccddd", &buf[..]);
+    assert_eq!(buf.capacity(), 64);
 }
 
 #[test]
@@ -783,7 +901,7 @@ fn from_iter_no_size_hint() {
 
 fn test_slice_ref(bytes: &Bytes, start: usize, end: usize, expected: &[u8]) {
     let slice = &(bytes.as_ref()[start..end]);
-    let sub = bytes.slice_ref(&slice);
+    let sub = bytes.slice_ref(slice);
     assert_eq!(&sub[..], expected);
 }
 
@@ -798,13 +916,12 @@ fn slice_ref_works() {
     test_slice_ref(&bytes, 9, 9, b"");
 }
 
-
 #[test]
 fn slice_ref_empty() {
     let bytes = Bytes::from(&b""[..]);
     let slice = &(bytes.as_ref()[0..0]);
 
-    let sub = bytes.slice_ref(&slice);
+    let sub = bytes.slice_ref(slice);
     assert_eq!(&sub[..], b"");
 }
 
@@ -848,23 +965,39 @@ fn bytes_buf_mut_advance() {
     let mut bytes = BytesMut::with_capacity(1024);
 
     unsafe {
-        let ptr = bytes.bytes_mut().as_ptr();
-        assert_eq!(1024, bytes.bytes_mut().len());
+        let ptr = bytes.chunk_mut().as_mut_ptr();
+        assert_eq!(1024, bytes.chunk_mut().len());
 
         bytes.advance_mut(10);
 
-        let next = bytes.bytes_mut().as_ptr();
-        assert_eq!(1024 - 10, bytes.bytes_mut().len());
+        let next = bytes.chunk_mut().as_mut_ptr();
+        assert_eq!(1024 - 10, bytes.chunk_mut().len());
         assert_eq!(ptr.offset(10), next);
 
         // advance to the end
         bytes.advance_mut(1024 - 10);
 
         // The buffer size is doubled
-        assert_eq!(1024, bytes.bytes_mut().len());
+        assert_eq!(1024, bytes.chunk_mut().len());
     }
 }
 
+#[test]
+fn bytes_buf_mut_reuse_when_fully_consumed() {
+    use bytes::{Buf, BytesMut};
+    let mut buf = BytesMut::new();
+    buf.reserve(8192);
+    buf.extend_from_slice(&[0u8; 100][..]);
+
+    let p = &buf[0] as *const u8;
+    buf.advance(100);
+
+    buf.reserve(8192);
+    buf.extend_from_slice(b" ");
+
+    assert_eq!(&buf[0] as *const u8, p);
+}
+
 #[test]
 #[should_panic]
 fn bytes_reserve_overflow() {
@@ -880,3 +1013,637 @@ fn bytes_with_capacity_but_empty() {
     let vec = Vec::with_capacity(1);
     let _ = Bytes::from(vec);
 }
+
+#[test]
+fn bytes_put_bytes() {
+    let mut bytes = BytesMut::new();
+    bytes.put_u8(17);
+    bytes.put_bytes(19, 2);
+    assert_eq!([17, 19, 19], bytes.as_ref());
+}
+
+#[test]
+fn box_slice_empty() {
+    // See https://github.com/tokio-rs/bytes/issues/340
+    let empty: Box<[u8]> = Default::default();
+    let b = Bytes::from(empty);
+    assert!(b.is_empty());
+}
+
+#[test]
+fn bytes_into_vec() {
+    // Test kind == KIND_VEC
+    let content = b"helloworld";
+
+    let mut bytes = BytesMut::new();
+    bytes.put_slice(content);
+
+    let vec: Vec<u8> = bytes.into();
+    assert_eq!(&vec, content);
+
+    // Test kind == KIND_ARC, shared.is_unique() == True
+    let mut bytes = BytesMut::new();
+    bytes.put_slice(b"abcdewe23");
+    bytes.put_slice(content);
+
+    // Overwrite the bytes to make sure only one reference to the underlying
+    // Vec exists.
+    bytes = bytes.split_off(9);
+
+    let vec: Vec<u8> = bytes.into();
+    assert_eq!(&vec, content);
+
+    // Test kind == KIND_ARC, shared.is_unique() == False
+    let prefix = b"abcdewe23";
+
+    let mut bytes = BytesMut::new();
+    bytes.put_slice(prefix);
+    bytes.put_slice(content);
+
+    let vec: Vec<u8> = bytes.split_off(prefix.len()).into();
+    assert_eq!(&vec, content);
+
+    let vec: Vec<u8> = bytes.into();
+    assert_eq!(&vec, prefix);
+}
+
+#[test]
+fn test_bytes_into_vec() {
+    // Test STATIC_VTABLE.to_vec
+    let bs = b"1b23exfcz3r";
+    let vec: Vec<u8> = Bytes::from_static(bs).into();
+    assert_eq!(&*vec, bs);
+
+    // Test bytes_mut.SHARED_VTABLE.to_vec impl
+    eprintln!("1");
+    let mut bytes_mut: BytesMut = bs[..].into();
+
+    // Set kind to KIND_ARC so that after freeze, Bytes will use bytes_mut.SHARED_VTABLE
+    eprintln!("2");
+    drop(bytes_mut.split_off(bs.len()));
+
+    eprintln!("3");
+    let b1 = bytes_mut.freeze();
+    eprintln!("4");
+    let b2 = b1.clone();
+
+    eprintln!("{:#?}", (&*b1).as_ptr());
+
+    // shared.is_unique() = False
+    eprintln!("5");
+    assert_eq!(&*Vec::from(b2), bs);
+
+    // shared.is_unique() = True
+    eprintln!("6");
+    assert_eq!(&*Vec::from(b1), bs);
+
+    // Test bytes_mut.SHARED_VTABLE.to_vec impl where offset != 0
+    let mut bytes_mut1: BytesMut = bs[..].into();
+    let bytes_mut2 = bytes_mut1.split_off(9);
+
+    let b1 = bytes_mut1.freeze();
+    let b2 = bytes_mut2.freeze();
+
+    assert_eq!(Vec::from(b2), bs[9..]);
+    assert_eq!(Vec::from(b1), bs[..9]);
+}
+
+#[test]
+fn test_bytes_into_vec_promotable_even() {
+    let vec = vec![33u8; 1024];
+
+    // Test cases where kind == KIND_VEC
+    let b1 = Bytes::from(vec.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 1
+    let b1 = Bytes::from(vec.clone());
+    drop(b1.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 2
+    let b1 = Bytes::from(vec.clone());
+    let b2 = b1.clone();
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where vtable = SHARED_VTABLE, kind == KIND_ARC, ref_cnt == 1
+    assert_eq!(Vec::from(b2), vec);
+
+    // Test cases where offset != 0
+    let mut b1 = Bytes::from(vec.clone());
+    let b2 = b1.split_off(20);
+
+    assert_eq!(Vec::from(b2), vec[20..]);
+    assert_eq!(Vec::from(b1), vec[..20]);
+}
+
+#[test]
+fn test_bytes_vec_conversion() {
+    let mut vec = Vec::with_capacity(10);
+    vec.extend(b"abcdefg");
+    let b = Bytes::from(vec);
+    let v = Vec::from(b);
+    assert_eq!(v.len(), 7);
+    assert_eq!(v.capacity(), 10);
+
+    let mut b = Bytes::from(v);
+    b.advance(1);
+    let v = Vec::from(b);
+    assert_eq!(v.len(), 6);
+    assert_eq!(v.capacity(), 10);
+    assert_eq!(v.as_slice(), b"bcdefg");
+}
+
+#[test]
+fn test_bytes_mut_conversion() {
+    let mut b1 = BytesMut::with_capacity(10);
+    b1.extend(b"abcdefg");
+    let b2 = Bytes::from(b1);
+    let v = Vec::from(b2);
+    assert_eq!(v.len(), 7);
+    assert_eq!(v.capacity(), 10);
+
+    let mut b = Bytes::from(v);
+    b.advance(1);
+    let v = Vec::from(b);
+    assert_eq!(v.len(), 6);
+    assert_eq!(v.capacity(), 10);
+    assert_eq!(v.as_slice(), b"bcdefg");
+}
+
+#[test]
+fn test_bytes_capacity_len() {
+    for cap in 0..100 {
+        for len in 0..=cap {
+            let mut v = Vec::with_capacity(cap);
+            v.resize(len, 0);
+            let _ = Bytes::from(v);
+        }
+    }
+}
+
+#[test]
+fn static_is_unique() {
+    let b = Bytes::from_static(LONG);
+    assert!(!b.is_unique());
+}
+
+#[test]
+fn vec_is_unique() {
+    let v: Vec<u8> = LONG.to_vec();
+    let b = Bytes::from(v);
+    assert!(b.is_unique());
+}
+
+#[test]
+fn arc_is_unique() {
+    let v: Vec<u8> = LONG.to_vec();
+    let b = Bytes::from(v);
+    let c = b.clone();
+    assert!(!b.is_unique());
+    drop(c);
+    assert!(b.is_unique());
+}
+
+#[test]
+fn shared_is_unique() {
+    let v: Vec<u8> = LONG.to_vec();
+    let b = Bytes::from(v);
+    let c = b.clone();
+    assert!(!c.is_unique());
+    drop(b);
+    assert!(c.is_unique());
+}
+
+#[test]
+fn mut_shared_is_unique() {
+    let mut b = BytesMut::from(LONG);
+    let c = b.split().freeze();
+    assert!(!c.is_unique());
+    drop(b);
+    assert!(c.is_unique());
+}
+
+#[test]
+fn test_bytesmut_from_bytes_static() {
+    let bs = b"1b23exfcz3r";
+
+    // Test STATIC_VTABLE.to_mut
+    let bytes_mut = BytesMut::from(Bytes::from_static(bs));
+    assert_eq!(bytes_mut, bs[..]);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_bytes_mut_vec() {
+    let bs = b"1b23exfcz3r";
+    let bs_long = b"1b23exfcz3r1b23exfcz3r";
+
+    // Test case where kind == KIND_VEC
+    let mut bytes_mut: BytesMut = bs[..].into();
+    bytes_mut = BytesMut::from(bytes_mut.freeze());
+    assert_eq!(bytes_mut, bs[..]);
+    bytes_mut.extend_from_slice(&bs[..]);
+    assert_eq!(bytes_mut, bs_long[..]);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_bytes_mut_shared() {
+    let bs = b"1b23exfcz3r";
+
+    // Set kind to KIND_ARC so that after freeze, Bytes will use bytes_mut.SHARED_VTABLE
+    let mut bytes_mut: BytesMut = bs[..].into();
+    drop(bytes_mut.split_off(bs.len()));
+
+    let b1 = bytes_mut.freeze();
+    let b2 = b1.clone();
+
+    // shared.is_unique() = False
+    let mut b1m = BytesMut::from(b1);
+    assert_eq!(b1m, bs[..]);
+    b1m[0] = b'9';
+
+    // shared.is_unique() = True
+    let b2m = BytesMut::from(b2);
+    assert_eq!(b2m, bs[..]);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_bytes_mut_offset() {
+    let bs = b"1b23exfcz3r";
+
+    // Test bytes_mut.SHARED_VTABLE.to_mut impl where offset != 0
+    let mut bytes_mut1: BytesMut = bs[..].into();
+    let bytes_mut2 = bytes_mut1.split_off(9);
+
+    let b1 = bytes_mut1.freeze();
+    let b2 = bytes_mut2.freeze();
+
+    let b1m = BytesMut::from(b1);
+    let b2m = BytesMut::from(b2);
+
+    assert_eq!(b2m, bs[9..]);
+    assert_eq!(b1m, bs[..9]);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_promotable_even_vec() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_VEC
+    let b1 = Bytes::from(vec.clone());
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_promotable_even_arc_1() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_ARC, ref_cnt == 1
+    let b1 = Bytes::from(vec.clone());
+    drop(b1.clone());
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_promotable_even_arc_2() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_ARC, ref_cnt == 2
+    let b1 = Bytes::from(vec.clone());
+    let b2 = b1.clone();
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+
+    // Test case where vtable = SHARED_VTABLE, kind == KIND_ARC, ref_cnt == 1
+    let b2m = BytesMut::from(b2);
+    assert_eq!(b2m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_promotable_even_arc_offset() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where offset != 0
+    let mut b1 = Bytes::from(vec.clone());
+    let b2 = b1.split_off(20);
+    let b1m = BytesMut::from(b1);
+    let b2m = BytesMut::from(b2);
+
+    assert_eq!(b2m, vec[20..]);
+    assert_eq!(b1m, vec[..20]);
+}
+
+#[test]
+fn try_reclaim_empty() {
+    let mut buf = BytesMut::new();
+    assert_eq!(false, buf.try_reclaim(6));
+    buf.reserve(6);
+    assert_eq!(true, buf.try_reclaim(6));
+    let cap = buf.capacity();
+    assert!(cap >= 6);
+    assert_eq!(false, buf.try_reclaim(cap + 1));
+
+    let mut buf = BytesMut::new();
+    buf.reserve(6);
+    let cap = buf.capacity();
+    assert!(cap >= 6);
+    let mut split = buf.split();
+    drop(buf);
+    assert_eq!(0, split.capacity());
+    assert_eq!(true, split.try_reclaim(6));
+    assert_eq!(false, split.try_reclaim(cap + 1));
+}
+
+#[test]
+fn try_reclaim_vec() {
+    let mut buf = BytesMut::with_capacity(6);
+    buf.put_slice(b"abc");
+    // Reclaiming a ludicrous amount of space should calmly return false
+    assert_eq!(false, buf.try_reclaim(usize::MAX));
+
+    assert_eq!(false, buf.try_reclaim(6));
+    buf.advance(2);
+    assert_eq!(4, buf.capacity());
+    // We can reclaim 5 bytes, because the byte in the buffer can be moved to the front. 6 bytes
+    // cannot be reclaimed because there is already one byte stored
+    assert_eq!(false, buf.try_reclaim(6));
+    assert_eq!(true, buf.try_reclaim(5));
+    buf.advance(1);
+    assert_eq!(true, buf.try_reclaim(6));
+    assert_eq!(6, buf.capacity());
+}
+
+#[test]
+fn try_reclaim_arc() {
+    let mut buf = BytesMut::with_capacity(6);
+    buf.put_slice(b"abc");
+    let x = buf.split().freeze();
+    buf.put_slice(b"def");
+    // Reclaiming a ludicrous amount of space should calmly return false
+    assert_eq!(false, buf.try_reclaim(usize::MAX));
+
+    let y = buf.split().freeze();
+    let z = y.clone();
+    assert_eq!(false, buf.try_reclaim(6));
+    drop(x);
+    drop(z);
+    assert_eq!(false, buf.try_reclaim(6));
+    drop(y);
+    assert_eq!(true, buf.try_reclaim(6));
+    assert_eq!(6, buf.capacity());
+    assert_eq!(0, buf.len());
+    buf.put_slice(b"abc");
+    buf.put_slice(b"def");
+    assert_eq!(6, buf.capacity());
+    assert_eq!(6, buf.len());
+    assert_eq!(false, buf.try_reclaim(6));
+    buf.advance(4);
+    assert_eq!(true, buf.try_reclaim(4));
+    buf.advance(2);
+    assert_eq!(true, buf.try_reclaim(6));
+}
+
+#[test]
+fn split_off_empty_addr() {
+    let mut buf = Bytes::from(vec![0; 1024]);
+
+    let ptr_start = buf.as_ptr();
+    let ptr_end = ptr_start.wrapping_add(1024);
+
+    let empty_end = buf.split_off(1024);
+    assert_eq!(empty_end.len(), 0);
+    assert_eq!(empty_end.as_ptr(), ptr_end);
+
+    let _ = buf.split_off(0);
+    assert_eq!(buf.len(), 0);
+    assert_eq!(buf.as_ptr(), ptr_start);
+
+    // Is miri happy about the provenance?
+    let _ = &empty_end[..];
+    let _ = &buf[..];
+}
+
+#[test]
+fn split_to_empty_addr() {
+    let mut buf = Bytes::from(vec![0; 1024]);
+
+    let ptr_start = buf.as_ptr();
+    let ptr_end = ptr_start.wrapping_add(1024);
+
+    let empty_start = buf.split_to(0);
+    assert_eq!(empty_start.len(), 0);
+    assert_eq!(empty_start.as_ptr(), ptr_start);
+
+    let _ = buf.split_to(1024);
+    assert_eq!(buf.len(), 0);
+    assert_eq!(buf.as_ptr(), ptr_end);
+
+    // Is miri happy about the provenance?
+    let _ = &empty_start[..];
+    let _ = &buf[..];
+}
+
+#[test]
+fn split_off_empty_addr_mut() {
+    let mut buf = BytesMut::from([0; 1024].as_slice());
+
+    let ptr_start = buf.as_ptr();
+    let ptr_end = ptr_start.wrapping_add(1024);
+
+    let empty_end = buf.split_off(1024);
+    assert_eq!(empty_end.len(), 0);
+    assert_eq!(empty_end.as_ptr(), ptr_end);
+
+    let _ = buf.split_off(0);
+    assert_eq!(buf.len(), 0);
+    assert_eq!(buf.as_ptr(), ptr_start);
+
+    // Is miri happy about the provenance?
+    let _ = &empty_end[..];
+    let _ = &buf[..];
+}
+
+#[test]
+fn split_to_empty_addr_mut() {
+    let mut buf = BytesMut::from([0; 1024].as_slice());
+
+    let ptr_start = buf.as_ptr();
+    let ptr_end = ptr_start.wrapping_add(1024);
+
+    let empty_start = buf.split_to(0);
+    assert_eq!(empty_start.len(), 0);
+    assert_eq!(empty_start.as_ptr(), ptr_start);
+
+    let _ = buf.split_to(1024);
+    assert_eq!(buf.len(), 0);
+    assert_eq!(buf.as_ptr(), ptr_end);
+
+    // Is miri happy about the provenance?
+    let _ = &empty_start[..];
+    let _ = &buf[..];
+}
+
+#[derive(Clone)]
+struct SharedAtomicCounter(Arc<AtomicUsize>);
+
+impl SharedAtomicCounter {
+    pub fn new() -> Self {
+        SharedAtomicCounter(Arc::new(AtomicUsize::new(0)))
+    }
+
+    pub fn increment(&self) {
+        self.0.fetch_add(1, Ordering::AcqRel);
+    }
+
+    pub fn get(&self) -> usize {
+        self.0.load(Ordering::Acquire)
+    }
+}
+
+#[derive(Clone)]
+struct OwnedTester<const L: usize> {
+    buf: [u8; L],
+    drop_count: SharedAtomicCounter,
+    pub panic_as_ref: bool,
+}
+
+impl<const L: usize> OwnedTester<L> {
+    fn new(buf: [u8; L], drop_count: SharedAtomicCounter) -> Self {
+        Self {
+            buf,
+            drop_count,
+            panic_as_ref: false,
+        }
+    }
+}
+
+impl<const L: usize> AsRef<[u8]> for OwnedTester<L> {
+    fn as_ref(&self) -> &[u8] {
+        if self.panic_as_ref {
+            panic!("test-triggered panic in `AsRef<[u8]> for OwnedTester`");
+        }
+        self.buf.as_slice()
+    }
+}
+
+impl<const L: usize> Drop for OwnedTester<L> {
+    fn drop(&mut self) {
+        self.drop_count.increment();
+    }
+}
+
+#[test]
+fn owned_is_unique_always_false() {
+    let b1 = Bytes::from_owner([1, 2, 3, 4, 5, 6, 7]);
+    assert!(!b1.is_unique()); // even if ref_cnt == 1
+    let b2 = b1.clone();
+    assert!(!b1.is_unique());
+    assert!(!b2.is_unique());
+    drop(b1);
+    assert!(!b2.is_unique()); // even if ref_cnt == 1
+}
+
+#[test]
+fn owned_buf_sharing() {
+    let buf = [1, 2, 3, 4, 5, 6, 7];
+    let b1 = Bytes::from_owner(buf);
+    let b2 = b1.clone();
+    assert_eq!(&buf[..], &b1[..]);
+    assert_eq!(&buf[..], &b2[..]);
+    assert_eq!(b1.as_ptr(), b2.as_ptr());
+    assert_eq!(b1.len(), b2.len());
+    assert_eq!(b1.len(), buf.len());
+}
+
+#[test]
+fn owned_buf_slicing() {
+    let b1 = Bytes::from_owner(SHORT);
+    assert_eq!(SHORT, &b1[..]);
+    let b2 = b1.slice(1..(b1.len() - 1));
+    assert_eq!(&SHORT[1..(SHORT.len() - 1)], b2);
+    assert_eq!(unsafe { SHORT.as_ptr().add(1) }, b2.as_ptr());
+    assert_eq!(SHORT.len() - 2, b2.len());
+}
+
+#[test]
+fn owned_dropped_exactly_once() {
+    let buf: [u8; 5] = [1, 2, 3, 4, 5];
+    let drop_counter = SharedAtomicCounter::new();
+    let owner = OwnedTester::new(buf, drop_counter.clone());
+    let b1 = Bytes::from_owner(owner);
+    let b2 = b1.clone();
+    assert_eq!(drop_counter.get(), 0);
+    drop(b1);
+    assert_eq!(drop_counter.get(), 0);
+    let b3 = b2.slice(1..b2.len() - 1);
+    drop(b2);
+    assert_eq!(drop_counter.get(), 0);
+    drop(b3);
+    assert_eq!(drop_counter.get(), 1);
+}
+
+#[test]
+fn owned_to_mut() {
+    let buf: [u8; 10] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+    let drop_counter = SharedAtomicCounter::new();
+    let owner = OwnedTester::new(buf, drop_counter.clone());
+    let b1 = Bytes::from_owner(owner);
+
+    // Holding an owner will fail converting to a BytesMut,
+    // even when the bytes instance has a ref_cnt == 1.
+    let b1 = b1.try_into_mut().unwrap_err();
+
+    // That said, it's still possible, just not cheap.
+    let bm1: BytesMut = b1.into();
+    let new_buf = &bm1[..];
+    assert_eq!(new_buf, &buf[..]);
+
+    // `.into::<BytesMut>()` has correctly dropped the owner
+    assert_eq!(drop_counter.get(), 1);
+}
+
+#[test]
+fn owned_to_vec() {
+    let buf: [u8; 10] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+    let drop_counter = SharedAtomicCounter::new();
+    let owner = OwnedTester::new(buf, drop_counter.clone());
+    let b1 = Bytes::from_owner(owner);
+
+    let v1 = b1.to_vec();
+    assert_eq!(&v1[..], &buf[..]);
+    assert_eq!(&v1[..], &b1[..]);
+
+    drop(b1);
+    assert_eq!(drop_counter.get(), 1);
+}
+
+#[test]
+fn owned_into_vec() {
+    let drop_counter = SharedAtomicCounter::new();
+    let buf: [u8; 10] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+    let owner = OwnedTester::new(buf, drop_counter.clone());
+    let b1 = Bytes::from_owner(owner);
+
+    let v1: Vec<u8> = b1.into();
+    assert_eq!(&v1[..], &buf[..]);
+    // into() vec will copy out of the owner and drop it
+    assert_eq!(drop_counter.get(), 1);
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn owned_safe_drop_on_as_ref_panic() {
+    let buf: [u8; 10] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+    let drop_counter = SharedAtomicCounter::new();
+    let mut owner = OwnedTester::new(buf, drop_counter.clone());
+    owner.panic_as_ref = true;
+
+    let result = panic::catch_unwind(AssertUnwindSafe(|| {
+        let _ = Bytes::from_owner(owner);
+    }));
+
+    assert!(result.is_err());
+    assert_eq!(drop_counter.get(), 1);
+}
diff --git a/tests/test_bytes_odd_alloc.rs b/tests/test_bytes_odd_alloc.rs
index 4ce424b7c..4758dc2f9 100644
--- a/tests/test_bytes_odd_alloc.rs
+++ b/tests/test_bytes_odd_alloc.rs
@@ -1,10 +1,12 @@
 //! Test using `Bytes` with an allocator that hands out "odd" pointers for
 //! vectors (pointers where the LSB is set).
 
+#![cfg(not(miri))] // Miri does not support custom allocators (also, Miri is "odd" by default with 50% chance)
+
 use std::alloc::{GlobalAlloc, Layout, System};
 use std::ptr;
 
-use bytes::Bytes;
+use bytes::{Bytes, BytesMut};
 
 #[global_allocator]
 static ODD: Odd = Odd;
@@ -22,8 +24,7 @@ unsafe impl GlobalAlloc for Odd {
             };
             let ptr = System.alloc(new_layout);
             if !ptr.is_null() {
-                let ptr = ptr.offset(1);
-                ptr
+                ptr.offset(1)
             } else {
                 ptr
             }
@@ -65,3 +66,82 @@ fn test_bytes_clone_drop() {
     let b1 = Bytes::from(vec);
     let _b2 = b1.clone();
 }
+
+#[test]
+fn test_bytes_into_vec() {
+    let vec = vec![33u8; 1024];
+
+    // Test cases where kind == KIND_VEC
+    let b1 = Bytes::from(vec.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 1
+    let b1 = Bytes::from(vec.clone());
+    drop(b1.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 2
+    let b1 = Bytes::from(vec.clone());
+    let b2 = b1.clone();
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where vtable = SHARED_VTABLE, kind == KIND_ARC, ref_cnt == 1
+    assert_eq!(Vec::from(b2), vec);
+
+    // Test cases where offset != 0
+    let mut b1 = Bytes::from(vec.clone());
+    let b2 = b1.split_off(20);
+
+    assert_eq!(Vec::from(b2), vec[20..]);
+    assert_eq!(Vec::from(b1), vec[..20]);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_vec() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_VEC
+    let b1 = Bytes::from(vec.clone());
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_arc_1() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_ARC, ref_cnt == 1
+    let b1 = Bytes::from(vec.clone());
+    drop(b1.clone());
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_arc_2() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where kind == KIND_ARC, ref_cnt == 2
+    let b1 = Bytes::from(vec.clone());
+    let b2 = b1.clone();
+    let b1m = BytesMut::from(b1);
+    assert_eq!(b1m, vec);
+
+    // Test case where vtable = SHARED_VTABLE, kind == KIND_ARC, ref_cnt == 1
+    let b2m = BytesMut::from(b2);
+    assert_eq!(b2m, vec);
+}
+
+#[test]
+fn test_bytesmut_from_bytes_arc_offset() {
+    let vec = vec![33u8; 1024];
+
+    // Test case where offset != 0
+    let mut b1 = Bytes::from(vec.clone());
+    let b2 = b1.split_off(20);
+    let b1m = BytesMut::from(b1);
+    let b2m = BytesMut::from(b2);
+
+    assert_eq!(b2m, vec[20..]);
+    assert_eq!(b1m, vec[..20]);
+}
diff --git a/tests/test_bytes_vec_alloc.rs b/tests/test_bytes_vec_alloc.rs
index dc007cfd4..9c7601717 100644
--- a/tests/test_bytes_vec_alloc.rs
+++ b/tests/test_bytes_vec_alloc.rs
@@ -1,57 +1,90 @@
+#![cfg(not(miri))]
 use std::alloc::{GlobalAlloc, Layout, System};
-use std::{mem, ptr};
+use std::ptr::null_mut;
+use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
 
 use bytes::{Buf, Bytes};
 
 #[global_allocator]
-static LEDGER: Ledger = Ledger;
+static LEDGER: Ledger = Ledger::new();
 
-struct Ledger;
+const LEDGER_LENGTH: usize = 1024 * 1024;
 
-const USIZE_SIZE: usize = mem::size_of::<usize>();
+struct Ledger {
+    alloc_table: [(AtomicPtr<u8>, AtomicUsize); LEDGER_LENGTH],
+}
 
-unsafe impl GlobalAlloc for Ledger {
-    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        if layout.align() == 1 && layout.size() > 0 {
-            // Allocate extra space to stash a record of
-            // how much space there was.
-            let orig_size = layout.size();
-            let size = orig_size + USIZE_SIZE;
-            let new_layout = match Layout::from_size_align(size, 1) {
-                Ok(layout) => layout,
-                Err(_err) => return ptr::null_mut(),
-            };
-            let ptr = System.alloc(new_layout);
-            if !ptr.is_null() {
-                (ptr as *mut usize).write(orig_size);
-                let ptr = ptr.offset(USIZE_SIZE as isize);
-                ptr
-            } else {
-                ptr
+impl Ledger {
+    const fn new() -> Self {
+        const ELEM: (AtomicPtr<u8>, AtomicUsize) =
+            (AtomicPtr::new(null_mut()), AtomicUsize::new(0));
+        let alloc_table = [ELEM; LEDGER_LENGTH];
+
+        Self { alloc_table }
+    }
+
+    /// Iterate over our table until we find an open entry, then insert into said entry
+    fn insert(&self, ptr: *mut u8, size: usize) {
+        for (entry_ptr, entry_size) in self.alloc_table.iter() {
+            // SeqCst is good enough here, we don't care about perf, i just want to be correct!
+            if entry_ptr
+                .compare_exchange(null_mut(), ptr, Ordering::SeqCst, Ordering::SeqCst)
+                .is_ok()
+            {
+                entry_size.store(size, Ordering::SeqCst);
+                return;
             }
-        } else {
-            System.alloc(layout)
         }
+
+        panic!("Ledger ran out of space.");
     }
 
-    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
-        if layout.align() == 1 && layout.size() > 0 {
-            let off_ptr = (ptr as *mut usize).offset(-1);
-            let orig_size = off_ptr.read();
-            if orig_size != layout.size() {
-                panic!("bad dealloc: alloc size was {}, dealloc size is {}", orig_size, layout.size());
+    fn remove(&self, ptr: *mut u8) -> usize {
+        for (entry_ptr, entry_size) in self.alloc_table.iter() {
+            // set the value to be something that will never try and be deallocated, so that we
+            // don't have any chance of a race condition
+            //
+            // dont worry, LEDGER_LENGTH is really long to compensate for us not reclaiming space
+            if entry_ptr
+                .compare_exchange(
+                    ptr,
+                    invalid_ptr(usize::MAX),
+                    Ordering::SeqCst,
+                    Ordering::SeqCst,
+                )
+                .is_ok()
+            {
+                return entry_size.load(Ordering::SeqCst);
             }
+        }
+
+        panic!("Couldn't find a matching entry for {:x?}", ptr);
+    }
+}
 
-            let new_layout = match Layout::from_size_align(layout.size() + USIZE_SIZE, 1) {
-                Ok(layout) => layout,
-                Err(_err) => std::process::abort(),
-            };
-            System.dealloc(off_ptr as *mut u8, new_layout);
+unsafe impl GlobalAlloc for Ledger {
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        let size = layout.size();
+        let ptr = System.alloc(layout);
+        self.insert(ptr, size);
+        ptr
+    }
+
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        let orig_size = self.remove(ptr);
+
+        if orig_size != layout.size() {
+            panic!(
+                "bad dealloc: alloc size was {}, dealloc size is {}",
+                orig_size,
+                layout.size()
+            );
         } else {
             System.dealloc(ptr, layout);
         }
     }
 }
+
 #[test]
 fn test_bytes_advance() {
     let mut bytes = Bytes::from(vec![10, 20, 30]);
@@ -73,3 +106,41 @@ fn test_bytes_truncate_and_advance() {
     bytes.advance(1);
     drop(bytes);
 }
+
+/// Returns a dangling pointer with the given address. This is used to store
+/// integer data in pointer fields.
+#[inline]
+fn invalid_ptr<T>(addr: usize) -> *mut T {
+    let ptr = std::ptr::null_mut::<u8>().wrapping_add(addr);
+    debug_assert_eq!(ptr as usize, addr);
+    ptr.cast::<T>()
+}
+
+#[test]
+fn test_bytes_into_vec() {
+    let vec = vec![33u8; 1024];
+
+    // Test cases where kind == KIND_VEC
+    let b1 = Bytes::from(vec.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 1
+    let b1 = Bytes::from(vec.clone());
+    drop(b1.clone());
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where kind == KIND_ARC, ref_cnt == 2
+    let b1 = Bytes::from(vec.clone());
+    let b2 = b1.clone();
+    assert_eq!(Vec::from(b1), vec);
+
+    // Test cases where vtable = SHARED_VTABLE, kind == KIND_ARC, ref_cnt == 1
+    assert_eq!(Vec::from(b2), vec);
+
+    // Test cases where offset != 0
+    let mut b1 = Bytes::from(vec.clone());
+    let b2 = b1.split_off(20);
+
+    assert_eq!(Vec::from(b2), vec[20..]);
+    assert_eq!(Vec::from(b1), vec[..20]);
+}
diff --git a/tests/test_chain.rs b/tests/test_chain.rs
index 332571d8b..cfda6b8dc 100644
--- a/tests/test_chain.rs
+++ b/tests/test_chain.rs
@@ -1,7 +1,7 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
 use bytes::{Buf, BufMut, Bytes};
-use bytes::buf::{BufExt, BufMutExt};
+#[cfg(feature = "std")]
 use std::io::IoSlice;
 
 #[test]
@@ -9,7 +9,7 @@ fn collect_two_bufs() {
     let a = Bytes::from(&b"hello"[..]);
     let b = Bytes::from(&b"world"[..]);
 
-    let res = a.chain(b).to_bytes();
+    let res = a.chain(b).copy_to_bytes(10);
     assert_eq!(res, &b"helloworld"[..]);
 }
 
@@ -42,6 +42,7 @@ fn iterating_two_bufs() {
     assert_eq!(res, &b"helloworld"[..]);
 }
 
+#[cfg(feature = "std")]
 #[test]
 fn vectored_read() {
     let a = Bytes::from(&b"hello"[..]);
@@ -61,7 +62,7 @@ fn vectored_read() {
             IoSlice::new(b4),
         ];
 
-        assert_eq!(2, buf.bytes_vectored(&mut iovecs));
+        assert_eq!(2, buf.chunks_vectored(&mut iovecs));
         assert_eq!(iovecs[0][..], b"hello"[..]);
         assert_eq!(iovecs[1][..], b"world"[..]);
         assert_eq!(iovecs[2][..], b""[..]);
@@ -82,7 +83,7 @@ fn vectored_read() {
             IoSlice::new(b4),
         ];
 
-        assert_eq!(2, buf.bytes_vectored(&mut iovecs));
+        assert_eq!(2, buf.chunks_vectored(&mut iovecs));
         assert_eq!(iovecs[0][..], b"llo"[..]);
         assert_eq!(iovecs[1][..], b"world"[..]);
         assert_eq!(iovecs[2][..], b""[..]);
@@ -103,7 +104,7 @@ fn vectored_read() {
             IoSlice::new(b4),
         ];
 
-        assert_eq!(1, buf.bytes_vectored(&mut iovecs));
+        assert_eq!(1, buf.chunks_vectored(&mut iovecs));
         assert_eq!(iovecs[0][..], b"world"[..]);
         assert_eq!(iovecs[1][..], b""[..]);
         assert_eq!(iovecs[2][..], b""[..]);
@@ -124,10 +125,53 @@ fn vectored_read() {
             IoSlice::new(b4),
         ];
 
-        assert_eq!(1, buf.bytes_vectored(&mut iovecs));
+        assert_eq!(1, buf.chunks_vectored(&mut iovecs));
         assert_eq!(iovecs[0][..], b"ld"[..]);
         assert_eq!(iovecs[1][..], b""[..]);
         assert_eq!(iovecs[2][..], b""[..]);
         assert_eq!(iovecs[3][..], b""[..]);
     }
 }
+
+#[test]
+fn chain_growing_buffer() {
+    let mut buff = [' ' as u8; 10];
+    let mut vec = b"wassup".to_vec();
+
+    let mut chained = (&mut buff[..]).chain_mut(&mut vec).chain_mut(Vec::new()); // Required for potential overflow because remaining_mut for Vec is isize::MAX - vec.len(), but for chain_mut is usize::MAX
+
+    chained.put_slice(b"hey there123123");
+
+    assert_eq!(&buff, b"hey there1");
+    assert_eq!(&vec, b"wassup23123");
+}
+
+#[test]
+fn chain_overflow_remaining_mut() {
+    let mut chained = Vec::<u8>::new().chain_mut(Vec::new()).chain_mut(Vec::new());
+
+    assert_eq!(chained.remaining_mut(), usize::MAX);
+    chained.put_slice(&[0; 256]);
+    assert_eq!(chained.remaining_mut(), usize::MAX);
+}
+
+#[test]
+fn chain_get_bytes() {
+    let mut ab = Bytes::copy_from_slice(b"ab");
+    let mut cd = Bytes::copy_from_slice(b"cd");
+    let ab_ptr = ab.as_ptr();
+    let cd_ptr = cd.as_ptr();
+    let mut chain = (&mut ab).chain(&mut cd);
+    let a = chain.copy_to_bytes(1);
+    let bc = chain.copy_to_bytes(2);
+    let d = chain.copy_to_bytes(1);
+
+    assert_eq!(Bytes::copy_from_slice(b"a"), a);
+    assert_eq!(Bytes::copy_from_slice(b"bc"), bc);
+    assert_eq!(Bytes::copy_from_slice(b"d"), d);
+
+    // assert `get_bytes` did not allocate
+    assert_eq!(ab_ptr, a.as_ptr());
+    // assert `get_bytes` did not allocate
+    assert_eq!(cd_ptr.wrapping_offset(1), d.as_ptr());
+}
diff --git a/tests/test_debug.rs b/tests/test_debug.rs
index 7528bac87..08d2f254e 100644
--- a/tests/test_debug.rs
+++ b/tests/test_debug.rs
@@ -1,4 +1,4 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
 use bytes::Bytes;
 
diff --git a/tests/test_iter.rs b/tests/test_iter.rs
index 13b86cdad..bad901860 100644
--- a/tests/test_iter.rs
+++ b/tests/test_iter.rs
@@ -1,21 +1,20 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use bytes::Bytes;
+use bytes::{buf::IntoIter, Bytes};
 
 #[test]
 fn iter_len() {
     let buf = Bytes::from_static(b"hello world");
-    let iter = buf.iter();
+    let iter = IntoIter::new(buf);
 
     assert_eq!(iter.size_hint(), (11, Some(11)));
     assert_eq!(iter.len(), 11);
 }
 
-
 #[test]
 fn empty_iter_len() {
-    let buf = Bytes::from_static(b"");
-    let iter = buf.iter();
+    let buf = Bytes::new();
+    let iter = IntoIter::new(buf);
 
     assert_eq!(iter.size_hint(), (0, Some(0)));
     assert_eq!(iter.len(), 0);
diff --git a/tests/test_reader.rs b/tests/test_reader.rs
index 9c5972a96..897aff645 100644
--- a/tests/test_reader.rs
+++ b/tests/test_reader.rs
@@ -1,14 +1,15 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
+#![cfg(feature = "std")]
 
 use std::io::{BufRead, Read};
 
-use bytes::buf::{BufExt};
+use bytes::Buf;
 
 #[test]
 fn read() {
     let buf1 = &b"hello "[..];
     let buf2 = &b"world"[..];
-    let buf = BufExt::chain(buf1, buf2); // Disambiguate with Read::chain
+    let buf = Buf::chain(buf1, buf2); // Disambiguate with Read::chain
     let mut buffer = Vec::new();
     buf.reader().read_to_end(&mut buffer).unwrap();
     assert_eq!(b"hello world", &buffer[..]);
@@ -18,7 +19,7 @@ fn read() {
 fn buf_read() {
     let buf1 = &b"hell"[..];
     let buf2 = &b"o\nworld"[..];
-    let mut reader = BufExt::chain(buf1, buf2).reader();
+    let mut reader = Buf::chain(buf1, buf2).reader();
     let mut line = String::new();
     reader.read_line(&mut line).unwrap();
     assert_eq!("hello\n", &line);
diff --git a/tests/test_serde.rs b/tests/test_serde.rs
index 18b135692..cf4aeffa7 100644
--- a/tests/test_serde.rs
+++ b/tests/test_serde.rs
@@ -1,7 +1,7 @@
 #![cfg(feature = "serde")]
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use serde_test::{Token, assert_tokens};
+use serde_test::{assert_tokens, Token};
 
 #[test]
 fn test_ser_de_empty() {
diff --git a/tests/test_take.rs b/tests/test_take.rs
index b9b525b1f..0c0159be1 100644
--- a/tests/test_take.rs
+++ b/tests/test_take.rs
@@ -1,6 +1,7 @@
-#![deny(warnings, rust_2018_idioms)]
+#![warn(rust_2018_idioms)]
 
-use bytes::buf::{Buf, BufExt};
+use bytes::buf::Buf;
+use bytes::Bytes;
 
 #[test]
 fn long_take() {
@@ -8,5 +9,76 @@ fn long_take() {
     // overrun the buffer. Regression test for #138.
     let buf = b"hello world".take(100);
     assert_eq!(11, buf.remaining());
-    assert_eq!(b"hello world", buf.bytes());
+    assert_eq!(b"hello world", buf.chunk());
+}
+
+#[test]
+fn take_copy_to_bytes() {
+    let mut abcd = Bytes::copy_from_slice(b"abcd");
+    let abcd_ptr = abcd.as_ptr();
+    let mut take = (&mut abcd).take(2);
+    let a = take.copy_to_bytes(1);
+    assert_eq!(Bytes::copy_from_slice(b"a"), a);
+    // assert `to_bytes` did not allocate
+    assert_eq!(abcd_ptr, a.as_ptr());
+    assert_eq!(Bytes::copy_from_slice(b"bcd"), abcd);
+}
+
+#[test]
+#[should_panic]
+fn take_copy_to_bytes_panics() {
+    let abcd = Bytes::copy_from_slice(b"abcd");
+    abcd.take(2).copy_to_bytes(3);
+}
+
+#[cfg(feature = "std")]
+#[test]
+fn take_chunks_vectored() {
+    fn chain() -> impl Buf {
+        Bytes::from([1, 2, 3].to_vec()).chain(Bytes::from([4, 5, 6].to_vec()))
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(0);
+        assert_eq!(take.chunks_vectored(&mut dst), 0);
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(1);
+        assert_eq!(take.chunks_vectored(&mut dst), 1);
+        assert_eq!(&*dst[0], &[1]);
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(3);
+        assert_eq!(take.chunks_vectored(&mut dst), 1);
+        assert_eq!(&*dst[0], &[1, 2, 3]);
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(4);
+        assert_eq!(take.chunks_vectored(&mut dst), 2);
+        assert_eq!(&*dst[0], &[1, 2, 3]);
+        assert_eq!(&*dst[1], &[4]);
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(6);
+        assert_eq!(take.chunks_vectored(&mut dst), 2);
+        assert_eq!(&*dst[0], &[1, 2, 3]);
+        assert_eq!(&*dst[1], &[4, 5, 6]);
+    }
+
+    {
+        let mut dst = [std::io::IoSlice::new(&[]); 2];
+        let take = chain().take(7);
+        assert_eq!(take.chunks_vectored(&mut dst), 2);
+        assert_eq!(&*dst[0], &[1, 2, 3]);
+        assert_eq!(&*dst[1], &[4, 5, 6]);
+    }
 }