diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7598afd..20ccc64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.18...3.30)
 project(
   OmniMalloc
-  VERSION 0.3.0 # Also update pyproject.toml
+  VERSION 0.4.0 # Also update pyproject.toml
   LANGUAGES CXX)
 
 set(CMAKE_CXX_STANDARD 20)
@@ -11,13 +11,11 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 option(ENABLE_CLANG_TIDY "Run clang-tidy with the compiler" OFF)
 if(ENABLE_CLANG_TIDY)
   find_program(CLANG_TIDY_PROGRAM NAMES clang-tidy REQUIRED)
-  set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PROGRAM}")
 endif()
 
 option(ENABLE_IWYU "Run include-what-you-use with the compiler" OFF)
 if(ENABLE_IWYU)
   find_program(IWYU_PROGRAM NAMES include-what-you-use iwyu REQUIRED)
-  set(CMAKE_CXX_INCLUDE_WHAT_YOU_USE "${IWYU_PROGRAM}")
 endif()
 
 find_package(Python 3.10 REQUIRED COMPONENTS Interpreter Development.Module)
@@ -33,6 +31,14 @@ nanobind_add_module(_cpp NB_STATIC src/cpp/allocators/greedy.cpp
 
 target_include_directories(_cpp PRIVATE src/cpp)
 
+if(ENABLE_CLANG_TIDY)
+  set_target_properties(_cpp PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_PROGRAM}")
+endif()
+if(ENABLE_IWYU)
+  set_target_properties(_cpp PROPERTIES CXX_INCLUDE_WHAT_YOU_USE
+                                        "${IWYU_PROGRAM}")
+endif()
+
 nanobind_add_stub(
   _cpp_stub
   MODULE
diff --git a/pyproject.toml b/pyproject.toml
index 234e086..301b972 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "omnimalloc"
-version = "0.3.0" # Also update CMakeLists.txt
+version = "0.4.0" # Also update CMakeLists.txt
 description = "Your one-stop shop for static memory allocation."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.10"
diff --git a/src/python/omnimalloc/allocators/__init__.py b/src/python/omnimalloc/allocators/__init__.py
index 5a022b0..2ee5105 100644
--- a/src/python/omnimalloc/allocators/__init__.py
+++ b/src/python/omnimalloc/allocators/__init__.py
@@ -5,11 +5,13 @@
 from .base import BaseAllocator as BaseAllocator
 from .genetic import GeneticAllocator as GeneticAllocator
 from .greedy import GreedyAllocator as GreedyAllocator
+from .greedy import GreedyByAllAllocator as GreedyByAllAllocator
 from .greedy import GreedyByAreaAllocator as GreedyByAreaAllocator
 from .greedy import GreedyByConflictAllocator as GreedyByConflictAllocator
 from .greedy import GreedyByDurationAllocator as GreedyByDurationAllocator
 from .greedy import GreedyBySizeAllocator as GreedyBySizeAllocator
 from .greedy_cpp import GreedyAllocatorCpp as GreedyAllocatorCpp
+from .greedy_cpp import GreedyByAllAllocatorCpp as GreedyByAllAllocatorCpp
 from .greedy_cpp import GreedyByAreaAllocatorCpp as GreedyByAreaAllocatorCpp
 from .greedy_cpp import GreedyByConflictAllocatorCpp as GreedyByConflictAllocatorCpp
 from .greedy_cpp import GreedyByDurationAllocatorCpp as GreedyByDurationAllocatorCpp
diff --git a/src/python/omnimalloc/allocators/greedy.py b/src/python/omnimalloc/allocators/greedy.py
index 6fe77e4..1173d18 100644
--- a/src/python/omnimalloc/allocators/greedy.py
+++ b/src/python/omnimalloc/allocators/greedy.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+import sys
+
 from omnimalloc.primitives import Allocation
 
 from .base import BaseAllocator
@@ -80,3 +82,40 @@ class GreedyBySizeAllocator(GreedyAllocator):
     def allocate(self, allocations: tuple[Allocation, ...]) -> tuple[Allocation, ...]:
         sorted_allocs = sorted(allocations, key=lambda a: a.size, reverse=True)
         return super().allocate(tuple(sorted_allocs))
+
+
+def allocate_best_of(
+    variants: tuple[BaseAllocator, ...], allocations: tuple[Allocation, ...]
+) -> tuple[Allocation, ...]:
+    """Run each variant and return the result with the smallest peak memory."""
+    if not allocations:
+        return allocations
+
+    best_allocation: tuple[Allocation, ...] | None = None
+    best_peak_memory = sys.maxsize
+
+    for variant in variants:
+        result = variant.allocate(allocations)
+        heights = [a.height for a in result if a.height is not None]
+        peak_memory = max(heights) if heights else 0
+
+        if peak_memory < best_peak_memory:
+            best_peak_memory = peak_memory
+            best_allocation = result
+
+    assert best_allocation is not None
+    return best_allocation
+
+
+class GreedyByAllAllocator(GreedyAllocator):
+    """Greedy allocator that runs every variant and keeps the best result."""
+
+    def allocate(self, allocations: tuple[Allocation, ...]) -> tuple[Allocation, ...]:
+        variants: tuple[BaseAllocator, ...] = (
+            GreedyAllocator(),
+            GreedyBySizeAllocator(),
+            GreedyByDurationAllocator(),
+            GreedyByAreaAllocator(),
+            GreedyByConflictAllocator(),
+        )
+        return allocate_best_of(variants, allocations)
diff --git a/src/python/omnimalloc/allocators/greedy_cpp.py b/src/python/omnimalloc/allocators/greedy_cpp.py
index 651db9d..694804b 100644
--- a/src/python/omnimalloc/allocators/greedy_cpp.py
+++ b/src/python/omnimalloc/allocators/greedy_cpp.py
@@ -6,6 +6,7 @@
 from omnimalloc.primitives import Allocation
 
 from .base import BaseAllocator
+from .greedy import allocate_best_of
 
 
 class GreedyAllocatorCpp(BaseAllocator):
@@ -63,3 +64,17 @@ class GreedyBySizeAllocatorCpp(GreedyAllocatorCpp):
     def allocate(self, allocations: tuple[Allocation, ...]) -> tuple[Allocation, ...]:
         sorted_allocs = sorted(allocations, key=lambda a: a.size, reverse=True)
         return super().allocate(tuple(sorted_allocs))
+
+
+class GreedyByAllAllocatorCpp(GreedyAllocatorCpp):
+    """C++ greedy allocator that runs every variant and keeps the best result."""
+
+    def allocate(self, allocations: tuple[Allocation, ...]) -> tuple[Allocation, ...]:
+        variants: tuple[BaseAllocator, ...] = (
+            GreedyAllocatorCpp(),
+            GreedyBySizeAllocatorCpp(),
+            GreedyByDurationAllocatorCpp(),
+            GreedyByAreaAllocatorCpp(),
+            GreedyByConflictAllocatorCpp(),
+        )
+        return allocate_best_of(variants, allocations)
diff --git a/tests/unit/allocators/test_greedy.py b/tests/unit/allocators/test_greedy.py
index 297c6e9..e69b6a8 100644
--- a/tests/unit/allocators/test_greedy.py
+++ b/tests/unit/allocators/test_greedy.py
@@ -4,6 +4,7 @@
 
 from omnimalloc.allocators.greedy import (
     GreedyAllocator,
+    GreedyByAllAllocator,
     GreedyByAreaAllocator,
     GreedyByConflictAllocator,
     GreedyByDurationAllocator,
@@ -254,3 +255,58 @@ def test_greedy_allocator_fits_in_gap() -> None:
     assert result[0].offset == 0
     assert result[1].offset == 50
     assert result[2].offset == 100
+
+
+def test_greedy_by_all_empty() -> None:
+    allocator = GreedyByAllAllocator()
+    result = allocator.allocate(())
+    assert len(result) == 0
+
+
+def test_greedy_by_all_preserves_allocations() -> None:
+    allocator = GreedyByAllAllocator()
+    allocs = (
+        Allocation(id=1, size=100, start=0, end=10),
+        Allocation(id=2, size=50, start=5, end=15),
+    )
+    result = allocator.allocate(allocs)
+    assert len(result) == len(allocs)
+    assert {a.id for a in result} == {1, 2}
+    assert all(a.offset is not None for a in result)
+
+
+def test_greedy_by_all_picks_best_peak() -> None:
+    allocator = GreedyByAllAllocator()
+    allocs = (
+        Allocation(id=1, size=100, start=0, end=5),
+        Allocation(id=2, size=100, start=3, end=8),
+        Allocation(id=3, size=100, start=6, end=10),
+        Allocation(id=4, size=50, start=0, end=10),
+        Allocation(id=5, size=300, start=2, end=4),
+    )
+    result = allocator.allocate(allocs)
+    peak = max(a.height for a in result if a.height is not None)
+
+    variants = (
+        GreedyAllocator(),
+        GreedyBySizeAllocator(),
+        GreedyByDurationAllocator(),
+        GreedyByAreaAllocator(),
+        GreedyByConflictAllocator(),
+    )
+    best_variant_peak = min(
+        max(a.height for a in v.allocate(allocs) if a.height is not None)
+        for v in variants
+    )
+    assert peak == best_variant_peak
+
+
+def test_greedy_by_all_deterministic() -> None:
+    allocator = GreedyByAllAllocator()
+    allocs = tuple(
+        Allocation(id=i, size=(i % 5 + 1) * 100, start=0, end=i % 7 + 1)
+        for i in range(20)
+    )
+    result1 = allocator.allocate(allocs)
+    result2 = allocator.allocate(allocs)
+    assert all(r1.offset == r2.offset for r1, r2 in zip(result1, result2, strict=True))
diff --git a/tests/unit/allocators/test_greedy_cpp.py b/tests/unit/allocators/test_greedy_cpp.py
index 991ec0f..36dd9aa 100644
--- a/tests/unit/allocators/test_greedy_cpp.py
+++ b/tests/unit/allocators/test_greedy_cpp.py
@@ -2,9 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-from omnimalloc.allocators.greedy import GreedyAllocator
+from omnimalloc.allocators.greedy import GreedyAllocator, GreedyByAllAllocator
 from omnimalloc.allocators.greedy_cpp import (
     GreedyAllocatorCpp,
+    GreedyByAllAllocatorCpp,
     GreedyByAreaAllocatorCpp,
     GreedyByConflictAllocatorCpp,
     GreedyByDurationAllocatorCpp,
@@ -285,3 +286,60 @@ def test_greedy_cpp_matches_python() -> None:
         for cpp_alloc, py_alloc in zip(cpp_result, py_result, strict=True):
             assert cpp_alloc.offset == py_alloc.offset
             assert cpp_alloc.id == py_alloc.id
+
+
+def test_greedy_cpp_by_all_empty() -> None:
+    allocator = GreedyByAllAllocatorCpp()
+    result = allocator.allocate(())
+    assert len(result) == 0
+
+
+def test_greedy_cpp_by_all_preserves_allocations() -> None:
+    allocator = GreedyByAllAllocatorCpp()
+    allocs = (
+        Allocation(id=1, size=100, start=0, end=10),
+        Allocation(id=2, size=50, start=5, end=15),
+    )
+    result = allocator.allocate(allocs)
+    assert len(result) == len(allocs)
+    assert {a.id for a in result} == {1, 2}
+    assert all(a.offset is not None for a in result)
+
+
+def test_greedy_cpp_by_all_deterministic() -> None:
+    allocator = GreedyByAllAllocatorCpp()
+    allocs = tuple(
+        Allocation(id=i, size=(i % 5 + 1) * 100, start=0, end=i % 7 + 1)
+        for i in range(20)
+    )
+    result1 = allocator.allocate(allocs)
+    result2 = allocator.allocate(allocs)
+    assert all(r1.offset == r2.offset for r1, r2 in zip(result1, result2, strict=True))
+
+
+def test_greedy_cpp_by_all_matches_python() -> None:
+    """C++ greedy-by-all should match the Python greedy-by-all result."""
+    cpp_allocator = GreedyByAllAllocatorCpp()
+    py_allocator = GreedyByAllAllocator()
+
+    test_cases = [
+        tuple(Allocation(id=i, size=100, start=0, end=10) for i in range(5)),
+        (
+            Allocation(id=1, size=100, start=0, end=5),
+            Allocation(id=2, size=100, start=3, end=8),
+            Allocation(id=3, size=100, start=6, end=10),
+            Allocation(id=4, size=50, start=0, end=10),
+            Allocation(id=5, size=300, start=2, end=4),
+        ),
+        tuple(
+            Allocation(id=i, size=(i % 5 + 1) * 100, start=0, end=i % 7 + 1)
+            for i in range(20)
+        ),
+    ]
+
+    for allocs in test_cases:
+        cpp_result = cpp_allocator.allocate(allocs)
+        py_result = py_allocator.allocate(allocs)
+        cpp_peak = max(a.height for a in cpp_result if a.height is not None)
+        py_peak = max(a.height for a in py_result if a.height is not None)
+        assert cpp_peak == py_peak
diff --git a/uv.lock b/uv.lock
index fae105d..60b9870 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1551,7 +1551,7 @@ wheels = [
 
 [[package]]
 name = "omnimalloc"
-version = "0.3.0"
+version = "0.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },