diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e393cb1..037e6ac 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,4 +9,6 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.15.12
     hooks:
+      - id: ruff        # Linter
+        args: [ --fix ]
       - id: ruff-format # Formatter
diff --git a/docs/dev/internals.md b/docs/dev/internals.md
index 8280da9..3371adb 100644
--- a/docs/dev/internals.md
+++ b/docs/dev/internals.md
@@ -8,6 +8,7 @@ metricsqlite/
 ├── engine/
 │   ├── parser/        # Lexer, parser, AST nodes
 │   ├── executor.py    # AST evaluation, result types
+│   ├── functions.py   # Rollup, transformation, aggregation functions
 │   ├── sqlite.py      # SQLiteAdapter for raw data fetching
 │   └── query.py       # QueryEngine - public query interface
 └── fastapi/           # Optional FastAPI routes
@@ -68,8 +69,10 @@ Samples may have an `end` field indicating they span a time range `[start, end]`
 
 The executor handles overlap detection and boundary clamping when windowing.
 
-## Function Categories
+## Function Categories (`functions.py`)
 
-- **Rollup** (`avg_over_time`, `sum_over_time`, etc.): Aggregate samples within each window
+- **Rollup** (`avg_over_time`, `sum_over_time`, `integrate`, etc.): Aggregate samples within each window
 - **Transformation** (`abs`, `clamp_min`, `clamp_max`): Transform individual values
 - **Aggregation** (`sum`, `avg`, `min`, `max`, `count`): Aggregate across series
+
+Each category has a dictionary mapping function names to implementations.
diff --git a/metricsqlite/client.py b/metricsqlite/client.py
index f299032..b58c0ef 100644
--- a/metricsqlite/client.py
+++ b/metricsqlite/client.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from pathlib import Path
 
-from metricsqlite.engine import MatrixResult, QueryEngine, QueryResult
+from metricsqlite.engine import MatrixResult, QueryEngine, QueryResult, sqlite_regexp
 from metricsqlite.exceptions import CompactedRangeError
 from metricsqlite.util import parse_interval, parse_timestamp
 
@@ -29,6 +29,7 @@ def __init__(
         db_path: str | Path | None,
         tables_prefix: str = "metricsqlite",
         enable_wal: bool = False,
+        register_regexp: bool = True,
     ) -> None:
         """
         Args:
@@ -37,10 +38,14 @@ def __init__(
             enable_wal: Enable WAL journal mode for better concurrent read/write
                         performance. Note: This is a database-level setting that
                         affects all connections to this database file.
+            register_regexp: Register a custom REGEXP function for regex label
+                        matching. Set to False if your database already has a
+                        REGEXP implementation (e.g., from sqlite3-pcre extension).
         """
         self._db_path = db_path
         self._tables_prefix = tables_prefix
         self._enable_wal = enable_wal
+        self._register_regexp = register_regexp
         self._lock = threading.Lock()
 
         self._connection: sqlite3.Connection | None = None
@@ -90,6 +95,8 @@ def connect(self) -> None:
         db_path = self._db_path if self._db_path is not None else ":memory:"
         self._connection = sqlite3.connect(db_path, check_same_thread=False)
         self._connection.row_factory = sqlite3.Row
+        if self._register_regexp:
+            self._connection.create_function("regexp", 2, sqlite_regexp)
         if self._enable_wal:
             self._get_connection().execute("PRAGMA journal_mode=WAL")
         self._engine = QueryEngine(
@@ -233,9 +240,8 @@ def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
     def query(
         self,
         query: str,
-        time: float | str | None = None,
+        time: float | str | datetime | None = None,
         step: float | str | None = None,
-        timeout: float | str | None = None,
     ) -> QueryResult:
         """Execute an instant query.
 
@@ -247,13 +253,12 @@ def query(
             step: Lookback window. If set, only samples within
                   [time - step, time] are considered. Accepts seconds or
                   duration strings like "5m", "1h".
-            timeout: Query timeout (not yet implemented).
 
         Returns:
             Query result (InstantVector, RangeVectorResult, or ScalarResult).
         """
         with self._lock:
-            return self._get_engine().query(query, eval_time=time, step=step, timeout=timeout)
+            return self._get_engine().query(query, eval_time=time, step=step)
 
     def query_range(
         self,
@@ -261,7 +266,6 @@ def query_range(
         start: float | str | datetime,
         end: float | str | datetime | None = None,
         step: float | str | None = None,
-        timeout: float | str | None = None,
     ) -> MatrixResult:
         """Execute a range query.
 
@@ -272,13 +276,12 @@ def query_range(
             start: Start timestamp (Unix seconds).
             end: End timestamp (Unix seconds). Defaults to current time.
             step: Query resolution step in seconds. Defaults to 5m (300s).
-            timeout: Query timeout (not yet implemented).
 
         Returns:
             MatrixResult containing series with multiple samples over time.
         """
         with self._lock:
-            return self._get_engine().query_range(query, start=start, end=end, step=step, timeout=timeout)
+            return self._get_engine().query_range(query, start=start, end=end, step=step)
 
     @staticmethod
     def _build_time_filter(
diff --git a/metricsqlite/engine/__init__.py b/metricsqlite/engine/__init__.py
index 28a54d7..8e2b43e 100644
--- a/metricsqlite/engine/__init__.py
+++ b/metricsqlite/engine/__init__.py
@@ -28,6 +28,7 @@
     tokenize,
 )
 from metricsqlite.engine.query import QueryEngine
+from metricsqlite.engine.sqlite import sqlite_regexp
 
 __all__ = [
     "BinaryExpr",
@@ -53,5 +54,6 @@
     "TokenType",
     "UnaryExpr",
     "parse",
+    "sqlite_regexp",
     "tokenize",
 ]
diff --git a/metricsqlite/engine/executor.py b/metricsqlite/engine/executor.py
index 01c8ec6..36ec676 100644
--- a/metricsqlite/engine/executor.py
+++ b/metricsqlite/engine/executor.py
@@ -10,6 +10,11 @@
 from collections.abc import Callable
 from dataclasses import dataclass
 
+from metricsqlite.engine.functions import (
+    AGGREGATION_FUNCTIONS,
+    ROLLUP_FUNCTIONS,
+    TRANSFORMATION_FUNCTIONS,
+)
 from metricsqlite.engine.parser import (
     BinaryExpr,
     Expr,
@@ -206,6 +211,24 @@ def execute(self, expr: Expr) -> QueryResult:
         # Convert RawSeriesSet to InstantVector
         return self._to_instant_vector(result)
 
+    def find_series(
+        self,
+        selector: MetricSelector,
+        start: float | None = None,
+        end: float | None = None,
+    ) -> list[tuple[str, str]]:
+        """Find series matching a selector.
+
+        Args:
+            selector: Parsed metric selector.
+            start: Start timestamp in milliseconds.
+            end: End timestamp in milliseconds.
+
+        Returns:
+            List of (name, labels_json) tuples for matching series.
+        """
+        return self._sql.find_series(selector, start, end)
+
     def execute_range(
         self,
         expr: Expr,
@@ -442,15 +465,15 @@ def _evaluate_function(
         name = func.name.lower()
 
         # Rollup functions - aggregate over time, require _WindowedSeriesSet
-        if name in _ROLLUP_FUNCTIONS:
+        if name in ROLLUP_FUNCTIONS:
             return self._apply_rollup(name, func.args, ctx)
 
         # Transformation functions - transform individual values
-        if name in _TRANSFORMATION_FUNCTIONS:
+        if name in TRANSFORMATION_FUNCTIONS:
             return self._apply_transformation(name, func.args, ctx)
 
         # Aggregation functions - aggregate across series
-        if name in _AGGREGATION_FUNCTIONS:
+        if name in AGGREGATION_FUNCTIONS:
             return self._apply_aggregation(name, func.args, ctx)
 
         raise ExecutionError(f"Unknown function: {func.name}")
@@ -470,7 +493,7 @@ def _apply_rollup(
         if not isinstance(inner, _WindowedSeriesSet):
             raise ExecutionError(f"{name} requires a range vector")
 
-        agg_fn = _ROLLUP_FUNCTIONS[name]
+        agg_fn = ROLLUP_FUNCTIONS[name]
         result_series = []
 
         for ws in inner.series:
@@ -501,35 +524,24 @@ def _apply_transformation(
             raise ExecutionError(f"{name} requires at least 1 argument")
 
         inner = self._evaluate(args[0], ctx)
+        transform_fn = TRANSFORMATION_FUNCTIONS[name]
 
-        # Get the transformation function
-        fn: Callable[[float], float]
+        # Build the value transformation function
         if name == "abs":
-            fn = abs
-        elif name == "clamp_min":
+            fn = transform_fn
+        elif name in ("clamp_min", "clamp_max"):
             if len(args) != 2:
-                raise ExecutionError("clamp_min requires 2 arguments")
-            min_val = self._evaluate(args[1], ctx)
-            if not isinstance(min_val, (int, float)):
-                raise ExecutionError("clamp_min second argument must be a scalar")
-            threshold = float(min_val)
+                raise ExecutionError(f"{name} requires 2 arguments")
+            bound = self._evaluate(args[1], ctx)
+            if not isinstance(bound, (int, float)):
+                raise ExecutionError(f"{name} second argument must be a scalar")
+            bound_val = float(bound)
+            clamp_fn = transform_fn
 
-            def _clamp_min(v: float, m: float = threshold) -> float:
-                return max(v, m)
-
-            fn = _clamp_min
-        elif name == "clamp_max":
-            if len(args) != 2:
-                raise ExecutionError("clamp_max requires 2 arguments")
-            max_val = self._evaluate(args[1], ctx)
-            if not isinstance(max_val, (int, float)):
-                raise ExecutionError("clamp_max second argument must be a scalar")
-            threshold = float(max_val)
+            def _apply_clamp(v: float, b: float = bound_val, f: Callable[[float, float], float] = clamp_fn) -> float:
+                return f(v, b)
 
-            def _clamp_max(v: float, m: float = threshold) -> float:
-                return min(v, m)
-
-            fn = _clamp_max
+            fn = _apply_clamp
         else:
             raise ExecutionError(f"Unknown transformation function: {name}")
 
@@ -567,7 +579,7 @@ def _apply_aggregation(
         if inner.is_empty():
             return RawSeriesSet([])
 
-        agg_fn = _AGGREGATION_FUNCTIONS[name]
+        agg_fn = AGGREGATION_FUNCTIONS[name]
 
         # Group samples by timestamp across all series
         samples_by_time: dict[float, list[float]] = {}
@@ -655,26 +667,30 @@ def _evaluate_binary(
 
         raise ExecutionError(f"Unsupported binary operation between {type(left)} and {type(right)}")
 
+    @staticmethod
     def _binary_vector_scalar(
-        self,
         vector: RawSeriesSet,
         scalar: float,
         op: str,
     ) -> RawSeriesSet:
         """Apply binary op between vector and scalar."""
+        if _is_comparison_op(op):
+            return vector.filter_values(lambda v: _compare(op, v, scalar))
         return vector.map_values(lambda v: _apply_binary_op(op, v, scalar))
 
+    @staticmethod
     def _binary_scalar_vector(
-        self,
         scalar: float,
         vector: RawSeriesSet,
         op: str,
     ) -> RawSeriesSet:
         """Apply binary op between scalar and vector."""
+        if _is_comparison_op(op):
+            return vector.filter_values(lambda v: _compare(op, scalar, v))
         return vector.map_values(lambda v: _apply_binary_op(op, scalar, v))
 
+    @staticmethod
     def _binary_vector_vector(
-        self,
         left: RawSeriesSet,
         right: RawSeriesSet,
         op: str,
@@ -702,17 +718,27 @@ def label_key(labels: dict) -> str:
             # Match samples by timestamp
             right_samples = {s.timestamp: s for s in right_series.samples}
             new_samples = []
+            is_comparison = _is_comparison_op(op)
 
             for left_sample in left_series.samples:
                 if left_sample.timestamp in right_samples:
                     right_sample = right_samples[left_sample.timestamp]
-                    new_val = _apply_binary_op(op, left_sample.value, right_sample.value)
-                    new_samples.append(
-                        RawSample(
-                            timestamp=left_sample.timestamp,
-                            value=new_val,
+                    if is_comparison:
+                        if _compare(op, left_sample.value, right_sample.value):
+                            new_samples.append(
+                                RawSample(
+                                    timestamp=left_sample.timestamp,
+                                    value=left_sample.value,
+                                )
+                            )
+                    else:
+                        new_val = _apply_binary_op(op, left_sample.value, right_sample.value)
+                        new_samples.append(
+                            RawSample(
+                                timestamp=left_sample.timestamp,
+                                value=new_val,
+                            )
                         )
-                    )
 
             if new_samples:
                 result.append(
@@ -861,100 +887,24 @@ def _apply_binary_op(op: str, left: float, right: float) -> float:
         raise ExecutionError(f"Unknown binary operator: {op}")
 
 
-# Rollup functions: take list of samples in a window, return single value
-def _avg(samples: list[RawSample]) -> float:
-    if not samples:
-        return float("nan")
-    return sum(s.value for s in samples) / len(samples)
-
-
-def _sum(samples: list[RawSample]) -> float:
-    return sum(s.value for s in samples)
-
-
-def _min(samples: list[RawSample]) -> float:
-    if not samples:
-        return float("nan")
-    return min(s.min if s.min is not None else s.value for s in samples)
-
-
-def _max(samples: list[RawSample]) -> float:
-    if not samples:
-        return float("nan")
-    return max(s.max if s.max is not None else s.value for s in samples)
+def _is_comparison_op(op: str) -> bool:
+    """Check if an operator is a comparison operator."""
+    return op in ("==", "!=", ">", "<", ">=", "<=")
 
 
-def _count(samples: list[RawSample]) -> float:
-    return float(len(samples))
-
-
-def _integrate(samples: list[RawSample]) -> float:
-    """Integrate values over time using trapezoidal rule.
-
-    Returns the integral in units of (value * seconds).
-    """
-    if len(samples) < 2:
-        return 0.0
-
-    # Sort by timestamp
-    sorted_samples = sorted(samples, key=lambda s: s.timestamp)
-
-    # Trapezoidal integration
-    integral = 0.0
-    for i in range(1, len(sorted_samples)):
-        s0 = sorted_samples[i - 1]
-        s1 = sorted_samples[i]
-        # Timestamps are in ms, convert to seconds for integration
-        dt = (s1.timestamp - s0.timestamp) / 1000
-        # Trapezoid area = (v0 + v1) / 2 * dt
-        integral += (s0.value + s1.value) / 2 * dt
-
-    return integral
-
-
-_ROLLUP_FUNCTIONS = {
-    "avg_over_time": _avg,
-    "sum_over_time": _sum,
-    "min_over_time": _min,
-    "max_over_time": _max,
-    "count_over_time": _count,
-    "integrate": _integrate,
-}
-
-_TRANSFORMATION_FUNCTIONS = {"abs", "clamp_min", "clamp_max"}
-
-
-# Aggregation functions: aggregate across series, take list of values
-def _agg_sum(values: list[float]) -> float:
-    return sum(values)
-
-
-def _agg_avg(values: list[float]) -> float:
-    if not values:
-        return float("nan")
-    return sum(values) / len(values)
-
-
-def _agg_min(values: list[float]) -> float:
-    if not values:
-        return float("nan")
-    return min(values)
-
-
-def _agg_max(values: list[float]) -> float:
-    if not values:
-        return float("nan")
-    return max(values)
-
-
-def _agg_count(values: list[float]) -> float:
-    return float(len(values))
-
-
-_AGGREGATION_FUNCTIONS = {
-    "sum": _agg_sum,
-    "avg": _agg_avg,
-    "min": _agg_min,
-    "max": _agg_max,
-    "count": _agg_count,
-}
+def _compare(op: str, left: float, right: float) -> bool:
+    """Evaluate a comparison operator, returning a boolean."""
+    if op == ">":
+        return left > right
+    elif op == "<":
+        return left < right
+    elif op == ">=":
+        return left >= right
+    elif op == "<=":
+        return left <= right
+    elif op == "==":
+        return left == right
+    elif op == "!=":
+        return left != right
+    else:
+        raise ExecutionError(f"Unknown comparison operator: {op}")
diff --git a/metricsqlite/engine/functions.py b/metricsqlite/engine/functions.py
new file mode 100644
index 0000000..45acc40
--- /dev/null
+++ b/metricsqlite/engine/functions.py
@@ -0,0 +1,139 @@
+"""MetricsQL function implementations.
+
+Three categories of functions:
+- Rollup: Aggregate samples within a time window (e.g., avg_over_time)
+- Transformation: Transform individual sample values (e.g., abs, clamp_min)
+- Aggregation: Aggregate across series at each timestamp (e.g., sum, avg)
+"""
+
+from collections.abc import Callable
+
+from metricsqlite.engine.sqlite import Sample as RawSample
+
+# =============================================================================
+# Rollup functions: take list of samples in a window, return single value
+# =============================================================================
+
+
+def avg_over_time(samples: list[RawSample]) -> float:
+    if not samples:
+        return float("nan")
+    return sum(s.value for s in samples) / len(samples)
+
+
+def sum_over_time(samples: list[RawSample]) -> float:
+    if not samples:
+        return float("nan")
+    return sum(s.value for s in samples)
+
+
+def min_over_time(samples: list[RawSample]) -> float:
+    if not samples:
+        return float("nan")
+    return min(s.min if s.min is not None else s.value for s in samples)
+
+
+def max_over_time(samples: list[RawSample]) -> float:
+    if not samples:
+        return float("nan")
+    return max(s.max if s.max is not None else s.value for s in samples)
+
+
+def count_over_time(samples: list[RawSample]) -> float:
+    return float(len(samples))
+
+
+def integrate(samples: list[RawSample]) -> float:
+    """Integrate values over time using trapezoidal rule.
+
+    Returns the integral in units of (value * seconds).
+    """
+    if len(samples) < 2:
+        return 0.0
+
+    sorted_samples = sorted(samples, key=lambda s: s.timestamp)
+
+    integral = 0.0
+    for i in range(1, len(sorted_samples)):
+        s0 = sorted_samples[i - 1]
+        s1 = sorted_samples[i]
+        dt = (s1.timestamp - s0.timestamp) / 1000  # ms to seconds
+        integral += (s0.value + s1.value) / 2 * dt
+
+    return integral
+
+
+ROLLUP_FUNCTIONS: dict[str, Callable[[list[RawSample]], float]] = {
+    "avg_over_time": avg_over_time,
+    "sum_over_time": sum_over_time,
+    "min_over_time": min_over_time,
+    "max_over_time": max_over_time,
+    "count_over_time": count_over_time,
+    "integrate": integrate,
+}
+
+
+# =============================================================================
+# Transformation functions: transform individual sample values
+# =============================================================================
+
+
+def transform_abs(value: float) -> float:
+    return abs(value)
+
+
+def transform_clamp_min(value: float, min_val: float) -> float:
+    return max(value, min_val)
+
+
+def transform_clamp_max(value: float, max_val: float) -> float:
+    return min(value, max_val)
+
+
+TRANSFORMATION_FUNCTIONS: dict[str, Callable] = {
+    "abs": transform_abs,
+    "clamp_min": transform_clamp_min,
+    "clamp_max": transform_clamp_max,
+}
+
+
+# =============================================================================
+# Aggregation functions: aggregate across series, take list of values
+# =============================================================================
+
+
+def agg_sum(values: list[float]) -> float:
+    if not values:
+        return float("nan")
+    return sum(values)
+
+
+def agg_avg(values: list[float]) -> float:
+    if not values:
+        return float("nan")
+    return sum(values) / len(values)
+
+
+def agg_min(values: list[float]) -> float:
+    if not values:
+        return float("nan")
+    return min(values)
+
+
+def agg_max(values: list[float]) -> float:
+    if not values:
+        return float("nan")
+    return max(values)
+
+
+def agg_count(values: list[float]) -> float:
+    return float(len(values))
+
+
+AGGREGATION_FUNCTIONS: dict[str, Callable[[list[float]], float]] = {
+    "sum": agg_sum,
+    "avg": agg_avg,
+    "min": agg_min,
+    "max": agg_max,
+    "count": agg_count,
+}
diff --git a/metricsqlite/engine/parser/lexer.py b/metricsqlite/engine/parser/lexer.py
index 40ef639..7e8ef80 100644
--- a/metricsqlite/engine/parser/lexer.py
+++ b/metricsqlite/engine/parser/lexer.py
@@ -22,11 +22,18 @@ class TokenType(Enum):
     COMMA = auto()  # ,
     COLON = auto()  # :
 
-    # Operators
-    EQ = auto()  # =
+    # Comparison
+    EQEQ = auto()  # ==
     NEQ = auto()  # !=
+    GTE = auto()  # >=
+    LTE = auto()  # <=
     REGEX = auto()  # =~
     NREGEX = auto()  # !~
+    EQ = auto()  # =
+    GT = auto()  # >
+    LT = auto()  # <
+
+    # Math
     PLUS = auto()  # +
     MINUS = auto()  # -
     MUL = auto()  # *
@@ -34,12 +41,6 @@ class TokenType(Enum):
     MOD = auto()  # %
     POW = auto()  # ^
 
-    # Comparison
-    GT = auto()  # >
-    LT = auto()  # <
-    GTE = auto()  # >=
-    LTE = auto()  # <=
-
     # Special
     EOF = auto()
 
@@ -62,11 +63,12 @@ def __repr__(self) -> str:
     # Skip whitespace
     (re.compile(r"\s+"), None),
     # Multi-char operators (must come before single-char)
+    (re.compile(r"=="), TokenType.EQEQ),
     (re.compile(r"!="), TokenType.NEQ),
-    (re.compile(r"=~"), TokenType.REGEX),
-    (re.compile(r"!~"), TokenType.NREGEX),
     (re.compile(r">="), TokenType.GTE),
     (re.compile(r"<="), TokenType.LTE),
+    (re.compile(r"=~"), TokenType.REGEX),
+    (re.compile(r"!~"), TokenType.NREGEX),
     # Single-char operators and punctuation
     (re.compile(r"\("), TokenType.LPAREN),
     (re.compile(r"\)"), TokenType.RPAREN),
@@ -76,13 +78,14 @@ def __repr__(self) -> str:
     (re.compile(r"]"), TokenType.RBRACKET),
     (re.compile(r","), TokenType.COMMA),
     (re.compile(r":"), TokenType.COLON),
+    # Binary operators
     (re.compile(r"="), TokenType.EQ),
-    (re.compile(r"\+"), TokenType.PLUS),
-    (re.compile(r"-"), TokenType.MINUS),
+    (re.compile(r"\^"), TokenType.POW),
     (re.compile(r"\*"), TokenType.MUL),
     (re.compile(r"/"), TokenType.DIV),
     (re.compile(r"%"), TokenType.MOD),
-    (re.compile(r"\^"), TokenType.POW),
+    (re.compile(r"\+"), TokenType.PLUS),
+    (re.compile(r"-"), TokenType.MINUS),
     (re.compile(r">"), TokenType.GT),
     (re.compile(r"<"), TokenType.LT),
     # Strings (single or double quoted)
diff --git a/metricsqlite/engine/parser/parser.py b/metricsqlite/engine/parser/parser.py
index 0035178..12a2f99 100644
--- a/metricsqlite/engine/parser/parser.py
+++ b/metricsqlite/engine/parser/parser.py
@@ -35,13 +35,6 @@ def current(self) -> Token:
         """Current token."""
         return self.tokens[self.pos]
 
-    def peek(self, offset: int = 0) -> Token:
-        """Peek at token at current position + offset."""
-        idx = self.pos + offset
-        if idx < len(self.tokens):
-            return self.tokens[idx]
-        return self.tokens[-1]  # EOF
-
     def advance(self) -> Token:
         """Consume and return current token."""
         token = self.current
@@ -68,7 +61,28 @@ def parse(self) -> Expr:
 
     def parse_expr(self) -> Expr:
         """Parse an expression (handles binary operators)."""
-        return self.parse_additive()
+        return self.parse_comparison()
+
+    def parse_comparison(self) -> Expr:
+        """Parse comparison expressions (> < >= <= == !=).
+
+        Lowest precedence - comparisons are typically used for filtering.
+        """
+        left = self.parse_additive()
+
+        while self.match(
+            TokenType.EQEQ,
+            TokenType.NEQ,
+            TokenType.GT,
+            TokenType.LT,
+            TokenType.GTE,
+            TokenType.LTE,
+        ):
+            op = self.advance().value
+            right = self.parse_additive()
+            left = BinaryExpr(left, op, right)
+
+        return left
 
     def parse_additive(self) -> Expr:
         """Parse additive expressions (+ -)."""
@@ -82,21 +96,10 @@ def parse_additive(self) -> Expr:
         return left
 
     def parse_multiplicative(self) -> Expr:
-        """Parse multiplicative expressions (* / % ^)."""
-        left = self.parse_comparison()
-
-        while self.match(TokenType.MUL, TokenType.DIV, TokenType.MOD, TokenType.POW):
-            op = self.advance().value
-            right = self.parse_comparison()
-            left = BinaryExpr(left, op, right)
-
-        return left
-
-    def parse_comparison(self) -> Expr:
-        """Parse comparison expressions (> < >= <= == !=)."""
+        """Parse multiplicative expressions (* / %)."""
         left = self.parse_unary()
 
-        while self.match(TokenType.GT, TokenType.LT, TokenType.GTE, TokenType.LTE):
+        while self.match(TokenType.MUL, TokenType.DIV, TokenType.MOD):
             op = self.advance().value
             right = self.parse_unary()
             left = BinaryExpr(left, op, right)
@@ -104,13 +107,31 @@ def parse_comparison(self) -> Expr:
         return left
 
     def parse_unary(self) -> Expr:
-        """Parse unary expressions (- +)."""
+        """Parse unary expressions (- +).
+
+        Unary minus has lower precedence than power, so -2^2 = -(2^2) = -4.
+        """
         if self.match(TokenType.MINUS, TokenType.PLUS):
             op = self.advance().value
             expr = self.parse_unary()
             return UnaryExpr(op, expr)
 
-        return self.parse_postfix()
+        return self.parse_power()
+
+    def parse_power(self) -> Expr:
+        """Parse power expressions (^).
+
+        Right-associative: 2^3^2 = 2^(3^2) = 512.
+        Right operand allows unary: 2^-3 works.
+        """
+        left = self.parse_postfix()
+
+        if self.match(TokenType.POW):
+            op = self.advance().value
+            right = self.parse_unary()  # Allows unary on right, gives right-associativity
+            left = BinaryExpr(left, op, right)
+
+        return left
 
     def parse_postfix(self) -> Expr:
         """Parse postfix expressions (range vectors, offset)."""
diff --git a/metricsqlite/engine/query.py b/metricsqlite/engine/query.py
index 309d221..c888970 100644
--- a/metricsqlite/engine/query.py
+++ b/metricsqlite/engine/query.py
@@ -10,7 +10,7 @@
     QueryResult,
     raw_to_matrix,
 )
-from metricsqlite.engine.parser import LabelMatchType, MetricSelector, parse
+from metricsqlite.engine.parser import MetricSelector, parse
 from metricsqlite.util import parse_interval, parse_timestamp
 
 
@@ -40,11 +40,7 @@ def __init__(
         self._executor = Executor(connection, series_table, data_table)
 
     def query(
-        self,
-        query: str,
-        eval_time: float | str | None = None,
-        step: float | str | None = None,
-        timeout: float | str | None = None,
+        self, query: str, eval_time: float | str | datetime | None = None, step: float | str | None = None
     ) -> QueryResult:
         """Execute an instant query.
 
@@ -53,12 +49,10 @@ def query(
             eval_time: Evaluation timestamp. If None, uses current time.
             step: Lookback window. If set, only samples within
                   [eval_time - step, eval_time] are considered.
-            timeout: Query timeout (currently ignored).
 
         Returns:
             Query result (InstantVector, RangeVectorResult, or ScalarResult).
         """
-        del timeout  # Not implemented
         eval_time_ms = parse_timestamp(eval_time)
         if eval_time_ms is None:
             eval_time_ms = time.time() * 1000
@@ -146,42 +140,4 @@ def find_series(
         if not isinstance(ast, MetricSelector):
             raise ValueError(f"Expected metric selector, got {type(ast).__name__}")
 
-        # Build query
-        if start is not None or end is not None:
-            query = f"""
-                SELECT DISTINCT s.name, s.labels
-                FROM {self._series_table} s
-                JOIN {self._data_table} d USING (series_id)
-                WHERE s.name = ?
-            """
-        else:
-            query = f"SELECT DISTINCT name, labels FROM {self._series_table} WHERE name = ?"
-
-        params: list = [ast.name]
-
-        # Add time filters
-        if start is not None:
-            query += " AND d.start >= ?"
-            params.append(start)
-        if end is not None:
-            query += " AND d.start <= ?"
-            params.append(end)
-
-        # Add label matchers
-        label_col = "s.labels" if (start is not None or end is not None) else "labels"
-        for matcher in ast.matchers:
-            if matcher.match_type == LabelMatchType.EQ:
-                query += f" AND json_extract({label_col}, '$.{matcher.name}') = ?"
-                params.append(matcher.value)
-            elif matcher.match_type == LabelMatchType.NEQ:
-                query += f" AND (json_extract({label_col}, '$.{matcher.name}') IS NULL OR json_extract({label_col}, '$.{matcher.name}') != ?)"
-                params.append(matcher.value)
-            elif matcher.match_type == LabelMatchType.REGEX:
-                query += f" AND json_extract({label_col}, '$.{matcher.name}') REGEXP ?"
-                params.append(matcher.value)
-            elif matcher.match_type == LabelMatchType.NREGEX:
-                query += f" AND (json_extract({label_col}, '$.{matcher.name}') IS NULL OR json_extract({label_col}, '$.{matcher.name}') NOT REGEXP ?)"
-                params.append(matcher.value)
-
-        cursor = self._conn.execute(query, params)
-        return [(row["name"], row["labels"]) for row in cursor.fetchall()]
+        return self._executor.find_series(ast, start, end)
diff --git a/metricsqlite/engine/sqlite.py b/metricsqlite/engine/sqlite.py
index 8ac9baa..04a849e 100644
--- a/metricsqlite/engine/sqlite.py
+++ b/metricsqlite/engine/sqlite.py
@@ -1,12 +1,30 @@
 """SQLite query adapter for fetching raw time series data."""
 
 import json
+import re
 import sqlite3
 from collections.abc import Callable
 from dataclasses import dataclass, field
 
 from .parser import LabelMatchType, MetricSelector
 
+
+def sqlite_regexp(pattern: str, value: str | None) -> bool:
+    """SQLite REGEXP function for MetricsQL regex label matching.
+
+    MetricsQL/PromQL regex patterns are implicitly anchored to match
+    the entire string (equivalent to ^pattern$).
+
+    Register with: connection.create_function("regexp", 2, sqlite_regexp)
+    """
+    if value is None:
+        return False
+    try:
+        return re.fullmatch(pattern, value) is not None
+    except re.error:
+        return False
+
+
 Labels = dict[str, str]
 
 
@@ -60,6 +78,21 @@ def map_values(self, fn: Callable[[float], float]) -> "RawSeriesSet":
             ]
         )
 
+    def filter_values(self, predicate: Callable[[float], bool]) -> "RawSeriesSet":
+        """Filter samples based on a predicate, returning original values."""
+        result = []
+        for s in self.series:
+            filtered_samples = [sample for sample in s.samples if predicate(sample.value)]
+            if filtered_samples:
+                result.append(
+                    RawSeries(
+                        series_id=s.series_id,
+                        labels=s.labels,
+                        samples=filtered_samples,
+                    )
+                )
+        return RawSeriesSet(result)
+
     def is_empty(self) -> bool:
         return len(self.series) == 0
 
@@ -77,6 +110,50 @@ def __init__(
         self._series_table = series_table
         self._data_table = data_table
 
+    @staticmethod
+    def _build_name_condition(
+        selector: MetricSelector,
+        name_col: str = "s.name",
+    ) -> tuple[str, dict, list]:
+        """Build SQL condition for metric name matching.
+
+        Handles both direct name matching and __name__ label matchers.
+
+        Returns:
+            Tuple of (sql_condition, params, labels) where
+            labels excludes any __name__ matchers.
+        """
+        labels = []
+        name_matcher = None
+
+        # Look for __name__ matcher
+        for matcher in selector.matchers:
+            if matcher.name == "__name__":
+                name_matcher = matcher
+            else:
+                labels.append(matcher)
+
+        # If selector has a name, use exact match (unless overridden by __name__ matcher)
+        if selector.name and not name_matcher:
+            return f"{name_col} = :name", {"name": selector.name}, labels
+
+        # Use __name__ matcher if present
+        if name_matcher:
+            if name_matcher.match_type == LabelMatchType.EQ:
+                return f"{name_col} = :name", {"name": name_matcher.value}, labels
+            elif name_matcher.match_type == LabelMatchType.NEQ:
+                return f"{name_col} != :name", {"name": name_matcher.value}, labels
+            elif name_matcher.match_type == LabelMatchType.REGEX:
+                return f"{name_col} REGEXP :name", {"name": name_matcher.value}, labels
+            elif name_matcher.match_type == LabelMatchType.NREGEX:
+                return f"{name_col} NOT REGEXP :name", {"name": name_matcher.value}, labels
+
+        # Fallback: use selector.name if present, otherwise match all
+        if selector.name:
+            return f"{name_col} = :name", {"name": selector.name}, labels
+        else:
+            return "1=1", {}, labels  # Match all names
+
     def fetch_range(
         self,
         selector: MetricSelector,
@@ -96,6 +173,9 @@ def fetch_range(
         Returns:
             RawSeriesSet with all matching series and their samples.
         """
+        # Build name condition (handles __name__ matchers)
+        name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name")
+
         # Include rows where:
         # - start is within (start, end], OR
         # - row spans into the range (start < query_start but end >= query_start)
@@ -112,19 +192,19 @@ def fetch_range(
                 d.sample_count
             FROM {self._data_table} d
             JOIN {self._series_table} s USING (series_id)
-            WHERE s.name = :name
+            WHERE {name_condition}
               AND d.start <= :end
               AND COALESCE(d.end, d.start) > :start
         """
 
         params: dict = {
-            "name": selector.name,
+            **name_params,
             "start": start,
             "end": end,
         }
 
-        # Add label matchers
-        sql, params = self._add_label_matchers(sql, params, selector.matchers)
+        # Add label matchers (excluding __name__ which was handled above)
+        sql, params = self._add_label_matchers(sql, params, remaining_matchers)
 
         sql += " ORDER BY s.series_id, d.start"
 
@@ -181,6 +261,9 @@ def fetch_instant(
         """
         lookback_start = time - lookback
 
+        # Build name condition (handles __name__ matchers)
+        name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name")
+
         # Find the latest sample where:
         # - start <= eval_time (sample exists before or at eval time)
         # - effective end (COALESCE(end, start)) >= lookback_start (not stale)
@@ -197,7 +280,7 @@ def fetch_instant(
                 d.sample_count
             FROM {self._data_table} d
             JOIN {self._series_table} s USING (series_id)
-            WHERE s.name = :name
+            WHERE {name_condition}
               AND d.start <= :time
               AND COALESCE(d.end, d.start) >= :lookback_start
               AND d.start = (
@@ -210,12 +293,12 @@ def fetch_instant(
         """
 
         params: dict = {
-            "name": selector.name,
+            **name_params,
             "time": time,
             "lookback_start": lookback_start,
         }
 
-        sql, params = self._add_label_matchers(sql, params, selector.matchers)
+        sql, params = self._add_label_matchers(sql, params, remaining_matchers)
 
         cursor = self._conn.execute(sql, params)
 
@@ -249,21 +332,71 @@ def fetch_instant(
 
         return RawSeriesSet(series_list)
 
+    def find_series(
+        self,
+        selector: MetricSelector,
+        start: float | None = None,
+        end: float | None = None,
+    ) -> list[tuple[str, str]]:
+        """Find series matching a selector.
+
+        Args:
+            selector: Parsed metric selector.
+            start: Start timestamp in milliseconds.
+            end: End timestamp in milliseconds.
+
+        Returns:
+            List of (name, labels_json) tuples for matching series.
+        """
+        # Build name condition (handles __name__ matchers)
+        if start is not None or end is not None:
+            name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name")
+            sql = f"""
+                SELECT DISTINCT s.name, s.labels
+                FROM {self._data_table} d
+                JOIN {self._series_table} s USING (series_id)
+                WHERE {name_condition}
+            """
+            labels_col = "s.labels"
+        else:
+            name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="name")
+            sql = f"SELECT DISTINCT name, labels FROM {self._series_table} WHERE {name_condition}"
+            labels_col = "labels"
+
+        params: dict = {**name_params}
+
+        if start is not None:
+            sql += " AND d.start >= :start"
+            params["start"] = start
+        if end is not None:
+            sql += " AND d.start <= :end"
+            params["end"] = end
+
+        sql, params = self._add_label_matchers(sql, params, remaining_matchers, labels_col)
+
+        cursor = self._conn.execute(sql, params)
+        return [(row["name"], row["labels"]) for row in cursor.fetchall()]
+
     @staticmethod
-    def _add_label_matchers(sql: str, params: dict, matchers: list) -> tuple[str, dict]:
+    def _add_label_matchers(
+        sql: str,
+        params: dict,
+        matchers: list,
+        labels_col: str = "s.labels",
+    ) -> tuple[str, dict]:
         """Add label matcher conditions to SQL query."""
         for i, matcher in enumerate(matchers):
             param_name = f"label_{i}"
             if matcher.match_type == LabelMatchType.EQ:
-                sql += f" AND json_extract(s.labels, '$.{matcher.name}') = :{param_name}"
+                sql += f" AND json_extract({labels_col}, '$.{matcher.name}') = :{param_name}"
                 params[param_name] = matcher.value
             elif matcher.match_type == LabelMatchType.NEQ:
-                sql += f" AND (json_extract(s.labels, '$.{matcher.name}') IS NULL OR json_extract(s.labels, '$.{matcher.name}') != :{param_name})"
+                sql += f" AND (json_extract({labels_col}, '$.{matcher.name}') IS NULL OR json_extract({labels_col}, '$.{matcher.name}') != :{param_name})"
                 params[param_name] = matcher.value
             elif matcher.match_type == LabelMatchType.REGEX:
-                sql += f" AND json_extract(s.labels, '$.{matcher.name}') REGEXP :{param_name}"
+                sql += f" AND json_extract({labels_col}, '$.{matcher.name}') REGEXP :{param_name}"
                 params[param_name] = matcher.value
             elif matcher.match_type == LabelMatchType.NREGEX:
-                sql += f" AND (json_extract(s.labels, '$.{matcher.name}') IS NULL OR json_extract(s.labels, '$.{matcher.name}') NOT REGEXP :{param_name})"
+                sql += f" AND (json_extract({labels_col}, '$.{matcher.name}') IS NULL OR json_extract({labels_col}, '$.{matcher.name}') NOT REGEXP :{param_name})"
                 params[param_name] = matcher.value
         return sql, params
diff --git a/metricsqlite/fastapi/routes.py b/metricsqlite/fastapi/routes.py
index 1098dcd..5df8cae 100644
--- a/metricsqlite/fastapi/routes.py
+++ b/metricsqlite/fastapi/routes.py
@@ -113,10 +113,9 @@ def get_query(
         query: str = Query(..., description="MetricsQL query string"),
         time: float | str | None = Query(None, description="Evaluation timestamp"),
         step: float | str | None = Query(None, description="Interval"),
-        timeout: float | str | None = Query(None, description="Query timeout"),
     ) -> JSONResponse:
         try:
-            result = client.query(query, time=time, step=step, timeout=timeout)
+            result = client.query(query, time=time, step=step)
             return _format_query_result(result)
         except (ParseError, ExecutionError, LexerError) as e:
             return _error_response("bad_data", str(e), 400)
@@ -133,10 +132,9 @@ def get_query_range(
         start: float | str = Query(..., description="Start timestamp"),
         end: float | str | None = Query(None, description="End timestamp"),
         step: float | str | None = Query(None, description="Query resolution step in seconds"),
-        timeout: float | str | None = Query(None, description="Query timeout"),
     ) -> JSONResponse:
         try:
-            result = client.query_range(query, start, end=end, step=step, timeout=timeout)
+            result = client.query_range(query, start, end=end, step=step)
             return _format_range_result(result)
         except (ParseError, ExecutionError, LexerError) as e:
             return _error_response("bad_data", str(e), 400)
@@ -196,7 +194,6 @@ def get_series(
         @router.post("/influx/write")
         async def influx_write(
             request: Request,
-            db: str | None = Query(None, description="Database name (ignored)"),
             precision: str = Query("ns", description="Timestamp precision: ns, us, ms, s"),
         ) -> PlainTextResponse:
             """Write data using InfluxDB line protocol.
diff --git a/tests/engine/test_executor.py b/tests/engine/test_executor.py
index c7725c2..60f5ed0 100644
--- a/tests/engine/test_executor.py
+++ b/tests/engine/test_executor.py
@@ -13,7 +13,7 @@
     parse,
 )
 
-EVAL_TIME = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
 
 
 @pytest.fixture
diff --git a/tests/fastapi/test_routes.py b/tests/fastapi/test_routes.py
index b9c21d8..cf0982e 100644
--- a/tests/fastapi/test_routes.py
+++ b/tests/fastapi/test_routes.py
@@ -5,7 +5,7 @@
 from metricsqlite import MetricsQLiteClient
 from metricsqlite.fastapi import create_router
 
-EVAL_TIME = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
 
 
 @pytest.fixture
diff --git a/tests/queries/conftest.py b/tests/queries/conftest.py
new file mode 100644
index 0000000..dd13435
--- /dev/null
+++ b/tests/queries/conftest.py
@@ -0,0 +1,78 @@
+"""Shared fixtures for query tests.
+
+This module provides reusable fixtures for testing MetricsQL queries.
+All fixtures are automatically available to tests in this directory.
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+# Standard eval time used across tests: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized.
+
+    This is the base fixture that other fixtures build upon.
+    Yields a connected client with tables created, closes on teardown.
+    """
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+@pytest.fixture
+def insert_gauge(client):
+    """Factory fixture to insert gauge data.
+
+    Usage:
+        insert_gauge("metric", value=42, time=EVAL_TIME)
+        insert_gauge("metric", value=42, time=EVAL_TIME, labels={"env": "prod"})
+    """
+
+    def _insert(name, value, time, labels=None):
+        client.insert_gauge(name, value, time, labels=labels)
+
+    return _insert
+
+
+@pytest.fixture
+def insert_counter(client):
+    """Factory fixture to insert counter data.
+
+    Usage:
+        insert_counter("requests_total", value=100, time=EVAL_TIME)
+    """
+
+    def _insert(name, value, time, labels=None):
+        client.insert_counter(name, value, time, labels=labels)
+
+    return _insert
+
+
+@pytest.fixture
+def insert_minute_series(client):
+    """Factory fixture to insert a series with 1-minute interval samples.
+
+    Creates samples from start_minute to end_minute (exclusive) relative to EVAL_TIME.
+    Value at each minute is: base_value + minute_offset
+
+    Usage:
+        # Insert metric from T-60m to T+60m with values 100+minute
+        insert_minute_series("metric", base_value=100, start_minute=-60, end_minute=60)
+
+        # Insert with labels
+        insert_minute_series("metric", base_value=100, labels={"env": "prod"})
+    """
+
+    def _insert(name, base_value=0, start_minute=-60, end_minute=60, labels=None):
+        for minute in range(start_minute, end_minute):
+            ts = EVAL_TIME + 60_000 * minute
+            client.insert_gauge(name, base_value + minute, ts, labels=labels)
+
+    return _insert
diff --git a/tests/queries/test_aggregation.py b/tests/queries/test_aggregation.py
new file mode 100644
index 0000000..d256408
--- /dev/null
+++ b/tests/queries/test_aggregation.py
@@ -0,0 +1,432 @@
+"""Tests for aggregation functions.
+
+Aggregation functions aggregate values across multiple series at each timestamp,
+producing a single output series (or grouped series with `by`/`without` clauses).
+
+Functions tested:
+- sum(): Sum of all values
+- avg(): Average of all values
+- min(): Minimum value
+- max(): Maximum value
+- count(): Number of series
+
+VictoriaMetrics-specific behaviors:
+- NaN values are ignored in aggregations
+- Empty input produces NaN
+- Supports `by()` and `without()` modifiers
+"""
+
+import pytest
+
+from metricsqlite.engine import InstantVector, MatrixResult
+
+# Standard eval time: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
+
+# =============================================================================
+# Fixtures (client fixture inherited from conftest.py)
+# =============================================================================
+
+
+@pytest.fixture
+def standard_data(client):
+    """Standard test dataset with multiple series.
+
+    Creates data from T-60m to T+60m with 1-minute intervals.
+
+    Series created:
+    - metric{label="A"}: values = 100 + minute  (at T: 100, at T+10m: 110)
+    - metric{label="B"}: values = 200 + minute  (at T: 200, at T+10m: 210)
+    - metric{label="C"}: values = 300 + minute  (at T: 300, at T+10m: 310)
+    - other_metric:      values = 1000 + minute (at T: 1000)
+
+    At eval_time T:
+    - sum(metric) = 100 + 200 + 300 = 600
+    - avg(metric) = 600 / 3 = 200
+    - min(metric) = 100
+    - max(metric) = 300
+    - count(metric) = 3
+
+    At eval_time T+10m:
+    - sum(metric) = 110 + 210 + 310 = 630
+    - avg(metric) = 630 / 3 = 210
+    - min(metric) = 110
+    - max(metric) = 310
+    """
+    for minute in range(-60, 60):
+        ts = EVAL_TIME + 60_000 * minute
+        client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"})
+        client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"})
+        client.insert_gauge("metric", 300 + minute, ts, labels={"label": "C"})
+        client.insert_gauge("other_metric", 1000 + minute, ts)
+    return client
+
+
+@pytest.fixture
+def single_series_data(client):
+    """Dataset with only one series.
+
+    Series: metric (no labels), values = minute (at T: 0, at T+10m: 10)
+    """
+    for minute in range(-60, 60):
+        client.insert_gauge("metric", minute, EVAL_TIME + 60_000 * minute)
+    return client
+
+
+@pytest.fixture
+def compacted_data(client):
+    """Dataset with compacted gauge data.
+
+    Creates raw data, then compacts into 10-minute buckets.
+
+    Before compaction (T-30m to T-10m, minute intervals):
+    - metric{label="A"}: values 100+minute
+    - metric{label="B"}: values 200+minute
+
+    After compaction:
+    - Bucket [T-30m, T-20m]: avg of 10 samples each
+    - Bucket [T-20m, T-10m]: avg of 10 samples each
+
+    Raw data (T-10m to T+10m) remains uncompacted.
+    """
+    for minute in range(-40, 30):
+        ts = EVAL_TIME + 60_000 * minute
+        client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"})
+        client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"})
+
+    # Compact data older than T-10m into 10-minute buckets
+    client.compact_gauges(older_than=EVAL_TIME - 600_000, interval="10m")
+    return client
+
+
+@pytest.fixture
+def negative_values_data(client):
+    """Dataset with negative values.
+
+    Series:
+    - metric{label="pos"}: value = 50
+    - metric{label="neg"}: value = -30
+    - metric{label="zero"}: value = 0
+
+    At T:
+    - sum(metric) = 50 + (-30) + 0 = 20
+    - avg(metric) = 20 / 3 ≈ 6.67
+    - min(metric) = -30
+    - max(metric) = 50
+    """
+    client.insert_gauge("metric", 50, EVAL_TIME, labels={"label": "pos"})
+    client.insert_gauge("metric", -30, EVAL_TIME, labels={"label": "neg"})
+    client.insert_gauge("metric", 0, EVAL_TIME, labels={"label": "zero"})
+    return client
+
+
+@pytest.fixture
+def stale_series_data(client):
+    """Dataset with one stale and one fresh series.
+
+    Series:
+    - metric{label="fresh"}: value = 100 at T-1m (within 5m lookback)
+    - metric{label="stale"}: value = 200 at T-10m (outside 5m lookback)
+
+    With default 5m lookback, only "fresh" series should be included.
+    """
+    client.insert_gauge("metric", 100, EVAL_TIME - 60_000, labels={"label": "fresh"})
+    client.insert_gauge("metric", 200, EVAL_TIME - 600_000, labels={"label": "stale"})
+    return client
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+
+def assert_instant_value(result, expected_value, expected_labels=None):
+    """Assert instant query returns expected single value."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    labels, sample = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    assert sample.value == pytest.approx(expected_value)
+
+
+def assert_instant_empty(result):
+    """Assert instant query returns no series."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 0
+
+
+def assert_range_values(result, expected_values, expected_labels=None):
+    """Assert range query returns expected values at each step."""
+    assert isinstance(result, MatrixResult)
+    assert len(result.series) == 1
+    labels, series = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    actual_values = [s.value for s in series]
+    assert actual_values == pytest.approx(expected_values)
+
+
+def assert_range_empty(result):
+    """Assert range query returns no series."""
+    assert isinstance(result, MatrixResult)
+    assert len(result.series) == 0
+
+
+# =============================================================================
+# Tests: Basic aggregation behavior
+# =============================================================================
+
+
+class TestAggregationSingleSeries:
+    """Aggregation of a single series returns that series' value."""
+
+    @pytest.mark.parametrize("func", ["sum", "avg", "min", "max"])
+    def test_instant_query(self, single_series_data, func):
+        """Single series: aggregation returns the series value."""
+        result = single_series_data.query(f"{func}(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=0, expected_labels={})
+
+    @pytest.mark.parametrize("func", ["sum", "avg", "min", "max"])
+    def test_range_query(self, single_series_data, func):
+        """Single series: aggregation at each step returns series value."""
+        result = single_series_data.query_range(
+            f"{func}(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[0, 10, 20, 30], expected_labels={})
+
+    def test_count_instant(self, single_series_data):
+        """count() of single series returns 1."""
+        result = single_series_data.query("count(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1, expected_labels={})
+
+    def test_count_range(self, single_series_data):
+        """count() at each step returns 1."""
+        result = single_series_data.query_range(
+            "count(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[1, 1, 1, 1], expected_labels={})
+
+
+class TestAggregationMultipleSeries:
+    """Aggregation across multiple series."""
+
+    def test_sum_instant(self, standard_data):
+        """sum() adds values across all series."""
+        result = standard_data.query("sum(metric)", time=EVAL_TIME)
+        # 100 + 200 + 300 = 600
+        assert_instant_value(result, expected_value=600, expected_labels={})
+
+    def test_sum_range(self, standard_data):
+        """sum() at each step adds values across series."""
+        result = standard_data.query_range(
+            "sum(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        # At T: 600, T+10m: 630, T+20m: 660, T+30m: 690
+        assert_range_values(result, expected_values=[600, 630, 660, 690], expected_labels={})
+
+    def test_avg_instant(self, standard_data):
+        """avg() computes mean across all series."""
+        result = standard_data.query("avg(metric)", time=EVAL_TIME)
+        # (100 + 200 + 300) / 3 = 200
+        assert_instant_value(result, expected_value=200, expected_labels={})
+
+    def test_avg_range(self, standard_data):
+        """avg() at each step computes mean across series."""
+        result = standard_data.query_range(
+            "avg(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        # At T: 200, T+10m: 210, T+20m: 220, T+30m: 230
+        assert_range_values(result, expected_values=[200, 210, 220, 230], expected_labels={})
+
+    def test_min_instant(self, standard_data):
+        """min() returns smallest value across series."""
+        result = standard_data.query("min(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=100, expected_labels={})
+
+    def test_min_range(self, standard_data):
+        """min() at each step returns smallest value."""
+        result = standard_data.query_range(
+            "min(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[100, 110, 120, 130], expected_labels={})
+
+    def test_max_instant(self, standard_data):
+        """max() returns largest value across series."""
+        result = standard_data.query("max(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=300, expected_labels={})
+
+    def test_max_range(self, standard_data):
+        """max() at each step returns largest value."""
+        result = standard_data.query_range(
+            "max(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[300, 310, 320, 330], expected_labels={})
+
+    def test_count_instant(self, standard_data):
+        """count() returns number of series."""
+        result = standard_data.query("count(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=3, expected_labels={})
+
+    def test_count_range(self, standard_data):
+        """count() at each step returns number of series."""
+        result = standard_data.query_range(
+            "count(metric)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[3, 3, 3, 3], expected_labels={})
+
+
+class TestAggregationEmptyResult:
+    """Aggregation with no matching series."""
+
+    @pytest.mark.parametrize("func", ["sum", "avg", "min", "max", "count"])
+    def test_instant_no_match(self, standard_data, func):
+        """Aggregation of non-existent metric returns empty result."""
+        result = standard_data.query(f"{func}(nonexistent)", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    @pytest.mark.parametrize("func", ["sum", "avg", "min", "max", "count"])
+    def test_range_no_match(self, standard_data, func):
+        """Aggregation of non-existent metric returns empty result."""
+        result = standard_data.query_range(
+            f"{func}(nonexistent)",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_empty(result)
+
+
+# =============================================================================
+# Tests: Edge cases
+# =============================================================================
+
+
+class TestAggregationNegativeValues:
+    """Aggregation with negative values."""
+
+    def test_sum_with_negatives(self, negative_values_data):
+        """sum() correctly handles positive and negative values."""
+        result = negative_values_data.query("sum(metric)", time=EVAL_TIME)
+        # 50 + (-30) + 0 = 20
+        assert_instant_value(result, expected_value=20, expected_labels={})
+
+    def test_avg_with_negatives(self, negative_values_data):
+        """avg() correctly handles positive and negative values."""
+        result = negative_values_data.query("avg(metric)", time=EVAL_TIME)
+        # 20 / 3 ≈ 6.67
+        assert_instant_value(result, expected_value=20 / 3, expected_labels={})
+
+    def test_min_finds_negative(self, negative_values_data):
+        """min() finds the most negative value."""
+        result = negative_values_data.query("min(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=-30, expected_labels={})
+
+    def test_max_with_negatives(self, negative_values_data):
+        """max() finds largest value when negatives present."""
+        result = negative_values_data.query("max(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=50, expected_labels={})
+
+
+class TestAggregationStaleness:
+    """Aggregation respects staleness (stale series excluded)."""
+
+    def test_sum_excludes_stale(self, stale_series_data):
+        """sum() excludes stale series from calculation."""
+        result = stale_series_data.query("sum(metric)", time=EVAL_TIME)
+        # Only fresh series (100) included, stale (200) excluded
+        assert_instant_value(result, expected_value=100, expected_labels={})
+
+    def test_count_excludes_stale(self, stale_series_data):
+        """count() excludes stale series."""
+        result = stale_series_data.query("count(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1, expected_labels={})
+
+    def test_all_stale_returns_empty(self, client):
+        """When all series are stale, aggregation returns empty."""
+        client.insert_gauge("metric", 100, EVAL_TIME - 600_000)  # 10m ago, outside 5m lookback
+        result = client.query("sum(metric)", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+
+class TestAggregationOnCompactedData:
+    """Aggregation on compacted gauge data."""
+
+    def test_sum_on_compacted(self, compacted_data):
+        """sum() works on compacted data using bucket averages."""
+        pytest.skip("TODO: Implement test")
+
+    def test_avg_on_compacted(self, compacted_data):
+        """avg() works on compacted data."""
+        pytest.skip("TODO: Implement test")
+
+    def test_count_on_compacted(self, compacted_data):
+        """count() counts series, not buckets."""
+        pytest.skip("TODO: Implement test")
+
+
+# =============================================================================
+# Tests: Grouping (by/without)
+# =============================================================================
+
+
+class TestAggregationWithGrouping:
+    """Aggregation with by() and without() modifiers."""
+
+    def test_sum_by_label(self, standard_data):
+        """sum() by(label) groups results by that label."""
+        pytest.skip("TODO: Implement test - may not be implemented yet")
+
+    def test_sum_without_label(self, standard_data):
+        """sum() without(label) aggregates across all other labels."""
+        pytest.skip("TODO: Implement test - may not be implemented yet")
+
+    def test_by_multiple_labels(self, client):
+        """by(l1, l2) groups by multiple labels."""
+        pytest.skip("TODO: Implement test - may not be implemented yet")
+
+
+# =============================================================================
+# Tests: Combined with other functions
+# =============================================================================
+
+
+class TestAggregationCombined:
+    """Aggregation combined with rollup and transformation functions."""
+
+    def test_sum_of_rollup(self, standard_data):
+        """sum(avg_over_time(metric[5m])) aggregates rollup results."""
+        pytest.skip("TODO: Implement test")
+
+    def test_aggregation_then_binary_op(self, standard_data):
+        """sum(metric) * 2 applies binary op to aggregation result."""
+        result = standard_data.query("sum(metric) * 2", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1200, expected_labels={})
+
+    def test_aggregation_of_transformation(self, negative_values_data):
+        """sum(abs(metric)) aggregates transformed values."""
+        result = negative_values_data.query("sum(abs(metric))", time=EVAL_TIME)
+        # abs(50) + abs(-30) + abs(0) = 50 + 30 + 0 = 80
+        assert_instant_value(result, expected_value=80, expected_labels={})
diff --git a/tests/queries/test_binary_operations.py b/tests/queries/test_binary_operations.py
new file mode 100644
index 0000000..11e5318
--- /dev/null
+++ b/tests/queries/test_binary_operations.py
@@ -0,0 +1,467 @@
+"""Tests for binary operations.
+
+Binary operations perform arithmetic between:
+- Two scalars: 5 + 3
+- Scalar and vector: metric * 2
+- Two vectors: metric1 + metric2
+
+VictoriaMetrics-specific behaviors:
+- Vector matching uses label comparison
+- Missing matches result in no output (unless `or` used)
+- Operator precedence follows math conventions
+"""
+
+import math
+
+import pytest
+
+from metricsqlite.engine import InstantVector, MatrixResult, ScalarResult
+
+# Standard eval time: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
+
+# =============================================================================
+# Fixtures (client fixture inherited from conftest.py)
+# =============================================================================
+
+
+@pytest.fixture
+def single_series(client):
+    """Single series for vector-scalar operations.
+
+    Series: metric (no labels)
+    - At T: value = 100
+    - At T+10m: value = 110
+    - At T+20m: value = 120
+    """
+    for minute in range(-60, 60):
+        client.insert_gauge("metric", 100 + minute, EVAL_TIME + 60_000 * minute)
+    return client
+
+
+@pytest.fixture
+def multi_series(client):
+    """Multiple series with different labels.
+
+    Series:
+    - metric{label="A"}: value = 10 + minute  (at T: 10)
+    - metric{label="B"}: value = 20 + minute  (at T: 20)
+    - metric{label="C"}: value = 30 + minute  (at T: 30)
+    """
+    for minute in range(-60, 60):
+        ts = EVAL_TIME + 60_000 * minute
+        client.insert_gauge("metric", 10 + minute, ts, labels={"label": "A"})
+        client.insert_gauge("metric", 20 + minute, ts, labels={"label": "B"})
+        client.insert_gauge("metric", 30 + minute, ts, labels={"label": "C"})
+    return client
+
+
+@pytest.fixture
+def two_metrics(client):
+    """Two different metrics for vector-vector operations.
+
+    Series:
+    - metric_a{label="X"}: value = 100 + minute (at T: 100)
+    - metric_a{label="Y"}: value = 200 + minute (at T: 200)
+    - metric_b{label="X"}: value = 10 + minute  (at T: 10)
+    - metric_b{label="Y"}: value = 20 + minute  (at T: 20)
+    - metric_b{label="Z"}: value = 30 + minute  (at T: 30, no match in metric_a)
+
+    At T, matching pairs (by label):
+    - label="X": metric_a=100, metric_b=10
+    - label="Y": metric_a=200, metric_b=20
+    """
+    for minute in range(-60, 60):
+        ts = EVAL_TIME + 60_000 * minute
+        client.insert_gauge("metric_a", 100 + minute, ts, labels={"label": "X"})
+        client.insert_gauge("metric_a", 200 + minute, ts, labels={"label": "Y"})
+        client.insert_gauge("metric_b", 10 + minute, ts, labels={"label": "X"})
+        client.insert_gauge("metric_b", 20 + minute, ts, labels={"label": "Y"})
+        client.insert_gauge("metric_b", 30 + minute, ts, labels={"label": "Z"})
+    return client
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+
+def assert_scalar_value(result, expected):
+    """Assert result is a scalar with expected value."""
+    assert isinstance(result, ScalarResult)
+    assert result.value == pytest.approx(expected)
+
+
+def assert_instant_values(result, expected_values):
+    """Assert instant vector contains expected values (sorted)."""
+    assert isinstance(result, InstantVector)
+    actual = sorted([sample.value for _, sample in result.series])
+    assert actual == pytest.approx(sorted(expected_values))
+
+
+def assert_instant_single(result, expected_value, expected_labels=None):
+    """Assert instant vector has single series with expected value."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    labels, sample = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    assert sample.value == pytest.approx(expected_value)
+
+
+def assert_instant_empty(result):
+    """Assert instant vector is empty."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 0
+
+
+# =============================================================================
+# Tests: Scalar-Scalar Operations
+# =============================================================================
+
+
+class TestScalarArithmetic:
+    """Basic arithmetic between two scalars."""
+
+    @pytest.mark.parametrize(
+        "expr,expected",
+        [
+            ("5 + 3", 8),
+            ("5 - 3", 2),
+            ("5 * 3", 15),
+            ("10 / 4", 2.5),
+            ("10 % 3", 1),
+            ("2 ^ 3", 8),
+        ],
+    )
+    def test_basic_operations(self, client, expr, expected):
+        """Scalar arithmetic operations."""
+        result = client.query(expr, time=EVAL_TIME)
+        assert_scalar_value(result, expected)
+
+    def test_negative_numbers(self, client):
+        """-5 + 3 = -2"""
+        result = client.query("-5 + 3", time=EVAL_TIME)
+        assert_scalar_value(result, -2)
+
+    def test_negative_result(self, client):
+        """3 - 5 = -2"""
+        result = client.query("3 - 5", time=EVAL_TIME)
+        assert_scalar_value(result, -2)
+
+    def test_division_by_zero(self, client):
+        """10 / 0 produces NaN."""
+        result = client.query("10 / 0", time=EVAL_TIME)
+        assert isinstance(result, ScalarResult)
+        assert math.isnan(result.value)
+
+    def test_zero_division_by_zero(self, client):
+        """0 / 0 produces NaN."""
+        result = client.query("0 / 0", time=EVAL_TIME)
+        assert isinstance(result, ScalarResult)
+        assert math.isnan(result.value)
+
+
+class TestOperatorPrecedence:
+    """Operator precedence follows math conventions."""
+
+    @pytest.mark.parametrize(
+        "expr,expected",
+        [
+            ("2 + 3 * 4", 14),  # multiplication before addition
+            ("10 - 6 / 2", 7),  # division before subtraction
+            ("(2 + 3) * 4", 20),  # parentheses override
+            ("2 * 3 + 4 * 5", 26),  # left-to-right for same precedence
+            ("10 / 2 / 5", 1),  # left-to-right division
+            ("((1 + 2) * (3 + 4))", 21),  # nested parentheses
+            ("2 * 3 ^ 2", 18),
+            ("(2 * 3) ^ 2", 36),
+            ("-2 ^ 2", -4),
+            ("(-2) ^ 2", 4),
+        ],
+    )
+    def test_precedence(self, client, expr, expected):
+        """Operator precedence is respected."""
+        result = client.query(expr, time=EVAL_TIME)
+        assert_scalar_value(result, expected)
+
+
+# =============================================================================
+# Tests: Vector-Scalar Operations
+# =============================================================================
+
+
+class TestVectorScalarArithmetic:
+    """Arithmetic between vector and scalar."""
+
+    def test_vector_times_scalar(self, single_series):
+        """metric * 2 doubles all values."""
+        result = single_series.query("metric * 2", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=200)
+
+    def test_scalar_times_vector(self, single_series):
+        """2 * metric is same as metric * 2 (commutative)."""
+        result = single_series.query("2 * metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=200)
+
+    def test_vector_plus_scalar(self, single_series):
+        """metric + 50 adds 50 to all values."""
+        result = single_series.query("metric + 50", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=150)
+
+    def test_scalar_plus_vector(self, single_series):
+        """50 + metric is same as metric + 50."""
+        result = single_series.query("50 + metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=150)
+
+    def test_vector_minus_scalar(self, single_series):
+        """metric - 30 subtracts 30 from all values."""
+        result = single_series.query("metric - 30", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=70)
+
+    def test_scalar_minus_vector(self, single_series):
+        """150 - metric subtracts metric from 150."""
+        result = single_series.query("150 - metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=50)
+
+    def test_vector_divided_by_scalar(self, single_series):
+        """metric / 4 divides all values by 4."""
+        result = single_series.query("metric / 4", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=25)
+
+    def test_scalar_divided_by_vector(self, single_series):
+        """1000 / metric gives 1000/x for each value x."""
+        result = single_series.query("1000 / metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=10)
+
+
+class TestVectorScalarMultipleSeries:
+    """Vector-scalar operations on multiple series."""
+
+    def test_multiply_all_series(self, multi_series):
+        """metric * 2 doubles each series independently."""
+        result = multi_series.query("metric * 2", time=EVAL_TIME)
+        # At T: A=10, B=20, C=30 -> doubled: 20, 40, 60
+        assert_instant_values(result, expected_values=[20, 40, 60])
+
+    def test_add_all_series(self, multi_series):
+        """metric + 100 adds to each series."""
+        result = multi_series.query("metric + 100", time=EVAL_TIME)
+        # At T: A=10, B=20, C=30 -> plus 100: 110, 120, 130
+        assert_instant_values(result, expected_values=[110, 120, 130])
+
+
+class TestVectorScalarPreservesMetadata:
+    """Vector-scalar operations preserve labels and timestamps."""
+
+    def test_preserves_labels(self, multi_series):
+        """Labels are preserved after operation."""
+        result = multi_series.query("metric * 2", time=EVAL_TIME)
+        assert isinstance(result, InstantVector)
+        labels_set = {frozenset(labels.items()) for labels, _ in result.series}
+        expected = {
+            frozenset([("__name__", "metric"), ("label", "A")]),
+            frozenset([("__name__", "metric"), ("label", "B")]),
+            frozenset([("__name__", "metric"), ("label", "C")]),
+        }
+        assert labels_set == expected
+
+    def test_preserves_timestamp(self, single_series):
+        """Sample timestamp is preserved."""
+        result = single_series.query("metric * 2", time=EVAL_TIME)
+        _, sample = result.series[0]
+        assert sample.timestamp == EVAL_TIME
+
+
+# =============================================================================
+# Tests: Vector-Vector Operations
+# =============================================================================
+
+
+class TestVectorVectorArithmetic:
+    """Arithmetic between two vectors with label matching."""
+
+    def test_addition_matching_labels(self, two_metrics):
+        """metric_a + metric_b adds matching series."""
+        result = two_metrics.query("metric_a + metric_b", time=EVAL_TIME)
+        # X: 100+10=110, Y: 200+20=220, Z has no match
+        assert_instant_values(result, expected_values=[110, 220])
+
+    def test_subtraction_matching_labels(self, two_metrics):
+        """metric_a - metric_b subtracts matching series."""
+        result = two_metrics.query("metric_a - metric_b", time=EVAL_TIME)
+        # X: 100-10=90, Y: 200-20=180
+        assert_instant_values(result, expected_values=[90, 180])
+
+    def test_multiplication_matching_labels(self, two_metrics):
+        """metric_a * metric_b multiplies matching series."""
+        result = two_metrics.query("metric_a * metric_b", time=EVAL_TIME)
+        # X: 100*10=1000, Y: 200*20=4000
+        assert_instant_values(result, expected_values=[1000, 4000])
+
+    def test_division_matching_labels(self, two_metrics):
+        """metric_a / metric_b divides matching series."""
+        result = two_metrics.query("metric_a / metric_b", time=EVAL_TIME)
+        # X: 100/10=10, Y: 200/20=10
+        assert_instant_values(result, expected_values=[10, 10])
+
+
+class TestVectorVectorNoMatch:
+    """Vector-vector operations with no matching labels."""
+
+    def test_no_match_returns_empty(self, client):
+        """Non-matching series produce no output."""
+        client.insert_gauge("metric_a", 100, EVAL_TIME, labels={"env": "prod"})
+        client.insert_gauge("metric_b", 10, EVAL_TIME, labels={"env": "dev"})
+
+        result = client.query("metric_a + metric_b", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_partial_match(self, two_metrics):
+        """Only matching pairs produce output."""
+        # metric_b has label="Z" with no match in metric_a
+        result = two_metrics.query("metric_a + metric_b", time=EVAL_TIME)
+        assert isinstance(result, InstantVector)
+        assert len(result.series) == 2  # Only X and Y match
+
+
+class TestVectorSameMetric:
+    """Operations on same metric (metric + metric)."""
+
+    def test_metric_plus_itself(self, single_series):
+        """metric + metric = 2 * metric."""
+        result = single_series.query("metric + metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=200)
+
+    def test_metric_minus_itself(self, single_series):
+        """metric - metric = 0."""
+        result = single_series.query("metric - metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=0)
+
+    def test_metric_divided_by_itself(self, single_series):
+        """metric / metric = 1."""
+        result = single_series.query("metric / metric", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=1)
+
+
+# =============================================================================
+# Tests: Comparison Operations
+# =============================================================================
+
+
+class TestComparisonOperators:
+    """Comparison operators filter series."""
+
+    def test_greater_than(self, multi_series):
+        """metric > 15 filters to values > 15."""
+        result = multi_series.query("metric > 15", time=EVAL_TIME)
+        # At T: A=10 (excluded), B=20 (included), C=30 (included)
+        assert_instant_values(result, expected_values=[20, 30])
+
+    def test_less_than(self, multi_series):
+        """metric < 25 filters to values < 25."""
+        result = multi_series.query("metric < 25", time=EVAL_TIME)
+        # At T: A=10 (included), B=20 (included), C=30 (excluded)
+        assert_instant_values(result, expected_values=[10, 20])
+
+    def test_greater_equal(self, multi_series):
+        """metric >= 20 filters to values >= 20."""
+        result = multi_series.query("metric >= 20", time=EVAL_TIME)
+        # At T: A=10 (excluded), B=20 (included), C=30 (included)
+        assert_instant_values(result, expected_values=[20, 30])
+
+    def test_less_equal(self, multi_series):
+        """metric <= 20 filters to values <= 20."""
+        result = multi_series.query("metric <= 20", time=EVAL_TIME)
+        # At T: A=10 (included), B=20 (included), C=30 (excluded)
+        assert_instant_values(result, expected_values=[10, 20])
+
+    def test_equal(self, multi_series):
+        """metric == 20 filters to values == 20."""
+        result = multi_series.query("metric == 20", time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[20])
+
+    def test_not_equal(self, multi_series):
+        """metric != 20 filters to values != 20."""
+        result = multi_series.query("metric != 20", time=EVAL_TIME)
+        # At T: A=10, C=30 (B=20 excluded)
+        assert_instant_values(result, expected_values=[10, 30])
+
+    def test_comparison_returns_original_value(self, single_series):
+        """Comparison returns original value, not 1/0."""
+        result = single_series.query("metric > 50", time=EVAL_TIME)
+        # At T: value=100 > 50, should return 100 (not 1)
+        assert_instant_single(result, expected_value=100)
+
+    def test_comparison_no_match_empty(self, single_series):
+        """Comparison with no matches returns empty."""
+        result = single_series.query("metric > 200", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+
+# =============================================================================
+# Tests: Range Queries
+# =============================================================================
+
+
+class TestBinaryInRangeQuery:
+    """Binary operations in range queries."""
+
+    def test_scalar_operation_at_each_step(self, single_series):
+        """Operation is evaluated at each step."""
+        result = single_series.query_range(
+            "metric * 2",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert isinstance(result, MatrixResult)
+        assert len(result.series) == 1
+        _, series = result.series[0]
+        # At T: 200, T+10m: 220, T+20m: 240, T+30m: 260
+        values = [s.value for s in series]
+        assert values == pytest.approx([200, 220, 240, 260])
+
+    def test_combined_with_rollup(self, single_series):
+        """avg_over_time(metric[5m]) * 2 works correctly."""
+        result = single_series.query("avg_over_time(metric[5m]) * 2", time=EVAL_TIME)
+        # avg of values in (T-5m, T] is avg of [-4,-3,-2,-1,0]+100 = 98
+        # 98 * 2 = 196
+        assert_instant_single(result, expected_value=196)
+
+
+# =============================================================================
+# Tests: Edge Cases
+# =============================================================================
+
+
+class TestBinaryEdgeCases:
+    """Edge cases for binary operations."""
+
+    def test_chained_operations(self, single_series):
+        """metric * 2 + 10 / 2 evaluates correctly."""
+        result = single_series.query("metric * 2 + 10 / 2", time=EVAL_TIME)
+        # (100 * 2) + (10 / 2) = 200 + 5 = 205
+        assert_instant_single(result, expected_value=205)
+
+    def test_deeply_nested_parentheses(self, client):
+        """((((1 + 2) * 3) - 4) / 5) evaluates correctly."""
+        result = client.query("((((1 + 2) * 3) - 4) / 5)", time=EVAL_TIME)
+        # ((3 * 3) - 4) / 5 = (9 - 4) / 5 = 5 / 5 = 1
+        assert_scalar_value(result, 1)
+
+    def test_operation_with_empty_vector(self, client):
+        """Operation with empty vector returns empty result."""
+        result = client.query("nonexistent * 2", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_negative_scalar_multiplication(self, single_series):
+        """metric * -1 negates values."""
+        result = single_series.query("metric * -1", time=EVAL_TIME)
+        assert_instant_single(result, expected_value=-100)
+
+    def test_double_negation(self, client):
+        """--5 equals 5."""
+        result = client.query("--5", time=EVAL_TIME)
+        assert_scalar_value(result, 5)
diff --git a/tests/queries/test_compaction.py b/tests/queries/test_compaction.py
new file mode 100644
index 0000000..418a724
--- /dev/null
+++ b/tests/queries/test_compaction.py
@@ -0,0 +1,171 @@
+"""Tests for querying compacted data.
+
+Compacted data combines multiple samples into buckets with aggregate values.
+Gauge compaction stores min/max/avg/count. Counter compaction extends the
+end timestamp for unchanged values.
+
+Key behaviors:
+- Compacted gauge stores min, max, value (average), sample_count
+- Counter with unchanged value extends end timestamp instead of new row
+- min_over_time uses stored min from compacted data
+- max_over_time uses stored max from compacted data
+- Other rollups use the average value
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized."""
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+class TestCompactedGaugeInstant:
+    """Tests for instant queries on compacted gauge data."""
+
+    def test_compacted_gauge_returns_average(self, client: MetricsQLiteClient):
+        """Instant query on compacted gauge returns the average value."""
+        pytest.skip("TODO: Implement test")
+
+    def test_compacted_gauge_timestamp_clamped(self, client: MetricsQLiteClient):
+        """Instant query timestamp is clamped to eval_time when bucket extends past it.
+
+        If bucket spans [T-10m, T+5m] and query is at T, timestamp should be T.
+        """
+        pytest.skip("TODO: Implement test")
+
+    def test_compacted_gauge_within_bucket(self, client: MetricsQLiteClient):
+        """Query eval_time within bucket returns bucket average at eval_time."""
+        pytest.skip("TODO: Implement test")
+
+    def test_compacted_gauge_at_bucket_start(self, client: MetricsQLiteClient):
+        """Query at bucket start timestamp returns that bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_compacted_gauge_at_bucket_end(self, client: MetricsQLiteClient):
+        """Query at bucket end timestamp returns that bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_multiple_buckets_returns_latest(self, client: MetricsQLiteClient):
+        """With multiple buckets, latest one covering eval_time is returned."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCompactedGaugeRangeVector:
+    """Tests for range vectors on compacted gauge data."""
+
+    def test_range_vector_bucket_fully_within(self, client: MetricsQLiteClient):
+        """Bucket fully within range returns two samples (start, end)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_vector_bucket_start_outside(self, client: MetricsQLiteClient):
+        """Bucket starting before range has start clamped to range start."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_vector_bucket_end_outside(self, client: MetricsQLiteClient):
+        """Bucket ending after range has end clamped to range end."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_vector_bucket_spans_entire_range(self, client: MetricsQLiteClient):
+        """Bucket spanning entire range returns samples at range boundaries."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_vector_multiple_buckets(self, client: MetricsQLiteClient):
+        """Multiple buckets in range each contribute samples."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCompactedGaugeRollup:
+    """Tests for rollup functions on compacted gauge data."""
+
+    def test_min_over_time_uses_stored_min(self, client: MetricsQLiteClient):
+        """min_over_time uses the stored min value from compacted bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_max_over_time_uses_stored_max(self, client: MetricsQLiteClient):
+        """max_over_time uses the stored max value from compacted bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_avg_over_time_on_compacted(self, client: MetricsQLiteClient):
+        """avg_over_time uses the average value from compacted bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sum_over_time_on_compacted(self, client: MetricsQLiteClient):
+        """sum_over_time sums the average values (not original samples)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_count_over_time_on_compacted(self, client: MetricsQLiteClient):
+        """count_over_time counts bucket points, not original samples.
+
+        A compacted bucket contributes 2 samples (start, end) to the range.
+        """
+        pytest.skip("TODO: Implement test")
+
+
+class TestCounterCompaction:
+    """Tests for counter data with extended end timestamps."""
+
+    def test_counter_instant_with_extended_end(self, client: MetricsQLiteClient):
+        """Counter with multiple inserts at same value shows extended end."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_timestamp_clamped_to_eval_time(self, client: MetricsQLiteClient):
+        """If counter spans past eval_time, timestamp is clamped to eval_time."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_range_vector_boundary_clamping(self, client: MetricsQLiteClient):
+        """Counter spanning range boundaries has timestamps clamped."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_sample_count_preserved(self, client: MetricsQLiteClient):
+        """Sample count reflects number of original inserts."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestMixedCompactedAndRaw:
+    """Tests for queries spanning compacted and raw data."""
+
+    def test_range_with_compacted_and_raw(self, client: MetricsQLiteClient):
+        """Range spanning compacted buckets and raw samples."""
+        pytest.skip("TODO: Implement test")
+
+    def test_rollup_on_mixed_data(self, client: MetricsQLiteClient):
+        """Rollup function on mix of compacted and raw data."""
+        pytest.skip("TODO: Implement test")
+
+    def test_boundary_between_compacted_and_raw(self, client: MetricsQLiteClient):
+        """Behavior at boundary between compacted and raw data."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCompactionEdgeCases:
+    """Edge cases for compacted data queries."""
+
+    def test_single_sample_bucket(self, client: MetricsQLiteClient):
+        """Bucket with single sample has start == end."""
+        pytest.skip("TODO: Implement test")
+
+    def test_bucket_with_all_same_values(self, client: MetricsQLiteClient):
+        """Bucket where all samples have same value: min == max == avg."""
+        pytest.skip("TODO: Implement test")
+
+    def test_bucket_with_extreme_values(self, client: MetricsQLiteClient):
+        """Bucket with very large/small values."""
+        pytest.skip("TODO: Implement test")
+
+    def test_adjacent_buckets(self, client: MetricsQLiteClient):
+        """Adjacent buckets with no gap between them."""
+        pytest.skip("TODO: Implement test")
+
+    def test_gap_between_buckets(self, client: MetricsQLiteClient):
+        """Buckets with time gap between them."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_instant_query.py b/tests/queries/test_instant_query.py
new file mode 100644
index 0000000..b7cd597
--- /dev/null
+++ b/tests/queries/test_instant_query.py
@@ -0,0 +1,435 @@
+"""Tests for instant query (query()) behavior.
+
+Tests the client.query() endpoint which evaluates a MetricsQL expression
+at a single point in time, returning an InstantVector or ScalarResult.
+
+Key behaviors tested:
+- Latest sample selection within lookback window
+- Staleness handling (samples outside lookback are ignored)
+- Timestamp behavior (sample timestamp vs eval_time)
+- Label matching and filtering
+- Multiple series handling
+"""
+
+from datetime import datetime, timezone
+
+import pytest
+
+from metricsqlite.engine import InstantVector, ScalarResult
+
+# Standard eval time: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
+# Time constants in milliseconds
+ONE_MINUTE = 60_000
+FIVE_MINUTES = 300_000
+ONE_HOUR = 3_600_000
+
+
+# =============================================================================
+# Fixtures (client fixture inherited from conftest.py)
+# =============================================================================
+
+
+@pytest.fixture
+def single_series(client):
+    """Single series with samples at various times.
+
+    Series: metric (no labels)
+    Samples:
+    - T-10m: value = 10
+    - T-5m:  value = 50
+    - T-2m:  value = 80
+    - T:     value = 100
+    """
+    client.insert_gauge("metric", 10, EVAL_TIME - 10 * ONE_MINUTE)
+    client.insert_gauge("metric", 50, EVAL_TIME - 5 * ONE_MINUTE)
+    client.insert_gauge("metric", 80, EVAL_TIME - 2 * ONE_MINUTE)
+    client.insert_gauge("metric", 100, EVAL_TIME)
+    return client
+
+
+@pytest.fixture
+def multi_series(client):
+    """Multiple series with different labels.
+
+    Series:
+    - metric{env="prod"}: value = 100 at T
+    - metric{env="dev"}:  value = 200 at T
+    - metric{env="test"}: value = 300 at T-10m (stale with default 5m lookback)
+    """
+    client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "prod"})
+    client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "dev"})
+    client.insert_gauge("metric", 300, EVAL_TIME - 10 * ONE_MINUTE, labels={"env": "test"})
+    return client
+
+
+@pytest.fixture
+def minute_series(client):
+    """Series with 1-minute interval samples for staleness tests.
+
+    Series: metric (no labels)
+    Samples from T-60m to T at 1-minute intervals.
+    Value at each minute = minute offset (e.g., T-5m has value -5, T has value 0)
+    """
+    for minute in range(-60, 1):
+        client.insert_gauge("metric", minute, EVAL_TIME + minute * ONE_MINUTE)
+    return client
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+
+def assert_instant_value(result, expected_value, expected_labels=None):
+    """Assert instant query returns expected single value."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    labels, sample = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    assert sample.value == pytest.approx(expected_value)
+
+
+def assert_instant_values(result, expected_values):
+    """Assert instant vector contains expected values (sorted)."""
+    assert isinstance(result, InstantVector)
+    actual = sorted([sample.value for _, sample in result.series])
+    assert actual == pytest.approx(sorted(expected_values))
+
+
+def assert_instant_empty(result):
+    """Assert instant query returns no series."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 0
+
+
+def assert_instant_timestamp(result, expected_timestamp):
+    """Assert the sample timestamp in the result."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    _, sample = result.series[0]
+    assert sample.timestamp == expected_timestamp
+
+
+# =============================================================================
+# Tests: Basic Instant Query Functionality
+# =============================================================================
+
+
+class TestInstantQueryBasics:
+    """Basic instant query functionality."""
+
+    def test_returns_latest_sample_in_lookback(self, single_series):
+        """When multiple samples exist within lookback, return the latest."""
+        result = single_series.query("metric", time=EVAL_TIME)
+        # With default 5m lookback, samples at T-2m and T are in range
+        # Should return the latest (T, value=100)
+        assert_instant_value(result, expected_value=100)
+
+    def test_empty_result_for_nonexistent_metric(self, client):
+        """Querying a metric that doesn't exist returns empty InstantVector."""
+        result = client.query("nonexistent", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_empty_result_when_all_samples_stale(self, client):
+        """Querying when all samples are outside lookback returns empty."""
+        # Insert sample 10 minutes ago (outside default 5m lookback)
+        client.insert_gauge("metric", 100, EVAL_TIME - 10 * ONE_MINUTE)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_sample_at_exact_eval_time(self, client):
+        """Sample with timestamp exactly at eval_time should be included."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=42)
+
+    def test_sample_at_exact_lookback_boundary(self, client):
+        """Sample at T-lookback boundary is included."""
+        # Insert sample exactly 5 minutes ago (at boundary of default lookback)
+        client.insert_gauge("metric", 42, EVAL_TIME - FIVE_MINUTES)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=42)
+
+    def test_sample_just_outside_lookback(self, client):
+        """Sample 1ms before lookback boundary is excluded."""
+        # Insert sample 5 minutes + 1ms ago (just outside default lookback)
+        client.insert_gauge("metric", 42, EVAL_TIME - FIVE_MINUTES - 1)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+
+# =============================================================================
+# Tests: Timestamp Behavior
+# =============================================================================
+
+
+class TestInstantQueryTimestamps:
+    """Timestamp behavior in instant query results."""
+
+    def test_result_timestamp_is_sample_timestamp(self, client):
+        """InstantVector sample timestamp should be the actual sample timestamp."""
+        sample_time = EVAL_TIME - 2 * ONE_MINUTE
+        client.insert_gauge("metric", 42, sample_time)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_timestamp(result, expected_timestamp=sample_time)
+
+    def test_future_sample_not_included(self, client):
+        """Samples with timestamp after eval_time are never included."""
+        # Insert sample in the future
+        client.insert_gauge("metric", 100, EVAL_TIME + ONE_HOUR)
+        # Insert sample in the past (should be returned)
+        client.insert_gauge("metric", 42, EVAL_TIME - ONE_MINUTE)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=42)
+
+    def test_future_sample_1ms_after_eval_time(self, client):
+        """Even 1ms in the future excludes the sample."""
+        client.insert_gauge("metric", 100, EVAL_TIME + 1)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_latest_sample_selected(self, single_series):
+        """When multiple samples in lookback, latest timestamp wins."""
+        result = single_series.query("metric", time=EVAL_TIME)
+        # Sample at T (100) should be selected over T-2m (80)
+        assert_instant_timestamp(result, expected_timestamp=EVAL_TIME)
+        assert_instant_value(result, expected_value=100)
+
+
+# =============================================================================
+# Tests: Staleness Handling
+# =============================================================================
+
+
+class TestInstantQueryStaleness:
+    """Staleness handling for instant queries."""
+
+    def test_default_lookback_is_5_minutes(self, minute_series):
+        """Default lookback (step) is 5 minutes (300 seconds)."""
+        result = minute_series.query("metric", time=EVAL_TIME)
+        # Should return sample at T (value=0), not older ones
+        assert_instant_value(result, expected_value=0)
+
+    def test_custom_lookback_as_seconds(self, minute_series):
+        """Custom lookback can be specified as integer seconds."""
+        # With 10 minute lookback, sample at T-6m would be included
+        result = minute_series.query("metric", time=EVAL_TIME - 6 * ONE_MINUTE, step=600)
+        # At T-6m, latest sample is T-6m (value=-6)
+        assert_instant_value(result, expected_value=-6)
+
+    def test_custom_lookback_as_duration_string(self, minute_series):
+        """Custom lookback can be specified as duration string (e.g., '10m')."""
+        result = minute_series.query("metric", time=EVAL_TIME, step="10m")
+        # With 10m lookback, sample at T (value=0) is still the latest
+        assert_instant_value(result, expected_value=0)
+
+    def test_short_lookback_excludes_samples(self, single_series):
+        """Short lookback excludes samples that would otherwise be included."""
+        # With 1m lookback, only sample at T is included (T-2m is excluded)
+        result = single_series.query("metric", time=EVAL_TIME, step="1m")
+        assert_instant_value(result, expected_value=100)
+
+    def test_very_large_lookback(self, minute_series):
+        """Very large lookback includes old samples."""
+        # Query at T-30m with 1 hour lookback
+        result = minute_series.query("metric", time=EVAL_TIME - 30 * ONE_MINUTE, step="1h")
+        # Latest sample at or before T-30m is T-30m (value=-30)
+        assert_instant_value(result, expected_value=-30)
+
+
+# =============================================================================
+# Tests: Multiple Series
+# =============================================================================
+
+
+class TestInstantQueryMultipleSeries:
+    """Handling multiple series in instant queries."""
+
+    def test_multiple_series_same_metric(self, multi_series):
+        """Query returns all matching series with different labels."""
+        result = multi_series.query("metric", time=EVAL_TIME)
+        # prod (100) and dev (200) are fresh, test (300) is stale
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_each_series_independent_staleness(self, multi_series):
+        """Each series is evaluated independently for staleness."""
+        # With longer lookback, test series should also be included
+        result = multi_series.query("metric", time=EVAL_TIME, step="15m")
+        assert_instant_values(result, expected_values=[100, 200, 300])
+
+    def test_partial_stale_series(self, client):
+        """When some series are stale and others aren't, only non-stale returned."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"status": "fresh"})
+        client.insert_gauge("metric", 200, EVAL_TIME - 10 * ONE_MINUTE, labels={"status": "stale"})
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert isinstance(result, InstantVector)
+        assert len(result.series) == 1
+        labels, sample = result.series[0]
+        assert labels["status"] == "fresh"
+        assert sample.value == pytest.approx(100)
+
+    def test_labels_preserved_in_result(self, multi_series):
+        """Labels are preserved in the result."""
+        result = multi_series.query("metric", time=EVAL_TIME)
+        labels_set = {frozenset(labels.items()) for labels, _ in result.series}
+        assert frozenset([("__name__", "metric"), ("env", "prod")]) in labels_set
+        assert frozenset([("__name__", "metric"), ("env", "dev")]) in labels_set
+
+
+# =============================================================================
+# Tests: With Functions
+# =============================================================================
+
+
+class TestInstantQueryWithFunctions:
+    """Instant queries with functions applied."""
+
+    def test_transformation_on_instant_vector(self, client):
+        """Transformation functions (abs) work on instant vectors."""
+        client.insert_gauge("metric", -50, EVAL_TIME)
+
+        result = client.query("abs(metric)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=50)
+
+    def test_clamp_min_on_instant_vector(self, client):
+        """clamp_min transformation works on instant vectors."""
+        client.insert_gauge("metric", 30, EVAL_TIME)
+
+        result = client.query("clamp_min(metric, 50)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=50)
+
+    def test_clamp_max_on_instant_vector(self, client):
+        """clamp_max transformation works on instant vectors."""
+        client.insert_gauge("metric", 100, EVAL_TIME)
+
+        result = client.query("clamp_max(metric, 50)", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=50)
+
+    def test_rollup_on_range_selector(self, minute_series):
+        """Rollup functions (avg_over_time) on range selector return instant vector."""
+        result = minute_series.query("avg_over_time(metric[5m])", time=EVAL_TIME)
+        # Range (T-5m, T] has samples at -4, -3, -2, -1, 0
+        # avg = -2
+        assert_instant_value(result, expected_value=-2)
+
+    def test_aggregation_across_series(self, multi_series):
+        """Aggregation functions (sum) aggregate across series at eval_time."""
+        result = multi_series.query("sum(metric)", time=EVAL_TIME)
+        # prod (100) + dev (200) = 300 (test is stale)
+        assert_instant_value(result, expected_value=300, expected_labels={})
+
+    def test_binary_operation(self, single_series):
+        """Binary operations work on instant queries."""
+        result = single_series.query("metric * 2", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=200)
+
+    def test_comparison_filter(self, multi_series):
+        """Comparison operators filter instant vectors."""
+        result = multi_series.query("metric > 150", time=EVAL_TIME)
+        # Only dev (200) passes the filter
+        assert_instant_values(result, expected_values=[200])
+
+
+# =============================================================================
+# Tests: Edge Cases
+# =============================================================================
+
+
+class TestInstantQueryEdgeCases:
+    """Edge cases for instant queries."""
+
+    def test_eval_time_as_datetime(self, client):
+        """eval_time can be specified as datetime object."""
+        dt = datetime(2000, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
+        client.insert_gauge("metric", 42, EVAL_TIME)
+
+        result = client.query("metric", time=dt)
+        assert_instant_value(result, expected_value=42)
+
+    def test_eval_time_as_iso_string(self, client):
+        """eval_time can be specified as ISO 8601 string."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+
+        result = client.query("metric", time="2000-01-01T00:00:00Z")
+        assert_instant_value(result, expected_value=42)
+
+    def test_negative_metric_values(self, client):
+        """Negative metric values are handled correctly."""
+        client.insert_gauge("metric", -42, EVAL_TIME)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=-42)
+
+    def test_zero_metric_value(self, client):
+        """Zero metric value is handled correctly."""
+        client.insert_gauge("metric", 0, EVAL_TIME)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=0)
+
+    def test_very_large_metric_value(self, client):
+        """Very large metric values are handled correctly."""
+        client.insert_gauge("metric", 1e15, EVAL_TIME)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1e15)
+
+    def test_very_small_metric_value(self, client):
+        """Very small metric values are handled correctly."""
+        client.insert_gauge("metric", 1e-15, EVAL_TIME)
+
+        result = client.query("metric", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1e-15)
+
+    def test_scalar_query(self, client):
+        """Scalar expressions return ScalarResult."""
+        result = client.query("5 + 3", time=EVAL_TIME)
+        assert isinstance(result, ScalarResult)
+        assert result.value == pytest.approx(8)
+
+    def test_empty_metric_name_with_labels(self, client):
+        """Query with only label matchers works."""
+        client.insert_gauge("metric", 42, EVAL_TIME, labels={"env": "prod"})
+
+        result = client.query('{__name__="metric", env="prod"}', time=EVAL_TIME)
+        assert_instant_value(result, expected_value=42)
+
+
+# =============================================================================
+# Tests: Compacted/Counter Data
+# =============================================================================
+
+
+class TestInstantQueryCompactedData:
+    """Instant queries on compacted gauge data."""
+
+    def test_compacted_gauge_returns_value(self, client):
+        """Compacted gauge bucket returns the average value."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+    def test_compacted_end_timestamp_for_staleness(self, client):
+        """Compacted bucket uses end timestamp for staleness check."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+
+class TestInstantQueryCounterData:
+    """Instant queries on counter data."""
+
+    def test_counter_with_extended_end(self, client):
+        """Counter with extended end timestamp is not stale."""
+        pytest.skip("TODO: Implement once counter semantics are tested")
+
+    def test_counter_timestamp_clamped(self, client):
+        """Counter timestamp is clamped to eval_time if end > eval_time."""
+        pytest.skip("TODO: Implement once counter semantics are tested")
diff --git a/tests/queries/test_label_matching.py b/tests/queries/test_label_matching.py
new file mode 100644
index 0000000..a978efd
--- /dev/null
+++ b/tests/queries/test_label_matching.py
@@ -0,0 +1,515 @@
+"""Tests for label matching in selectors.
+
+MetricsQL supports various label matching operators:
+- =  : Exact equality
+- != : Not equal
+- =~ : Regex match
+- !~ : Regex not match
+
+VictoriaMetrics-specific behaviors:
+- __name__ is a special label containing metric name
+- Empty label value vs missing label
+"""
+
+import pytest
+
+from metricsqlite.engine import InstantVector
+
+# Standard eval time: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
+
+# =============================================================================
+# Fixtures (client fixture inherited from conftest.py)
+# =============================================================================
+
+
+@pytest.fixture
+def labeled_series(client):
+    """Multiple series with various label combinations.
+
+    Series created:
+    - metric{env="prod", region="us"}:  value = 100
+    - metric{env="prod", region="eu"}:  value = 200
+    - metric{env="dev", region="us"}:   value = 300
+    - metric{env="dev", region="eu"}:   value = 400
+    - metric{env="staging"}:            value = 500 (no region label)
+    - metric (no labels):               value = 600
+    """
+    client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "prod", "region": "us"})
+    client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod", "region": "eu"})
+    client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "dev", "region": "us"})
+    client.insert_gauge("metric", 400, EVAL_TIME, labels={"env": "dev", "region": "eu"})
+    client.insert_gauge("metric", 500, EVAL_TIME, labels={"env": "staging"})
+    client.insert_gauge("metric", 600, EVAL_TIME)
+    return client
+
+
+@pytest.fixture
+def multi_metric(client):
+    """Multiple metrics with various names.
+
+    Metrics created:
+    - http_requests_total{method="GET"}:  value = 100
+    - http_requests_total{method="POST"}: value = 200
+    - http_errors_total{method="GET"}:    value = 10
+    - grpc_requests_total{method="GET"}:  value = 50
+    """
+    client.insert_gauge("http_requests_total", 100, EVAL_TIME, labels={"method": "GET"})
+    client.insert_gauge("http_requests_total", 200, EVAL_TIME, labels={"method": "POST"})
+    client.insert_gauge("http_errors_total", 10, EVAL_TIME, labels={"method": "GET"})
+    client.insert_gauge("grpc_requests_total", 50, EVAL_TIME, labels={"method": "GET"})
+    return client
+
+
+@pytest.fixture
+def numeric_labels(client):
+    """Series with numeric-looking label values.
+
+    Series created:
+    - metric{code="200"}: value = 100
+    - metric{code="201"}: value = 200
+    - metric{code="404"}: value = 300
+    - metric{code="500"}: value = 400
+    - metric{code="abc"}: value = 500
+    """
+    client.insert_gauge("metric", 100, EVAL_TIME, labels={"code": "200"})
+    client.insert_gauge("metric", 200, EVAL_TIME, labels={"code": "201"})
+    client.insert_gauge("metric", 300, EVAL_TIME, labels={"code": "404"})
+    client.insert_gauge("metric", 400, EVAL_TIME, labels={"code": "500"})
+    client.insert_gauge("metric", 500, EVAL_TIME, labels={"code": "abc"})
+    return client
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+
+def assert_instant_values(result, expected_values):
+    """Assert instant vector contains expected values (sorted)."""
+    assert isinstance(result, InstantVector)
+    actual = sorted([sample.value for _, sample in result.series])
+    assert actual == pytest.approx(sorted(expected_values))
+
+
+def assert_instant_empty(result):
+    """Assert instant query returns no series."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 0
+
+
+def get_series_count(result):
+    """Get the number of series in the result."""
+    assert isinstance(result, InstantVector)
+    return len(result.series)
+
+
+# =============================================================================
+# Tests: Exact Equality Matching (=)
+# =============================================================================
+
+
+class TestEqualityMatch:
+    """Tests for exact equality matching (=)."""
+
+    def test_exact_label_match(self, labeled_series):
+        """metric{env="prod"} matches only series with env="prod"."""
+        result = labeled_series.query('metric{env="prod"}', time=EVAL_TIME)
+        # Should match: prod/us (100), prod/eu (200)
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_no_match_wrong_value(self, labeled_series):
+        """metric{env="test"} doesn't match when no series has that value."""
+        result = labeled_series.query('metric{env="test"}', time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_no_match_missing_label(self, labeled_series):
+        """metric{region="us"} doesn't match series without region label."""
+        result = labeled_series.query('metric{region="us"}', time=EVAL_TIME)
+        # Should match: prod/us (100), dev/us (300)
+        # Should NOT match: staging (500), no labels (600)
+        assert_instant_values(result, expected_values=[100, 300])
+
+    def test_multiple_label_matchers(self, labeled_series):
+        """metric{env="prod", region="us"} requires both labels to match."""
+        result = labeled_series.query('metric{env="prod", region="us"}', time=EVAL_TIME)
+        # Only prod/us (100) matches both
+        assert_instant_values(result, expected_values=[100])
+
+    def test_partial_label_match(self, labeled_series):
+        """metric{env="prod"} matches series with additional labels."""
+        result = labeled_series.query('metric{env="prod"}', time=EVAL_TIME)
+        # prod/us and prod/eu both have env="prod" plus additional labels
+        assert get_series_count(result) == 2
+
+    def test_empty_string_value(self, client):
+        """metric{label=""} matches series with empty label value."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": ""})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"label": "value"})
+        client.insert_gauge("metric", 300, EVAL_TIME)  # no label at all
+
+        result = client.query('metric{label=""}', time=EVAL_TIME)
+        # Only matches series with label="" (100)
+        assert_instant_values(result, expected_values=[100])
+
+
+# =============================================================================
+# Tests: Not Equal Matching (!=)
+# =============================================================================
+
+
+class TestNotEqualMatch:
+    """Tests for not equal matching (!=)."""
+
+    def test_not_equal_excludes_value(self, labeled_series):
+        """metric{env!="prod"} excludes series with env="prod"."""
+        result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME)
+        # Excludes: prod/us (100), prod/eu (200)
+        # Includes: dev/us (300), dev/eu (400), staging (500), no labels (600)
+        assert_instant_values(result, expected_values=[300, 400, 500, 600])
+
+    def test_not_equal_includes_other_values(self, labeled_series):
+        """metric{env!="prod"} includes series with different values."""
+        result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME)
+        # dev and staging are included
+        values = sorted([sample.value for _, sample in result.series])
+        assert 300 in values  # dev/us
+        assert 400 in values  # dev/eu
+        assert 500 in values  # staging
+
+    def test_not_equal_includes_missing_label(self, labeled_series):
+        """metric{env!="prod"} includes series without that label.
+
+        VictoriaMetrics treats missing label as not equal to any value.
+        """
+        result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME)
+        # Series with no labels (600) should be included
+        values = [sample.value for _, sample in result.series]
+        assert 600 in values
+
+    def test_not_equal_empty_string(self, client):
+        """metric{label!=""} behavior with empty value and missing label."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": ""})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"label": "value"})
+        client.insert_gauge("metric", 300, EVAL_TIME)  # no label
+
+        result = client.query('metric{label!=""}', time=EVAL_TIME)
+        # Should include: series with non-empty value (200) and possibly missing label (300)
+        # Exact behavior may vary - at minimum 200 should be included
+        values = [sample.value for _, sample in result.series]
+        assert 200 in values
+        assert 100 not in values  # empty string is excluded
+
+
+# =============================================================================
+# Tests: Regex Matching (=~)
+# =============================================================================
+
+
+class TestRegexMatch:
+    """Tests for regex matching (=~)."""
+
+    def test_regex_simple_pattern(self, labeled_series):
+        """metric{env=~"prod.*"} matches values starting with 'prod'."""
+        result = labeled_series.query('metric{env=~"prod.*"}', time=EVAL_TIME)
+        # Matches: prod/us (100), prod/eu (200)
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_regex_alternation(self, labeled_series):
+        """metric{env=~"prod|dev"} matches 'prod' or 'dev'."""
+        result = labeled_series.query('metric{env=~"prod|dev"}', time=EVAL_TIME)
+        # Matches: prod/us (100), prod/eu (200), dev/us (300), dev/eu (400)
+        assert_instant_values(result, expected_values=[100, 200, 300, 400])
+
+    def test_regex_anchored(self, labeled_series):
+        """Regex is implicitly anchored (^...$).
+
+        metric{env=~"prod"} only matches exactly "prod", not "production".
+        """
+        result = labeled_series.query('metric{env=~"prod"}', time=EVAL_TIME)
+        # Matches exactly "prod"
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_regex_partial_match_needs_wildcard(self, client):
+        """Regex needs .* to match partial strings."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "production"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"})
+
+        # Without wildcard - only exact match
+        result = client.query('metric{env=~"prod"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[200])
+
+        # With wildcard - matches both
+        result = client.query('metric{env=~"prod.*"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_regex_any_value(self, labeled_series):
+        """metric{env=~".+"} matches any non-empty value."""
+        result = labeled_series.query('metric{env=~".+"}', time=EVAL_TIME)
+        # Matches all series with env label (prod, dev, staging)
+        # Excludes: no labels (600)
+        assert_instant_values(result, expected_values=[100, 200, 300, 400, 500])
+
+    def test_regex_character_class(self, numeric_labels):
+        """metric{code=~"[0-9]+"} matches numeric values."""
+        result = numeric_labels.query('metric{code=~"[0-9]+"}', time=EVAL_TIME)
+        # Matches: 200, 201, 404, 500 (all numeric codes)
+        # Excludes: abc
+        assert_instant_values(result, expected_values=[100, 200, 300, 400])
+
+    def test_regex_character_class_specific(self, numeric_labels):
+        """metric{code=~"2.."} matches codes starting with 2."""
+        result = numeric_labels.query('metric{code=~"2.."}', time=EVAL_TIME)
+        # Matches: 200 (100), 201 (200)
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_regex_dot_matches_any(self, numeric_labels):
+        """metric{code=~"4.4"} matches 404."""
+        result = numeric_labels.query('metric{code=~"4.4"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[300])  # 404
+
+
+# =============================================================================
+# Tests: Regex Not Match (!~)
+# =============================================================================
+
+
+class TestRegexNotMatch:
+    """Tests for regex not match (!~)."""
+
+    def test_regex_not_match_excludes(self, labeled_series):
+        """metric{env!~"prod.*"} excludes values matching pattern."""
+        result = labeled_series.query('metric{env!~"prod.*"}', time=EVAL_TIME)
+        # Excludes: prod/us (100), prod/eu (200)
+        # Includes: dev/us (300), dev/eu (400), staging (500), no labels (600)
+        assert_instant_values(result, expected_values=[300, 400, 500, 600])
+
+    def test_regex_not_match_includes_non_matching(self, labeled_series):
+        """metric{env!~"prod|staging"} includes values not matching."""
+        result = labeled_series.query('metric{env!~"prod|staging"}', time=EVAL_TIME)
+        # Excludes: prod/* (100, 200), staging (500)
+        # Includes: dev/* (300, 400), no labels (600)
+        assert_instant_values(result, expected_values=[300, 400, 600])
+
+    def test_regex_not_match_missing_label(self, labeled_series):
+        """metric{region!~"us"} includes series without region label."""
+        result = labeled_series.query('metric{region!~"us"}', time=EVAL_TIME)
+        # Excludes: */us (100, 300)
+        # Includes: */eu (200, 400), staging (500), no labels (600)
+        assert_instant_values(result, expected_values=[200, 400, 500, 600])
+
+    def test_regex_not_match_any(self, labeled_series):
+        """metric{env!~".+"} excludes any non-empty env value."""
+        result = labeled_series.query('metric{env!~".+"}', time=EVAL_TIME)
+        # Excludes all with env label
+        # Includes only: no labels (600)
+        assert_instant_values(result, expected_values=[600])
+
+
+# =============================================================================
+# Tests: __name__ Label Matching
+# =============================================================================
+
+
+class TestMetricNameMatching:
+    """Tests for __name__ label matching."""
+
+    def test_name_equality(self, multi_metric):
+        """{__name__="http_requests_total"} same as just http_requests_total."""
+        result1 = multi_metric.query("http_requests_total", time=EVAL_TIME)
+        result2 = multi_metric.query('{__name__="http_requests_total"}', time=EVAL_TIME)
+
+        values1 = sorted([sample.value for _, sample in result1.series])
+        values2 = sorted([sample.value for _, sample in result2.series])
+        assert values1 == values2
+        assert values1 == pytest.approx([100, 200])
+
+    def test_name_regex(self, multi_metric):
+        """{__name__=~"http.*"} matches metrics starting with 'http'."""
+        result = multi_metric.query('{__name__=~"http.*"}', time=EVAL_TIME)
+        # Matches: http_requests_total (100, 200), http_errors_total (10)
+        assert_instant_values(result, expected_values=[10, 100, 200])
+
+    def test_name_regex_suffix(self, multi_metric):
+        """{__name__=~".*_total"} matches metrics ending with '_total'."""
+        result = multi_metric.query('{__name__=~".*_total"}', time=EVAL_TIME)
+        # All metrics end with _total
+        assert_instant_values(result, expected_values=[10, 50, 100, 200])
+
+    def test_name_with_other_labels(self, multi_metric):
+        """{__name__="http_requests_total", method="GET"} matches name and label."""
+        result = multi_metric.query('{__name__="http_requests_total", method="GET"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_name_regex_with_labels(self, multi_metric):
+        """{__name__=~".*_requests_total", method="GET"} combines name regex and label."""
+        result = multi_metric.query('{__name__=~".*_requests_total", method="GET"}', time=EVAL_TIME)
+        # Matches: http_requests_total/GET (100), grpc_requests_total/GET (50)
+        assert_instant_values(result, expected_values=[50, 100])
+
+    def test_name_not_equal(self, multi_metric):
+        """{__name__!="http_requests_total", method="GET"} excludes specific metric."""
+        result = multi_metric.query('{__name__!="http_requests_total", method="GET"}', time=EVAL_TIME)
+        # Excludes http_requests_total, includes http_errors_total and grpc_requests_total
+        assert_instant_values(result, expected_values=[10, 50])
+
+
+# =============================================================================
+# Tests: Combined Matchers
+# =============================================================================
+
+
+class TestCombinedMatchers:
+    """Tests for combining multiple match types."""
+
+    def test_equal_and_not_equal(self, labeled_series):
+        """metric{env="prod", region!="eu"} combines equality and not-equal."""
+        result = labeled_series.query('metric{env="prod", region!="eu"}', time=EVAL_TIME)
+        # env="prod" AND region!="eu"
+        # Matches: prod/us (100)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_equal_and_regex(self, labeled_series):
+        """metric{env="prod", region=~"u.*"} combines equality and regex."""
+        result = labeled_series.query('metric{env="prod", region=~"u.*"}', time=EVAL_TIME)
+        # env="prod" AND region starts with "u"
+        # Matches: prod/us (100)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_not_equal_and_regex(self, labeled_series):
+        """metric{env!="staging", region=~".*"} combines not-equal and regex."""
+        result = labeled_series.query('metric{env!="staging", region=~".*"}', time=EVAL_TIME)
+        # env!="staging" AND has region label
+        # Matches: prod/us (100), prod/eu (200), dev/us (300), dev/eu (400)
+        assert_instant_values(result, expected_values=[100, 200, 300, 400])
+
+    def test_all_four_match_types(self, client):
+        """Combining all four matcher types."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"a": "1", "b": "2", "c": "prod", "d": "us"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"a": "1", "b": "3", "c": "prod", "d": "eu"})
+        client.insert_gauge("metric", 300, EVAL_TIME, labels={"a": "1", "b": "2", "c": "dev", "d": "us"})
+        client.insert_gauge("metric", 400, EVAL_TIME, labels={"a": "2", "b": "2", "c": "prod", "d": "us"})
+
+        result = client.query('metric{a="1", b!="3", c=~"prod.*", d!~"eu"}', time=EVAL_TIME)
+        # a="1" AND b!="3" AND c matches "prod.*" AND d doesn't match "eu"
+        # Only matches: first series (100)
+        assert_instant_values(result, expected_values=[100])
+
+
+# =============================================================================
+# Tests: Edge Cases
+# =============================================================================
+
+
+class TestLabelMatchingEdgeCases:
+    """Edge cases for label matching."""
+
+    def test_label_with_special_characters(self, client):
+        """Labels can contain underscores and colons."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"app_name": "my_app"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"k8s_io_name": "pod"})
+
+        result = client.query('metric{app_name="my_app"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+        result = client.query('metric{k8s_io_name="pod"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[200])
+
+    def test_label_value_with_spaces(self, client):
+        """Label values can contain spaces."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"description": "hello world"})
+
+        result = client.query('metric{description="hello world"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_unicode_label_value(self, client):
+        """Unicode characters in label values."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"region": "日本"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"region": "europe"})
+
+        result = client.query('metric{region="日本"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_many_labels(self, client):
+        """Series with many labels still matches correctly."""
+        labels = {f"label{i}": f"value{i}" for i in range(10)}
+        client.insert_gauge("metric", 100, EVAL_TIME, labels=labels)
+
+        result = client.query('metric{label0="value0", label5="value5"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_case_sensitivity(self, client):
+        """Label matching is case-sensitive."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "Prod"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"})
+        client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "PROD"})
+
+        result = client.query('metric{env="prod"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[200])
+
+        result = client.query('metric{env="Prod"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+    def test_case_insensitive_regex(self, client):
+        """Regex can use (?i) for case-insensitive matching."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "Prod"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"})
+        client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "PROD"})
+
+        result = client.query('metric{env=~"(?i)prod"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100, 200, 300])
+
+    def test_empty_selector_matches_all(self, labeled_series):
+        """metric{} matches all series of that metric."""
+        result = labeled_series.query("metric{}", time=EVAL_TIME)
+        # All 6 series
+        assert get_series_count(result) == 6
+
+    def test_regex_escape_special_chars(self, client):
+        """Special regex characters need escaping."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"path": "/api/v1"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"path": "/api/v2"})
+        client.insert_gauge("metric", 300, EVAL_TIME, labels={"path": "xapixv1"})
+
+        # Without escaping, . matches any char
+        result = client.query('metric{path=~"/api/v."}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100, 200])
+
+    def test_label_name_with_number(self, client):
+        """Label names can contain numbers (not at start)."""
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"level1": "a"})
+        client.insert_gauge("metric", 200, EVAL_TIME, labels={"level2": "b"})
+
+        result = client.query('metric{level1="a"}', time=EVAL_TIME)
+        assert_instant_values(result, expected_values=[100])
+
+
+# =============================================================================
+# Tests: No Match Scenarios
+# =============================================================================
+
+
+class TestNoMatchScenarios:
+    """Tests for scenarios that should return no results."""
+
+    def test_no_series_exists(self, client):
+        """Query for non-existent metric returns empty."""
+        result = client.query('nonexistent{env="prod"}', time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_no_matching_labels(self, labeled_series):
+        """Query with impossible label combination returns empty."""
+        result = labeled_series.query('metric{env="prod", env="dev"}', time=EVAL_TIME)
+        # Can't have both env="prod" AND env="dev"
+        assert_instant_empty(result)
+
+    def test_contradictory_matchers(self, labeled_series):
+        """Contradictory matchers return empty."""
+        result = labeled_series.query('metric{env="prod", env!="prod"}', time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_regex_no_match(self, labeled_series):
+        """Regex that matches nothing returns empty."""
+        result = labeled_series.query('metric{env=~"xyz.*"}', time=EVAL_TIME)
+        assert_instant_empty(result)
diff --git a/tests/queries/test_query_range.py b/tests/queries/test_query_range.py
index c534bf2..bfd224d 100644
--- a/tests/queries/test_query_range.py
+++ b/tests/queries/test_query_range.py
@@ -3,7 +3,7 @@
 from metricsqlite import MetricsQLiteClient
 from metricsqlite.engine import MatrixResult
 
-START = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+START = 946_684_800_000  # 2000-01-01 00:00:00 UTC
 
 
 @pytest.fixture
@@ -85,3 +85,111 @@ def test_only_latest_sample_in_result(self, client: MetricsQLiteClient):
         assert series[0].timestamp == START
         assert series[6].timestamp == START + 3_600_000
         assert [sample.value for sample in series] == [0, 10, 20, 30, 40, 50, 60]
+
+
+class TestRangeQueryStepBehavior:
+    """Tests for step behavior in range queries."""
+
+    def test_step_as_integer_seconds(self, client: MetricsQLiteClient):
+        """Step can be specified as integer seconds."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_as_duration_string(self, client: MetricsQLiteClient):
+        """Step can be specified as duration string like '5m'."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_default_is_5_minutes(self, client: MetricsQLiteClient):
+        """Default step is 5 minutes (300 seconds)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_alignment(self, client: MetricsQLiteClient):
+        """Steps are aligned to start, end at start + N*step <= end."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_1_second(self, client: MetricsQLiteClient):
+        """Very small step (1 second) produces many points."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_larger_than_range(self, client: MetricsQLiteClient):
+        """Step larger than query range produces start and end only."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeQueryTimestamps:
+    """Tests for timestamp handling in range queries."""
+
+    def test_start_timestamp_formats(self, client: MetricsQLiteClient):
+        """Start can be float, string, or datetime."""
+        pytest.skip("TODO: Implement test")
+
+    def test_end_timestamp_formats(self, client: MetricsQLiteClient):
+        """End can be float, string, or datetime."""
+        pytest.skip("TODO: Implement test")
+
+    def test_end_default_is_now(self, client: MetricsQLiteClient):
+        """End defaults to current time if not specified."""
+        pytest.skip("TODO: Implement test")
+
+    def test_start_required(self, client: MetricsQLiteClient):
+        """Start timestamp is required, raises error if missing."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeQueryMultipleSeries:
+    """Tests for range queries with multiple series."""
+
+    def test_multiple_series_same_metric(self, client: MetricsQLiteClient):
+        """Multiple series with different labels in same query."""
+        pytest.skip("TODO: Implement test")
+
+    def test_series_with_different_data_ranges(self, client: MetricsQLiteClient):
+        """Series with data at different time ranges."""
+        pytest.skip("TODO: Implement test")
+
+    def test_series_appears_mid_range(self, client: MetricsQLiteClient):
+        """Series that starts producing data mid-query-range."""
+        pytest.skip("TODO: Implement test")
+
+    def test_series_disappears_mid_range(self, client: MetricsQLiteClient):
+        """Series that stops producing data mid-query-range."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeQueryWithCompaction:
+    """Tests for range queries on compacted data."""
+
+    def test_range_query_on_compacted_gauges(self, client: MetricsQLiteClient):
+        """Range query spanning compacted gauge buckets."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_query_on_counters(self, client: MetricsQLiteClient):
+        """Range query with counter data."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_query_mixed_compacted_raw(self, client: MetricsQLiteClient):
+        """Range query spanning both compacted and raw data."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeQueryEdgeCases:
+    """Edge cases for range queries."""
+
+    def test_start_equals_end(self, client: MetricsQLiteClient):
+        """Start == end produces single point result."""
+        pytest.skip("TODO: Implement test")
+
+    def test_very_long_range(self, client: MetricsQLiteClient):
+        """Query spanning very long time range (1 year)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_no_data_in_range(self, client: MetricsQLiteClient):
+        """Query range with no data returns empty result."""
+        pytest.skip("TODO: Implement test")
+
+    def test_data_only_before_range(self, client: MetricsQLiteClient):
+        """Data exists but only before query range."""
+        pytest.skip("TODO: Implement test")
+
+    def test_data_only_after_range(self, client: MetricsQLiteClient):
+        """Data exists but only after query range."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_rollup.py b/tests/queries/test_rollup.py
index 0d237ae..4385ef3 100644
--- a/tests/queries/test_rollup.py
+++ b/tests/queries/test_rollup.py
@@ -1,65 +1,548 @@
+"""Tests for rollup functions (aggregation over time).
+
+Rollup functions aggregate samples within a time range for each series.
+They take a range vector and produce an instant vector.
+
+Functions tested:
+- avg_over_time(): Average of samples in range
+- sum_over_time(): Sum of samples in range
+- min_over_time(): Minimum sample in range
+- max_over_time(): Maximum sample in range
+- count_over_time(): Number of samples in range
+- (integrate(): Area under curve - trapezoidal integration)
+
+Key behaviors:
+- Range is half-open: (T-range, T]
+- Each series evaluated independently
+- Empty range produces NaN
+"""
+
+import math
+
 import pytest
 
-from metricsqlite import MetricsQLiteClient
+from metricsqlite.engine import InstantVector, MatrixResult
+
+# Standard eval time: 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000
+
 
-START = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+# =============================================================================
+# Fixtures (client fixture inherited from conftest.py)
+# =============================================================================
 
 
 @pytest.fixture
-def client():
-    """Create an in-memory client with tables initialized."""
-    client = MetricsQLiteClient(None)
-    client.connect()
-    client.create_tables()
-    yield client
-    client.close()
+def minute_series(client):
+    """Series with 1-minute interval samples.
 
+    Series: metric (no labels)
+    - Values: minute offset from EVAL_TIME
+    - At T-5m: -5, at T: 0, at T+5m: 5
 
-def insert_gauge_series(
-    client: MetricsQLiteClient, data: list[tuple[float, float]], metric_name: str = "metric", labels: dict | None = None
-):
-    for ts, v in data:
-        client.insert_gauge(metric_name, v, ts, labels=labels)
+    Range (T-10m, T] contains samples at minutes: -9, -8, ..., -1, 0
+    Values: -9, -8, ..., -1, 0 (10 samples)
+    - avg = -4.5
+    - sum = -45
+    - min = -9
+    - max = 0
+    - count = 10
+    """
+    for minute in range(-60, 60):
+        client.insert_gauge("metric", minute, EVAL_TIME + 60_000 * minute)
+    return client
 
 
-class TestFunctionOverTime:
-    def test_avg_over_time(self, client: MetricsQLiteClient):
-        for minute in range(-60, 60):
-            client.insert_gauge("metric", minute, START + 60_000 * minute)
+@pytest.fixture
+def multi_series(client):
+    """Multiple series with different values.
 
-        result = client.query_range(query="avg_over_time(metric[10m])", start=START, end=START + 1_800_000, step="10m")
-        labels, series = result.series[0]
-        assert labels == {"__name__": "metric"}
-        assert len(series) == 4
-        assert series[0].timestamp == START
-        assert series[3].timestamp == START + 1_800_000
-        # First bucket is (T-10m:T], so values are [-9, ... 0] of which average is -4.5.
-        assert [sample.value for sample in series] == [-4.5, 5.5, 15.5, 25.5]
+    Series:
+    - metric{label="A"}: values = 100 + minute
+    - metric{label="B"}: values = 200 + minute
+    - metric{label="C"}: values = 300 + minute
+    """
+    for minute in range(-60, 60):
+        ts = EVAL_TIME + 60_000 * minute
+        client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"})
+        client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"})
+        client.insert_gauge("metric", 300 + minute, ts, labels={"label": "C"})
+    return client
+
+
+@pytest.fixture
+def sparse_series(client):
+    """Series with gaps in data.
+
+    Samples only at T-30m, T-20m, T-10m, T
+    Values: 10, 20, 30, 40
+    """
+    client.insert_gauge("metric", 10, EVAL_TIME - 1_800_000)  # T-30m
+    client.insert_gauge("metric", 20, EVAL_TIME - 1_200_000)  # T-20m
+    client.insert_gauge("metric", 30, EVAL_TIME - 600_000)  # T-10m
+    client.insert_gauge("metric", 40, EVAL_TIME)  # T
+    return client
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+
+def assert_instant_value(result, expected_value, expected_labels=None):
+    """Assert instant query returns expected single value."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    labels, sample = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    assert sample.value == pytest.approx(expected_value)
+
+
+def assert_instant_nan(result):
+    """Assert instant query returns NaN."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 1
+    _, sample = result.series[0]
+    assert math.isnan(sample.value)
+
+
+def assert_instant_empty(result):
+    """Assert instant query returns no series."""
+    assert isinstance(result, InstantVector)
+    assert len(result.series) == 0
+
+
+def assert_instant_values(result, expected_values):
+    """Assert instant vector contains expected values (sorted)."""
+    assert isinstance(result, InstantVector)
+    actual = sorted([sample.value for _, sample in result.series])
+    assert actual == pytest.approx(sorted(expected_values))
+
+
+def assert_range_values(result, expected_values, expected_labels=None):
+    """Assert range query returns expected values at each step."""
+    assert isinstance(result, MatrixResult)
+    assert len(result.series) == 1
+    labels, series = result.series[0]
+    if expected_labels is not None:
+        assert labels == expected_labels
+    actual_values = [s.value for s in series]
+    assert actual_values == pytest.approx(expected_values)
 
-    def test_avg_over_time_small_lookback(self, client: MetricsQLiteClient):
-        for minute in range(-60, 60):
-            client.insert_gauge("metric", minute, START + 60_000 * minute)
 
-        result = client.query_range(query="avg_over_time(metric[3m])", start=START, end=START + 1_800_000, step="10m")
+def assert_range_empty(result):
+    """Assert range query returns no series."""
+    assert isinstance(result, MatrixResult)
+    assert len(result.series) == 0
+
+
+# =============================================================================
+# Tests: Range Vector (query_range with rollup)
+# =============================================================================
+
+
+class TestRangeVector:
+    """Tests for rollup functions in range queries."""
+
+    def test_avg_over_time_small_lookback(self, minute_series):
+        """avg_over_time with 3m range."""
+        result = minute_series.query_range(
+            query="avg_over_time(metric[3m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
         labels, series = result.series[0]
         assert labels == {"__name__": "metric"}
         assert len(series) == 4
-        assert series[0].timestamp == START
-        assert series[3].timestamp == START + 1_800_000
-        # First bucket is (T-3m:T], so values are [-2, -1, 0] of which average is -1.
-        assert [sample.value for sample in series] == [-1, 9, 19, 29]
-
-    def test_avg_over_time_large_lookback(self, client: MetricsQLiteClient):
-        for minute in range(-60, 60):
-            client.insert_gauge("metric", minute, START + 60_000 * minute)
+        assert series[0].timestamp == EVAL_TIME
+        assert series[3].timestamp == EVAL_TIME + 1_800_000
+        # First bucket is (T-3m, T], so values are [-2, -1, 0], avg = -1
+        assert [sample.value for sample in series] == pytest.approx([-1, 9, 19, 29])
 
-        result = client.query_range(query="avg_over_time(metric[15m])", start=START, end=START + 1_800_000, step="10m")
+    def test_avg_over_time_large_lookback(self, minute_series):
+        """avg_over_time with 15m range."""
+        result = minute_series.query_range(
+            query="avg_over_time(metric[15m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
         labels, series = result.series[0]
         assert labels == {"__name__": "metric"}
         assert len(series) == 4
-        assert series[0].timestamp == START
-        assert series[3].timestamp == START + 1_800_000
-        # First bucket is (T-15m:T], so values are [-14, ... , 0] of which average is -7.
-        assert [sample.value for sample in series] == [-7, 3, 13, 23]
+        # First bucket is (T-15m, T], so values are [-14, ..., 0], avg = -7
+        assert [sample.value for sample in series] == pytest.approx([-7, 3, 13, 23])
+
+
+# =============================================================================
+# Tests: Function Over Time (instant queries)
+# =============================================================================
+
+
+class TestFunctionOverTime:
+    """Tests for rollup functions in instant queries."""
+
+    def test_avg_over_time(self, minute_series):
+        """avg_over_time computes average of samples in range."""
+        result = minute_series.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        # Range (T-10m, T] has samples at -9, -8, ..., 0, avg = -4.5
+        assert_instant_value(result, expected_value=-4.5)
+
+    def test_sum_over_time(self, minute_series):
+        """sum_over_time sums all samples in range."""
+        result = minute_series.query("sum_over_time(metric[10m])", time=EVAL_TIME)
+        # Range (T-10m, T] has samples -9+...+0 = -45
+        assert_instant_value(result, expected_value=-45)
+
+    def test_min_over_time(self, minute_series):
+        """min_over_time finds minimum sample in range."""
+        result = minute_series.query("min_over_time(metric[10m])", time=EVAL_TIME)
+        # Range (T-10m, T] has min = -9
+        assert_instant_value(result, expected_value=-9)
+
+    def test_max_over_time(self, minute_series):
+        """max_over_time finds maximum sample in range."""
+        result = minute_series.query("max_over_time(metric[10m])", time=EVAL_TIME)
+        # Range (T-10m, T] has max = 0
+        assert_instant_value(result, expected_value=0)
+
+    def test_count_over_time(self, minute_series):
+        """count_over_time counts samples in range."""
+        result = minute_series.query("count_over_time(metric[10m])", time=EVAL_TIME)
+        # Range (T-10m, T] has 10 samples
+        assert_instant_value(result, expected_value=10)
+
+
+class TestFunctionOverTimeRangeQuery:
+    """Tests for rollup functions in range queries."""
+
+    def test_avg_over_time_range(self, minute_series):
+        """avg_over_time evaluated at each step."""
+        result = minute_series.query_range(
+            query="avg_over_time(metric[10m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        # At each step T+N: avg of (T+N-10m, T+N]
+        # T: avg(-9..0) = -4.5
+        # T+10m: avg(1..10) = 5.5
+        # T+20m: avg(11..20) = 15.5
+        # T+30m: avg(21..30) = 25.5
+        assert_range_values(result, expected_values=[-4.5, 5.5, 15.5, 25.5])
+
+    def test_sum_over_time_range(self, minute_series):
+        """sum_over_time evaluated at each step."""
+        result = minute_series.query_range(
+            query="sum_over_time(metric[10m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        # T: sum(-9..0) = -45
+        # T+10m: sum(1..10) = 55
+        # T+20m: sum(11..20) = 155
+        # T+30m: sum(21..30) = 255
+        assert_range_values(result, expected_values=[-45, 55, 155, 255])
+
+    def test_min_over_time_range(self, minute_series):
+        """min_over_time evaluated at each step."""
+        result = minute_series.query_range(
+            query="min_over_time(metric[10m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[-9, 1, 11, 21])
+
+    def test_max_over_time_range(self, minute_series):
+        """max_over_time evaluated at each step."""
+        result = minute_series.query_range(
+            query="max_over_time(metric[10m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[0, 10, 20, 30])
+
+    def test_count_over_time_range(self, minute_series):
+        """count_over_time evaluated at each step."""
+        result = minute_series.query_range(
+            query="count_over_time(metric[10m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 1_800_000,
+            step="10m",
+        )
+        assert_range_values(result, expected_values=[10, 10, 10, 10])
+
+
+# =============================================================================
+# Tests: Edge Cases
+# =============================================================================
+
+
+class TestRollupEdgeCases:
+    """Edge cases for rollup functions."""
+
+    def test_empty_range_returns_empty(self, client):
+        """Rollup on non-existent metric returns empty result."""
+        result = client.query("avg_over_time(nonexistent[10m])", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_single_sample_in_range(self, client):
+        """Rollup with exactly one sample."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+        result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        # Single sample: avg = sum = min = max = 42, count = 1
+        assert_instant_value(result, expected_value=42)
+
+    def test_single_sample_sum(self, client):
+        """sum_over_time with single sample equals that sample."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+        result = client.query("sum_over_time(metric[10m])", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=42)
+
+    def test_single_sample_count(self, client):
+        """count_over_time with single sample returns 1."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+        result = client.query("count_over_time(metric[10m])", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1)
+
+    def test_sample_exactly_at_range_boundary_excluded(self, client):
+        """Sample at exactly T-range is excluded (half-open interval)."""
+        # Insert sample at exactly T-10m (should be excluded from (T-10m, T])
+        client.insert_gauge("metric", 100, EVAL_TIME - 600_000)  # T-10m
+        # Insert sample at T-9m (should be included)
+        client.insert_gauge("metric", 200, EVAL_TIME - 540_000)  # T-9m
+
+        result = client.query("sum_over_time(metric[10m])", time=EVAL_TIME)
+        # Only T-9m sample should be included
+        assert_instant_value(result, expected_value=200)
+
+    def test_sample_at_eval_time_included(self, client):
+        """Sample at exactly T (eval_time) is included."""
+        client.insert_gauge("metric", 42, EVAL_TIME)
+        result = client.query("count_over_time(metric[1m])", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=1)
+
+    def test_all_samples_outside_range(self, client):
+        """All samples outside range returns empty."""
+        # Insert samples only outside the range
+        client.insert_gauge("metric", 100, EVAL_TIME - 1_200_000)  # T-20m
+        client.insert_gauge("metric", 200, EVAL_TIME + 600_000)  # T+10m
+
+        result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        assert_instant_empty(result)
+
+    def test_sparse_data_rollup(self, sparse_series):
+        """Rollup on sparse data only includes samples in range."""
+        # sparse_series has samples at T-30m, T-20m, T-10m, T with values 10, 20, 30, 40
+        # Range (T-15m, T] should only include T-10m (30) and T (40)
+        result = sparse_series.query("avg_over_time(metric[15m])", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=35)  # (30 + 40) / 2
+
+    def test_sparse_data_count(self, sparse_series):
+        """count_over_time on sparse data."""
+        result = sparse_series.query("count_over_time(metric[15m])", time=EVAL_TIME)
+        assert_instant_value(result, expected_value=2)  # Only T-10m and T
+
+
+# =============================================================================
+# Tests: Multiple Series
+# =============================================================================
+
+
+class TestRollupMultipleSeries:
+    """Rollup functions with multiple series."""
+
+    def test_rollup_independent_per_series(self, multi_series):
+        """Each series is evaluated independently."""
+        result = multi_series.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        # Series A: avg(91..100) = 95.5
+        # Series B: avg(191..200) = 195.5
+        # Series C: avg(291..300) = 295.5
+        assert isinstance(result, InstantVector)
+        assert len(result.series) == 3
+        values = sorted([sample.value for _, sample in result.series])
+        assert values == pytest.approx([95.5, 195.5, 295.5])
+
+    def test_rollup_preserves_labels(self, multi_series):
+        """Rollup preserves series labels."""
+        result = multi_series.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        assert isinstance(result, InstantVector)
+        labels_set = {frozenset(labels.items()) for labels, _ in result.series}
+        expected = {
+            frozenset([("__name__", "metric"), ("label", "A")]),
+            frozenset([("__name__", "metric"), ("label", "B")]),
+            frozenset([("__name__", "metric"), ("label", "C")]),
+        }
+        assert labels_set == expected
+
+    def test_partial_data_some_series(self, client):
+        """Some series may have no data in range."""
+        # Series A has data in range
+        client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": "A"})
+        # Series B has no data in range (only old data)
+        client.insert_gauge("metric", 200, EVAL_TIME - 1_200_000, labels={"label": "B"})
+
+        result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME)
+        # Only series A should be in result
+        assert isinstance(result, InstantVector)
+        assert len(result.series) == 1
+        labels, sample = result.series[0]
+        assert labels["label"] == "A"
+        assert sample.value == pytest.approx(100)
+
+    def test_sum_over_time_multiple_series(self, multi_series):
+        """sum_over_time on multiple series."""
+        result = multi_series.query("sum_over_time(metric[10m])", time=EVAL_TIME)
+        # Series A: sum(91..100) = 955
+        # Series B: sum(191..200) = 1955
+        # Series C: sum(291..300) = 2955
+        assert_instant_values(result, expected_values=[955, 1955, 2955])
+
+    def test_count_over_time_multiple_series(self, multi_series):
+        """count_over_time returns same count for each series."""
+        result = multi_series.query("count_over_time(metric[10m])", time=EVAL_TIME)
+        # Each series has 10 samples in range
+        assert_instant_values(result, expected_values=[10, 10, 10])
+
+
+# =============================================================================
+# Tests: Compacted Data
+# =============================================================================
+
+
+class TestRollupOnCompactedData:
+    """Rollup functions on compacted gauge data."""
+
+    def test_avg_over_time_compacted(self, client):
+        """avg_over_time uses average value from compacted bucket."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+    def test_min_over_time_compacted_uses_stored_min(self, client):
+        """min_over_time should use stored min from compacted data."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+    def test_max_over_time_compacted_uses_stored_max(self, client):
+        """max_over_time should use stored max from compacted data."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+    def test_sum_over_time_compacted(self, client):
+        """sum_over_time sums average values from compacted buckets."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+    def test_rollup_on_partially_compacted(self, client):
+        """Range spanning compacted and raw data."""
+        pytest.skip("TODO: Implement once compaction is fully supported")
+
+
+# =============================================================================
+# Tests: Integrate Function
+# =============================================================================
+
+
+class TestIntegrateFunction:
+    """Tests for integrate() rollup function."""
+
+    def test_integrate_constant_value(self, client):
+        """integrate() of constant value = value * duration."""
+        pytest.skip("TODO: Implement once integrate() is supported")
+
+    def test_integrate_linear_increase(self, client):
+        """integrate() of linearly increasing values uses trapezoidal rule."""
+        pytest.skip("TODO: Implement once integrate() is supported")
+
+    def test_integrate_single_sample(self, client):
+        """integrate() with single sample returns 0."""
+        pytest.skip("TODO: Implement once integrate() is supported")
+
+    def test_integrate_empty_range(self, client):
+        """integrate() with no samples returns 0."""
+        pytest.skip("TODO: Implement once integrate() is supported")
+
+    def test_integrate_returns_seconds(self, client):
+        """integrate() returns value in (value * seconds)."""
+        pytest.skip("TODO: Implement once integrate() is supported")
+
+
+# =============================================================================
+# Tests: Combined with Other Functions
+# =============================================================================
+
+
+class TestRollupCombined:
+    """Rollup combined with other functions."""
+
+    def test_rollup_with_binary_op(self, minute_series):
+        """avg_over_time(metric[10m]) * 2 applies operation to result."""
+        result = minute_series.query("avg_over_time(metric[10m]) * 2", time=EVAL_TIME)
+        # avg = -4.5, * 2 = -9
+        assert_instant_value(result, expected_value=-9)
+
+    def test_rollup_with_transformation(self, minute_series):
+        """abs(min_over_time(metric[10m])) applies abs to result."""
+        result = minute_series.query("abs(min_over_time(metric[10m]))", time=EVAL_TIME)
+        # min = -9, abs(-9) = 9
+        assert_instant_value(result, expected_value=9)
+
+    def test_aggregation_of_rollup(self, multi_series):
+        """sum(avg_over_time(metric[10m])) aggregates rollup results."""
+        result = multi_series.query("sum(avg_over_time(metric[10m]))", time=EVAL_TIME)
+        # Series averages: 95.5, 195.5, 295.5
+        # Sum = 586.5
+        assert_instant_value(result, expected_value=586.5)
+
+    def test_rollup_in_range_query(self, minute_series):
+        """Rollup function evaluated at each range query step."""
+        result = minute_series.query_range(
+            query="max_over_time(metric[5m])",
+            start=EVAL_TIME,
+            end=EVAL_TIME + 600_000,
+            step="5m",
+        )
+        # At T: max(-4..0) = 0
+        # At T+5m: max(1..5) = 5
+        # At T+10m: max(6..10) = 10
+        assert_range_values(result, expected_values=[0, 5, 10])
+
+
+# =============================================================================
+# Tests: Different Range Sizes
+# =============================================================================
+
+
+class TestDifferentRangeSizes:
+    """Tests for various range durations."""
+
+    def test_1_minute_range(self, minute_series):
+        """1m range includes 1 sample."""
+        result = minute_series.query("count_over_time(metric[1m])", time=EVAL_TIME)
+        # Range (T-1m, T] includes only T (value 0)
+        assert_instant_value(result, expected_value=1)
+
+    def test_5_minute_range(self, minute_series):
+        """5m range includes 5 samples."""
+        result = minute_series.query("count_over_time(metric[5m])", time=EVAL_TIME)
+        # Range (T-5m, T] includes -4, -3, -2, -1, 0 (5 samples)
+        assert_instant_value(result, expected_value=5)
+
+    def test_1_hour_range(self, minute_series):
+        """1h range includes 60 samples."""
+        result = minute_series.query("count_over_time(metric[1h])", time=EVAL_TIME)
+        # Range (T-1h, T] includes -59, ..., 0 (60 samples)
+        assert_instant_value(result, expected_value=60)
+
+    def test_avg_over_1_minute(self, minute_series):
+        """avg_over_time with 1m range."""
+        result = minute_series.query("avg_over_time(metric[1m])", time=EVAL_TIME)
+        # Only sample at T with value 0
+        assert_instant_value(result, expected_value=0)
 
-    # TODO: query on compacted rows
+    def test_avg_over_5_minutes(self, minute_series):
+        """avg_over_time with 5m range."""
+        result = minute_series.query("avg_over_time(metric[5m])", time=EVAL_TIME)
+        # Range (T-5m, T]: values -4, -3, -2, -1, 0, avg = -2
+        assert_instant_value(result, expected_value=-2)
diff --git a/tests/queries/test_selectors.py b/tests/queries/test_selectors.py
index cdff5f4..4f8b21a 100644
--- a/tests/queries/test_selectors.py
+++ b/tests/queries/test_selectors.py
@@ -5,7 +5,7 @@
 from metricsqlite import MetricsQLiteClient
 from metricsqlite.engine import InstantVector, RangeVectorResult, Sample
 
-EVAL_TIME = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
 
 
 @pytest.fixture
@@ -390,3 +390,71 @@ def test_range_selector_singular_gauge(self, client: MetricsQLiteClient):
         assert len(samples) == 1
         assert samples[0].timestamp == EVAL_TIME - 120_000
         assert samples[0].value == 42
+
+
+class TestSelectorEdgeCases:
+    """Edge cases for metric selectors."""
+
+    def test_metric_name_with_underscores(self, client: MetricsQLiteClient):
+        """Metric names can contain underscores."""
+        pytest.skip("TODO: Implement test")
+
+    def test_metric_name_with_colons(self, client: MetricsQLiteClient):
+        """Metric names can contain colons (recording rules)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_very_long_metric_name(self, client: MetricsQLiteClient):
+        """Very long metric names are handled correctly."""
+        pytest.skip("TODO: Implement test")
+
+    def test_label_with_special_characters(self, client: MetricsQLiteClient):
+        """Label values with special characters."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeVectorDurations:
+    """Tests for various range vector duration formats."""
+
+    def test_range_seconds(self, client: MetricsQLiteClient):
+        """metric[30s] - seconds duration."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_minutes(self, client: MetricsQLiteClient):
+        """metric[5m] - minutes duration."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_hours(self, client: MetricsQLiteClient):
+        """metric[1h] - hours duration."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_days(self, client: MetricsQLiteClient):
+        """metric[1d] - days duration."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_weeks(self, client: MetricsQLiteClient):
+        """metric[1w] - weeks duration."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_combined(self, client: MetricsQLiteClient):
+        """metric[1h30m] - combined duration."""
+        pytest.skip("TODO: Implement test - check if supported")
+
+
+class TestSelectorLabelMatching:
+    """Tests for label matching in selectors."""
+
+    def test_equality_match(self, client: MetricsQLiteClient):
+        """metric{label="value"} exact match."""
+        pytest.skip("TODO: Implement test")
+
+    def test_not_equal_match(self, client: MetricsQLiteClient):
+        """metric{label!="value"} not equal match."""
+        pytest.skip("TODO: Implement test")
+
+    def test_regex_match(self, client: MetricsQLiteClient):
+        """metric{label=~"val.*"} regex match."""
+        pytest.skip("TODO: Implement test")
+
+    def test_regex_not_match(self, client: MetricsQLiteClient):
+        """metric{label!~"val.*"} regex not match."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_staleness.py b/tests/queries/test_staleness.py
new file mode 100644
index 0000000..9c4884f
--- /dev/null
+++ b/tests/queries/test_staleness.py
@@ -0,0 +1,155 @@
+"""Tests for staleness semantics.
+
+Staleness determines when a sample is considered too old to be included
+in query results. This is controlled by the lookback window.
+
+Key behaviors:
+- Default lookback is 5 minutes (300 seconds)
+- Samples outside lookback window are excluded
+- Each series evaluated independently
+- Compacted/counter data uses end timestamp for staleness
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized."""
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+class TestDefaultStaleness:
+    """Tests for default 5-minute staleness window."""
+
+    def test_sample_4_minutes_ago_not_stale(self, client: MetricsQLiteClient):
+        """Sample 4 minutes before eval_time is not stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_5_minutes_ago_not_stale(self, client: MetricsQLiteClient):
+        """Sample exactly 5 minutes before eval_time is not stale (boundary)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_5_minutes_1_second_ago_stale(self, client: MetricsQLiteClient):
+        """Sample 5m1s before eval_time is stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_6_minutes_ago_stale(self, client: MetricsQLiteClient):
+        """Sample 6 minutes before eval_time is stale."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCustomLookback:
+    """Tests for custom lookback window."""
+
+    def test_short_lookback_1_minute(self, client: MetricsQLiteClient):
+        """With 1m lookback, 2-minute-old sample is stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_long_lookback_1_hour(self, client: MetricsQLiteClient):
+        """With 1h lookback, 30-minute-old sample is not stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_lookback_boundary_included(self, client: MetricsQLiteClient):
+        """Sample at exactly eval_time - lookback is included."""
+        pytest.skip("TODO: Implement test")
+
+    def test_lookback_boundary_excluded(self, client: MetricsQLiteClient):
+        """Sample 1ms before eval_time - lookback is excluded."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCounterStaleness:
+    """Tests for counter staleness using end timestamp."""
+
+    def test_counter_end_timestamp_determines_staleness(self, client: MetricsQLiteClient):
+        """Counter with old start but recent end is not stale.
+
+        If counter has start=T-10m but end=T-2m, and lookback is 5m,
+        the counter is NOT stale because end >= T-5m.
+        """
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_start_before_end_after_lookback(self, client: MetricsQLiteClient):
+        """Counter starting before lookback but ending within is not stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_end_before_lookback_is_stale(self, client: MetricsQLiteClient):
+        """Counter with end before lookback boundary is stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_spans_entire_lookback(self, client: MetricsQLiteClient):
+        """Counter spanning T-10m to T+5m is not stale at T with 5m lookback."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCompactedGaugeStaleness:
+    """Tests for compacted gauge staleness."""
+
+    def test_compacted_gauge_end_determines_staleness(self, client: MetricsQLiteClient):
+        """Compacted gauge uses end timestamp for staleness check."""
+        pytest.skip("TODO: Implement test")
+
+    def test_compacted_bucket_partially_in_lookback(self, client: MetricsQLiteClient):
+        """Bucket with end in lookback but start outside is not stale."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestMultiSeriesStaleness:
+    """Tests for staleness with multiple series."""
+
+    def test_each_series_independent_staleness(self, client: MetricsQLiteClient):
+        """Each series is checked for staleness independently."""
+        pytest.skip("TODO: Implement test")
+
+    def test_some_series_stale_others_not(self, client: MetricsQLiteClient):
+        """Only non-stale series are included in results."""
+        pytest.skip("TODO: Implement test")
+
+    def test_same_metric_different_labels_staleness(self, client: MetricsQLiteClient):
+        """Same metric with different labels can have different staleness."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestStalenessInRangeQuery:
+    """Tests for staleness in range queries."""
+
+    def test_staleness_checked_at_each_step(self, client: MetricsQLiteClient):
+        """Staleness is checked at each step time in range query."""
+        pytest.skip("TODO: Implement test")
+
+    def test_series_appears_when_no_longer_stale(self, client: MetricsQLiteClient):
+        """Series appears in results once a sample enters lookback window."""
+        pytest.skip("TODO: Implement test")
+
+    def test_series_disappears_when_becomes_stale(self, client: MetricsQLiteClient):
+        """Series disappears from results once all samples are stale."""
+        pytest.skip("TODO: Implement test")
+
+    def test_gap_in_series_data(self, client: MetricsQLiteClient):
+        """Series with gap has missing steps where data is stale."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestStalenessWithRollupFunctions:
+    """Tests for staleness interaction with rollup functions."""
+
+    def test_avg_over_time_range_ignores_staleness(self, client: MetricsQLiteClient):
+        """Range vectors use explicit range, not lookback for sample selection.
+
+        avg_over_time(metric[10m]) includes samples in (T-10m, T] regardless
+        of lookback setting.
+        """
+        pytest.skip("TODO: Implement test")
+
+    def test_rollup_with_no_samples_returns_nan(self, client: MetricsQLiteClient):
+        """Rollup function on empty range returns NaN."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_subqueries.py b/tests/queries/test_subqueries.py
new file mode 100644
index 0000000..bb69cd4
--- /dev/null
+++ b/tests/queries/test_subqueries.py
@@ -0,0 +1,110 @@
+"""Tests for subqueries.
+
+Subqueries allow applying range query semantics inside an instant query.
+Syntax: func(metric[range:step]) or func(metric[range:])
+
+VictoriaMetrics-specific behaviors:
+- Subqueries create intermediate range evaluation
+- Result is processed by outer rollup function
+- Different from plain range vectors
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized."""
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+class TestSubqueryBasics:
+    """Basic subquery functionality."""
+
+    def test_subquery_syntax_with_step(self, client: MetricsQLiteClient):
+        """avg_over_time(metric[1h:5m]) averages over 1h with 5m step."""
+        pytest.skip("TODO: Implement test")
+
+    def test_subquery_syntax_default_step(self, client: MetricsQLiteClient):
+        """avg_over_time(metric[1h:]) uses default step."""
+        pytest.skip("TODO: Implement test")
+
+    def test_subquery_creates_multiple_samples(self, client: MetricsQLiteClient):
+        """Subquery evaluates metric at each step time, creating samples."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestSubqueryWithRollup:
+    """Subqueries with rollup functions."""
+
+    def test_avg_of_subquery(self, client: MetricsQLiteClient):
+        """avg_over_time(metric[1h:5m]) averages the step results."""
+        pytest.skip("TODO: Implement test")
+
+    def test_max_of_subquery(self, client: MetricsQLiteClient):
+        """max_over_time(metric[1h:5m]) finds max of step results."""
+        pytest.skip("TODO: Implement test")
+
+    def test_count_of_subquery(self, client: MetricsQLiteClient):
+        """count_over_time(metric[1h:5m]) counts step results."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestSubqueryWithTransformation:
+    """Subqueries with transformation functions."""
+
+    def test_transformation_in_subquery(self, client: MetricsQLiteClient):
+        """avg_over_time(clamp_min(metric, 0)[1h:5m]) clamps then averages."""
+        pytest.skip("TODO: Implement test")
+
+    def test_transformation_on_subquery_result(self, client: MetricsQLiteClient):
+        """abs(avg_over_time(metric[1h:5m])) takes abs of average."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestSubqueryWithRollupNesting:
+    """Nested rollup functions with subqueries."""
+
+    def test_avg_of_sum_over_time(self, client: MetricsQLiteClient):
+        """avg_over_time(sum_over_time(metric[5m])[1h:5m]) nested rollups."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestSubqueryInRangeQuery:
+    """Subqueries within range queries."""
+
+    def test_subquery_at_each_range_step(self, client: MetricsQLiteClient):
+        """Range query evaluates subquery at each step."""
+        pytest.skip("TODO: Implement test")
+
+    def test_subquery_lookback_in_range(self, client: MetricsQLiteClient):
+        """Subquery lookback is relative to each range step."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestSubqueryEdgeCases:
+    """Edge cases for subqueries."""
+
+    def test_subquery_step_larger_than_range(self, client: MetricsQLiteClient):
+        """metric[5m:10m] where step > range - only one sample."""
+        pytest.skip("TODO: Implement test")
+
+    def test_subquery_with_offset(self, client: MetricsQLiteClient):
+        """metric[1h:5m] offset 1h shifts the window."""
+        pytest.skip("TODO: Implement test - if offset is supported")
+
+    def test_subquery_empty_result(self, client: MetricsQLiteClient):
+        """Subquery with no data returns NaN for rollup."""
+        pytest.skip("TODO: Implement test")
+
+    def test_subquery_partial_data(self, client: MetricsQLiteClient):
+        """Subquery with some steps missing data handles correctly."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_transformation.py b/tests/queries/test_transformation.py
new file mode 100644
index 0000000..ec4411e
--- /dev/null
+++ b/tests/queries/test_transformation.py
@@ -0,0 +1,170 @@
+"""Tests for transformation functions.
+
+Transformation functions transform individual sample values without
+changing the series structure. They operate element-wise on each sample.
+
+Functions tested:
+- abs(): Absolute value
+- clamp_min(v, min): Clamp values to minimum
+- clamp_max(v, max): Clamp values to maximum
+
+VictoriaMetrics-specific behaviors:
+- Transformations preserve all labels
+- Applied before aggregation if nested
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized."""
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+class TestAbsTransformation:
+    """Tests for abs() transformation function."""
+
+    def test_abs_positive_value(self, client: MetricsQLiteClient):
+        """abs() of positive value returns same value."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_negative_value(self, client: MetricsQLiteClient):
+        """abs() of negative value returns positive value."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_zero(self, client: MetricsQLiteClient):
+        """abs() of zero returns zero."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_multiple_series(self, client: MetricsQLiteClient):
+        """abs() applied independently to each series."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_preserves_labels(self, client: MetricsQLiteClient):
+        """abs() preserves all series labels."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_preserves_timestamp(self, client: MetricsQLiteClient):
+        """abs() preserves sample timestamp."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_empty_result(self, client: MetricsQLiteClient):
+        """abs() on no series returns empty InstantVector."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestClampMinTransformation:
+    """Tests for clamp_min() transformation function."""
+
+    def test_clamp_min_value_below_threshold(self, client: MetricsQLiteClient):
+        """Value below threshold is clamped to threshold."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_value_above_threshold(self, client: MetricsQLiteClient):
+        """Value above threshold is unchanged."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_value_equals_threshold(self, client: MetricsQLiteClient):
+        """Value equal to threshold is unchanged."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_negative_threshold(self, client: MetricsQLiteClient):
+        """Negative threshold values work correctly."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_zero_threshold(self, client: MetricsQLiteClient):
+        """Zero threshold clamps negative values to zero."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_multiple_series(self, client: MetricsQLiteClient):
+        """clamp_min() applied independently to each series."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_preserves_labels(self, client: MetricsQLiteClient):
+        """clamp_min() preserves all series labels."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_min_missing_argument_error(self, client: MetricsQLiteClient):
+        """clamp_min() without threshold argument raises error."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestClampMaxTransformation:
+    """Tests for clamp_max() transformation function."""
+
+    def test_clamp_max_value_above_threshold(self, client: MetricsQLiteClient):
+        """Value above threshold is clamped to threshold."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_value_below_threshold(self, client: MetricsQLiteClient):
+        """Value below threshold is unchanged."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_value_equals_threshold(self, client: MetricsQLiteClient):
+        """Value equal to threshold is unchanged."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_negative_threshold(self, client: MetricsQLiteClient):
+        """Negative threshold values work correctly."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_zero_threshold(self, client: MetricsQLiteClient):
+        """Zero threshold clamps positive values to zero."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_multiple_series(self, client: MetricsQLiteClient):
+        """clamp_max() applied independently to each series."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestTransformationChaining:
+    """Tests for chaining transformation functions."""
+
+    def test_clamp_min_then_clamp_max(self, client: MetricsQLiteClient):
+        """clamp_max(clamp_min(v, min), max) clamps to range [min, max]."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_max_then_clamp_min(self, client: MetricsQLiteClient):
+        """clamp_min(clamp_max(v, max), min) clamps to range [min, max]."""
+        pytest.skip("TODO: Implement test")
+
+    def test_abs_then_clamp(self, client: MetricsQLiteClient):
+        """clamp_max(abs(v), max) caps absolute values."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestTransformationInRangeQuery:
+    """Tests for transformation functions in range queries."""
+
+    def test_transformation_at_each_step(self, client: MetricsQLiteClient):
+        """Transformation is applied at each step in range query."""
+        pytest.skip("TODO: Implement test")
+
+    def test_transformation_before_rollup(self, client: MetricsQLiteClient):
+        """avg_over_time(clamp_min(v, 0)[5m]) clamps before averaging."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestTransformationEdgeCases:
+    """Edge cases for transformation functions."""
+
+    def test_transformation_on_nan(self, client: MetricsQLiteClient):
+        """Transformation functions handle NaN values."""
+        pytest.skip("TODO: Implement test")
+
+    def test_transformation_on_inf(self, client: MetricsQLiteClient):
+        """Transformation functions handle Infinity values."""
+        pytest.skip("TODO: Implement test")
+
+    def test_clamp_with_scalar_expression(self, client: MetricsQLiteClient):
+        """clamp_min(v, 1+1) evaluates scalar expression for threshold."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/queries/test_window_semantics.py b/tests/queries/test_window_semantics.py
new file mode 100644
index 0000000..8933325
--- /dev/null
+++ b/tests/queries/test_window_semantics.py
@@ -0,0 +1,148 @@
+"""Tests for window and interval semantics.
+
+Window semantics differ between range vectors and plain selectors:
+- Range vectors: Half-open interval (T-range, T] to avoid overlap
+- Plain selectors: Closed interval [T-lookback, T] for staleness
+
+This prevents samples from being counted twice in adjacent windows
+while still allowing staleness checks to include boundary samples.
+"""
+
+import pytest
+
+from metricsqlite import MetricsQLiteClient
+
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
+
+
+@pytest.fixture
+def client():
+    """Create an in-memory client with tables initialized."""
+    client = MetricsQLiteClient(None)
+    client.connect()
+    client.create_tables()
+    yield client
+    client.close()
+
+
+class TestRangeVectorIntervals:
+    """Tests for range vector half-open interval (T-range, T]."""
+
+    def test_sample_at_end_included(self, client: MetricsQLiteClient):
+        """Sample at exactly T (end of range) is included."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_at_start_excluded(self, client: MetricsQLiteClient):
+        """Sample at exactly T-range (start of range) is EXCLUDED.
+
+        This is the half-open interval semantics: (T-range, T]
+        Sample at exactly T-10m is NOT included in [10m] range.
+        """
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_1ms_after_start_included(self, client: MetricsQLiteClient):
+        """Sample 1ms after T-range is included."""
+        pytest.skip("TODO: Implement test")
+
+    def test_adjacent_windows_no_overlap(self, client: MetricsQLiteClient):
+        """Adjacent range query windows do not count same sample twice.
+
+        With step=10m and range=10m:
+        - Window at T=10m: (0m, 10m]
+        - Window at T=20m: (10m, 20m]
+        Sample at exactly T=10m is in first window, not second.
+        """
+        pytest.skip("TODO: Implement test")
+
+
+class TestPlainSelectorIntervals:
+    """Tests for plain selector closed interval [T-lookback, T]."""
+
+    def test_sample_at_start_included(self, client: MetricsQLiteClient):
+        """Sample at T-lookback is included (closed interval)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_at_end_included(self, client: MetricsQLiteClient):
+        """Sample at T is included."""
+        pytest.skip("TODO: Implement test")
+
+    def test_sample_before_start_excluded(self, client: MetricsQLiteClient):
+        """Sample before T-lookback is excluded."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestMixedIntervalSemantics:
+    """Tests verifying different semantics for different contexts."""
+
+    def test_range_vector_vs_plain_at_boundary(self, client: MetricsQLiteClient):
+        """Same sample included by plain selector but excluded by range vector.
+
+        Sample at exactly T-5m:
+        - Plain selector metric with 5m lookback: INCLUDED
+        - Range vector metric[5m]: EXCLUDED
+        """
+        pytest.skip("TODO: Implement test")
+
+    def test_rollup_in_range_query_uses_half_open(self, client: MetricsQLiteClient):
+        """avg_over_time(metric[5m]) in range query uses half-open intervals."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestRangeQueryStepWindows:
+    """Tests for step windows in range queries."""
+
+    def test_step_aligned_windows(self, client: MetricsQLiteClient):
+        """Windows align with step times: T, T+step, T+2*step, ..."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_smaller_than_range(self, client: MetricsQLiteClient):
+        """With step < range, windows overlap (each sample counted multiple times)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_equals_range(self, client: MetricsQLiteClient):
+        """With step == range, windows are adjacent (no overlap, no gap)."""
+        pytest.skip("TODO: Implement test")
+
+    def test_step_larger_than_range(self, client: MetricsQLiteClient):
+        """With step > range, there are gaps between windows."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestWindowBoundaryCases:
+    """Edge cases for window boundaries."""
+
+    def test_sample_exactly_on_step_boundary(self, client: MetricsQLiteClient):
+        """Sample at step boundary is in the earlier window only."""
+        pytest.skip("TODO: Implement test")
+
+    def test_1ms_difference_in_boundary(self, client: MetricsQLiteClient):
+        """1ms difference determines which window a sample falls into."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_start_equals_query_start(self, client: MetricsQLiteClient):
+        """First window in range query starts at query start time."""
+        pytest.skip("TODO: Implement test")
+
+    def test_range_end_equals_query_end(self, client: MetricsQLiteClient):
+        """Last window in range query ends at query end time."""
+        pytest.skip("TODO: Implement test")
+
+
+class TestCompactedDataWindowing:
+    """Tests for windowing with compacted data."""
+
+    def test_compacted_bucket_spans_window_boundary(self, client: MetricsQLiteClient):
+        """Compacted bucket spanning window boundary is clamped correctly."""
+        pytest.skip("TODO: Implement test")
+
+    def test_counter_spans_multiple_windows(self, client: MetricsQLiteClient):
+        """Counter spanning multiple step windows appears in all of them."""
+        pytest.skip("TODO: Implement test")
+
+    def test_window_start_inside_bucket(self, client: MetricsQLiteClient):
+        """Window start falls inside a compacted bucket."""
+        pytest.skip("TODO: Implement test")
+
+    def test_window_end_inside_bucket(self, client: MetricsQLiteClient):
+        """Window end falls inside a compacted bucket."""
+        pytest.skip("TODO: Implement test")
diff --git a/tests/test_client.py b/tests/test_client.py
index 1b2b1bf..562a6ba 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -4,7 +4,7 @@
 
 from metricsqlite import CompactedRangeError, MetricsQLiteClient
 
-EVAL_TIME = 946_681_200_000  # 2000-01-01 00:00:00 UTC
+EVAL_TIME = 946_684_800_000  # 2000-01-01 00:00:00 UTC
 
 
 @pytest.fixture