diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e393cb1..037e6ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,4 +9,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.12 hooks: + - id: ruff # Linter + args: [ --fix ] - id: ruff-format # Formatter diff --git a/docs/dev/internals.md b/docs/dev/internals.md index 8280da9..3371adb 100644 --- a/docs/dev/internals.md +++ b/docs/dev/internals.md @@ -8,6 +8,7 @@ metricsqlite/ ├── engine/ │ ├── parser/ # Lexer, parser, AST nodes │ ├── executor.py # AST evaluation, result types +│ ├── functions.py # Rollup, transformation, aggregation functions │ ├── sqlite.py # SQLiteAdapter for raw data fetching │ └── query.py # QueryEngine - public query interface └── fastapi/ # Optional FastAPI routes @@ -68,8 +69,10 @@ Samples may have an `end` field indicating they span a time range `[start, end]` The executor handles overlap detection and boundary clamping when windowing. -## Function Categories +## Function Categories (`functions.py`) -- **Rollup** (`avg_over_time`, `sum_over_time`, etc.): Aggregate samples within each window +- **Rollup** (`avg_over_time`, `sum_over_time`, `integrate`, etc.): Aggregate samples within each window - **Transformation** (`abs`, `clamp_min`, `clamp_max`): Transform individual values - **Aggregation** (`sum`, `avg`, `min`, `max`, `count`): Aggregate across series + +Each category has a dictionary mapping function names to implementations. diff --git a/metricsqlite/client.py b/metricsqlite/client.py index f299032..b58c0ef 100644 --- a/metricsqlite/client.py +++ b/metricsqlite/client.py @@ -4,7 +4,7 @@ from datetime import datetime from pathlib import Path -from metricsqlite.engine import MatrixResult, QueryEngine, QueryResult +from metricsqlite.engine import MatrixResult, QueryEngine, QueryResult, sqlite_regexp from metricsqlite.exceptions import CompactedRangeError from metricsqlite.util import parse_interval, parse_timestamp @@ -29,6 +29,7 @@ def __init__( db_path: str | Path | None, tables_prefix: str = "metricsqlite", enable_wal: bool = False, + register_regexp: bool = True, ) -> None: """ Args: @@ -37,10 +38,14 @@ def __init__( enable_wal: Enable WAL journal mode for better concurrent read/write performance. Note: This is a database-level setting that affects all connections to this database file. + register_regexp: Register a custom REGEXP function for regex label + matching. Set to False if your database already has a + REGEXP implementation (e.g., from sqlite3-pcre extension). """ self._db_path = db_path self._tables_prefix = tables_prefix self._enable_wal = enable_wal + self._register_regexp = register_regexp self._lock = threading.Lock() self._connection: sqlite3.Connection | None = None @@ -90,6 +95,8 @@ def connect(self) -> None: db_path = self._db_path if self._db_path is not None else ":memory:" self._connection = sqlite3.connect(db_path, check_same_thread=False) self._connection.row_factory = sqlite3.Row + if self._register_regexp: + self._connection.create_function("regexp", 2, sqlite_regexp) if self._enable_wal: self._get_connection().execute("PRAGMA journal_mode=WAL") self._engine = QueryEngine( @@ -233,9 +240,8 @@ def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: def query( self, query: str, - time: float | str | None = None, + time: float | str | datetime | None = None, step: float | str | None = None, - timeout: float | str | None = None, ) -> QueryResult: """Execute an instant query. @@ -247,13 +253,12 @@ def query( step: Lookback window. If set, only samples within [time - step, time] are considered. Accepts seconds or duration strings like "5m", "1h". - timeout: Query timeout (not yet implemented). Returns: Query result (InstantVector, RangeVectorResult, or ScalarResult). """ with self._lock: - return self._get_engine().query(query, eval_time=time, step=step, timeout=timeout) + return self._get_engine().query(query, eval_time=time, step=step) def query_range( self, @@ -261,7 +266,6 @@ def query_range( start: float | str | datetime, end: float | str | datetime | None = None, step: float | str | None = None, - timeout: float | str | None = None, ) -> MatrixResult: """Execute a range query. @@ -272,13 +276,12 @@ def query_range( start: Start timestamp (Unix seconds). end: End timestamp (Unix seconds). Defaults to current time. step: Query resolution step in seconds. Defaults to 5m (300s). - timeout: Query timeout (not yet implemented). Returns: MatrixResult containing series with multiple samples over time. """ with self._lock: - return self._get_engine().query_range(query, start=start, end=end, step=step, timeout=timeout) + return self._get_engine().query_range(query, start=start, end=end, step=step) @staticmethod def _build_time_filter( diff --git a/metricsqlite/engine/__init__.py b/metricsqlite/engine/__init__.py index 28a54d7..8e2b43e 100644 --- a/metricsqlite/engine/__init__.py +++ b/metricsqlite/engine/__init__.py @@ -28,6 +28,7 @@ tokenize, ) from metricsqlite.engine.query import QueryEngine +from metricsqlite.engine.sqlite import sqlite_regexp __all__ = [ "BinaryExpr", @@ -53,5 +54,6 @@ "TokenType", "UnaryExpr", "parse", + "sqlite_regexp", "tokenize", ] diff --git a/metricsqlite/engine/executor.py b/metricsqlite/engine/executor.py index 01c8ec6..36ec676 100644 --- a/metricsqlite/engine/executor.py +++ b/metricsqlite/engine/executor.py @@ -10,6 +10,11 @@ from collections.abc import Callable from dataclasses import dataclass +from metricsqlite.engine.functions import ( + AGGREGATION_FUNCTIONS, + ROLLUP_FUNCTIONS, + TRANSFORMATION_FUNCTIONS, +) from metricsqlite.engine.parser import ( BinaryExpr, Expr, @@ -206,6 +211,24 @@ def execute(self, expr: Expr) -> QueryResult: # Convert RawSeriesSet to InstantVector return self._to_instant_vector(result) + def find_series( + self, + selector: MetricSelector, + start: float | None = None, + end: float | None = None, + ) -> list[tuple[str, str]]: + """Find series matching a selector. + + Args: + selector: Parsed metric selector. + start: Start timestamp in milliseconds. + end: End timestamp in milliseconds. + + Returns: + List of (name, labels_json) tuples for matching series. + """ + return self._sql.find_series(selector, start, end) + def execute_range( self, expr: Expr, @@ -442,15 +465,15 @@ def _evaluate_function( name = func.name.lower() # Rollup functions - aggregate over time, require _WindowedSeriesSet - if name in _ROLLUP_FUNCTIONS: + if name in ROLLUP_FUNCTIONS: return self._apply_rollup(name, func.args, ctx) # Transformation functions - transform individual values - if name in _TRANSFORMATION_FUNCTIONS: + if name in TRANSFORMATION_FUNCTIONS: return self._apply_transformation(name, func.args, ctx) # Aggregation functions - aggregate across series - if name in _AGGREGATION_FUNCTIONS: + if name in AGGREGATION_FUNCTIONS: return self._apply_aggregation(name, func.args, ctx) raise ExecutionError(f"Unknown function: {func.name}") @@ -470,7 +493,7 @@ def _apply_rollup( if not isinstance(inner, _WindowedSeriesSet): raise ExecutionError(f"{name} requires a range vector") - agg_fn = _ROLLUP_FUNCTIONS[name] + agg_fn = ROLLUP_FUNCTIONS[name] result_series = [] for ws in inner.series: @@ -501,35 +524,24 @@ def _apply_transformation( raise ExecutionError(f"{name} requires at least 1 argument") inner = self._evaluate(args[0], ctx) + transform_fn = TRANSFORMATION_FUNCTIONS[name] - # Get the transformation function - fn: Callable[[float], float] + # Build the value transformation function if name == "abs": - fn = abs - elif name == "clamp_min": + fn = transform_fn + elif name in ("clamp_min", "clamp_max"): if len(args) != 2: - raise ExecutionError("clamp_min requires 2 arguments") - min_val = self._evaluate(args[1], ctx) - if not isinstance(min_val, (int, float)): - raise ExecutionError("clamp_min second argument must be a scalar") - threshold = float(min_val) + raise ExecutionError(f"{name} requires 2 arguments") + bound = self._evaluate(args[1], ctx) + if not isinstance(bound, (int, float)): + raise ExecutionError(f"{name} second argument must be a scalar") + bound_val = float(bound) + clamp_fn = transform_fn - def _clamp_min(v: float, m: float = threshold) -> float: - return max(v, m) - - fn = _clamp_min - elif name == "clamp_max": - if len(args) != 2: - raise ExecutionError("clamp_max requires 2 arguments") - max_val = self._evaluate(args[1], ctx) - if not isinstance(max_val, (int, float)): - raise ExecutionError("clamp_max second argument must be a scalar") - threshold = float(max_val) + def _apply_clamp(v: float, b: float = bound_val, f: Callable[[float, float], float] = clamp_fn) -> float: + return f(v, b) - def _clamp_max(v: float, m: float = threshold) -> float: - return min(v, m) - - fn = _clamp_max + fn = _apply_clamp else: raise ExecutionError(f"Unknown transformation function: {name}") @@ -567,7 +579,7 @@ def _apply_aggregation( if inner.is_empty(): return RawSeriesSet([]) - agg_fn = _AGGREGATION_FUNCTIONS[name] + agg_fn = AGGREGATION_FUNCTIONS[name] # Group samples by timestamp across all series samples_by_time: dict[float, list[float]] = {} @@ -655,26 +667,30 @@ def _evaluate_binary( raise ExecutionError(f"Unsupported binary operation between {type(left)} and {type(right)}") + @staticmethod def _binary_vector_scalar( - self, vector: RawSeriesSet, scalar: float, op: str, ) -> RawSeriesSet: """Apply binary op between vector and scalar.""" + if _is_comparison_op(op): + return vector.filter_values(lambda v: _compare(op, v, scalar)) return vector.map_values(lambda v: _apply_binary_op(op, v, scalar)) + @staticmethod def _binary_scalar_vector( - self, scalar: float, vector: RawSeriesSet, op: str, ) -> RawSeriesSet: """Apply binary op between scalar and vector.""" + if _is_comparison_op(op): + return vector.filter_values(lambda v: _compare(op, scalar, v)) return vector.map_values(lambda v: _apply_binary_op(op, scalar, v)) + @staticmethod def _binary_vector_vector( - self, left: RawSeriesSet, right: RawSeriesSet, op: str, @@ -702,17 +718,27 @@ def label_key(labels: dict) -> str: # Match samples by timestamp right_samples = {s.timestamp: s for s in right_series.samples} new_samples = [] + is_comparison = _is_comparison_op(op) for left_sample in left_series.samples: if left_sample.timestamp in right_samples: right_sample = right_samples[left_sample.timestamp] - new_val = _apply_binary_op(op, left_sample.value, right_sample.value) - new_samples.append( - RawSample( - timestamp=left_sample.timestamp, - value=new_val, + if is_comparison: + if _compare(op, left_sample.value, right_sample.value): + new_samples.append( + RawSample( + timestamp=left_sample.timestamp, + value=left_sample.value, + ) + ) + else: + new_val = _apply_binary_op(op, left_sample.value, right_sample.value) + new_samples.append( + RawSample( + timestamp=left_sample.timestamp, + value=new_val, + ) ) - ) if new_samples: result.append( @@ -861,100 +887,24 @@ def _apply_binary_op(op: str, left: float, right: float) -> float: raise ExecutionError(f"Unknown binary operator: {op}") -# Rollup functions: take list of samples in a window, return single value -def _avg(samples: list[RawSample]) -> float: - if not samples: - return float("nan") - return sum(s.value for s in samples) / len(samples) - - -def _sum(samples: list[RawSample]) -> float: - return sum(s.value for s in samples) - - -def _min(samples: list[RawSample]) -> float: - if not samples: - return float("nan") - return min(s.min if s.min is not None else s.value for s in samples) - - -def _max(samples: list[RawSample]) -> float: - if not samples: - return float("nan") - return max(s.max if s.max is not None else s.value for s in samples) +def _is_comparison_op(op: str) -> bool: + """Check if an operator is a comparison operator.""" + return op in ("==", "!=", ">", "<", ">=", "<=") -def _count(samples: list[RawSample]) -> float: - return float(len(samples)) - - -def _integrate(samples: list[RawSample]) -> float: - """Integrate values over time using trapezoidal rule. - - Returns the integral in units of (value * seconds). - """ - if len(samples) < 2: - return 0.0 - - # Sort by timestamp - sorted_samples = sorted(samples, key=lambda s: s.timestamp) - - # Trapezoidal integration - integral = 0.0 - for i in range(1, len(sorted_samples)): - s0 = sorted_samples[i - 1] - s1 = sorted_samples[i] - # Timestamps are in ms, convert to seconds for integration - dt = (s1.timestamp - s0.timestamp) / 1000 - # Trapezoid area = (v0 + v1) / 2 * dt - integral += (s0.value + s1.value) / 2 * dt - - return integral - - -_ROLLUP_FUNCTIONS = { - "avg_over_time": _avg, - "sum_over_time": _sum, - "min_over_time": _min, - "max_over_time": _max, - "count_over_time": _count, - "integrate": _integrate, -} - -_TRANSFORMATION_FUNCTIONS = {"abs", "clamp_min", "clamp_max"} - - -# Aggregation functions: aggregate across series, take list of values -def _agg_sum(values: list[float]) -> float: - return sum(values) - - -def _agg_avg(values: list[float]) -> float: - if not values: - return float("nan") - return sum(values) / len(values) - - -def _agg_min(values: list[float]) -> float: - if not values: - return float("nan") - return min(values) - - -def _agg_max(values: list[float]) -> float: - if not values: - return float("nan") - return max(values) - - -def _agg_count(values: list[float]) -> float: - return float(len(values)) - - -_AGGREGATION_FUNCTIONS = { - "sum": _agg_sum, - "avg": _agg_avg, - "min": _agg_min, - "max": _agg_max, - "count": _agg_count, -} +def _compare(op: str, left: float, right: float) -> bool: + """Evaluate a comparison operator, returning a boolean.""" + if op == ">": + return left > right + elif op == "<": + return left < right + elif op == ">=": + return left >= right + elif op == "<=": + return left <= right + elif op == "==": + return left == right + elif op == "!=": + return left != right + else: + raise ExecutionError(f"Unknown comparison operator: {op}") diff --git a/metricsqlite/engine/functions.py b/metricsqlite/engine/functions.py new file mode 100644 index 0000000..45acc40 --- /dev/null +++ b/metricsqlite/engine/functions.py @@ -0,0 +1,139 @@ +"""MetricsQL function implementations. + +Three categories of functions: +- Rollup: Aggregate samples within a time window (e.g., avg_over_time) +- Transformation: Transform individual sample values (e.g., abs, clamp_min) +- Aggregation: Aggregate across series at each timestamp (e.g., sum, avg) +""" + +from collections.abc import Callable + +from metricsqlite.engine.sqlite import Sample as RawSample + +# ============================================================================= +# Rollup functions: take list of samples in a window, return single value +# ============================================================================= + + +def avg_over_time(samples: list[RawSample]) -> float: + if not samples: + return float("nan") + return sum(s.value for s in samples) / len(samples) + + +def sum_over_time(samples: list[RawSample]) -> float: + if not samples: + return float("nan") + return sum(s.value for s in samples) + + +def min_over_time(samples: list[RawSample]) -> float: + if not samples: + return float("nan") + return min(s.min if s.min is not None else s.value for s in samples) + + +def max_over_time(samples: list[RawSample]) -> float: + if not samples: + return float("nan") + return max(s.max if s.max is not None else s.value for s in samples) + + +def count_over_time(samples: list[RawSample]) -> float: + return float(len(samples)) + + +def integrate(samples: list[RawSample]) -> float: + """Integrate values over time using trapezoidal rule. + + Returns the integral in units of (value * seconds). + """ + if len(samples) < 2: + return 0.0 + + sorted_samples = sorted(samples, key=lambda s: s.timestamp) + + integral = 0.0 + for i in range(1, len(sorted_samples)): + s0 = sorted_samples[i - 1] + s1 = sorted_samples[i] + dt = (s1.timestamp - s0.timestamp) / 1000 # ms to seconds + integral += (s0.value + s1.value) / 2 * dt + + return integral + + +ROLLUP_FUNCTIONS: dict[str, Callable[[list[RawSample]], float]] = { + "avg_over_time": avg_over_time, + "sum_over_time": sum_over_time, + "min_over_time": min_over_time, + "max_over_time": max_over_time, + "count_over_time": count_over_time, + "integrate": integrate, +} + + +# ============================================================================= +# Transformation functions: transform individual sample values +# ============================================================================= + + +def transform_abs(value: float) -> float: + return abs(value) + + +def transform_clamp_min(value: float, min_val: float) -> float: + return max(value, min_val) + + +def transform_clamp_max(value: float, max_val: float) -> float: + return min(value, max_val) + + +TRANSFORMATION_FUNCTIONS: dict[str, Callable] = { + "abs": transform_abs, + "clamp_min": transform_clamp_min, + "clamp_max": transform_clamp_max, +} + + +# ============================================================================= +# Aggregation functions: aggregate across series, take list of values +# ============================================================================= + + +def agg_sum(values: list[float]) -> float: + if not values: + return float("nan") + return sum(values) + + +def agg_avg(values: list[float]) -> float: + if not values: + return float("nan") + return sum(values) / len(values) + + +def agg_min(values: list[float]) -> float: + if not values: + return float("nan") + return min(values) + + +def agg_max(values: list[float]) -> float: + if not values: + return float("nan") + return max(values) + + +def agg_count(values: list[float]) -> float: + return float(len(values)) + + +AGGREGATION_FUNCTIONS: dict[str, Callable[[list[float]], float]] = { + "sum": agg_sum, + "avg": agg_avg, + "min": agg_min, + "max": agg_max, + "count": agg_count, +} diff --git a/metricsqlite/engine/parser/lexer.py b/metricsqlite/engine/parser/lexer.py index 40ef639..7e8ef80 100644 --- a/metricsqlite/engine/parser/lexer.py +++ b/metricsqlite/engine/parser/lexer.py @@ -22,11 +22,18 @@ class TokenType(Enum): COMMA = auto() # , COLON = auto() # : - # Operators - EQ = auto() # = + # Comparison + EQEQ = auto() # == NEQ = auto() # != + GTE = auto() # >= + LTE = auto() # <= REGEX = auto() # =~ NREGEX = auto() # !~ + EQ = auto() # = + GT = auto() # > + LT = auto() # < + + # Math PLUS = auto() # + MINUS = auto() # - MUL = auto() # * @@ -34,12 +41,6 @@ class TokenType(Enum): MOD = auto() # % POW = auto() # ^ - # Comparison - GT = auto() # > - LT = auto() # < - GTE = auto() # >= - LTE = auto() # <= - # Special EOF = auto() @@ -62,11 +63,12 @@ def __repr__(self) -> str: # Skip whitespace (re.compile(r"\s+"), None), # Multi-char operators (must come before single-char) + (re.compile(r"=="), TokenType.EQEQ), (re.compile(r"!="), TokenType.NEQ), - (re.compile(r"=~"), TokenType.REGEX), - (re.compile(r"!~"), TokenType.NREGEX), (re.compile(r">="), TokenType.GTE), (re.compile(r"<="), TokenType.LTE), + (re.compile(r"=~"), TokenType.REGEX), + (re.compile(r"!~"), TokenType.NREGEX), # Single-char operators and punctuation (re.compile(r"\("), TokenType.LPAREN), (re.compile(r"\)"), TokenType.RPAREN), @@ -76,13 +78,14 @@ def __repr__(self) -> str: (re.compile(r"]"), TokenType.RBRACKET), (re.compile(r","), TokenType.COMMA), (re.compile(r":"), TokenType.COLON), + # Binary operators (re.compile(r"="), TokenType.EQ), - (re.compile(r"\+"), TokenType.PLUS), - (re.compile(r"-"), TokenType.MINUS), + (re.compile(r"\^"), TokenType.POW), (re.compile(r"\*"), TokenType.MUL), (re.compile(r"/"), TokenType.DIV), (re.compile(r"%"), TokenType.MOD), - (re.compile(r"\^"), TokenType.POW), + (re.compile(r"\+"), TokenType.PLUS), + (re.compile(r"-"), TokenType.MINUS), (re.compile(r">"), TokenType.GT), (re.compile(r"<"), TokenType.LT), # Strings (single or double quoted) diff --git a/metricsqlite/engine/parser/parser.py b/metricsqlite/engine/parser/parser.py index 0035178..12a2f99 100644 --- a/metricsqlite/engine/parser/parser.py +++ b/metricsqlite/engine/parser/parser.py @@ -35,13 +35,6 @@ def current(self) -> Token: """Current token.""" return self.tokens[self.pos] - def peek(self, offset: int = 0) -> Token: - """Peek at token at current position + offset.""" - idx = self.pos + offset - if idx < len(self.tokens): - return self.tokens[idx] - return self.tokens[-1] # EOF - def advance(self) -> Token: """Consume and return current token.""" token = self.current @@ -68,7 +61,28 @@ def parse(self) -> Expr: def parse_expr(self) -> Expr: """Parse an expression (handles binary operators).""" - return self.parse_additive() + return self.parse_comparison() + + def parse_comparison(self) -> Expr: + """Parse comparison expressions (> < >= <= == !=). + + Lowest precedence - comparisons are typically used for filtering. + """ + left = self.parse_additive() + + while self.match( + TokenType.EQEQ, + TokenType.NEQ, + TokenType.GT, + TokenType.LT, + TokenType.GTE, + TokenType.LTE, + ): + op = self.advance().value + right = self.parse_additive() + left = BinaryExpr(left, op, right) + + return left def parse_additive(self) -> Expr: """Parse additive expressions (+ -).""" @@ -82,21 +96,10 @@ def parse_additive(self) -> Expr: return left def parse_multiplicative(self) -> Expr: - """Parse multiplicative expressions (* / % ^).""" - left = self.parse_comparison() - - while self.match(TokenType.MUL, TokenType.DIV, TokenType.MOD, TokenType.POW): - op = self.advance().value - right = self.parse_comparison() - left = BinaryExpr(left, op, right) - - return left - - def parse_comparison(self) -> Expr: - """Parse comparison expressions (> < >= <= == !=).""" + """Parse multiplicative expressions (* / %).""" left = self.parse_unary() - while self.match(TokenType.GT, TokenType.LT, TokenType.GTE, TokenType.LTE): + while self.match(TokenType.MUL, TokenType.DIV, TokenType.MOD): op = self.advance().value right = self.parse_unary() left = BinaryExpr(left, op, right) @@ -104,13 +107,31 @@ def parse_comparison(self) -> Expr: return left def parse_unary(self) -> Expr: - """Parse unary expressions (- +).""" + """Parse unary expressions (- +). + + Unary minus has lower precedence than power, so -2^2 = -(2^2) = -4. + """ if self.match(TokenType.MINUS, TokenType.PLUS): op = self.advance().value expr = self.parse_unary() return UnaryExpr(op, expr) - return self.parse_postfix() + return self.parse_power() + + def parse_power(self) -> Expr: + """Parse power expressions (^). + + Right-associative: 2^3^2 = 2^(3^2) = 512. + Right operand allows unary: 2^-3 works. + """ + left = self.parse_postfix() + + if self.match(TokenType.POW): + op = self.advance().value + right = self.parse_unary() # Allows unary on right, gives right-associativity + left = BinaryExpr(left, op, right) + + return left def parse_postfix(self) -> Expr: """Parse postfix expressions (range vectors, offset).""" diff --git a/metricsqlite/engine/query.py b/metricsqlite/engine/query.py index 309d221..c888970 100644 --- a/metricsqlite/engine/query.py +++ b/metricsqlite/engine/query.py @@ -10,7 +10,7 @@ QueryResult, raw_to_matrix, ) -from metricsqlite.engine.parser import LabelMatchType, MetricSelector, parse +from metricsqlite.engine.parser import MetricSelector, parse from metricsqlite.util import parse_interval, parse_timestamp @@ -40,11 +40,7 @@ def __init__( self._executor = Executor(connection, series_table, data_table) def query( - self, - query: str, - eval_time: float | str | None = None, - step: float | str | None = None, - timeout: float | str | None = None, + self, query: str, eval_time: float | str | datetime | None = None, step: float | str | None = None ) -> QueryResult: """Execute an instant query. @@ -53,12 +49,10 @@ def query( eval_time: Evaluation timestamp. If None, uses current time. step: Lookback window. If set, only samples within [eval_time - step, eval_time] are considered. - timeout: Query timeout (currently ignored). Returns: Query result (InstantVector, RangeVectorResult, or ScalarResult). """ - del timeout # Not implemented eval_time_ms = parse_timestamp(eval_time) if eval_time_ms is None: eval_time_ms = time.time() * 1000 @@ -146,42 +140,4 @@ def find_series( if not isinstance(ast, MetricSelector): raise ValueError(f"Expected metric selector, got {type(ast).__name__}") - # Build query - if start is not None or end is not None: - query = f""" - SELECT DISTINCT s.name, s.labels - FROM {self._series_table} s - JOIN {self._data_table} d USING (series_id) - WHERE s.name = ? - """ - else: - query = f"SELECT DISTINCT name, labels FROM {self._series_table} WHERE name = ?" - - params: list = [ast.name] - - # Add time filters - if start is not None: - query += " AND d.start >= ?" - params.append(start) - if end is not None: - query += " AND d.start <= ?" - params.append(end) - - # Add label matchers - label_col = "s.labels" if (start is not None or end is not None) else "labels" - for matcher in ast.matchers: - if matcher.match_type == LabelMatchType.EQ: - query += f" AND json_extract({label_col}, '$.{matcher.name}') = ?" - params.append(matcher.value) - elif matcher.match_type == LabelMatchType.NEQ: - query += f" AND (json_extract({label_col}, '$.{matcher.name}') IS NULL OR json_extract({label_col}, '$.{matcher.name}') != ?)" - params.append(matcher.value) - elif matcher.match_type == LabelMatchType.REGEX: - query += f" AND json_extract({label_col}, '$.{matcher.name}') REGEXP ?" - params.append(matcher.value) - elif matcher.match_type == LabelMatchType.NREGEX: - query += f" AND (json_extract({label_col}, '$.{matcher.name}') IS NULL OR json_extract({label_col}, '$.{matcher.name}') NOT REGEXP ?)" - params.append(matcher.value) - - cursor = self._conn.execute(query, params) - return [(row["name"], row["labels"]) for row in cursor.fetchall()] + return self._executor.find_series(ast, start, end) diff --git a/metricsqlite/engine/sqlite.py b/metricsqlite/engine/sqlite.py index 8ac9baa..04a849e 100644 --- a/metricsqlite/engine/sqlite.py +++ b/metricsqlite/engine/sqlite.py @@ -1,12 +1,30 @@ """SQLite query adapter for fetching raw time series data.""" import json +import re import sqlite3 from collections.abc import Callable from dataclasses import dataclass, field from .parser import LabelMatchType, MetricSelector + +def sqlite_regexp(pattern: str, value: str | None) -> bool: + """SQLite REGEXP function for MetricsQL regex label matching. + + MetricsQL/PromQL regex patterns are implicitly anchored to match + the entire string (equivalent to ^pattern$). + + Register with: connection.create_function("regexp", 2, sqlite_regexp) + """ + if value is None: + return False + try: + return re.fullmatch(pattern, value) is not None + except re.error: + return False + + Labels = dict[str, str] @@ -60,6 +78,21 @@ def map_values(self, fn: Callable[[float], float]) -> "RawSeriesSet": ] ) + def filter_values(self, predicate: Callable[[float], bool]) -> "RawSeriesSet": + """Filter samples based on a predicate, returning original values.""" + result = [] + for s in self.series: + filtered_samples = [sample for sample in s.samples if predicate(sample.value)] + if filtered_samples: + result.append( + RawSeries( + series_id=s.series_id, + labels=s.labels, + samples=filtered_samples, + ) + ) + return RawSeriesSet(result) + def is_empty(self) -> bool: return len(self.series) == 0 @@ -77,6 +110,50 @@ def __init__( self._series_table = series_table self._data_table = data_table + @staticmethod + def _build_name_condition( + selector: MetricSelector, + name_col: str = "s.name", + ) -> tuple[str, dict, list]: + """Build SQL condition for metric name matching. + + Handles both direct name matching and __name__ label matchers. + + Returns: + Tuple of (sql_condition, params, labels) where + labels excludes any __name__ matchers. + """ + labels = [] + name_matcher = None + + # Look for __name__ matcher + for matcher in selector.matchers: + if matcher.name == "__name__": + name_matcher = matcher + else: + labels.append(matcher) + + # If selector has a name, use exact match (unless overridden by __name__ matcher) + if selector.name and not name_matcher: + return f"{name_col} = :name", {"name": selector.name}, labels + + # Use __name__ matcher if present + if name_matcher: + if name_matcher.match_type == LabelMatchType.EQ: + return f"{name_col} = :name", {"name": name_matcher.value}, labels + elif name_matcher.match_type == LabelMatchType.NEQ: + return f"{name_col} != :name", {"name": name_matcher.value}, labels + elif name_matcher.match_type == LabelMatchType.REGEX: + return f"{name_col} REGEXP :name", {"name": name_matcher.value}, labels + elif name_matcher.match_type == LabelMatchType.NREGEX: + return f"{name_col} NOT REGEXP :name", {"name": name_matcher.value}, labels + + # Fallback: use selector.name if present, otherwise match all + if selector.name: + return f"{name_col} = :name", {"name": selector.name}, labels + else: + return "1=1", {}, labels # Match all names + def fetch_range( self, selector: MetricSelector, @@ -96,6 +173,9 @@ def fetch_range( Returns: RawSeriesSet with all matching series and their samples. """ + # Build name condition (handles __name__ matchers) + name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name") + # Include rows where: # - start is within (start, end], OR # - row spans into the range (start < query_start but end >= query_start) @@ -112,19 +192,19 @@ def fetch_range( d.sample_count FROM {self._data_table} d JOIN {self._series_table} s USING (series_id) - WHERE s.name = :name + WHERE {name_condition} AND d.start <= :end AND COALESCE(d.end, d.start) > :start """ params: dict = { - "name": selector.name, + **name_params, "start": start, "end": end, } - # Add label matchers - sql, params = self._add_label_matchers(sql, params, selector.matchers) + # Add label matchers (excluding __name__ which was handled above) + sql, params = self._add_label_matchers(sql, params, remaining_matchers) sql += " ORDER BY s.series_id, d.start" @@ -181,6 +261,9 @@ def fetch_instant( """ lookback_start = time - lookback + # Build name condition (handles __name__ matchers) + name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name") + # Find the latest sample where: # - start <= eval_time (sample exists before or at eval time) # - effective end (COALESCE(end, start)) >= lookback_start (not stale) @@ -197,7 +280,7 @@ def fetch_instant( d.sample_count FROM {self._data_table} d JOIN {self._series_table} s USING (series_id) - WHERE s.name = :name + WHERE {name_condition} AND d.start <= :time AND COALESCE(d.end, d.start) >= :lookback_start AND d.start = ( @@ -210,12 +293,12 @@ def fetch_instant( """ params: dict = { - "name": selector.name, + **name_params, "time": time, "lookback_start": lookback_start, } - sql, params = self._add_label_matchers(sql, params, selector.matchers) + sql, params = self._add_label_matchers(sql, params, remaining_matchers) cursor = self._conn.execute(sql, params) @@ -249,21 +332,71 @@ def fetch_instant( return RawSeriesSet(series_list) + def find_series( + self, + selector: MetricSelector, + start: float | None = None, + end: float | None = None, + ) -> list[tuple[str, str]]: + """Find series matching a selector. + + Args: + selector: Parsed metric selector. + start: Start timestamp in milliseconds. + end: End timestamp in milliseconds. + + Returns: + List of (name, labels_json) tuples for matching series. + """ + # Build name condition (handles __name__ matchers) + if start is not None or end is not None: + name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="s.name") + sql = f""" + SELECT DISTINCT s.name, s.labels + FROM {self._data_table} d + JOIN {self._series_table} s USING (series_id) + WHERE {name_condition} + """ + labels_col = "s.labels" + else: + name_condition, name_params, remaining_matchers = self._build_name_condition(selector, name_col="name") + sql = f"SELECT DISTINCT name, labels FROM {self._series_table} WHERE {name_condition}" + labels_col = "labels" + + params: dict = {**name_params} + + if start is not None: + sql += " AND d.start >= :start" + params["start"] = start + if end is not None: + sql += " AND d.start <= :end" + params["end"] = end + + sql, params = self._add_label_matchers(sql, params, remaining_matchers, labels_col) + + cursor = self._conn.execute(sql, params) + return [(row["name"], row["labels"]) for row in cursor.fetchall()] + @staticmethod - def _add_label_matchers(sql: str, params: dict, matchers: list) -> tuple[str, dict]: + def _add_label_matchers( + sql: str, + params: dict, + matchers: list, + labels_col: str = "s.labels", + ) -> tuple[str, dict]: """Add label matcher conditions to SQL query.""" for i, matcher in enumerate(matchers): param_name = f"label_{i}" if matcher.match_type == LabelMatchType.EQ: - sql += f" AND json_extract(s.labels, '$.{matcher.name}') = :{param_name}" + sql += f" AND json_extract({labels_col}, '$.{matcher.name}') = :{param_name}" params[param_name] = matcher.value elif matcher.match_type == LabelMatchType.NEQ: - sql += f" AND (json_extract(s.labels, '$.{matcher.name}') IS NULL OR json_extract(s.labels, '$.{matcher.name}') != :{param_name})" + sql += f" AND (json_extract({labels_col}, '$.{matcher.name}') IS NULL OR json_extract({labels_col}, '$.{matcher.name}') != :{param_name})" params[param_name] = matcher.value elif matcher.match_type == LabelMatchType.REGEX: - sql += f" AND json_extract(s.labels, '$.{matcher.name}') REGEXP :{param_name}" + sql += f" AND json_extract({labels_col}, '$.{matcher.name}') REGEXP :{param_name}" params[param_name] = matcher.value elif matcher.match_type == LabelMatchType.NREGEX: - sql += f" AND (json_extract(s.labels, '$.{matcher.name}') IS NULL OR json_extract(s.labels, '$.{matcher.name}') NOT REGEXP :{param_name})" + sql += f" AND (json_extract({labels_col}, '$.{matcher.name}') IS NULL OR json_extract({labels_col}, '$.{matcher.name}') NOT REGEXP :{param_name})" params[param_name] = matcher.value return sql, params diff --git a/metricsqlite/fastapi/routes.py b/metricsqlite/fastapi/routes.py index 1098dcd..5df8cae 100644 --- a/metricsqlite/fastapi/routes.py +++ b/metricsqlite/fastapi/routes.py @@ -113,10 +113,9 @@ def get_query( query: str = Query(..., description="MetricsQL query string"), time: float | str | None = Query(None, description="Evaluation timestamp"), step: float | str | None = Query(None, description="Interval"), - timeout: float | str | None = Query(None, description="Query timeout"), ) -> JSONResponse: try: - result = client.query(query, time=time, step=step, timeout=timeout) + result = client.query(query, time=time, step=step) return _format_query_result(result) except (ParseError, ExecutionError, LexerError) as e: return _error_response("bad_data", str(e), 400) @@ -133,10 +132,9 @@ def get_query_range( start: float | str = Query(..., description="Start timestamp"), end: float | str | None = Query(None, description="End timestamp"), step: float | str | None = Query(None, description="Query resolution step in seconds"), - timeout: float | str | None = Query(None, description="Query timeout"), ) -> JSONResponse: try: - result = client.query_range(query, start, end=end, step=step, timeout=timeout) + result = client.query_range(query, start, end=end, step=step) return _format_range_result(result) except (ParseError, ExecutionError, LexerError) as e: return _error_response("bad_data", str(e), 400) @@ -196,7 +194,6 @@ def get_series( @router.post("/influx/write") async def influx_write( request: Request, - db: str | None = Query(None, description="Database name (ignored)"), precision: str = Query("ns", description="Timestamp precision: ns, us, ms, s"), ) -> PlainTextResponse: """Write data using InfluxDB line protocol. diff --git a/tests/engine/test_executor.py b/tests/engine/test_executor.py index c7725c2..60f5ed0 100644 --- a/tests/engine/test_executor.py +++ b/tests/engine/test_executor.py @@ -13,7 +13,7 @@ parse, ) -EVAL_TIME = 946_681_200_000 # 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC @pytest.fixture diff --git a/tests/fastapi/test_routes.py b/tests/fastapi/test_routes.py index b9c21d8..cf0982e 100644 --- a/tests/fastapi/test_routes.py +++ b/tests/fastapi/test_routes.py @@ -5,7 +5,7 @@ from metricsqlite import MetricsQLiteClient from metricsqlite.fastapi import create_router -EVAL_TIME = 946_681_200_000 # 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC @pytest.fixture diff --git a/tests/queries/conftest.py b/tests/queries/conftest.py new file mode 100644 index 0000000..dd13435 --- /dev/null +++ b/tests/queries/conftest.py @@ -0,0 +1,78 @@ +"""Shared fixtures for query tests. + +This module provides reusable fixtures for testing MetricsQL queries. +All fixtures are automatically available to tests in this directory. +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +# Standard eval time used across tests: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized. + + This is the base fixture that other fixtures build upon. + Yields a connected client with tables created, closes on teardown. + """ + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +@pytest.fixture +def insert_gauge(client): + """Factory fixture to insert gauge data. + + Usage: + insert_gauge("metric", value=42, time=EVAL_TIME) + insert_gauge("metric", value=42, time=EVAL_TIME, labels={"env": "prod"}) + """ + + def _insert(name, value, time, labels=None): + client.insert_gauge(name, value, time, labels=labels) + + return _insert + + +@pytest.fixture +def insert_counter(client): + """Factory fixture to insert counter data. + + Usage: + insert_counter("requests_total", value=100, time=EVAL_TIME) + """ + + def _insert(name, value, time, labels=None): + client.insert_counter(name, value, time, labels=labels) + + return _insert + + +@pytest.fixture +def insert_minute_series(client): + """Factory fixture to insert a series with 1-minute interval samples. + + Creates samples from start_minute to end_minute (exclusive) relative to EVAL_TIME. + Value at each minute is: base_value + minute_offset + + Usage: + # Insert metric from T-60m to T+60m with values 100+minute + insert_minute_series("metric", base_value=100, start_minute=-60, end_minute=60) + + # Insert with labels + insert_minute_series("metric", base_value=100, labels={"env": "prod"}) + """ + + def _insert(name, base_value=0, start_minute=-60, end_minute=60, labels=None): + for minute in range(start_minute, end_minute): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge(name, base_value + minute, ts, labels=labels) + + return _insert diff --git a/tests/queries/test_aggregation.py b/tests/queries/test_aggregation.py new file mode 100644 index 0000000..d256408 --- /dev/null +++ b/tests/queries/test_aggregation.py @@ -0,0 +1,432 @@ +"""Tests for aggregation functions. + +Aggregation functions aggregate values across multiple series at each timestamp, +producing a single output series (or grouped series with `by`/`without` clauses). + +Functions tested: +- sum(): Sum of all values +- avg(): Average of all values +- min(): Minimum value +- max(): Maximum value +- count(): Number of series + +VictoriaMetrics-specific behaviors: +- NaN values are ignored in aggregations +- Empty input produces NaN +- Supports `by()` and `without()` modifiers +""" + +import pytest + +from metricsqlite.engine import InstantVector, MatrixResult + +# Standard eval time: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + + +# ============================================================================= +# Fixtures (client fixture inherited from conftest.py) +# ============================================================================= + + +@pytest.fixture +def standard_data(client): + """Standard test dataset with multiple series. + + Creates data from T-60m to T+60m with 1-minute intervals. + + Series created: + - metric{label="A"}: values = 100 + minute (at T: 100, at T+10m: 110) + - metric{label="B"}: values = 200 + minute (at T: 200, at T+10m: 210) + - metric{label="C"}: values = 300 + minute (at T: 300, at T+10m: 310) + - other_metric: values = 1000 + minute (at T: 1000) + + At eval_time T: + - sum(metric) = 100 + 200 + 300 = 600 + - avg(metric) = 600 / 3 = 200 + - min(metric) = 100 + - max(metric) = 300 + - count(metric) = 3 + + At eval_time T+10m: + - sum(metric) = 110 + 210 + 310 = 630 + - avg(metric) = 630 / 3 = 210 + - min(metric) = 110 + - max(metric) = 310 + """ + for minute in range(-60, 60): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"}) + client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"}) + client.insert_gauge("metric", 300 + minute, ts, labels={"label": "C"}) + client.insert_gauge("other_metric", 1000 + minute, ts) + return client + + +@pytest.fixture +def single_series_data(client): + """Dataset with only one series. + + Series: metric (no labels), values = minute (at T: 0, at T+10m: 10) + """ + for minute in range(-60, 60): + client.insert_gauge("metric", minute, EVAL_TIME + 60_000 * minute) + return client + + +@pytest.fixture +def compacted_data(client): + """Dataset with compacted gauge data. + + Creates raw data, then compacts into 10-minute buckets. + + Before compaction (T-30m to T-10m, minute intervals): + - metric{label="A"}: values 100+minute + - metric{label="B"}: values 200+minute + + After compaction: + - Bucket [T-30m, T-20m]: avg of 10 samples each + - Bucket [T-20m, T-10m]: avg of 10 samples each + + Raw data (T-10m to T+10m) remains uncompacted. + """ + for minute in range(-40, 30): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"}) + client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"}) + + # Compact data older than T-10m into 10-minute buckets + client.compact_gauges(older_than=EVAL_TIME - 600_000, interval="10m") + return client + + +@pytest.fixture +def negative_values_data(client): + """Dataset with negative values. + + Series: + - metric{label="pos"}: value = 50 + - metric{label="neg"}: value = -30 + - metric{label="zero"}: value = 0 + + At T: + - sum(metric) = 50 + (-30) + 0 = 20 + - avg(metric) = 20 / 3 ≈ 6.67 + - min(metric) = -30 + - max(metric) = 50 + """ + client.insert_gauge("metric", 50, EVAL_TIME, labels={"label": "pos"}) + client.insert_gauge("metric", -30, EVAL_TIME, labels={"label": "neg"}) + client.insert_gauge("metric", 0, EVAL_TIME, labels={"label": "zero"}) + return client + + +@pytest.fixture +def stale_series_data(client): + """Dataset with one stale and one fresh series. + + Series: + - metric{label="fresh"}: value = 100 at T-1m (within 5m lookback) + - metric{label="stale"}: value = 200 at T-10m (outside 5m lookback) + + With default 5m lookback, only "fresh" series should be included. + """ + client.insert_gauge("metric", 100, EVAL_TIME - 60_000, labels={"label": "fresh"}) + client.insert_gauge("metric", 200, EVAL_TIME - 600_000, labels={"label": "stale"}) + return client + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def assert_instant_value(result, expected_value, expected_labels=None): + """Assert instant query returns expected single value.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + assert sample.value == pytest.approx(expected_value) + + +def assert_instant_empty(result): + """Assert instant query returns no series.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 0 + + +def assert_range_values(result, expected_values, expected_labels=None): + """Assert range query returns expected values at each step.""" + assert isinstance(result, MatrixResult) + assert len(result.series) == 1 + labels, series = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + actual_values = [s.value for s in series] + assert actual_values == pytest.approx(expected_values) + + +def assert_range_empty(result): + """Assert range query returns no series.""" + assert isinstance(result, MatrixResult) + assert len(result.series) == 0 + + +# ============================================================================= +# Tests: Basic aggregation behavior +# ============================================================================= + + +class TestAggregationSingleSeries: + """Aggregation of a single series returns that series' value.""" + + @pytest.mark.parametrize("func", ["sum", "avg", "min", "max"]) + def test_instant_query(self, single_series_data, func): + """Single series: aggregation returns the series value.""" + result = single_series_data.query(f"{func}(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=0, expected_labels={}) + + @pytest.mark.parametrize("func", ["sum", "avg", "min", "max"]) + def test_range_query(self, single_series_data, func): + """Single series: aggregation at each step returns series value.""" + result = single_series_data.query_range( + f"{func}(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[0, 10, 20, 30], expected_labels={}) + + def test_count_instant(self, single_series_data): + """count() of single series returns 1.""" + result = single_series_data.query("count(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=1, expected_labels={}) + + def test_count_range(self, single_series_data): + """count() at each step returns 1.""" + result = single_series_data.query_range( + "count(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[1, 1, 1, 1], expected_labels={}) + + +class TestAggregationMultipleSeries: + """Aggregation across multiple series.""" + + def test_sum_instant(self, standard_data): + """sum() adds values across all series.""" + result = standard_data.query("sum(metric)", time=EVAL_TIME) + # 100 + 200 + 300 = 600 + assert_instant_value(result, expected_value=600, expected_labels={}) + + def test_sum_range(self, standard_data): + """sum() at each step adds values across series.""" + result = standard_data.query_range( + "sum(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + # At T: 600, T+10m: 630, T+20m: 660, T+30m: 690 + assert_range_values(result, expected_values=[600, 630, 660, 690], expected_labels={}) + + def test_avg_instant(self, standard_data): + """avg() computes mean across all series.""" + result = standard_data.query("avg(metric)", time=EVAL_TIME) + # (100 + 200 + 300) / 3 = 200 + assert_instant_value(result, expected_value=200, expected_labels={}) + + def test_avg_range(self, standard_data): + """avg() at each step computes mean across series.""" + result = standard_data.query_range( + "avg(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + # At T: 200, T+10m: 210, T+20m: 220, T+30m: 230 + assert_range_values(result, expected_values=[200, 210, 220, 230], expected_labels={}) + + def test_min_instant(self, standard_data): + """min() returns smallest value across series.""" + result = standard_data.query("min(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=100, expected_labels={}) + + def test_min_range(self, standard_data): + """min() at each step returns smallest value.""" + result = standard_data.query_range( + "min(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[100, 110, 120, 130], expected_labels={}) + + def test_max_instant(self, standard_data): + """max() returns largest value across series.""" + result = standard_data.query("max(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=300, expected_labels={}) + + def test_max_range(self, standard_data): + """max() at each step returns largest value.""" + result = standard_data.query_range( + "max(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[300, 310, 320, 330], expected_labels={}) + + def test_count_instant(self, standard_data): + """count() returns number of series.""" + result = standard_data.query("count(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=3, expected_labels={}) + + def test_count_range(self, standard_data): + """count() at each step returns number of series.""" + result = standard_data.query_range( + "count(metric)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[3, 3, 3, 3], expected_labels={}) + + +class TestAggregationEmptyResult: + """Aggregation with no matching series.""" + + @pytest.mark.parametrize("func", ["sum", "avg", "min", "max", "count"]) + def test_instant_no_match(self, standard_data, func): + """Aggregation of non-existent metric returns empty result.""" + result = standard_data.query(f"{func}(nonexistent)", time=EVAL_TIME) + assert_instant_empty(result) + + @pytest.mark.parametrize("func", ["sum", "avg", "min", "max", "count"]) + def test_range_no_match(self, standard_data, func): + """Aggregation of non-existent metric returns empty result.""" + result = standard_data.query_range( + f"{func}(nonexistent)", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_empty(result) + + +# ============================================================================= +# Tests: Edge cases +# ============================================================================= + + +class TestAggregationNegativeValues: + """Aggregation with negative values.""" + + def test_sum_with_negatives(self, negative_values_data): + """sum() correctly handles positive and negative values.""" + result = negative_values_data.query("sum(metric)", time=EVAL_TIME) + # 50 + (-30) + 0 = 20 + assert_instant_value(result, expected_value=20, expected_labels={}) + + def test_avg_with_negatives(self, negative_values_data): + """avg() correctly handles positive and negative values.""" + result = negative_values_data.query("avg(metric)", time=EVAL_TIME) + # 20 / 3 ≈ 6.67 + assert_instant_value(result, expected_value=20 / 3, expected_labels={}) + + def test_min_finds_negative(self, negative_values_data): + """min() finds the most negative value.""" + result = negative_values_data.query("min(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=-30, expected_labels={}) + + def test_max_with_negatives(self, negative_values_data): + """max() finds largest value when negatives present.""" + result = negative_values_data.query("max(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=50, expected_labels={}) + + +class TestAggregationStaleness: + """Aggregation respects staleness (stale series excluded).""" + + def test_sum_excludes_stale(self, stale_series_data): + """sum() excludes stale series from calculation.""" + result = stale_series_data.query("sum(metric)", time=EVAL_TIME) + # Only fresh series (100) included, stale (200) excluded + assert_instant_value(result, expected_value=100, expected_labels={}) + + def test_count_excludes_stale(self, stale_series_data): + """count() excludes stale series.""" + result = stale_series_data.query("count(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=1, expected_labels={}) + + def test_all_stale_returns_empty(self, client): + """When all series are stale, aggregation returns empty.""" + client.insert_gauge("metric", 100, EVAL_TIME - 600_000) # 10m ago, outside 5m lookback + result = client.query("sum(metric)", time=EVAL_TIME) + assert_instant_empty(result) + + +class TestAggregationOnCompactedData: + """Aggregation on compacted gauge data.""" + + def test_sum_on_compacted(self, compacted_data): + """sum() works on compacted data using bucket averages.""" + pytest.skip("TODO: Implement test") + + def test_avg_on_compacted(self, compacted_data): + """avg() works on compacted data.""" + pytest.skip("TODO: Implement test") + + def test_count_on_compacted(self, compacted_data): + """count() counts series, not buckets.""" + pytest.skip("TODO: Implement test") + + +# ============================================================================= +# Tests: Grouping (by/without) +# ============================================================================= + + +class TestAggregationWithGrouping: + """Aggregation with by() and without() modifiers.""" + + def test_sum_by_label(self, standard_data): + """sum() by(label) groups results by that label.""" + pytest.skip("TODO: Implement test - may not be implemented yet") + + def test_sum_without_label(self, standard_data): + """sum() without(label) aggregates across all other labels.""" + pytest.skip("TODO: Implement test - may not be implemented yet") + + def test_by_multiple_labels(self, client): + """by(l1, l2) groups by multiple labels.""" + pytest.skip("TODO: Implement test - may not be implemented yet") + + +# ============================================================================= +# Tests: Combined with other functions +# ============================================================================= + + +class TestAggregationCombined: + """Aggregation combined with rollup and transformation functions.""" + + def test_sum_of_rollup(self, standard_data): + """sum(avg_over_time(metric[5m])) aggregates rollup results.""" + pytest.skip("TODO: Implement test") + + def test_aggregation_then_binary_op(self, standard_data): + """sum(metric) * 2 applies binary op to aggregation result.""" + result = standard_data.query("sum(metric) * 2", time=EVAL_TIME) + assert_instant_value(result, expected_value=1200, expected_labels={}) + + def test_aggregation_of_transformation(self, negative_values_data): + """sum(abs(metric)) aggregates transformed values.""" + result = negative_values_data.query("sum(abs(metric))", time=EVAL_TIME) + # abs(50) + abs(-30) + abs(0) = 50 + 30 + 0 = 80 + assert_instant_value(result, expected_value=80, expected_labels={}) diff --git a/tests/queries/test_binary_operations.py b/tests/queries/test_binary_operations.py new file mode 100644 index 0000000..11e5318 --- /dev/null +++ b/tests/queries/test_binary_operations.py @@ -0,0 +1,467 @@ +"""Tests for binary operations. + +Binary operations perform arithmetic between: +- Two scalars: 5 + 3 +- Scalar and vector: metric * 2 +- Two vectors: metric1 + metric2 + +VictoriaMetrics-specific behaviors: +- Vector matching uses label comparison +- Missing matches result in no output (unless `or` used) +- Operator precedence follows math conventions +""" + +import math + +import pytest + +from metricsqlite.engine import InstantVector, MatrixResult, ScalarResult + +# Standard eval time: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + + +# ============================================================================= +# Fixtures (client fixture inherited from conftest.py) +# ============================================================================= + + +@pytest.fixture +def single_series(client): + """Single series for vector-scalar operations. + + Series: metric (no labels) + - At T: value = 100 + - At T+10m: value = 110 + - At T+20m: value = 120 + """ + for minute in range(-60, 60): + client.insert_gauge("metric", 100 + minute, EVAL_TIME + 60_000 * minute) + return client + + +@pytest.fixture +def multi_series(client): + """Multiple series with different labels. + + Series: + - metric{label="A"}: value = 10 + minute (at T: 10) + - metric{label="B"}: value = 20 + minute (at T: 20) + - metric{label="C"}: value = 30 + minute (at T: 30) + """ + for minute in range(-60, 60): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge("metric", 10 + minute, ts, labels={"label": "A"}) + client.insert_gauge("metric", 20 + minute, ts, labels={"label": "B"}) + client.insert_gauge("metric", 30 + minute, ts, labels={"label": "C"}) + return client + + +@pytest.fixture +def two_metrics(client): + """Two different metrics for vector-vector operations. + + Series: + - metric_a{label="X"}: value = 100 + minute (at T: 100) + - metric_a{label="Y"}: value = 200 + minute (at T: 200) + - metric_b{label="X"}: value = 10 + minute (at T: 10) + - metric_b{label="Y"}: value = 20 + minute (at T: 20) + - metric_b{label="Z"}: value = 30 + minute (at T: 30, no match in metric_a) + + At T, matching pairs (by label): + - label="X": metric_a=100, metric_b=10 + - label="Y": metric_a=200, metric_b=20 + """ + for minute in range(-60, 60): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge("metric_a", 100 + minute, ts, labels={"label": "X"}) + client.insert_gauge("metric_a", 200 + minute, ts, labels={"label": "Y"}) + client.insert_gauge("metric_b", 10 + minute, ts, labels={"label": "X"}) + client.insert_gauge("metric_b", 20 + minute, ts, labels={"label": "Y"}) + client.insert_gauge("metric_b", 30 + minute, ts, labels={"label": "Z"}) + return client + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def assert_scalar_value(result, expected): + """Assert result is a scalar with expected value.""" + assert isinstance(result, ScalarResult) + assert result.value == pytest.approx(expected) + + +def assert_instant_values(result, expected_values): + """Assert instant vector contains expected values (sorted).""" + assert isinstance(result, InstantVector) + actual = sorted([sample.value for _, sample in result.series]) + assert actual == pytest.approx(sorted(expected_values)) + + +def assert_instant_single(result, expected_value, expected_labels=None): + """Assert instant vector has single series with expected value.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + assert sample.value == pytest.approx(expected_value) + + +def assert_instant_empty(result): + """Assert instant vector is empty.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 0 + + +# ============================================================================= +# Tests: Scalar-Scalar Operations +# ============================================================================= + + +class TestScalarArithmetic: + """Basic arithmetic between two scalars.""" + + @pytest.mark.parametrize( + "expr,expected", + [ + ("5 + 3", 8), + ("5 - 3", 2), + ("5 * 3", 15), + ("10 / 4", 2.5), + ("10 % 3", 1), + ("2 ^ 3", 8), + ], + ) + def test_basic_operations(self, client, expr, expected): + """Scalar arithmetic operations.""" + result = client.query(expr, time=EVAL_TIME) + assert_scalar_value(result, expected) + + def test_negative_numbers(self, client): + """-5 + 3 = -2""" + result = client.query("-5 + 3", time=EVAL_TIME) + assert_scalar_value(result, -2) + + def test_negative_result(self, client): + """3 - 5 = -2""" + result = client.query("3 - 5", time=EVAL_TIME) + assert_scalar_value(result, -2) + + def test_division_by_zero(self, client): + """10 / 0 produces NaN.""" + result = client.query("10 / 0", time=EVAL_TIME) + assert isinstance(result, ScalarResult) + assert math.isnan(result.value) + + def test_zero_division_by_zero(self, client): + """0 / 0 produces NaN.""" + result = client.query("0 / 0", time=EVAL_TIME) + assert isinstance(result, ScalarResult) + assert math.isnan(result.value) + + +class TestOperatorPrecedence: + """Operator precedence follows math conventions.""" + + @pytest.mark.parametrize( + "expr,expected", + [ + ("2 + 3 * 4", 14), # multiplication before addition + ("10 - 6 / 2", 7), # division before subtraction + ("(2 + 3) * 4", 20), # parentheses override + ("2 * 3 + 4 * 5", 26), # left-to-right for same precedence + ("10 / 2 / 5", 1), # left-to-right division + ("((1 + 2) * (3 + 4))", 21), # nested parentheses + ("2 * 3 ^ 2", 18), + ("(2 * 3) ^ 2", 36), + ("-2 ^ 2", -4), + ("(-2) ^ 2", 4), + ], + ) + def test_precedence(self, client, expr, expected): + """Operator precedence is respected.""" + result = client.query(expr, time=EVAL_TIME) + assert_scalar_value(result, expected) + + +# ============================================================================= +# Tests: Vector-Scalar Operations +# ============================================================================= + + +class TestVectorScalarArithmetic: + """Arithmetic between vector and scalar.""" + + def test_vector_times_scalar(self, single_series): + """metric * 2 doubles all values.""" + result = single_series.query("metric * 2", time=EVAL_TIME) + assert_instant_single(result, expected_value=200) + + def test_scalar_times_vector(self, single_series): + """2 * metric is same as metric * 2 (commutative).""" + result = single_series.query("2 * metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=200) + + def test_vector_plus_scalar(self, single_series): + """metric + 50 adds 50 to all values.""" + result = single_series.query("metric + 50", time=EVAL_TIME) + assert_instant_single(result, expected_value=150) + + def test_scalar_plus_vector(self, single_series): + """50 + metric is same as metric + 50.""" + result = single_series.query("50 + metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=150) + + def test_vector_minus_scalar(self, single_series): + """metric - 30 subtracts 30 from all values.""" + result = single_series.query("metric - 30", time=EVAL_TIME) + assert_instant_single(result, expected_value=70) + + def test_scalar_minus_vector(self, single_series): + """150 - metric subtracts metric from 150.""" + result = single_series.query("150 - metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=50) + + def test_vector_divided_by_scalar(self, single_series): + """metric / 4 divides all values by 4.""" + result = single_series.query("metric / 4", time=EVAL_TIME) + assert_instant_single(result, expected_value=25) + + def test_scalar_divided_by_vector(self, single_series): + """1000 / metric gives 1000/x for each value x.""" + result = single_series.query("1000 / metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=10) + + +class TestVectorScalarMultipleSeries: + """Vector-scalar operations on multiple series.""" + + def test_multiply_all_series(self, multi_series): + """metric * 2 doubles each series independently.""" + result = multi_series.query("metric * 2", time=EVAL_TIME) + # At T: A=10, B=20, C=30 -> doubled: 20, 40, 60 + assert_instant_values(result, expected_values=[20, 40, 60]) + + def test_add_all_series(self, multi_series): + """metric + 100 adds to each series.""" + result = multi_series.query("metric + 100", time=EVAL_TIME) + # At T: A=10, B=20, C=30 -> plus 100: 110, 120, 130 + assert_instant_values(result, expected_values=[110, 120, 130]) + + +class TestVectorScalarPreservesMetadata: + """Vector-scalar operations preserve labels and timestamps.""" + + def test_preserves_labels(self, multi_series): + """Labels are preserved after operation.""" + result = multi_series.query("metric * 2", time=EVAL_TIME) + assert isinstance(result, InstantVector) + labels_set = {frozenset(labels.items()) for labels, _ in result.series} + expected = { + frozenset([("__name__", "metric"), ("label", "A")]), + frozenset([("__name__", "metric"), ("label", "B")]), + frozenset([("__name__", "metric"), ("label", "C")]), + } + assert labels_set == expected + + def test_preserves_timestamp(self, single_series): + """Sample timestamp is preserved.""" + result = single_series.query("metric * 2", time=EVAL_TIME) + _, sample = result.series[0] + assert sample.timestamp == EVAL_TIME + + +# ============================================================================= +# Tests: Vector-Vector Operations +# ============================================================================= + + +class TestVectorVectorArithmetic: + """Arithmetic between two vectors with label matching.""" + + def test_addition_matching_labels(self, two_metrics): + """metric_a + metric_b adds matching series.""" + result = two_metrics.query("metric_a + metric_b", time=EVAL_TIME) + # X: 100+10=110, Y: 200+20=220, Z has no match + assert_instant_values(result, expected_values=[110, 220]) + + def test_subtraction_matching_labels(self, two_metrics): + """metric_a - metric_b subtracts matching series.""" + result = two_metrics.query("metric_a - metric_b", time=EVAL_TIME) + # X: 100-10=90, Y: 200-20=180 + assert_instant_values(result, expected_values=[90, 180]) + + def test_multiplication_matching_labels(self, two_metrics): + """metric_a * metric_b multiplies matching series.""" + result = two_metrics.query("metric_a * metric_b", time=EVAL_TIME) + # X: 100*10=1000, Y: 200*20=4000 + assert_instant_values(result, expected_values=[1000, 4000]) + + def test_division_matching_labels(self, two_metrics): + """metric_a / metric_b divides matching series.""" + result = two_metrics.query("metric_a / metric_b", time=EVAL_TIME) + # X: 100/10=10, Y: 200/20=10 + assert_instant_values(result, expected_values=[10, 10]) + + +class TestVectorVectorNoMatch: + """Vector-vector operations with no matching labels.""" + + def test_no_match_returns_empty(self, client): + """Non-matching series produce no output.""" + client.insert_gauge("metric_a", 100, EVAL_TIME, labels={"env": "prod"}) + client.insert_gauge("metric_b", 10, EVAL_TIME, labels={"env": "dev"}) + + result = client.query("metric_a + metric_b", time=EVAL_TIME) + assert_instant_empty(result) + + def test_partial_match(self, two_metrics): + """Only matching pairs produce output.""" + # metric_b has label="Z" with no match in metric_a + result = two_metrics.query("metric_a + metric_b", time=EVAL_TIME) + assert isinstance(result, InstantVector) + assert len(result.series) == 2 # Only X and Y match + + +class TestVectorSameMetric: + """Operations on same metric (metric + metric).""" + + def test_metric_plus_itself(self, single_series): + """metric + metric = 2 * metric.""" + result = single_series.query("metric + metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=200) + + def test_metric_minus_itself(self, single_series): + """metric - metric = 0.""" + result = single_series.query("metric - metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=0) + + def test_metric_divided_by_itself(self, single_series): + """metric / metric = 1.""" + result = single_series.query("metric / metric", time=EVAL_TIME) + assert_instant_single(result, expected_value=1) + + +# ============================================================================= +# Tests: Comparison Operations +# ============================================================================= + + +class TestComparisonOperators: + """Comparison operators filter series.""" + + def test_greater_than(self, multi_series): + """metric > 15 filters to values > 15.""" + result = multi_series.query("metric > 15", time=EVAL_TIME) + # At T: A=10 (excluded), B=20 (included), C=30 (included) + assert_instant_values(result, expected_values=[20, 30]) + + def test_less_than(self, multi_series): + """metric < 25 filters to values < 25.""" + result = multi_series.query("metric < 25", time=EVAL_TIME) + # At T: A=10 (included), B=20 (included), C=30 (excluded) + assert_instant_values(result, expected_values=[10, 20]) + + def test_greater_equal(self, multi_series): + """metric >= 20 filters to values >= 20.""" + result = multi_series.query("metric >= 20", time=EVAL_TIME) + # At T: A=10 (excluded), B=20 (included), C=30 (included) + assert_instant_values(result, expected_values=[20, 30]) + + def test_less_equal(self, multi_series): + """metric <= 20 filters to values <= 20.""" + result = multi_series.query("metric <= 20", time=EVAL_TIME) + # At T: A=10 (included), B=20 (included), C=30 (excluded) + assert_instant_values(result, expected_values=[10, 20]) + + def test_equal(self, multi_series): + """metric == 20 filters to values == 20.""" + result = multi_series.query("metric == 20", time=EVAL_TIME) + assert_instant_values(result, expected_values=[20]) + + def test_not_equal(self, multi_series): + """metric != 20 filters to values != 20.""" + result = multi_series.query("metric != 20", time=EVAL_TIME) + # At T: A=10, C=30 (B=20 excluded) + assert_instant_values(result, expected_values=[10, 30]) + + def test_comparison_returns_original_value(self, single_series): + """Comparison returns original value, not 1/0.""" + result = single_series.query("metric > 50", time=EVAL_TIME) + # At T: value=100 > 50, should return 100 (not 1) + assert_instant_single(result, expected_value=100) + + def test_comparison_no_match_empty(self, single_series): + """Comparison with no matches returns empty.""" + result = single_series.query("metric > 200", time=EVAL_TIME) + assert_instant_empty(result) + + +# ============================================================================= +# Tests: Range Queries +# ============================================================================= + + +class TestBinaryInRangeQuery: + """Binary operations in range queries.""" + + def test_scalar_operation_at_each_step(self, single_series): + """Operation is evaluated at each step.""" + result = single_series.query_range( + "metric * 2", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert isinstance(result, MatrixResult) + assert len(result.series) == 1 + _, series = result.series[0] + # At T: 200, T+10m: 220, T+20m: 240, T+30m: 260 + values = [s.value for s in series] + assert values == pytest.approx([200, 220, 240, 260]) + + def test_combined_with_rollup(self, single_series): + """avg_over_time(metric[5m]) * 2 works correctly.""" + result = single_series.query("avg_over_time(metric[5m]) * 2", time=EVAL_TIME) + # avg of values in (T-5m, T] is avg of [-4,-3,-2,-1,0]+100 = 98 + # 98 * 2 = 196 + assert_instant_single(result, expected_value=196) + + +# ============================================================================= +# Tests: Edge Cases +# ============================================================================= + + +class TestBinaryEdgeCases: + """Edge cases for binary operations.""" + + def test_chained_operations(self, single_series): + """metric * 2 + 10 / 2 evaluates correctly.""" + result = single_series.query("metric * 2 + 10 / 2", time=EVAL_TIME) + # (100 * 2) + (10 / 2) = 200 + 5 = 205 + assert_instant_single(result, expected_value=205) + + def test_deeply_nested_parentheses(self, client): + """((((1 + 2) * 3) - 4) / 5) evaluates correctly.""" + result = client.query("((((1 + 2) * 3) - 4) / 5)", time=EVAL_TIME) + # ((3 * 3) - 4) / 5 = (9 - 4) / 5 = 5 / 5 = 1 + assert_scalar_value(result, 1) + + def test_operation_with_empty_vector(self, client): + """Operation with empty vector returns empty result.""" + result = client.query("nonexistent * 2", time=EVAL_TIME) + assert_instant_empty(result) + + def test_negative_scalar_multiplication(self, single_series): + """metric * -1 negates values.""" + result = single_series.query("metric * -1", time=EVAL_TIME) + assert_instant_single(result, expected_value=-100) + + def test_double_negation(self, client): + """--5 equals 5.""" + result = client.query("--5", time=EVAL_TIME) + assert_scalar_value(result, 5) diff --git a/tests/queries/test_compaction.py b/tests/queries/test_compaction.py new file mode 100644 index 0000000..418a724 --- /dev/null +++ b/tests/queries/test_compaction.py @@ -0,0 +1,171 @@ +"""Tests for querying compacted data. + +Compacted data combines multiple samples into buckets with aggregate values. +Gauge compaction stores min/max/avg/count. Counter compaction extends the +end timestamp for unchanged values. + +Key behaviors: +- Compacted gauge stores min, max, value (average), sample_count +- Counter with unchanged value extends end timestamp instead of new row +- min_over_time uses stored min from compacted data +- max_over_time uses stored max from compacted data +- Other rollups use the average value +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized.""" + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +class TestCompactedGaugeInstant: + """Tests for instant queries on compacted gauge data.""" + + def test_compacted_gauge_returns_average(self, client: MetricsQLiteClient): + """Instant query on compacted gauge returns the average value.""" + pytest.skip("TODO: Implement test") + + def test_compacted_gauge_timestamp_clamped(self, client: MetricsQLiteClient): + """Instant query timestamp is clamped to eval_time when bucket extends past it. + + If bucket spans [T-10m, T+5m] and query is at T, timestamp should be T. + """ + pytest.skip("TODO: Implement test") + + def test_compacted_gauge_within_bucket(self, client: MetricsQLiteClient): + """Query eval_time within bucket returns bucket average at eval_time.""" + pytest.skip("TODO: Implement test") + + def test_compacted_gauge_at_bucket_start(self, client: MetricsQLiteClient): + """Query at bucket start timestamp returns that bucket.""" + pytest.skip("TODO: Implement test") + + def test_compacted_gauge_at_bucket_end(self, client: MetricsQLiteClient): + """Query at bucket end timestamp returns that bucket.""" + pytest.skip("TODO: Implement test") + + def test_multiple_buckets_returns_latest(self, client: MetricsQLiteClient): + """With multiple buckets, latest one covering eval_time is returned.""" + pytest.skip("TODO: Implement test") + + +class TestCompactedGaugeRangeVector: + """Tests for range vectors on compacted gauge data.""" + + def test_range_vector_bucket_fully_within(self, client: MetricsQLiteClient): + """Bucket fully within range returns two samples (start, end).""" + pytest.skip("TODO: Implement test") + + def test_range_vector_bucket_start_outside(self, client: MetricsQLiteClient): + """Bucket starting before range has start clamped to range start.""" + pytest.skip("TODO: Implement test") + + def test_range_vector_bucket_end_outside(self, client: MetricsQLiteClient): + """Bucket ending after range has end clamped to range end.""" + pytest.skip("TODO: Implement test") + + def test_range_vector_bucket_spans_entire_range(self, client: MetricsQLiteClient): + """Bucket spanning entire range returns samples at range boundaries.""" + pytest.skip("TODO: Implement test") + + def test_range_vector_multiple_buckets(self, client: MetricsQLiteClient): + """Multiple buckets in range each contribute samples.""" + pytest.skip("TODO: Implement test") + + +class TestCompactedGaugeRollup: + """Tests for rollup functions on compacted gauge data.""" + + def test_min_over_time_uses_stored_min(self, client: MetricsQLiteClient): + """min_over_time uses the stored min value from compacted bucket.""" + pytest.skip("TODO: Implement test") + + def test_max_over_time_uses_stored_max(self, client: MetricsQLiteClient): + """max_over_time uses the stored max value from compacted bucket.""" + pytest.skip("TODO: Implement test") + + def test_avg_over_time_on_compacted(self, client: MetricsQLiteClient): + """avg_over_time uses the average value from compacted bucket.""" + pytest.skip("TODO: Implement test") + + def test_sum_over_time_on_compacted(self, client: MetricsQLiteClient): + """sum_over_time sums the average values (not original samples).""" + pytest.skip("TODO: Implement test") + + def test_count_over_time_on_compacted(self, client: MetricsQLiteClient): + """count_over_time counts bucket points, not original samples. + + A compacted bucket contributes 2 samples (start, end) to the range. + """ + pytest.skip("TODO: Implement test") + + +class TestCounterCompaction: + """Tests for counter data with extended end timestamps.""" + + def test_counter_instant_with_extended_end(self, client: MetricsQLiteClient): + """Counter with multiple inserts at same value shows extended end.""" + pytest.skip("TODO: Implement test") + + def test_counter_timestamp_clamped_to_eval_time(self, client: MetricsQLiteClient): + """If counter spans past eval_time, timestamp is clamped to eval_time.""" + pytest.skip("TODO: Implement test") + + def test_counter_range_vector_boundary_clamping(self, client: MetricsQLiteClient): + """Counter spanning range boundaries has timestamps clamped.""" + pytest.skip("TODO: Implement test") + + def test_counter_sample_count_preserved(self, client: MetricsQLiteClient): + """Sample count reflects number of original inserts.""" + pytest.skip("TODO: Implement test") + + +class TestMixedCompactedAndRaw: + """Tests for queries spanning compacted and raw data.""" + + def test_range_with_compacted_and_raw(self, client: MetricsQLiteClient): + """Range spanning compacted buckets and raw samples.""" + pytest.skip("TODO: Implement test") + + def test_rollup_on_mixed_data(self, client: MetricsQLiteClient): + """Rollup function on mix of compacted and raw data.""" + pytest.skip("TODO: Implement test") + + def test_boundary_between_compacted_and_raw(self, client: MetricsQLiteClient): + """Behavior at boundary between compacted and raw data.""" + pytest.skip("TODO: Implement test") + + +class TestCompactionEdgeCases: + """Edge cases for compacted data queries.""" + + def test_single_sample_bucket(self, client: MetricsQLiteClient): + """Bucket with single sample has start == end.""" + pytest.skip("TODO: Implement test") + + def test_bucket_with_all_same_values(self, client: MetricsQLiteClient): + """Bucket where all samples have same value: min == max == avg.""" + pytest.skip("TODO: Implement test") + + def test_bucket_with_extreme_values(self, client: MetricsQLiteClient): + """Bucket with very large/small values.""" + pytest.skip("TODO: Implement test") + + def test_adjacent_buckets(self, client: MetricsQLiteClient): + """Adjacent buckets with no gap between them.""" + pytest.skip("TODO: Implement test") + + def test_gap_between_buckets(self, client: MetricsQLiteClient): + """Buckets with time gap between them.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_instant_query.py b/tests/queries/test_instant_query.py new file mode 100644 index 0000000..b7cd597 --- /dev/null +++ b/tests/queries/test_instant_query.py @@ -0,0 +1,435 @@ +"""Tests for instant query (query()) behavior. + +Tests the client.query() endpoint which evaluates a MetricsQL expression +at a single point in time, returning an InstantVector or ScalarResult. + +Key behaviors tested: +- Latest sample selection within lookback window +- Staleness handling (samples outside lookback are ignored) +- Timestamp behavior (sample timestamp vs eval_time) +- Label matching and filtering +- Multiple series handling +""" + +from datetime import datetime, timezone + +import pytest + +from metricsqlite.engine import InstantVector, ScalarResult + +# Standard eval time: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + +# Time constants in milliseconds +ONE_MINUTE = 60_000 +FIVE_MINUTES = 300_000 +ONE_HOUR = 3_600_000 + + +# ============================================================================= +# Fixtures (client fixture inherited from conftest.py) +# ============================================================================= + + +@pytest.fixture +def single_series(client): + """Single series with samples at various times. + + Series: metric (no labels) + Samples: + - T-10m: value = 10 + - T-5m: value = 50 + - T-2m: value = 80 + - T: value = 100 + """ + client.insert_gauge("metric", 10, EVAL_TIME - 10 * ONE_MINUTE) + client.insert_gauge("metric", 50, EVAL_TIME - 5 * ONE_MINUTE) + client.insert_gauge("metric", 80, EVAL_TIME - 2 * ONE_MINUTE) + client.insert_gauge("metric", 100, EVAL_TIME) + return client + + +@pytest.fixture +def multi_series(client): + """Multiple series with different labels. + + Series: + - metric{env="prod"}: value = 100 at T + - metric{env="dev"}: value = 200 at T + - metric{env="test"}: value = 300 at T-10m (stale with default 5m lookback) + """ + client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "prod"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "dev"}) + client.insert_gauge("metric", 300, EVAL_TIME - 10 * ONE_MINUTE, labels={"env": "test"}) + return client + + +@pytest.fixture +def minute_series(client): + """Series with 1-minute interval samples for staleness tests. + + Series: metric (no labels) + Samples from T-60m to T at 1-minute intervals. + Value at each minute = minute offset (e.g., T-5m has value -5, T has value 0) + """ + for minute in range(-60, 1): + client.insert_gauge("metric", minute, EVAL_TIME + minute * ONE_MINUTE) + return client + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def assert_instant_value(result, expected_value, expected_labels=None): + """Assert instant query returns expected single value.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + assert sample.value == pytest.approx(expected_value) + + +def assert_instant_values(result, expected_values): + """Assert instant vector contains expected values (sorted).""" + assert isinstance(result, InstantVector) + actual = sorted([sample.value for _, sample in result.series]) + assert actual == pytest.approx(sorted(expected_values)) + + +def assert_instant_empty(result): + """Assert instant query returns no series.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 0 + + +def assert_instant_timestamp(result, expected_timestamp): + """Assert the sample timestamp in the result.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + _, sample = result.series[0] + assert sample.timestamp == expected_timestamp + + +# ============================================================================= +# Tests: Basic Instant Query Functionality +# ============================================================================= + + +class TestInstantQueryBasics: + """Basic instant query functionality.""" + + def test_returns_latest_sample_in_lookback(self, single_series): + """When multiple samples exist within lookback, return the latest.""" + result = single_series.query("metric", time=EVAL_TIME) + # With default 5m lookback, samples at T-2m and T are in range + # Should return the latest (T, value=100) + assert_instant_value(result, expected_value=100) + + def test_empty_result_for_nonexistent_metric(self, client): + """Querying a metric that doesn't exist returns empty InstantVector.""" + result = client.query("nonexistent", time=EVAL_TIME) + assert_instant_empty(result) + + def test_empty_result_when_all_samples_stale(self, client): + """Querying when all samples are outside lookback returns empty.""" + # Insert sample 10 minutes ago (outside default 5m lookback) + client.insert_gauge("metric", 100, EVAL_TIME - 10 * ONE_MINUTE) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_empty(result) + + def test_sample_at_exact_eval_time(self, client): + """Sample with timestamp exactly at eval_time should be included.""" + client.insert_gauge("metric", 42, EVAL_TIME) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=42) + + def test_sample_at_exact_lookback_boundary(self, client): + """Sample at T-lookback boundary is included.""" + # Insert sample exactly 5 minutes ago (at boundary of default lookback) + client.insert_gauge("metric", 42, EVAL_TIME - FIVE_MINUTES) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=42) + + def test_sample_just_outside_lookback(self, client): + """Sample 1ms before lookback boundary is excluded.""" + # Insert sample 5 minutes + 1ms ago (just outside default lookback) + client.insert_gauge("metric", 42, EVAL_TIME - FIVE_MINUTES - 1) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_empty(result) + + +# ============================================================================= +# Tests: Timestamp Behavior +# ============================================================================= + + +class TestInstantQueryTimestamps: + """Timestamp behavior in instant query results.""" + + def test_result_timestamp_is_sample_timestamp(self, client): + """InstantVector sample timestamp should be the actual sample timestamp.""" + sample_time = EVAL_TIME - 2 * ONE_MINUTE + client.insert_gauge("metric", 42, sample_time) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_timestamp(result, expected_timestamp=sample_time) + + def test_future_sample_not_included(self, client): + """Samples with timestamp after eval_time are never included.""" + # Insert sample in the future + client.insert_gauge("metric", 100, EVAL_TIME + ONE_HOUR) + # Insert sample in the past (should be returned) + client.insert_gauge("metric", 42, EVAL_TIME - ONE_MINUTE) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=42) + + def test_future_sample_1ms_after_eval_time(self, client): + """Even 1ms in the future excludes the sample.""" + client.insert_gauge("metric", 100, EVAL_TIME + 1) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_empty(result) + + def test_latest_sample_selected(self, single_series): + """When multiple samples in lookback, latest timestamp wins.""" + result = single_series.query("metric", time=EVAL_TIME) + # Sample at T (100) should be selected over T-2m (80) + assert_instant_timestamp(result, expected_timestamp=EVAL_TIME) + assert_instant_value(result, expected_value=100) + + +# ============================================================================= +# Tests: Staleness Handling +# ============================================================================= + + +class TestInstantQueryStaleness: + """Staleness handling for instant queries.""" + + def test_default_lookback_is_5_minutes(self, minute_series): + """Default lookback (step) is 5 minutes (300 seconds).""" + result = minute_series.query("metric", time=EVAL_TIME) + # Should return sample at T (value=0), not older ones + assert_instant_value(result, expected_value=0) + + def test_custom_lookback_as_seconds(self, minute_series): + """Custom lookback can be specified as integer seconds.""" + # With 10 minute lookback, sample at T-6m would be included + result = minute_series.query("metric", time=EVAL_TIME - 6 * ONE_MINUTE, step=600) + # At T-6m, latest sample is T-6m (value=-6) + assert_instant_value(result, expected_value=-6) + + def test_custom_lookback_as_duration_string(self, minute_series): + """Custom lookback can be specified as duration string (e.g., '10m').""" + result = minute_series.query("metric", time=EVAL_TIME, step="10m") + # With 10m lookback, sample at T (value=0) is still the latest + assert_instant_value(result, expected_value=0) + + def test_short_lookback_excludes_samples(self, single_series): + """Short lookback excludes samples that would otherwise be included.""" + # With 1m lookback, only sample at T is included (T-2m is excluded) + result = single_series.query("metric", time=EVAL_TIME, step="1m") + assert_instant_value(result, expected_value=100) + + def test_very_large_lookback(self, minute_series): + """Very large lookback includes old samples.""" + # Query at T-30m with 1 hour lookback + result = minute_series.query("metric", time=EVAL_TIME - 30 * ONE_MINUTE, step="1h") + # Latest sample at or before T-30m is T-30m (value=-30) + assert_instant_value(result, expected_value=-30) + + +# ============================================================================= +# Tests: Multiple Series +# ============================================================================= + + +class TestInstantQueryMultipleSeries: + """Handling multiple series in instant queries.""" + + def test_multiple_series_same_metric(self, multi_series): + """Query returns all matching series with different labels.""" + result = multi_series.query("metric", time=EVAL_TIME) + # prod (100) and dev (200) are fresh, test (300) is stale + assert_instant_values(result, expected_values=[100, 200]) + + def test_each_series_independent_staleness(self, multi_series): + """Each series is evaluated independently for staleness.""" + # With longer lookback, test series should also be included + result = multi_series.query("metric", time=EVAL_TIME, step="15m") + assert_instant_values(result, expected_values=[100, 200, 300]) + + def test_partial_stale_series(self, client): + """When some series are stale and others aren't, only non-stale returned.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"status": "fresh"}) + client.insert_gauge("metric", 200, EVAL_TIME - 10 * ONE_MINUTE, labels={"status": "stale"}) + + result = client.query("metric", time=EVAL_TIME) + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + assert labels["status"] == "fresh" + assert sample.value == pytest.approx(100) + + def test_labels_preserved_in_result(self, multi_series): + """Labels are preserved in the result.""" + result = multi_series.query("metric", time=EVAL_TIME) + labels_set = {frozenset(labels.items()) for labels, _ in result.series} + assert frozenset([("__name__", "metric"), ("env", "prod")]) in labels_set + assert frozenset([("__name__", "metric"), ("env", "dev")]) in labels_set + + +# ============================================================================= +# Tests: With Functions +# ============================================================================= + + +class TestInstantQueryWithFunctions: + """Instant queries with functions applied.""" + + def test_transformation_on_instant_vector(self, client): + """Transformation functions (abs) work on instant vectors.""" + client.insert_gauge("metric", -50, EVAL_TIME) + + result = client.query("abs(metric)", time=EVAL_TIME) + assert_instant_value(result, expected_value=50) + + def test_clamp_min_on_instant_vector(self, client): + """clamp_min transformation works on instant vectors.""" + client.insert_gauge("metric", 30, EVAL_TIME) + + result = client.query("clamp_min(metric, 50)", time=EVAL_TIME) + assert_instant_value(result, expected_value=50) + + def test_clamp_max_on_instant_vector(self, client): + """clamp_max transformation works on instant vectors.""" + client.insert_gauge("metric", 100, EVAL_TIME) + + result = client.query("clamp_max(metric, 50)", time=EVAL_TIME) + assert_instant_value(result, expected_value=50) + + def test_rollup_on_range_selector(self, minute_series): + """Rollup functions (avg_over_time) on range selector return instant vector.""" + result = minute_series.query("avg_over_time(metric[5m])", time=EVAL_TIME) + # Range (T-5m, T] has samples at -4, -3, -2, -1, 0 + # avg = -2 + assert_instant_value(result, expected_value=-2) + + def test_aggregation_across_series(self, multi_series): + """Aggregation functions (sum) aggregate across series at eval_time.""" + result = multi_series.query("sum(metric)", time=EVAL_TIME) + # prod (100) + dev (200) = 300 (test is stale) + assert_instant_value(result, expected_value=300, expected_labels={}) + + def test_binary_operation(self, single_series): + """Binary operations work on instant queries.""" + result = single_series.query("metric * 2", time=EVAL_TIME) + assert_instant_value(result, expected_value=200) + + def test_comparison_filter(self, multi_series): + """Comparison operators filter instant vectors.""" + result = multi_series.query("metric > 150", time=EVAL_TIME) + # Only dev (200) passes the filter + assert_instant_values(result, expected_values=[200]) + + +# ============================================================================= +# Tests: Edge Cases +# ============================================================================= + + +class TestInstantQueryEdgeCases: + """Edge cases for instant queries.""" + + def test_eval_time_as_datetime(self, client): + """eval_time can be specified as datetime object.""" + dt = datetime(2000, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + client.insert_gauge("metric", 42, EVAL_TIME) + + result = client.query("metric", time=dt) + assert_instant_value(result, expected_value=42) + + def test_eval_time_as_iso_string(self, client): + """eval_time can be specified as ISO 8601 string.""" + client.insert_gauge("metric", 42, EVAL_TIME) + + result = client.query("metric", time="2000-01-01T00:00:00Z") + assert_instant_value(result, expected_value=42) + + def test_negative_metric_values(self, client): + """Negative metric values are handled correctly.""" + client.insert_gauge("metric", -42, EVAL_TIME) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=-42) + + def test_zero_metric_value(self, client): + """Zero metric value is handled correctly.""" + client.insert_gauge("metric", 0, EVAL_TIME) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=0) + + def test_very_large_metric_value(self, client): + """Very large metric values are handled correctly.""" + client.insert_gauge("metric", 1e15, EVAL_TIME) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=1e15) + + def test_very_small_metric_value(self, client): + """Very small metric values are handled correctly.""" + client.insert_gauge("metric", 1e-15, EVAL_TIME) + + result = client.query("metric", time=EVAL_TIME) + assert_instant_value(result, expected_value=1e-15) + + def test_scalar_query(self, client): + """Scalar expressions return ScalarResult.""" + result = client.query("5 + 3", time=EVAL_TIME) + assert isinstance(result, ScalarResult) + assert result.value == pytest.approx(8) + + def test_empty_metric_name_with_labels(self, client): + """Query with only label matchers works.""" + client.insert_gauge("metric", 42, EVAL_TIME, labels={"env": "prod"}) + + result = client.query('{__name__="metric", env="prod"}', time=EVAL_TIME) + assert_instant_value(result, expected_value=42) + + +# ============================================================================= +# Tests: Compacted/Counter Data +# ============================================================================= + + +class TestInstantQueryCompactedData: + """Instant queries on compacted gauge data.""" + + def test_compacted_gauge_returns_value(self, client): + """Compacted gauge bucket returns the average value.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + def test_compacted_end_timestamp_for_staleness(self, client): + """Compacted bucket uses end timestamp for staleness check.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + +class TestInstantQueryCounterData: + """Instant queries on counter data.""" + + def test_counter_with_extended_end(self, client): + """Counter with extended end timestamp is not stale.""" + pytest.skip("TODO: Implement once counter semantics are tested") + + def test_counter_timestamp_clamped(self, client): + """Counter timestamp is clamped to eval_time if end > eval_time.""" + pytest.skip("TODO: Implement once counter semantics are tested") diff --git a/tests/queries/test_label_matching.py b/tests/queries/test_label_matching.py new file mode 100644 index 0000000..a978efd --- /dev/null +++ b/tests/queries/test_label_matching.py @@ -0,0 +1,515 @@ +"""Tests for label matching in selectors. + +MetricsQL supports various label matching operators: +- = : Exact equality +- != : Not equal +- =~ : Regex match +- !~ : Regex not match + +VictoriaMetrics-specific behaviors: +- __name__ is a special label containing metric name +- Empty label value vs missing label +""" + +import pytest + +from metricsqlite.engine import InstantVector + +# Standard eval time: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + + +# ============================================================================= +# Fixtures (client fixture inherited from conftest.py) +# ============================================================================= + + +@pytest.fixture +def labeled_series(client): + """Multiple series with various label combinations. + + Series created: + - metric{env="prod", region="us"}: value = 100 + - metric{env="prod", region="eu"}: value = 200 + - metric{env="dev", region="us"}: value = 300 + - metric{env="dev", region="eu"}: value = 400 + - metric{env="staging"}: value = 500 (no region label) + - metric (no labels): value = 600 + """ + client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "prod", "region": "us"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod", "region": "eu"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "dev", "region": "us"}) + client.insert_gauge("metric", 400, EVAL_TIME, labels={"env": "dev", "region": "eu"}) + client.insert_gauge("metric", 500, EVAL_TIME, labels={"env": "staging"}) + client.insert_gauge("metric", 600, EVAL_TIME) + return client + + +@pytest.fixture +def multi_metric(client): + """Multiple metrics with various names. + + Metrics created: + - http_requests_total{method="GET"}: value = 100 + - http_requests_total{method="POST"}: value = 200 + - http_errors_total{method="GET"}: value = 10 + - grpc_requests_total{method="GET"}: value = 50 + """ + client.insert_gauge("http_requests_total", 100, EVAL_TIME, labels={"method": "GET"}) + client.insert_gauge("http_requests_total", 200, EVAL_TIME, labels={"method": "POST"}) + client.insert_gauge("http_errors_total", 10, EVAL_TIME, labels={"method": "GET"}) + client.insert_gauge("grpc_requests_total", 50, EVAL_TIME, labels={"method": "GET"}) + return client + + +@pytest.fixture +def numeric_labels(client): + """Series with numeric-looking label values. + + Series created: + - metric{code="200"}: value = 100 + - metric{code="201"}: value = 200 + - metric{code="404"}: value = 300 + - metric{code="500"}: value = 400 + - metric{code="abc"}: value = 500 + """ + client.insert_gauge("metric", 100, EVAL_TIME, labels={"code": "200"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"code": "201"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"code": "404"}) + client.insert_gauge("metric", 400, EVAL_TIME, labels={"code": "500"}) + client.insert_gauge("metric", 500, EVAL_TIME, labels={"code": "abc"}) + return client + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def assert_instant_values(result, expected_values): + """Assert instant vector contains expected values (sorted).""" + assert isinstance(result, InstantVector) + actual = sorted([sample.value for _, sample in result.series]) + assert actual == pytest.approx(sorted(expected_values)) + + +def assert_instant_empty(result): + """Assert instant query returns no series.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 0 + + +def get_series_count(result): + """Get the number of series in the result.""" + assert isinstance(result, InstantVector) + return len(result.series) + + +# ============================================================================= +# Tests: Exact Equality Matching (=) +# ============================================================================= + + +class TestEqualityMatch: + """Tests for exact equality matching (=).""" + + def test_exact_label_match(self, labeled_series): + """metric{env="prod"} matches only series with env="prod".""" + result = labeled_series.query('metric{env="prod"}', time=EVAL_TIME) + # Should match: prod/us (100), prod/eu (200) + assert_instant_values(result, expected_values=[100, 200]) + + def test_no_match_wrong_value(self, labeled_series): + """metric{env="test"} doesn't match when no series has that value.""" + result = labeled_series.query('metric{env="test"}', time=EVAL_TIME) + assert_instant_empty(result) + + def test_no_match_missing_label(self, labeled_series): + """metric{region="us"} doesn't match series without region label.""" + result = labeled_series.query('metric{region="us"}', time=EVAL_TIME) + # Should match: prod/us (100), dev/us (300) + # Should NOT match: staging (500), no labels (600) + assert_instant_values(result, expected_values=[100, 300]) + + def test_multiple_label_matchers(self, labeled_series): + """metric{env="prod", region="us"} requires both labels to match.""" + result = labeled_series.query('metric{env="prod", region="us"}', time=EVAL_TIME) + # Only prod/us (100) matches both + assert_instant_values(result, expected_values=[100]) + + def test_partial_label_match(self, labeled_series): + """metric{env="prod"} matches series with additional labels.""" + result = labeled_series.query('metric{env="prod"}', time=EVAL_TIME) + # prod/us and prod/eu both have env="prod" plus additional labels + assert get_series_count(result) == 2 + + def test_empty_string_value(self, client): + """metric{label=""} matches series with empty label value.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": ""}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"label": "value"}) + client.insert_gauge("metric", 300, EVAL_TIME) # no label at all + + result = client.query('metric{label=""}', time=EVAL_TIME) + # Only matches series with label="" (100) + assert_instant_values(result, expected_values=[100]) + + +# ============================================================================= +# Tests: Not Equal Matching (!=) +# ============================================================================= + + +class TestNotEqualMatch: + """Tests for not equal matching (!=).""" + + def test_not_equal_excludes_value(self, labeled_series): + """metric{env!="prod"} excludes series with env="prod".""" + result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME) + # Excludes: prod/us (100), prod/eu (200) + # Includes: dev/us (300), dev/eu (400), staging (500), no labels (600) + assert_instant_values(result, expected_values=[300, 400, 500, 600]) + + def test_not_equal_includes_other_values(self, labeled_series): + """metric{env!="prod"} includes series with different values.""" + result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME) + # dev and staging are included + values = sorted([sample.value for _, sample in result.series]) + assert 300 in values # dev/us + assert 400 in values # dev/eu + assert 500 in values # staging + + def test_not_equal_includes_missing_label(self, labeled_series): + """metric{env!="prod"} includes series without that label. + + VictoriaMetrics treats missing label as not equal to any value. + """ + result = labeled_series.query('metric{env!="prod"}', time=EVAL_TIME) + # Series with no labels (600) should be included + values = [sample.value for _, sample in result.series] + assert 600 in values + + def test_not_equal_empty_string(self, client): + """metric{label!=""} behavior with empty value and missing label.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": ""}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"label": "value"}) + client.insert_gauge("metric", 300, EVAL_TIME) # no label + + result = client.query('metric{label!=""}', time=EVAL_TIME) + # Should include: series with non-empty value (200) and possibly missing label (300) + # Exact behavior may vary - at minimum 200 should be included + values = [sample.value for _, sample in result.series] + assert 200 in values + assert 100 not in values # empty string is excluded + + +# ============================================================================= +# Tests: Regex Matching (=~) +# ============================================================================= + + +class TestRegexMatch: + """Tests for regex matching (=~).""" + + def test_regex_simple_pattern(self, labeled_series): + """metric{env=~"prod.*"} matches values starting with 'prod'.""" + result = labeled_series.query('metric{env=~"prod.*"}', time=EVAL_TIME) + # Matches: prod/us (100), prod/eu (200) + assert_instant_values(result, expected_values=[100, 200]) + + def test_regex_alternation(self, labeled_series): + """metric{env=~"prod|dev"} matches 'prod' or 'dev'.""" + result = labeled_series.query('metric{env=~"prod|dev"}', time=EVAL_TIME) + # Matches: prod/us (100), prod/eu (200), dev/us (300), dev/eu (400) + assert_instant_values(result, expected_values=[100, 200, 300, 400]) + + def test_regex_anchored(self, labeled_series): + """Regex is implicitly anchored (^...$). + + metric{env=~"prod"} only matches exactly "prod", not "production". + """ + result = labeled_series.query('metric{env=~"prod"}', time=EVAL_TIME) + # Matches exactly "prod" + assert_instant_values(result, expected_values=[100, 200]) + + def test_regex_partial_match_needs_wildcard(self, client): + """Regex needs .* to match partial strings.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "production"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"}) + + # Without wildcard - only exact match + result = client.query('metric{env=~"prod"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[200]) + + # With wildcard - matches both + result = client.query('metric{env=~"prod.*"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100, 200]) + + def test_regex_any_value(self, labeled_series): + """metric{env=~".+"} matches any non-empty value.""" + result = labeled_series.query('metric{env=~".+"}', time=EVAL_TIME) + # Matches all series with env label (prod, dev, staging) + # Excludes: no labels (600) + assert_instant_values(result, expected_values=[100, 200, 300, 400, 500]) + + def test_regex_character_class(self, numeric_labels): + """metric{code=~"[0-9]+"} matches numeric values.""" + result = numeric_labels.query('metric{code=~"[0-9]+"}', time=EVAL_TIME) + # Matches: 200, 201, 404, 500 (all numeric codes) + # Excludes: abc + assert_instant_values(result, expected_values=[100, 200, 300, 400]) + + def test_regex_character_class_specific(self, numeric_labels): + """metric{code=~"2.."} matches codes starting with 2.""" + result = numeric_labels.query('metric{code=~"2.."}', time=EVAL_TIME) + # Matches: 200 (100), 201 (200) + assert_instant_values(result, expected_values=[100, 200]) + + def test_regex_dot_matches_any(self, numeric_labels): + """metric{code=~"4.4"} matches 404.""" + result = numeric_labels.query('metric{code=~"4.4"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[300]) # 404 + + +# ============================================================================= +# Tests: Regex Not Match (!~) +# ============================================================================= + + +class TestRegexNotMatch: + """Tests for regex not match (!~).""" + + def test_regex_not_match_excludes(self, labeled_series): + """metric{env!~"prod.*"} excludes values matching pattern.""" + result = labeled_series.query('metric{env!~"prod.*"}', time=EVAL_TIME) + # Excludes: prod/us (100), prod/eu (200) + # Includes: dev/us (300), dev/eu (400), staging (500), no labels (600) + assert_instant_values(result, expected_values=[300, 400, 500, 600]) + + def test_regex_not_match_includes_non_matching(self, labeled_series): + """metric{env!~"prod|staging"} includes values not matching.""" + result = labeled_series.query('metric{env!~"prod|staging"}', time=EVAL_TIME) + # Excludes: prod/* (100, 200), staging (500) + # Includes: dev/* (300, 400), no labels (600) + assert_instant_values(result, expected_values=[300, 400, 600]) + + def test_regex_not_match_missing_label(self, labeled_series): + """metric{region!~"us"} includes series without region label.""" + result = labeled_series.query('metric{region!~"us"}', time=EVAL_TIME) + # Excludes: */us (100, 300) + # Includes: */eu (200, 400), staging (500), no labels (600) + assert_instant_values(result, expected_values=[200, 400, 500, 600]) + + def test_regex_not_match_any(self, labeled_series): + """metric{env!~".+"} excludes any non-empty env value.""" + result = labeled_series.query('metric{env!~".+"}', time=EVAL_TIME) + # Excludes all with env label + # Includes only: no labels (600) + assert_instant_values(result, expected_values=[600]) + + +# ============================================================================= +# Tests: __name__ Label Matching +# ============================================================================= + + +class TestMetricNameMatching: + """Tests for __name__ label matching.""" + + def test_name_equality(self, multi_metric): + """{__name__="http_requests_total"} same as just http_requests_total.""" + result1 = multi_metric.query("http_requests_total", time=EVAL_TIME) + result2 = multi_metric.query('{__name__="http_requests_total"}', time=EVAL_TIME) + + values1 = sorted([sample.value for _, sample in result1.series]) + values2 = sorted([sample.value for _, sample in result2.series]) + assert values1 == values2 + assert values1 == pytest.approx([100, 200]) + + def test_name_regex(self, multi_metric): + """{__name__=~"http.*"} matches metrics starting with 'http'.""" + result = multi_metric.query('{__name__=~"http.*"}', time=EVAL_TIME) + # Matches: http_requests_total (100, 200), http_errors_total (10) + assert_instant_values(result, expected_values=[10, 100, 200]) + + def test_name_regex_suffix(self, multi_metric): + """{__name__=~".*_total"} matches metrics ending with '_total'.""" + result = multi_metric.query('{__name__=~".*_total"}', time=EVAL_TIME) + # All metrics end with _total + assert_instant_values(result, expected_values=[10, 50, 100, 200]) + + def test_name_with_other_labels(self, multi_metric): + """{__name__="http_requests_total", method="GET"} matches name and label.""" + result = multi_metric.query('{__name__="http_requests_total", method="GET"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + def test_name_regex_with_labels(self, multi_metric): + """{__name__=~".*_requests_total", method="GET"} combines name regex and label.""" + result = multi_metric.query('{__name__=~".*_requests_total", method="GET"}', time=EVAL_TIME) + # Matches: http_requests_total/GET (100), grpc_requests_total/GET (50) + assert_instant_values(result, expected_values=[50, 100]) + + def test_name_not_equal(self, multi_metric): + """{__name__!="http_requests_total", method="GET"} excludes specific metric.""" + result = multi_metric.query('{__name__!="http_requests_total", method="GET"}', time=EVAL_TIME) + # Excludes http_requests_total, includes http_errors_total and grpc_requests_total + assert_instant_values(result, expected_values=[10, 50]) + + +# ============================================================================= +# Tests: Combined Matchers +# ============================================================================= + + +class TestCombinedMatchers: + """Tests for combining multiple match types.""" + + def test_equal_and_not_equal(self, labeled_series): + """metric{env="prod", region!="eu"} combines equality and not-equal.""" + result = labeled_series.query('metric{env="prod", region!="eu"}', time=EVAL_TIME) + # env="prod" AND region!="eu" + # Matches: prod/us (100) + assert_instant_values(result, expected_values=[100]) + + def test_equal_and_regex(self, labeled_series): + """metric{env="prod", region=~"u.*"} combines equality and regex.""" + result = labeled_series.query('metric{env="prod", region=~"u.*"}', time=EVAL_TIME) + # env="prod" AND region starts with "u" + # Matches: prod/us (100) + assert_instant_values(result, expected_values=[100]) + + def test_not_equal_and_regex(self, labeled_series): + """metric{env!="staging", region=~".*"} combines not-equal and regex.""" + result = labeled_series.query('metric{env!="staging", region=~".*"}', time=EVAL_TIME) + # env!="staging" AND has region label + # Matches: prod/us (100), prod/eu (200), dev/us (300), dev/eu (400) + assert_instant_values(result, expected_values=[100, 200, 300, 400]) + + def test_all_four_match_types(self, client): + """Combining all four matcher types.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"a": "1", "b": "2", "c": "prod", "d": "us"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"a": "1", "b": "3", "c": "prod", "d": "eu"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"a": "1", "b": "2", "c": "dev", "d": "us"}) + client.insert_gauge("metric", 400, EVAL_TIME, labels={"a": "2", "b": "2", "c": "prod", "d": "us"}) + + result = client.query('metric{a="1", b!="3", c=~"prod.*", d!~"eu"}', time=EVAL_TIME) + # a="1" AND b!="3" AND c matches "prod.*" AND d doesn't match "eu" + # Only matches: first series (100) + assert_instant_values(result, expected_values=[100]) + + +# ============================================================================= +# Tests: Edge Cases +# ============================================================================= + + +class TestLabelMatchingEdgeCases: + """Edge cases for label matching.""" + + def test_label_with_special_characters(self, client): + """Labels can contain underscores and colons.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"app_name": "my_app"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"k8s_io_name": "pod"}) + + result = client.query('metric{app_name="my_app"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + result = client.query('metric{k8s_io_name="pod"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[200]) + + def test_label_value_with_spaces(self, client): + """Label values can contain spaces.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"description": "hello world"}) + + result = client.query('metric{description="hello world"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + def test_unicode_label_value(self, client): + """Unicode characters in label values.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"region": "日本"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"region": "europe"}) + + result = client.query('metric{region="日本"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + def test_many_labels(self, client): + """Series with many labels still matches correctly.""" + labels = {f"label{i}": f"value{i}" for i in range(10)} + client.insert_gauge("metric", 100, EVAL_TIME, labels=labels) + + result = client.query('metric{label0="value0", label5="value5"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + def test_case_sensitivity(self, client): + """Label matching is case-sensitive.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "Prod"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "PROD"}) + + result = client.query('metric{env="prod"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[200]) + + result = client.query('metric{env="Prod"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + def test_case_insensitive_regex(self, client): + """Regex can use (?i) for case-insensitive matching.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"env": "Prod"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"env": "prod"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"env": "PROD"}) + + result = client.query('metric{env=~"(?i)prod"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100, 200, 300]) + + def test_empty_selector_matches_all(self, labeled_series): + """metric{} matches all series of that metric.""" + result = labeled_series.query("metric{}", time=EVAL_TIME) + # All 6 series + assert get_series_count(result) == 6 + + def test_regex_escape_special_chars(self, client): + """Special regex characters need escaping.""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"path": "/api/v1"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"path": "/api/v2"}) + client.insert_gauge("metric", 300, EVAL_TIME, labels={"path": "xapixv1"}) + + # Without escaping, . matches any char + result = client.query('metric{path=~"/api/v."}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100, 200]) + + def test_label_name_with_number(self, client): + """Label names can contain numbers (not at start).""" + client.insert_gauge("metric", 100, EVAL_TIME, labels={"level1": "a"}) + client.insert_gauge("metric", 200, EVAL_TIME, labels={"level2": "b"}) + + result = client.query('metric{level1="a"}', time=EVAL_TIME) + assert_instant_values(result, expected_values=[100]) + + +# ============================================================================= +# Tests: No Match Scenarios +# ============================================================================= + + +class TestNoMatchScenarios: + """Tests for scenarios that should return no results.""" + + def test_no_series_exists(self, client): + """Query for non-existent metric returns empty.""" + result = client.query('nonexistent{env="prod"}', time=EVAL_TIME) + assert_instant_empty(result) + + def test_no_matching_labels(self, labeled_series): + """Query with impossible label combination returns empty.""" + result = labeled_series.query('metric{env="prod", env="dev"}', time=EVAL_TIME) + # Can't have both env="prod" AND env="dev" + assert_instant_empty(result) + + def test_contradictory_matchers(self, labeled_series): + """Contradictory matchers return empty.""" + result = labeled_series.query('metric{env="prod", env!="prod"}', time=EVAL_TIME) + assert_instant_empty(result) + + def test_regex_no_match(self, labeled_series): + """Regex that matches nothing returns empty.""" + result = labeled_series.query('metric{env=~"xyz.*"}', time=EVAL_TIME) + assert_instant_empty(result) diff --git a/tests/queries/test_query_range.py b/tests/queries/test_query_range.py index c534bf2..bfd224d 100644 --- a/tests/queries/test_query_range.py +++ b/tests/queries/test_query_range.py @@ -3,7 +3,7 @@ from metricsqlite import MetricsQLiteClient from metricsqlite.engine import MatrixResult -START = 946_681_200_000 # 2000-01-01 00:00:00 UTC +START = 946_684_800_000 # 2000-01-01 00:00:00 UTC @pytest.fixture @@ -85,3 +85,111 @@ def test_only_latest_sample_in_result(self, client: MetricsQLiteClient): assert series[0].timestamp == START assert series[6].timestamp == START + 3_600_000 assert [sample.value for sample in series] == [0, 10, 20, 30, 40, 50, 60] + + +class TestRangeQueryStepBehavior: + """Tests for step behavior in range queries.""" + + def test_step_as_integer_seconds(self, client: MetricsQLiteClient): + """Step can be specified as integer seconds.""" + pytest.skip("TODO: Implement test") + + def test_step_as_duration_string(self, client: MetricsQLiteClient): + """Step can be specified as duration string like '5m'.""" + pytest.skip("TODO: Implement test") + + def test_step_default_is_5_minutes(self, client: MetricsQLiteClient): + """Default step is 5 minutes (300 seconds).""" + pytest.skip("TODO: Implement test") + + def test_step_alignment(self, client: MetricsQLiteClient): + """Steps are aligned to start, end at start + N*step <= end.""" + pytest.skip("TODO: Implement test") + + def test_step_1_second(self, client: MetricsQLiteClient): + """Very small step (1 second) produces many points.""" + pytest.skip("TODO: Implement test") + + def test_step_larger_than_range(self, client: MetricsQLiteClient): + """Step larger than query range produces start and end only.""" + pytest.skip("TODO: Implement test") + + +class TestRangeQueryTimestamps: + """Tests for timestamp handling in range queries.""" + + def test_start_timestamp_formats(self, client: MetricsQLiteClient): + """Start can be float, string, or datetime.""" + pytest.skip("TODO: Implement test") + + def test_end_timestamp_formats(self, client: MetricsQLiteClient): + """End can be float, string, or datetime.""" + pytest.skip("TODO: Implement test") + + def test_end_default_is_now(self, client: MetricsQLiteClient): + """End defaults to current time if not specified.""" + pytest.skip("TODO: Implement test") + + def test_start_required(self, client: MetricsQLiteClient): + """Start timestamp is required, raises error if missing.""" + pytest.skip("TODO: Implement test") + + +class TestRangeQueryMultipleSeries: + """Tests for range queries with multiple series.""" + + def test_multiple_series_same_metric(self, client: MetricsQLiteClient): + """Multiple series with different labels in same query.""" + pytest.skip("TODO: Implement test") + + def test_series_with_different_data_ranges(self, client: MetricsQLiteClient): + """Series with data at different time ranges.""" + pytest.skip("TODO: Implement test") + + def test_series_appears_mid_range(self, client: MetricsQLiteClient): + """Series that starts producing data mid-query-range.""" + pytest.skip("TODO: Implement test") + + def test_series_disappears_mid_range(self, client: MetricsQLiteClient): + """Series that stops producing data mid-query-range.""" + pytest.skip("TODO: Implement test") + + +class TestRangeQueryWithCompaction: + """Tests for range queries on compacted data.""" + + def test_range_query_on_compacted_gauges(self, client: MetricsQLiteClient): + """Range query spanning compacted gauge buckets.""" + pytest.skip("TODO: Implement test") + + def test_range_query_on_counters(self, client: MetricsQLiteClient): + """Range query with counter data.""" + pytest.skip("TODO: Implement test") + + def test_range_query_mixed_compacted_raw(self, client: MetricsQLiteClient): + """Range query spanning both compacted and raw data.""" + pytest.skip("TODO: Implement test") + + +class TestRangeQueryEdgeCases: + """Edge cases for range queries.""" + + def test_start_equals_end(self, client: MetricsQLiteClient): + """Start == end produces single point result.""" + pytest.skip("TODO: Implement test") + + def test_very_long_range(self, client: MetricsQLiteClient): + """Query spanning very long time range (1 year).""" + pytest.skip("TODO: Implement test") + + def test_no_data_in_range(self, client: MetricsQLiteClient): + """Query range with no data returns empty result.""" + pytest.skip("TODO: Implement test") + + def test_data_only_before_range(self, client: MetricsQLiteClient): + """Data exists but only before query range.""" + pytest.skip("TODO: Implement test") + + def test_data_only_after_range(self, client: MetricsQLiteClient): + """Data exists but only after query range.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_rollup.py b/tests/queries/test_rollup.py index 0d237ae..4385ef3 100644 --- a/tests/queries/test_rollup.py +++ b/tests/queries/test_rollup.py @@ -1,65 +1,548 @@ +"""Tests for rollup functions (aggregation over time). + +Rollup functions aggregate samples within a time range for each series. +They take a range vector and produce an instant vector. + +Functions tested: +- avg_over_time(): Average of samples in range +- sum_over_time(): Sum of samples in range +- min_over_time(): Minimum sample in range +- max_over_time(): Maximum sample in range +- count_over_time(): Number of samples in range +- (integrate(): Area under curve - trapezoidal integration) + +Key behaviors: +- Range is half-open: (T-range, T] +- Each series evaluated independently +- Empty range produces NaN +""" + +import math + import pytest -from metricsqlite import MetricsQLiteClient +from metricsqlite.engine import InstantVector, MatrixResult + +# Standard eval time: 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 + -START = 946_681_200_000 # 2000-01-01 00:00:00 UTC +# ============================================================================= +# Fixtures (client fixture inherited from conftest.py) +# ============================================================================= @pytest.fixture -def client(): - """Create an in-memory client with tables initialized.""" - client = MetricsQLiteClient(None) - client.connect() - client.create_tables() - yield client - client.close() +def minute_series(client): + """Series with 1-minute interval samples. + Series: metric (no labels) + - Values: minute offset from EVAL_TIME + - At T-5m: -5, at T: 0, at T+5m: 5 -def insert_gauge_series( - client: MetricsQLiteClient, data: list[tuple[float, float]], metric_name: str = "metric", labels: dict | None = None -): - for ts, v in data: - client.insert_gauge(metric_name, v, ts, labels=labels) + Range (T-10m, T] contains samples at minutes: -9, -8, ..., -1, 0 + Values: -9, -8, ..., -1, 0 (10 samples) + - avg = -4.5 + - sum = -45 + - min = -9 + - max = 0 + - count = 10 + """ + for minute in range(-60, 60): + client.insert_gauge("metric", minute, EVAL_TIME + 60_000 * minute) + return client -class TestFunctionOverTime: - def test_avg_over_time(self, client: MetricsQLiteClient): - for minute in range(-60, 60): - client.insert_gauge("metric", minute, START + 60_000 * minute) +@pytest.fixture +def multi_series(client): + """Multiple series with different values. - result = client.query_range(query="avg_over_time(metric[10m])", start=START, end=START + 1_800_000, step="10m") - labels, series = result.series[0] - assert labels == {"__name__": "metric"} - assert len(series) == 4 - assert series[0].timestamp == START - assert series[3].timestamp == START + 1_800_000 - # First bucket is (T-10m:T], so values are [-9, ... 0] of which average is -4.5. - assert [sample.value for sample in series] == [-4.5, 5.5, 15.5, 25.5] + Series: + - metric{label="A"}: values = 100 + minute + - metric{label="B"}: values = 200 + minute + - metric{label="C"}: values = 300 + minute + """ + for minute in range(-60, 60): + ts = EVAL_TIME + 60_000 * minute + client.insert_gauge("metric", 100 + minute, ts, labels={"label": "A"}) + client.insert_gauge("metric", 200 + minute, ts, labels={"label": "B"}) + client.insert_gauge("metric", 300 + minute, ts, labels={"label": "C"}) + return client + + +@pytest.fixture +def sparse_series(client): + """Series with gaps in data. + + Samples only at T-30m, T-20m, T-10m, T + Values: 10, 20, 30, 40 + """ + client.insert_gauge("metric", 10, EVAL_TIME - 1_800_000) # T-30m + client.insert_gauge("metric", 20, EVAL_TIME - 1_200_000) # T-20m + client.insert_gauge("metric", 30, EVAL_TIME - 600_000) # T-10m + client.insert_gauge("metric", 40, EVAL_TIME) # T + return client + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def assert_instant_value(result, expected_value, expected_labels=None): + """Assert instant query returns expected single value.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + assert sample.value == pytest.approx(expected_value) + + +def assert_instant_nan(result): + """Assert instant query returns NaN.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + _, sample = result.series[0] + assert math.isnan(sample.value) + + +def assert_instant_empty(result): + """Assert instant query returns no series.""" + assert isinstance(result, InstantVector) + assert len(result.series) == 0 + + +def assert_instant_values(result, expected_values): + """Assert instant vector contains expected values (sorted).""" + assert isinstance(result, InstantVector) + actual = sorted([sample.value for _, sample in result.series]) + assert actual == pytest.approx(sorted(expected_values)) + + +def assert_range_values(result, expected_values, expected_labels=None): + """Assert range query returns expected values at each step.""" + assert isinstance(result, MatrixResult) + assert len(result.series) == 1 + labels, series = result.series[0] + if expected_labels is not None: + assert labels == expected_labels + actual_values = [s.value for s in series] + assert actual_values == pytest.approx(expected_values) - def test_avg_over_time_small_lookback(self, client: MetricsQLiteClient): - for minute in range(-60, 60): - client.insert_gauge("metric", minute, START + 60_000 * minute) - result = client.query_range(query="avg_over_time(metric[3m])", start=START, end=START + 1_800_000, step="10m") +def assert_range_empty(result): + """Assert range query returns no series.""" + assert isinstance(result, MatrixResult) + assert len(result.series) == 0 + + +# ============================================================================= +# Tests: Range Vector (query_range with rollup) +# ============================================================================= + + +class TestRangeVector: + """Tests for rollup functions in range queries.""" + + def test_avg_over_time_small_lookback(self, minute_series): + """avg_over_time with 3m range.""" + result = minute_series.query_range( + query="avg_over_time(metric[3m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) labels, series = result.series[0] assert labels == {"__name__": "metric"} assert len(series) == 4 - assert series[0].timestamp == START - assert series[3].timestamp == START + 1_800_000 - # First bucket is (T-3m:T], so values are [-2, -1, 0] of which average is -1. - assert [sample.value for sample in series] == [-1, 9, 19, 29] - - def test_avg_over_time_large_lookback(self, client: MetricsQLiteClient): - for minute in range(-60, 60): - client.insert_gauge("metric", minute, START + 60_000 * minute) + assert series[0].timestamp == EVAL_TIME + assert series[3].timestamp == EVAL_TIME + 1_800_000 + # First bucket is (T-3m, T], so values are [-2, -1, 0], avg = -1 + assert [sample.value for sample in series] == pytest.approx([-1, 9, 19, 29]) - result = client.query_range(query="avg_over_time(metric[15m])", start=START, end=START + 1_800_000, step="10m") + def test_avg_over_time_large_lookback(self, minute_series): + """avg_over_time with 15m range.""" + result = minute_series.query_range( + query="avg_over_time(metric[15m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) labels, series = result.series[0] assert labels == {"__name__": "metric"} assert len(series) == 4 - assert series[0].timestamp == START - assert series[3].timestamp == START + 1_800_000 - # First bucket is (T-15m:T], so values are [-14, ... , 0] of which average is -7. - assert [sample.value for sample in series] == [-7, 3, 13, 23] + # First bucket is (T-15m, T], so values are [-14, ..., 0], avg = -7 + assert [sample.value for sample in series] == pytest.approx([-7, 3, 13, 23]) + + +# ============================================================================= +# Tests: Function Over Time (instant queries) +# ============================================================================= + + +class TestFunctionOverTime: + """Tests for rollup functions in instant queries.""" + + def test_avg_over_time(self, minute_series): + """avg_over_time computes average of samples in range.""" + result = minute_series.query("avg_over_time(metric[10m])", time=EVAL_TIME) + # Range (T-10m, T] has samples at -9, -8, ..., 0, avg = -4.5 + assert_instant_value(result, expected_value=-4.5) + + def test_sum_over_time(self, minute_series): + """sum_over_time sums all samples in range.""" + result = minute_series.query("sum_over_time(metric[10m])", time=EVAL_TIME) + # Range (T-10m, T] has samples -9+...+0 = -45 + assert_instant_value(result, expected_value=-45) + + def test_min_over_time(self, minute_series): + """min_over_time finds minimum sample in range.""" + result = minute_series.query("min_over_time(metric[10m])", time=EVAL_TIME) + # Range (T-10m, T] has min = -9 + assert_instant_value(result, expected_value=-9) + + def test_max_over_time(self, minute_series): + """max_over_time finds maximum sample in range.""" + result = minute_series.query("max_over_time(metric[10m])", time=EVAL_TIME) + # Range (T-10m, T] has max = 0 + assert_instant_value(result, expected_value=0) + + def test_count_over_time(self, minute_series): + """count_over_time counts samples in range.""" + result = minute_series.query("count_over_time(metric[10m])", time=EVAL_TIME) + # Range (T-10m, T] has 10 samples + assert_instant_value(result, expected_value=10) + + +class TestFunctionOverTimeRangeQuery: + """Tests for rollup functions in range queries.""" + + def test_avg_over_time_range(self, minute_series): + """avg_over_time evaluated at each step.""" + result = minute_series.query_range( + query="avg_over_time(metric[10m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + # At each step T+N: avg of (T+N-10m, T+N] + # T: avg(-9..0) = -4.5 + # T+10m: avg(1..10) = 5.5 + # T+20m: avg(11..20) = 15.5 + # T+30m: avg(21..30) = 25.5 + assert_range_values(result, expected_values=[-4.5, 5.5, 15.5, 25.5]) + + def test_sum_over_time_range(self, minute_series): + """sum_over_time evaluated at each step.""" + result = minute_series.query_range( + query="sum_over_time(metric[10m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + # T: sum(-9..0) = -45 + # T+10m: sum(1..10) = 55 + # T+20m: sum(11..20) = 155 + # T+30m: sum(21..30) = 255 + assert_range_values(result, expected_values=[-45, 55, 155, 255]) + + def test_min_over_time_range(self, minute_series): + """min_over_time evaluated at each step.""" + result = minute_series.query_range( + query="min_over_time(metric[10m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[-9, 1, 11, 21]) + + def test_max_over_time_range(self, minute_series): + """max_over_time evaluated at each step.""" + result = minute_series.query_range( + query="max_over_time(metric[10m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[0, 10, 20, 30]) + + def test_count_over_time_range(self, minute_series): + """count_over_time evaluated at each step.""" + result = minute_series.query_range( + query="count_over_time(metric[10m])", + start=EVAL_TIME, + end=EVAL_TIME + 1_800_000, + step="10m", + ) + assert_range_values(result, expected_values=[10, 10, 10, 10]) + + +# ============================================================================= +# Tests: Edge Cases +# ============================================================================= + + +class TestRollupEdgeCases: + """Edge cases for rollup functions.""" + + def test_empty_range_returns_empty(self, client): + """Rollup on non-existent metric returns empty result.""" + result = client.query("avg_over_time(nonexistent[10m])", time=EVAL_TIME) + assert_instant_empty(result) + + def test_single_sample_in_range(self, client): + """Rollup with exactly one sample.""" + client.insert_gauge("metric", 42, EVAL_TIME) + result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME) + # Single sample: avg = sum = min = max = 42, count = 1 + assert_instant_value(result, expected_value=42) + + def test_single_sample_sum(self, client): + """sum_over_time with single sample equals that sample.""" + client.insert_gauge("metric", 42, EVAL_TIME) + result = client.query("sum_over_time(metric[10m])", time=EVAL_TIME) + assert_instant_value(result, expected_value=42) + + def test_single_sample_count(self, client): + """count_over_time with single sample returns 1.""" + client.insert_gauge("metric", 42, EVAL_TIME) + result = client.query("count_over_time(metric[10m])", time=EVAL_TIME) + assert_instant_value(result, expected_value=1) + + def test_sample_exactly_at_range_boundary_excluded(self, client): + """Sample at exactly T-range is excluded (half-open interval).""" + # Insert sample at exactly T-10m (should be excluded from (T-10m, T]) + client.insert_gauge("metric", 100, EVAL_TIME - 600_000) # T-10m + # Insert sample at T-9m (should be included) + client.insert_gauge("metric", 200, EVAL_TIME - 540_000) # T-9m + + result = client.query("sum_over_time(metric[10m])", time=EVAL_TIME) + # Only T-9m sample should be included + assert_instant_value(result, expected_value=200) + + def test_sample_at_eval_time_included(self, client): + """Sample at exactly T (eval_time) is included.""" + client.insert_gauge("metric", 42, EVAL_TIME) + result = client.query("count_over_time(metric[1m])", time=EVAL_TIME) + assert_instant_value(result, expected_value=1) + + def test_all_samples_outside_range(self, client): + """All samples outside range returns empty.""" + # Insert samples only outside the range + client.insert_gauge("metric", 100, EVAL_TIME - 1_200_000) # T-20m + client.insert_gauge("metric", 200, EVAL_TIME + 600_000) # T+10m + + result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME) + assert_instant_empty(result) + + def test_sparse_data_rollup(self, sparse_series): + """Rollup on sparse data only includes samples in range.""" + # sparse_series has samples at T-30m, T-20m, T-10m, T with values 10, 20, 30, 40 + # Range (T-15m, T] should only include T-10m (30) and T (40) + result = sparse_series.query("avg_over_time(metric[15m])", time=EVAL_TIME) + assert_instant_value(result, expected_value=35) # (30 + 40) / 2 + + def test_sparse_data_count(self, sparse_series): + """count_over_time on sparse data.""" + result = sparse_series.query("count_over_time(metric[15m])", time=EVAL_TIME) + assert_instant_value(result, expected_value=2) # Only T-10m and T + + +# ============================================================================= +# Tests: Multiple Series +# ============================================================================= + + +class TestRollupMultipleSeries: + """Rollup functions with multiple series.""" + + def test_rollup_independent_per_series(self, multi_series): + """Each series is evaluated independently.""" + result = multi_series.query("avg_over_time(metric[10m])", time=EVAL_TIME) + # Series A: avg(91..100) = 95.5 + # Series B: avg(191..200) = 195.5 + # Series C: avg(291..300) = 295.5 + assert isinstance(result, InstantVector) + assert len(result.series) == 3 + values = sorted([sample.value for _, sample in result.series]) + assert values == pytest.approx([95.5, 195.5, 295.5]) + + def test_rollup_preserves_labels(self, multi_series): + """Rollup preserves series labels.""" + result = multi_series.query("avg_over_time(metric[10m])", time=EVAL_TIME) + assert isinstance(result, InstantVector) + labels_set = {frozenset(labels.items()) for labels, _ in result.series} + expected = { + frozenset([("__name__", "metric"), ("label", "A")]), + frozenset([("__name__", "metric"), ("label", "B")]), + frozenset([("__name__", "metric"), ("label", "C")]), + } + assert labels_set == expected + + def test_partial_data_some_series(self, client): + """Some series may have no data in range.""" + # Series A has data in range + client.insert_gauge("metric", 100, EVAL_TIME, labels={"label": "A"}) + # Series B has no data in range (only old data) + client.insert_gauge("metric", 200, EVAL_TIME - 1_200_000, labels={"label": "B"}) + + result = client.query("avg_over_time(metric[10m])", time=EVAL_TIME) + # Only series A should be in result + assert isinstance(result, InstantVector) + assert len(result.series) == 1 + labels, sample = result.series[0] + assert labels["label"] == "A" + assert sample.value == pytest.approx(100) + + def test_sum_over_time_multiple_series(self, multi_series): + """sum_over_time on multiple series.""" + result = multi_series.query("sum_over_time(metric[10m])", time=EVAL_TIME) + # Series A: sum(91..100) = 955 + # Series B: sum(191..200) = 1955 + # Series C: sum(291..300) = 2955 + assert_instant_values(result, expected_values=[955, 1955, 2955]) + + def test_count_over_time_multiple_series(self, multi_series): + """count_over_time returns same count for each series.""" + result = multi_series.query("count_over_time(metric[10m])", time=EVAL_TIME) + # Each series has 10 samples in range + assert_instant_values(result, expected_values=[10, 10, 10]) + + +# ============================================================================= +# Tests: Compacted Data +# ============================================================================= + + +class TestRollupOnCompactedData: + """Rollup functions on compacted gauge data.""" + + def test_avg_over_time_compacted(self, client): + """avg_over_time uses average value from compacted bucket.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + def test_min_over_time_compacted_uses_stored_min(self, client): + """min_over_time should use stored min from compacted data.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + def test_max_over_time_compacted_uses_stored_max(self, client): + """max_over_time should use stored max from compacted data.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + def test_sum_over_time_compacted(self, client): + """sum_over_time sums average values from compacted buckets.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + def test_rollup_on_partially_compacted(self, client): + """Range spanning compacted and raw data.""" + pytest.skip("TODO: Implement once compaction is fully supported") + + +# ============================================================================= +# Tests: Integrate Function +# ============================================================================= + + +class TestIntegrateFunction: + """Tests for integrate() rollup function.""" + + def test_integrate_constant_value(self, client): + """integrate() of constant value = value * duration.""" + pytest.skip("TODO: Implement once integrate() is supported") + + def test_integrate_linear_increase(self, client): + """integrate() of linearly increasing values uses trapezoidal rule.""" + pytest.skip("TODO: Implement once integrate() is supported") + + def test_integrate_single_sample(self, client): + """integrate() with single sample returns 0.""" + pytest.skip("TODO: Implement once integrate() is supported") + + def test_integrate_empty_range(self, client): + """integrate() with no samples returns 0.""" + pytest.skip("TODO: Implement once integrate() is supported") + + def test_integrate_returns_seconds(self, client): + """integrate() returns value in (value * seconds).""" + pytest.skip("TODO: Implement once integrate() is supported") + + +# ============================================================================= +# Tests: Combined with Other Functions +# ============================================================================= + + +class TestRollupCombined: + """Rollup combined with other functions.""" + + def test_rollup_with_binary_op(self, minute_series): + """avg_over_time(metric[10m]) * 2 applies operation to result.""" + result = minute_series.query("avg_over_time(metric[10m]) * 2", time=EVAL_TIME) + # avg = -4.5, * 2 = -9 + assert_instant_value(result, expected_value=-9) + + def test_rollup_with_transformation(self, minute_series): + """abs(min_over_time(metric[10m])) applies abs to result.""" + result = minute_series.query("abs(min_over_time(metric[10m]))", time=EVAL_TIME) + # min = -9, abs(-9) = 9 + assert_instant_value(result, expected_value=9) + + def test_aggregation_of_rollup(self, multi_series): + """sum(avg_over_time(metric[10m])) aggregates rollup results.""" + result = multi_series.query("sum(avg_over_time(metric[10m]))", time=EVAL_TIME) + # Series averages: 95.5, 195.5, 295.5 + # Sum = 586.5 + assert_instant_value(result, expected_value=586.5) + + def test_rollup_in_range_query(self, minute_series): + """Rollup function evaluated at each range query step.""" + result = minute_series.query_range( + query="max_over_time(metric[5m])", + start=EVAL_TIME, + end=EVAL_TIME + 600_000, + step="5m", + ) + # At T: max(-4..0) = 0 + # At T+5m: max(1..5) = 5 + # At T+10m: max(6..10) = 10 + assert_range_values(result, expected_values=[0, 5, 10]) + + +# ============================================================================= +# Tests: Different Range Sizes +# ============================================================================= + + +class TestDifferentRangeSizes: + """Tests for various range durations.""" + + def test_1_minute_range(self, minute_series): + """1m range includes 1 sample.""" + result = minute_series.query("count_over_time(metric[1m])", time=EVAL_TIME) + # Range (T-1m, T] includes only T (value 0) + assert_instant_value(result, expected_value=1) + + def test_5_minute_range(self, minute_series): + """5m range includes 5 samples.""" + result = minute_series.query("count_over_time(metric[5m])", time=EVAL_TIME) + # Range (T-5m, T] includes -4, -3, -2, -1, 0 (5 samples) + assert_instant_value(result, expected_value=5) + + def test_1_hour_range(self, minute_series): + """1h range includes 60 samples.""" + result = minute_series.query("count_over_time(metric[1h])", time=EVAL_TIME) + # Range (T-1h, T] includes -59, ..., 0 (60 samples) + assert_instant_value(result, expected_value=60) + + def test_avg_over_1_minute(self, minute_series): + """avg_over_time with 1m range.""" + result = minute_series.query("avg_over_time(metric[1m])", time=EVAL_TIME) + # Only sample at T with value 0 + assert_instant_value(result, expected_value=0) - # TODO: query on compacted rows + def test_avg_over_5_minutes(self, minute_series): + """avg_over_time with 5m range.""" + result = minute_series.query("avg_over_time(metric[5m])", time=EVAL_TIME) + # Range (T-5m, T]: values -4, -3, -2, -1, 0, avg = -2 + assert_instant_value(result, expected_value=-2) diff --git a/tests/queries/test_selectors.py b/tests/queries/test_selectors.py index cdff5f4..4f8b21a 100644 --- a/tests/queries/test_selectors.py +++ b/tests/queries/test_selectors.py @@ -5,7 +5,7 @@ from metricsqlite import MetricsQLiteClient from metricsqlite.engine import InstantVector, RangeVectorResult, Sample -EVAL_TIME = 946_681_200_000 # 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC @pytest.fixture @@ -390,3 +390,71 @@ def test_range_selector_singular_gauge(self, client: MetricsQLiteClient): assert len(samples) == 1 assert samples[0].timestamp == EVAL_TIME - 120_000 assert samples[0].value == 42 + + +class TestSelectorEdgeCases: + """Edge cases for metric selectors.""" + + def test_metric_name_with_underscores(self, client: MetricsQLiteClient): + """Metric names can contain underscores.""" + pytest.skip("TODO: Implement test") + + def test_metric_name_with_colons(self, client: MetricsQLiteClient): + """Metric names can contain colons (recording rules).""" + pytest.skip("TODO: Implement test") + + def test_very_long_metric_name(self, client: MetricsQLiteClient): + """Very long metric names are handled correctly.""" + pytest.skip("TODO: Implement test") + + def test_label_with_special_characters(self, client: MetricsQLiteClient): + """Label values with special characters.""" + pytest.skip("TODO: Implement test") + + +class TestRangeVectorDurations: + """Tests for various range vector duration formats.""" + + def test_range_seconds(self, client: MetricsQLiteClient): + """metric[30s] - seconds duration.""" + pytest.skip("TODO: Implement test") + + def test_range_minutes(self, client: MetricsQLiteClient): + """metric[5m] - minutes duration.""" + pytest.skip("TODO: Implement test") + + def test_range_hours(self, client: MetricsQLiteClient): + """metric[1h] - hours duration.""" + pytest.skip("TODO: Implement test") + + def test_range_days(self, client: MetricsQLiteClient): + """metric[1d] - days duration.""" + pytest.skip("TODO: Implement test") + + def test_range_weeks(self, client: MetricsQLiteClient): + """metric[1w] - weeks duration.""" + pytest.skip("TODO: Implement test") + + def test_range_combined(self, client: MetricsQLiteClient): + """metric[1h30m] - combined duration.""" + pytest.skip("TODO: Implement test - check if supported") + + +class TestSelectorLabelMatching: + """Tests for label matching in selectors.""" + + def test_equality_match(self, client: MetricsQLiteClient): + """metric{label="value"} exact match.""" + pytest.skip("TODO: Implement test") + + def test_not_equal_match(self, client: MetricsQLiteClient): + """metric{label!="value"} not equal match.""" + pytest.skip("TODO: Implement test") + + def test_regex_match(self, client: MetricsQLiteClient): + """metric{label=~"val.*"} regex match.""" + pytest.skip("TODO: Implement test") + + def test_regex_not_match(self, client: MetricsQLiteClient): + """metric{label!~"val.*"} regex not match.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_staleness.py b/tests/queries/test_staleness.py new file mode 100644 index 0000000..9c4884f --- /dev/null +++ b/tests/queries/test_staleness.py @@ -0,0 +1,155 @@ +"""Tests for staleness semantics. + +Staleness determines when a sample is considered too old to be included +in query results. This is controlled by the lookback window. + +Key behaviors: +- Default lookback is 5 minutes (300 seconds) +- Samples outside lookback window are excluded +- Each series evaluated independently +- Compacted/counter data uses end timestamp for staleness +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized.""" + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +class TestDefaultStaleness: + """Tests for default 5-minute staleness window.""" + + def test_sample_4_minutes_ago_not_stale(self, client: MetricsQLiteClient): + """Sample 4 minutes before eval_time is not stale.""" + pytest.skip("TODO: Implement test") + + def test_sample_5_minutes_ago_not_stale(self, client: MetricsQLiteClient): + """Sample exactly 5 minutes before eval_time is not stale (boundary).""" + pytest.skip("TODO: Implement test") + + def test_sample_5_minutes_1_second_ago_stale(self, client: MetricsQLiteClient): + """Sample 5m1s before eval_time is stale.""" + pytest.skip("TODO: Implement test") + + def test_sample_6_minutes_ago_stale(self, client: MetricsQLiteClient): + """Sample 6 minutes before eval_time is stale.""" + pytest.skip("TODO: Implement test") + + +class TestCustomLookback: + """Tests for custom lookback window.""" + + def test_short_lookback_1_minute(self, client: MetricsQLiteClient): + """With 1m lookback, 2-minute-old sample is stale.""" + pytest.skip("TODO: Implement test") + + def test_long_lookback_1_hour(self, client: MetricsQLiteClient): + """With 1h lookback, 30-minute-old sample is not stale.""" + pytest.skip("TODO: Implement test") + + def test_lookback_boundary_included(self, client: MetricsQLiteClient): + """Sample at exactly eval_time - lookback is included.""" + pytest.skip("TODO: Implement test") + + def test_lookback_boundary_excluded(self, client: MetricsQLiteClient): + """Sample 1ms before eval_time - lookback is excluded.""" + pytest.skip("TODO: Implement test") + + +class TestCounterStaleness: + """Tests for counter staleness using end timestamp.""" + + def test_counter_end_timestamp_determines_staleness(self, client: MetricsQLiteClient): + """Counter with old start but recent end is not stale. + + If counter has start=T-10m but end=T-2m, and lookback is 5m, + the counter is NOT stale because end >= T-5m. + """ + pytest.skip("TODO: Implement test") + + def test_counter_start_before_end_after_lookback(self, client: MetricsQLiteClient): + """Counter starting before lookback but ending within is not stale.""" + pytest.skip("TODO: Implement test") + + def test_counter_end_before_lookback_is_stale(self, client: MetricsQLiteClient): + """Counter with end before lookback boundary is stale.""" + pytest.skip("TODO: Implement test") + + def test_counter_spans_entire_lookback(self, client: MetricsQLiteClient): + """Counter spanning T-10m to T+5m is not stale at T with 5m lookback.""" + pytest.skip("TODO: Implement test") + + +class TestCompactedGaugeStaleness: + """Tests for compacted gauge staleness.""" + + def test_compacted_gauge_end_determines_staleness(self, client: MetricsQLiteClient): + """Compacted gauge uses end timestamp for staleness check.""" + pytest.skip("TODO: Implement test") + + def test_compacted_bucket_partially_in_lookback(self, client: MetricsQLiteClient): + """Bucket with end in lookback but start outside is not stale.""" + pytest.skip("TODO: Implement test") + + +class TestMultiSeriesStaleness: + """Tests for staleness with multiple series.""" + + def test_each_series_independent_staleness(self, client: MetricsQLiteClient): + """Each series is checked for staleness independently.""" + pytest.skip("TODO: Implement test") + + def test_some_series_stale_others_not(self, client: MetricsQLiteClient): + """Only non-stale series are included in results.""" + pytest.skip("TODO: Implement test") + + def test_same_metric_different_labels_staleness(self, client: MetricsQLiteClient): + """Same metric with different labels can have different staleness.""" + pytest.skip("TODO: Implement test") + + +class TestStalenessInRangeQuery: + """Tests for staleness in range queries.""" + + def test_staleness_checked_at_each_step(self, client: MetricsQLiteClient): + """Staleness is checked at each step time in range query.""" + pytest.skip("TODO: Implement test") + + def test_series_appears_when_no_longer_stale(self, client: MetricsQLiteClient): + """Series appears in results once a sample enters lookback window.""" + pytest.skip("TODO: Implement test") + + def test_series_disappears_when_becomes_stale(self, client: MetricsQLiteClient): + """Series disappears from results once all samples are stale.""" + pytest.skip("TODO: Implement test") + + def test_gap_in_series_data(self, client: MetricsQLiteClient): + """Series with gap has missing steps where data is stale.""" + pytest.skip("TODO: Implement test") + + +class TestStalenessWithRollupFunctions: + """Tests for staleness interaction with rollup functions.""" + + def test_avg_over_time_range_ignores_staleness(self, client: MetricsQLiteClient): + """Range vectors use explicit range, not lookback for sample selection. + + avg_over_time(metric[10m]) includes samples in (T-10m, T] regardless + of lookback setting. + """ + pytest.skip("TODO: Implement test") + + def test_rollup_with_no_samples_returns_nan(self, client: MetricsQLiteClient): + """Rollup function on empty range returns NaN.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_subqueries.py b/tests/queries/test_subqueries.py new file mode 100644 index 0000000..bb69cd4 --- /dev/null +++ b/tests/queries/test_subqueries.py @@ -0,0 +1,110 @@ +"""Tests for subqueries. + +Subqueries allow applying range query semantics inside an instant query. +Syntax: func(metric[range:step]) or func(metric[range:]) + +VictoriaMetrics-specific behaviors: +- Subqueries create intermediate range evaluation +- Result is processed by outer rollup function +- Different from plain range vectors +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized.""" + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +class TestSubqueryBasics: + """Basic subquery functionality.""" + + def test_subquery_syntax_with_step(self, client: MetricsQLiteClient): + """avg_over_time(metric[1h:5m]) averages over 1h with 5m step.""" + pytest.skip("TODO: Implement test") + + def test_subquery_syntax_default_step(self, client: MetricsQLiteClient): + """avg_over_time(metric[1h:]) uses default step.""" + pytest.skip("TODO: Implement test") + + def test_subquery_creates_multiple_samples(self, client: MetricsQLiteClient): + """Subquery evaluates metric at each step time, creating samples.""" + pytest.skip("TODO: Implement test") + + +class TestSubqueryWithRollup: + """Subqueries with rollup functions.""" + + def test_avg_of_subquery(self, client: MetricsQLiteClient): + """avg_over_time(metric[1h:5m]) averages the step results.""" + pytest.skip("TODO: Implement test") + + def test_max_of_subquery(self, client: MetricsQLiteClient): + """max_over_time(metric[1h:5m]) finds max of step results.""" + pytest.skip("TODO: Implement test") + + def test_count_of_subquery(self, client: MetricsQLiteClient): + """count_over_time(metric[1h:5m]) counts step results.""" + pytest.skip("TODO: Implement test") + + +class TestSubqueryWithTransformation: + """Subqueries with transformation functions.""" + + def test_transformation_in_subquery(self, client: MetricsQLiteClient): + """avg_over_time(clamp_min(metric, 0)[1h:5m]) clamps then averages.""" + pytest.skip("TODO: Implement test") + + def test_transformation_on_subquery_result(self, client: MetricsQLiteClient): + """abs(avg_over_time(metric[1h:5m])) takes abs of average.""" + pytest.skip("TODO: Implement test") + + +class TestSubqueryWithRollupNesting: + """Nested rollup functions with subqueries.""" + + def test_avg_of_sum_over_time(self, client: MetricsQLiteClient): + """avg_over_time(sum_over_time(metric[5m])[1h:5m]) nested rollups.""" + pytest.skip("TODO: Implement test") + + +class TestSubqueryInRangeQuery: + """Subqueries within range queries.""" + + def test_subquery_at_each_range_step(self, client: MetricsQLiteClient): + """Range query evaluates subquery at each step.""" + pytest.skip("TODO: Implement test") + + def test_subquery_lookback_in_range(self, client: MetricsQLiteClient): + """Subquery lookback is relative to each range step.""" + pytest.skip("TODO: Implement test") + + +class TestSubqueryEdgeCases: + """Edge cases for subqueries.""" + + def test_subquery_step_larger_than_range(self, client: MetricsQLiteClient): + """metric[5m:10m] where step > range - only one sample.""" + pytest.skip("TODO: Implement test") + + def test_subquery_with_offset(self, client: MetricsQLiteClient): + """metric[1h:5m] offset 1h shifts the window.""" + pytest.skip("TODO: Implement test - if offset is supported") + + def test_subquery_empty_result(self, client: MetricsQLiteClient): + """Subquery with no data returns NaN for rollup.""" + pytest.skip("TODO: Implement test") + + def test_subquery_partial_data(self, client: MetricsQLiteClient): + """Subquery with some steps missing data handles correctly.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_transformation.py b/tests/queries/test_transformation.py new file mode 100644 index 0000000..ec4411e --- /dev/null +++ b/tests/queries/test_transformation.py @@ -0,0 +1,170 @@ +"""Tests for transformation functions. + +Transformation functions transform individual sample values without +changing the series structure. They operate element-wise on each sample. + +Functions tested: +- abs(): Absolute value +- clamp_min(v, min): Clamp values to minimum +- clamp_max(v, max): Clamp values to maximum + +VictoriaMetrics-specific behaviors: +- Transformations preserve all labels +- Applied before aggregation if nested +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized.""" + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +class TestAbsTransformation: + """Tests for abs() transformation function.""" + + def test_abs_positive_value(self, client: MetricsQLiteClient): + """abs() of positive value returns same value.""" + pytest.skip("TODO: Implement test") + + def test_abs_negative_value(self, client: MetricsQLiteClient): + """abs() of negative value returns positive value.""" + pytest.skip("TODO: Implement test") + + def test_abs_zero(self, client: MetricsQLiteClient): + """abs() of zero returns zero.""" + pytest.skip("TODO: Implement test") + + def test_abs_multiple_series(self, client: MetricsQLiteClient): + """abs() applied independently to each series.""" + pytest.skip("TODO: Implement test") + + def test_abs_preserves_labels(self, client: MetricsQLiteClient): + """abs() preserves all series labels.""" + pytest.skip("TODO: Implement test") + + def test_abs_preserves_timestamp(self, client: MetricsQLiteClient): + """abs() preserves sample timestamp.""" + pytest.skip("TODO: Implement test") + + def test_abs_empty_result(self, client: MetricsQLiteClient): + """abs() on no series returns empty InstantVector.""" + pytest.skip("TODO: Implement test") + + +class TestClampMinTransformation: + """Tests for clamp_min() transformation function.""" + + def test_clamp_min_value_below_threshold(self, client: MetricsQLiteClient): + """Value below threshold is clamped to threshold.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_value_above_threshold(self, client: MetricsQLiteClient): + """Value above threshold is unchanged.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_value_equals_threshold(self, client: MetricsQLiteClient): + """Value equal to threshold is unchanged.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_negative_threshold(self, client: MetricsQLiteClient): + """Negative threshold values work correctly.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_zero_threshold(self, client: MetricsQLiteClient): + """Zero threshold clamps negative values to zero.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_multiple_series(self, client: MetricsQLiteClient): + """clamp_min() applied independently to each series.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_preserves_labels(self, client: MetricsQLiteClient): + """clamp_min() preserves all series labels.""" + pytest.skip("TODO: Implement test") + + def test_clamp_min_missing_argument_error(self, client: MetricsQLiteClient): + """clamp_min() without threshold argument raises error.""" + pytest.skip("TODO: Implement test") + + +class TestClampMaxTransformation: + """Tests for clamp_max() transformation function.""" + + def test_clamp_max_value_above_threshold(self, client: MetricsQLiteClient): + """Value above threshold is clamped to threshold.""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_value_below_threshold(self, client: MetricsQLiteClient): + """Value below threshold is unchanged.""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_value_equals_threshold(self, client: MetricsQLiteClient): + """Value equal to threshold is unchanged.""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_negative_threshold(self, client: MetricsQLiteClient): + """Negative threshold values work correctly.""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_zero_threshold(self, client: MetricsQLiteClient): + """Zero threshold clamps positive values to zero.""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_multiple_series(self, client: MetricsQLiteClient): + """clamp_max() applied independently to each series.""" + pytest.skip("TODO: Implement test") + + +class TestTransformationChaining: + """Tests for chaining transformation functions.""" + + def test_clamp_min_then_clamp_max(self, client: MetricsQLiteClient): + """clamp_max(clamp_min(v, min), max) clamps to range [min, max].""" + pytest.skip("TODO: Implement test") + + def test_clamp_max_then_clamp_min(self, client: MetricsQLiteClient): + """clamp_min(clamp_max(v, max), min) clamps to range [min, max].""" + pytest.skip("TODO: Implement test") + + def test_abs_then_clamp(self, client: MetricsQLiteClient): + """clamp_max(abs(v), max) caps absolute values.""" + pytest.skip("TODO: Implement test") + + +class TestTransformationInRangeQuery: + """Tests for transformation functions in range queries.""" + + def test_transformation_at_each_step(self, client: MetricsQLiteClient): + """Transformation is applied at each step in range query.""" + pytest.skip("TODO: Implement test") + + def test_transformation_before_rollup(self, client: MetricsQLiteClient): + """avg_over_time(clamp_min(v, 0)[5m]) clamps before averaging.""" + pytest.skip("TODO: Implement test") + + +class TestTransformationEdgeCases: + """Edge cases for transformation functions.""" + + def test_transformation_on_nan(self, client: MetricsQLiteClient): + """Transformation functions handle NaN values.""" + pytest.skip("TODO: Implement test") + + def test_transformation_on_inf(self, client: MetricsQLiteClient): + """Transformation functions handle Infinity values.""" + pytest.skip("TODO: Implement test") + + def test_clamp_with_scalar_expression(self, client: MetricsQLiteClient): + """clamp_min(v, 1+1) evaluates scalar expression for threshold.""" + pytest.skip("TODO: Implement test") diff --git a/tests/queries/test_window_semantics.py b/tests/queries/test_window_semantics.py new file mode 100644 index 0000000..8933325 --- /dev/null +++ b/tests/queries/test_window_semantics.py @@ -0,0 +1,148 @@ +"""Tests for window and interval semantics. + +Window semantics differ between range vectors and plain selectors: +- Range vectors: Half-open interval (T-range, T] to avoid overlap +- Plain selectors: Closed interval [T-lookback, T] for staleness + +This prevents samples from being counted twice in adjacent windows +while still allowing staleness checks to include boundary samples. +""" + +import pytest + +from metricsqlite import MetricsQLiteClient + +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC + + +@pytest.fixture +def client(): + """Create an in-memory client with tables initialized.""" + client = MetricsQLiteClient(None) + client.connect() + client.create_tables() + yield client + client.close() + + +class TestRangeVectorIntervals: + """Tests for range vector half-open interval (T-range, T].""" + + def test_sample_at_end_included(self, client: MetricsQLiteClient): + """Sample at exactly T (end of range) is included.""" + pytest.skip("TODO: Implement test") + + def test_sample_at_start_excluded(self, client: MetricsQLiteClient): + """Sample at exactly T-range (start of range) is EXCLUDED. + + This is the half-open interval semantics: (T-range, T] + Sample at exactly T-10m is NOT included in [10m] range. + """ + pytest.skip("TODO: Implement test") + + def test_sample_1ms_after_start_included(self, client: MetricsQLiteClient): + """Sample 1ms after T-range is included.""" + pytest.skip("TODO: Implement test") + + def test_adjacent_windows_no_overlap(self, client: MetricsQLiteClient): + """Adjacent range query windows do not count same sample twice. + + With step=10m and range=10m: + - Window at T=10m: (0m, 10m] + - Window at T=20m: (10m, 20m] + Sample at exactly T=10m is in first window, not second. + """ + pytest.skip("TODO: Implement test") + + +class TestPlainSelectorIntervals: + """Tests for plain selector closed interval [T-lookback, T].""" + + def test_sample_at_start_included(self, client: MetricsQLiteClient): + """Sample at T-lookback is included (closed interval).""" + pytest.skip("TODO: Implement test") + + def test_sample_at_end_included(self, client: MetricsQLiteClient): + """Sample at T is included.""" + pytest.skip("TODO: Implement test") + + def test_sample_before_start_excluded(self, client: MetricsQLiteClient): + """Sample before T-lookback is excluded.""" + pytest.skip("TODO: Implement test") + + +class TestMixedIntervalSemantics: + """Tests verifying different semantics for different contexts.""" + + def test_range_vector_vs_plain_at_boundary(self, client: MetricsQLiteClient): + """Same sample included by plain selector but excluded by range vector. + + Sample at exactly T-5m: + - Plain selector metric with 5m lookback: INCLUDED + - Range vector metric[5m]: EXCLUDED + """ + pytest.skip("TODO: Implement test") + + def test_rollup_in_range_query_uses_half_open(self, client: MetricsQLiteClient): + """avg_over_time(metric[5m]) in range query uses half-open intervals.""" + pytest.skip("TODO: Implement test") + + +class TestRangeQueryStepWindows: + """Tests for step windows in range queries.""" + + def test_step_aligned_windows(self, client: MetricsQLiteClient): + """Windows align with step times: T, T+step, T+2*step, ...""" + pytest.skip("TODO: Implement test") + + def test_step_smaller_than_range(self, client: MetricsQLiteClient): + """With step < range, windows overlap (each sample counted multiple times).""" + pytest.skip("TODO: Implement test") + + def test_step_equals_range(self, client: MetricsQLiteClient): + """With step == range, windows are adjacent (no overlap, no gap).""" + pytest.skip("TODO: Implement test") + + def test_step_larger_than_range(self, client: MetricsQLiteClient): + """With step > range, there are gaps between windows.""" + pytest.skip("TODO: Implement test") + + +class TestWindowBoundaryCases: + """Edge cases for window boundaries.""" + + def test_sample_exactly_on_step_boundary(self, client: MetricsQLiteClient): + """Sample at step boundary is in the earlier window only.""" + pytest.skip("TODO: Implement test") + + def test_1ms_difference_in_boundary(self, client: MetricsQLiteClient): + """1ms difference determines which window a sample falls into.""" + pytest.skip("TODO: Implement test") + + def test_range_start_equals_query_start(self, client: MetricsQLiteClient): + """First window in range query starts at query start time.""" + pytest.skip("TODO: Implement test") + + def test_range_end_equals_query_end(self, client: MetricsQLiteClient): + """Last window in range query ends at query end time.""" + pytest.skip("TODO: Implement test") + + +class TestCompactedDataWindowing: + """Tests for windowing with compacted data.""" + + def test_compacted_bucket_spans_window_boundary(self, client: MetricsQLiteClient): + """Compacted bucket spanning window boundary is clamped correctly.""" + pytest.skip("TODO: Implement test") + + def test_counter_spans_multiple_windows(self, client: MetricsQLiteClient): + """Counter spanning multiple step windows appears in all of them.""" + pytest.skip("TODO: Implement test") + + def test_window_start_inside_bucket(self, client: MetricsQLiteClient): + """Window start falls inside a compacted bucket.""" + pytest.skip("TODO: Implement test") + + def test_window_end_inside_bucket(self, client: MetricsQLiteClient): + """Window end falls inside a compacted bucket.""" + pytest.skip("TODO: Implement test") diff --git a/tests/test_client.py b/tests/test_client.py index 1b2b1bf..562a6ba 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -4,7 +4,7 @@ from metricsqlite import CompactedRangeError, MetricsQLiteClient -EVAL_TIME = 946_681_200_000 # 2000-01-01 00:00:00 UTC +EVAL_TIME = 946_684_800_000 # 2000-01-01 00:00:00 UTC @pytest.fixture