From b4a833ea2911bc24fd1fcafdf4c297215b0dae22 Mon Sep 17 00:00:00 2001 From: Concode0 Date: Fri, 8 May 2026 15:56:04 +0900 Subject: [PATCH 1/3] chore: remove legacy lqa tasks --- conf/task/lqa.yaml | 41 ---- datalib/lqa.py | 481 ----------------------------------------- models/lqa/__init__.py | 9 - models/lqa/glr_net.py | 244 --------------------- models/lqa/heads.py | 232 -------------------- tasks/lqa.py | 364 ------------------------------- 6 files changed, 1371 deletions(-) delete mode 100644 conf/task/lqa.yaml delete mode 100644 datalib/lqa.py delete mode 100644 models/lqa/__init__.py delete mode 100644 models/lqa/glr_net.py delete mode 100644 models/lqa/heads.py delete mode 100644 tasks/lqa.py diff --git a/conf/task/lqa.yaml b/conf/task/lqa.yaml deleted file mode 100644 index 6f2d057..0000000 --- a/conf/task/lqa.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# @package _global_ -name: lqa - -# Probe selection: chain | entailment | negation -probe: chain - -algebra: - p: 4 - q: 1 - r: 0 - device: auto - -model: - channels: 16 - num_layers: 3 - num_heads: 4 - num_rotors: 8 - dropout: 0.1 - encoder: "sentence-transformers/all-MiniLM-L6-v2" - encoder_dim: 384 - max_seq_len: 64 - use_entropy_gating: true - num_relations: 18 - -dataset: - data_root: data - # For entailment: recommend n_train=100000 - n_train: null - n_test: null - num_workers: 0 - -training: - epochs: 50 - lr: 0.001 - batch_size: 64 - optimizer_type: riemannian_adam - # Auxiliary loss weights - isometry_weight: 0.01 - asymmetry_weight: 0.1 - asymmetry_margin: 0.1 - involution_weight: 0.1 diff --git a/datalib/lqa.py b/datalib/lqa.py deleted file mode 100644 index de663f6..0000000 --- a/datalib/lqa.py +++ /dev/null @@ -1,481 +0,0 @@ -# Versor: Universal Geometric Algebra Neural Network -# Copyright (C) 2026 Eunkyum Kim -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# - -"""Real HuggingFace datasets for Geometric Latent Reasoning (GLR) probes. - -Three dataset classes for the three LQA probes: -1. CLUTRRDataset -- CLUTRR/v1 compositional chain reasoning -2. HANSDataset -- SNLI (train) + HANS (eval) asymmetric entailment -3. BoolQNegDataset -- google/boolq with rule-based negation augmentation - -All follow the pattern: - 1. Check for cached .pt embeddings in data_root/lqa/ - 2. If missing: download from HuggingFace, encode with frozen sentence-transformers, cache - 3. Return (embeddings, labels) -""" - -import re -from pathlib import Path - -import torch -from torch.nn.utils.rnn import pad_sequence -from torch.utils.data import DataLoader, Dataset - -from log import get_logger - -logger = get_logger(__name__) - -# Helpers - - -def _get_encoder(encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2"): - """Load a frozen sentence-transformer encoder.""" - try: - from sentence_transformers import SentenceTransformer - except ImportError: - raise ImportError( - "sentence-transformers is required for LQA datasets. Install via: uv pip install sentence-transformers" - ) - model = SentenceTransformer(encoder_name) - return model - - -def _encode_texts(texts: list[str], encoder, batch_size: int = 256) -> torch.Tensor: - """Encode a list of texts into embeddings [N, encoder_dim].""" - embeddings = encoder.encode(texts, batch_size=batch_size, show_progress_bar=True, convert_to_tensor=True) - return embeddings.cpu() - - -def _cache_path(data_root: str, name: str) -> Path: - return Path(data_root) / "lqa" / f"{name}.pt" - - -def _ensure_dir(path: Path): - path.parent.mkdir(parents=True, exist_ok=True) - - -def _load_hf_dataset(path: str, name: str = None, split: str = "train", trust_remote_code: bool = False): - """Load a HuggingFace dataset.""" - from datasets import load_dataset - - kwargs = {"path": path, "split": split, "trust_remote_code": trust_remote_code} - if name is not None: - kwargs["name"] = name - return load_dataset(**kwargs) - - -# CLUTRR Dataset -- Compositional Chain Reasoning (Real) - -# CLUTRR relation types (18 total in CLUTRR/v1) -CLUTRR_RELATIONS = [ - "father", - "mother", - "son", - "daughter", - "brother", - "sister", - "grandfather", - "grandmother", - "grandson", - "granddaughter", - "uncle", - "aunt", - "nephew", - "niece", - "father-in-law", - "mother-in-law", - "son-in-law", - "daughter-in-law", -] - - -def _split_story_sentences(story: str) -> list[str]: - """Split a CLUTRR story into individual sentences.""" - # Split on sentence boundaries: '. ' or '.\n' or end-of-string period - sentences = re.split(r"(?<=[.!?])\s+", story.strip()) - return [s.strip() for s in sentences if s.strip()] - - -def _clutrr_collate_fn(batch: list[dict]) -> dict: - """Collate variable-length chain samples with padding.""" - emb_list = [sample["sentence_embeddings"] for sample in batch] - padded_embs = pad_sequence(emb_list, batch_first=True, padding_value=0.0) - - lengths = torch.tensor([sample["chain_length"] for sample in batch]) - labels = torch.stack([sample["label"] for sample in batch]) - - return { - "sentence_embeddings": padded_embs, # [B, L_max, encoder_dim] - "chain_length": lengths, # [B] - "label": labels, # [B] - } - - -class CLUTRRDataset(Dataset): - """CLUTRR compositional chain reasoning dataset (real HuggingFace data). - - Uses CLUTRR/v1 from HuggingFace: stories with multi-hop kinship reasoning. - Each sample: per-sentence embeddings [L, 384] + final relation label. - """ - - def __init__( - self, - data_root: str, - split: str = "train", - encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2", - n_samples: int = None, - ): - cache_name = f"clutrr_hf_{split}" + (f"_{n_samples}" if n_samples else "") - cache = _cache_path(data_root, cache_name) - if cache.exists(): - logger.info("Loading cached CLUTRR %s from %s", split, cache) - cached = torch.load(cache, weights_only=False) - self.sentence_embeddings = cached["sentence_embeddings"] - self.chain_lengths = cached["chain_lengths"] - self.labels = cached["labels"] - else: - logger.info("Downloading CLUTRR %s from HuggingFace...", split) - hf_split = "train" if split == "train" else "test" - ds = _load_hf_dataset("CLUTRR/v1", "gen_train234_test2to10", hf_split, trust_remote_code=True) - - if n_samples is not None and n_samples < len(ds): - ds = ds.select(range(n_samples)) - - # Build relation label mapping from data - relation_to_idx = {r: i for i, r in enumerate(CLUTRR_RELATIONS)} - - all_sentences = [] - sentence_offsets = [] - labels = [] - chain_lengths = [] - - for row in ds: - story = row.get("story", row.get("clean_story", "")) - target = row.get("target", row.get("target_text", "")) - - # Split story into sentences - sents = _split_story_sentences(story) - if not sents: - continue - - # Map target relation to index - target_lower = target.lower().strip() if isinstance(target, str) else "" - label_idx = relation_to_idx.get(target_lower, -1) - if label_idx == -1: - # Try partial match - for r, idx in relation_to_idx.items(): - if r in target_lower or target_lower in r: - label_idx = idx - break - if label_idx == -1: - label_idx = 0 # fallback - - start = len(all_sentences) - all_sentences.extend(sents) - sentence_offsets.append((start, len(all_sentences))) - labels.append(label_idx) - chain_lengths.append(len(sents)) - - logger.info("Encoding %d sentences with %s...", len(all_sentences), encoder_name) - encoder = _get_encoder(encoder_name) - all_embs = _encode_texts(all_sentences, encoder) - - self.sentence_embeddings = [] - for s, e in sentence_offsets: - self.sentence_embeddings.append(all_embs[s:e]) - - self.chain_lengths = torch.tensor(chain_lengths, dtype=torch.long) - self.labels = torch.tensor(labels, dtype=torch.long) - - _ensure_dir(cache) - torch.save( - { - "sentence_embeddings": self.sentence_embeddings, - "chain_lengths": self.chain_lengths, - "labels": self.labels, - }, - cache, - ) - logger.info("Cached CLUTRR %s to %s (%d samples)", split, cache, len(self.labels)) - - self.num_relations = len(CLUTRR_RELATIONS) - - def __len__(self): - return len(self.labels) - - def __getitem__(self, idx): - return { - "sentence_embeddings": self.sentence_embeddings[idx], - "chain_length": self.chain_lengths[idx], - "label": self.labels[idx], - } - - -# HANS Dataset -- Asymmetric Entailment (SNLI train + HANS eval) - - -class HANSDataset(Dataset): - """SNLI (train) + HANS (eval) for entailment asymmetry testing. - - Train: stanfordnlp/snli -- 550k premise/hypothesis pairs, 3-way labels. - Eval: jhu-cogsci/hans -- 30k adversarial NLI examples. - """ - - def __init__( - self, - data_root: str, - split: str = "train", - encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2", - n_samples: int = None, - ): - cache_name = f"hans_bin_{split}" + (f"_{n_samples}" if n_samples else "") - cache = _cache_path(data_root, cache_name) - if cache.exists(): - logger.info("Loading cached HANS %s from %s", split, cache) - cached = torch.load(cache, weights_only=False) - self.premise_emb = cached["premise_emb"] - self.hypothesis_emb = cached["hypothesis_emb"] - self.labels = cached["labels"] - else: - if split == "train": - logger.info("Downloading SNLI train from HuggingFace...") - ds = _load_hf_dataset("stanfordnlp/snli", split="train") - # Filter invalid labels (-1) - ds = ds.filter(lambda x: x["label"] != -1) - if n_samples is not None and n_samples < len(ds): - ds = ds.select(range(n_samples)) - - premises = ds["premise"] - hypotheses = ds["hypothesis"] - # Binary: 0=entailment -> 1.0, {1=neutral, 2=contradiction} -> 0.0 - labels = [1.0 if l == 0 else 0.0 for l in ds["label"]] - else: - logger.info("Downloading HANS validation from HuggingFace...") - ds = _load_hf_dataset("jhu-cogsci/hans", split="validation", trust_remote_code=True) - if n_samples is not None and n_samples < len(ds): - ds = ds.select(range(n_samples)) - - premises = ds["premise"] - hypotheses = ds["hypothesis"] - # HANS: 0=entailment -> 1.0, 1=non-entailment -> 0.0 - labels = [1.0 if l == 0 else 0.0 for l in ds["label"]] - - logger.info("Encoding %d premise/hypothesis pairs...", len(premises)) - encoder = _get_encoder(encoder_name) - self.premise_emb = _encode_texts(list(premises), encoder) - self.hypothesis_emb = _encode_texts(list(hypotheses), encoder) - self.labels = torch.tensor(labels, dtype=torch.float32) - - _ensure_dir(cache) - torch.save( - { - "premise_emb": self.premise_emb, - "hypothesis_emb": self.hypothesis_emb, - "labels": self.labels, - }, - cache, - ) - logger.info("Cached HANS %s to %s (%d samples)", split, cache, len(self.labels)) - - def __len__(self): - return len(self.labels) - - def __getitem__(self, idx): - return { - "premise_emb": self.premise_emb[idx], - "hypothesis_emb": self.hypothesis_emb[idx], - "label": self.labels[idx], - } - - -# BoolQ-Neg Dataset -- Negation Sensitivity (Real) - -_NEGATION_PREFIXES = [ - ("Is", "Isn't"), - ("Can", "Can't"), - ("Does", "Doesn't"), - ("Do", "Don't"), - ("Was", "Wasn't"), - ("Were", "Weren't"), - ("Has", "Hasn't"), - ("Have", "Haven't"), - ("Will", "Won't"), - ("Are", "Aren't"), - ("Did", "Didn't"), - ("Could", "Couldn't"), - ("Would", "Wouldn't"), - ("Should", "Shouldn't"), -] - - -def _negate_question(q: str) -> str: - """Apply rule-based negation to a question.""" - for pos, neg in _NEGATION_PREFIXES: - if q.startswith(pos + " "): - return neg + q[len(pos) :] - if q.startswith(neg + " "): - return pos + q[len(neg) :] - parts = q.split(" ", 2) - if len(parts) >= 2: - return parts[0] + " not " + " ".join(parts[1:]) - return "not " + q - - -class BoolQNegDataset(Dataset): - """BoolQ with negation augmentation for negation sensitivity testing. - - Uses google/boolq from HuggingFace. Each sample produces 2 entries: - original question + negated question (answer flipped). - """ - - def __init__( - self, - data_root: str, - split: str = "train", - encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2", - n_samples: int = None, - ): - cache_name = f"boolqneg_hf_{split}" + (f"_{n_samples}" if n_samples else "") - cache = _cache_path(data_root, cache_name) - if cache.exists(): - logger.info("Loading cached BoolQ-Neg %s from %s", split, cache) - cached = torch.load(cache, weights_only=False) - self.passage_emb = cached["passage_emb"] - self.question_emb = cached["question_emb"] - self.is_negated = cached["is_negated"] - self.answers = cached["answers"] - else: - hf_split = "train" if split == "train" else "validation" - logger.info("Downloading BoolQ %s from HuggingFace...", hf_split) - ds = _load_hf_dataset("google/boolq", split=hf_split) - - if n_samples is not None and n_samples < len(ds): - ds = ds.select(range(n_samples)) - - passages = [] - questions = [] - is_negated = [] - answers = [] - - for row in ds: - passage = row["passage"] - question = row["question"] - answer = row["answer"] - - # Original - passages.append(passage) - questions.append(question) - is_negated.append(False) - answers.append(int(answer)) - - # Negated version - neg_q = _negate_question(question) - passages.append(passage) - questions.append(neg_q) - is_negated.append(True) - answers.append(int(not answer)) - - logger.info("Encoding %d passage/question pairs...", len(passages)) - encoder = _get_encoder(encoder_name) - self.passage_emb = _encode_texts(passages, encoder) - self.question_emb = _encode_texts(questions, encoder) - self.is_negated = torch.tensor(is_negated, dtype=torch.bool) - self.answers = torch.tensor(answers, dtype=torch.float32) - - _ensure_dir(cache) - torch.save( - { - "passage_emb": self.passage_emb, - "question_emb": self.question_emb, - "is_negated": self.is_negated, - "answers": self.answers, - }, - cache, - ) - logger.info("Cached BoolQ-Neg %s to %s (%d samples)", split, cache, len(self.answers)) - - def __len__(self): - return len(self.answers) - - def __getitem__(self, idx): - return { - "passage_emb": self.passage_emb[idx], - "question_emb": self.question_emb[idx], - "is_negated": self.is_negated[idx], - "answer": self.answers[idx], - } - - -# Loader helper - - -def get_lqa_loaders( - data_root: str = "data", - probe: str = "chain", - batch_size: int = 64, - encoder_name: str = "sentence-transformers/all-MiniLM-L6-v2", - n_train: int = None, - n_test: int = None, - num_workers: int = 0, - pin_memory: bool = False, -) -> tuple[DataLoader, DataLoader]: - """Get train/test DataLoaders for a specific LQA probe. - - Args: - data_root: Root data directory. - probe: "chain", "entailment", or "negation". - batch_size: Batch size. - encoder_name: Sentence transformer model name. - n_train: Max training samples (None = use full dataset). - n_test: Max test samples (None = use full dataset). - num_workers: DataLoader workers. - pin_memory: Pin memory for CUDA. - - Returns: - (train_loader, test_loader) - """ - if probe == "chain": - train_ds = CLUTRRDataset(data_root, "train", encoder_name, n_train) - test_ds = CLUTRRDataset(data_root, "test", encoder_name, n_test) - train_loader = DataLoader( - train_ds, - batch_size=batch_size, - shuffle=True, - num_workers=num_workers, - pin_memory=pin_memory, - collate_fn=_clutrr_collate_fn, - ) - test_loader = DataLoader( - test_ds, - batch_size=batch_size, - shuffle=False, - num_workers=num_workers, - pin_memory=pin_memory, - collate_fn=_clutrr_collate_fn, - ) - elif probe == "entailment": - train_ds = HANSDataset(data_root, "train", encoder_name, n_train) - test_ds = HANSDataset(data_root, "test", encoder_name, n_test) - train_loader = DataLoader( - train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory - ) - test_loader = DataLoader( - test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory - ) - elif probe == "negation": - train_ds = BoolQNegDataset(data_root, "train", encoder_name, n_train) - test_ds = BoolQNegDataset(data_root, "test", encoder_name, n_test) - train_loader = DataLoader( - train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory - ) - test_loader = DataLoader( - test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory - ) - else: - raise ValueError(f"Unknown probe: {probe}. Use 'chain', 'entailment', or 'negation'.") - - return train_loader, test_loader diff --git a/models/lqa/__init__.py b/models/lqa/__init__.py deleted file mode 100644 index 2aeb3b3..0000000 --- a/models/lqa/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .glr_net import GLRNet -from .heads import ChainReasoningHead, EntailmentHead, NegationHead - -__all__ = [ - "GLRNet", - "ChainReasoningHead", - "EntailmentHead", - "NegationHead", -] diff --git a/models/lqa/glr_net.py b/models/lqa/glr_net.py deleted file mode 100644 index 74969a3..0000000 --- a/models/lqa/glr_net.py +++ /dev/null @@ -1,244 +0,0 @@ -# Versor: Universal Geometric Algebra Neural Network -# Copyright (C) 2026 Eunkyum Kim -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# - -"""Geometric Latent Reasoning Network (GLRNet). - -A small geometric post-processor (~300K params) on frozen LLM embeddings. -Uses Cl(4,1) conformal GA to provide algebraic structure for: -- Non-commutative products (asymmetry) -- Rotor composition (exact multi-hop) -- Grade involution (negation as automorphism) -- Grade-0 invariance (truth preserved under transformations) - -Data flow ensures L >= 2 for every probe so the transformer has -real sequence structure to attend over: - Chain: L = chain_length (each sentence is a token) - Entailment: L = 2 (premise, hypothesis stacked) - Negation: L = 2 (passage, question stacked) -""" - -import torch -import torch.nn as nn - -from core.algebra import CliffordAlgebra -from core.module import CliffordModule -from layers.adapters.embedding import RotaryBivectorPE -from layers.adapters.mother import MotherEmbedding -from layers.blocks.transformer import GeometricTransformerBlock -from layers.primitives.normalization import CliffordLayerNorm -from layers.primitives.projection import GeometricNeutralizer - -from .heads import ChainReasoningHead, EntailmentHead, NegationHead - - -class GLRNet(CliffordModule): - """Geometric Latent Reasoning Network. - - Architecture: - frozen_embeddings [B, L, encoder_dim] - -> MotherEmbedding(encoder_dim -> C x 2^n) per token - -> RotaryBivectorPE position-dependent rotors - -> GeometricTransformerBlock x N cross-token attention - -> GeometricNeutralizer grade-0/grade-2 separation - -> TaskHead (probe-specific) - -> prediction - - Every probe constructs a multi-token sequence (L >= 2) so the - transformer always has cross-token attention to work with. - """ - - def __init__( - self, - algebra: CliffordAlgebra, - encoder_dim: int = 384, - channels: int = 16, - num_layers: int = 3, - num_heads: int = 4, - num_rotors: int = 8, - dropout: float = 0.1, - probe: str = "chain", - max_seq_len: int = 64, - use_entropy_gating: bool = True, - num_relations: int = 10, - ): - super().__init__(algebra) - self.channels = channels - self.probe = probe - self.encoder_dim = encoder_dim - - # 1. Lift frozen embeddings to multivector space - self.mother = MotherEmbedding(algebra, encoder_dim, channels) - - # 2. Positional encoding via bivector rotors - self.pe = RotaryBivectorPE(algebra, channels, max_seq_len) - - # 3. Geometric transformer backbone - self.blocks = nn.ModuleList( - [ - GeometricTransformerBlock( - algebra, - channels, - num_heads, - num_rotors, - dropout=dropout, - use_entropy_gating=use_entropy_gating, - ) - for _ in range(num_layers) - ] - ) - - # 4. Final normalization + neutralization - self.final_norm = CliffordLayerNorm(algebra, channels) - self.neutralizer = GeometricNeutralizer(algebra, channels) - - # 5. Probe-specific head - if probe == "chain": - self.head = ChainReasoningHead(algebra, channels, num_relations=num_relations) - elif probe == "entailment": - self.head = EntailmentHead(algebra, channels) - elif probe == "negation": - self.head = NegationHead(algebra, channels) - else: - raise ValueError(f"Unknown probe: {probe}") - - def _lift_and_attend(self, embeddings: torch.Tensor, key_padding_mask: torch.Tensor = None) -> torch.Tensor: - """Lift a sequence of embeddings through the full backbone. - - Args: - embeddings: [B, L, encoder_dim] with L >= 2. - key_padding_mask: Optional [B, L] bool mask where True = padded. - - Returns: - Attended multivectors [B, L, C, D] (sequence preserved). - """ - B, L, _ = embeddings.shape - D = self.algebra.dim - - # Lift per-token: [B*L, encoder_dim] -> [B*L, C, D] - mv = self.mother(embeddings.reshape(B * L, -1)) - mv = mv.reshape(B, L, self.channels, D) - - # Positional encoding (rotor per position) - mv = self.pe(mv) - - # Transformer blocks -- cross-token attention happens here - for block in self.blocks: - mv = block(mv, key_padding_mask=key_padding_mask) - - return mv # [B, L, C, D] - - def forward(self, batch: dict) -> dict: - """Forward pass dispatching to probe-specific logic.""" - if self.probe == "chain": - return self._forward_chain(batch) - elif self.probe == "entailment": - return self._forward_entailment(batch) - elif self.probe == "negation": - return self._forward_negation(batch) - - def _forward_chain(self, batch: dict) -> dict: - """Chain reasoning: per-sentence embeddings -> rotor composition -> classify. - - Each sentence in the chain is a separate token. The transformer - attends across all chain steps, then the head applies soft-gated - rotor bank and classifies from the composed result. - - Args: - batch: { - "sentence_embeddings": [B, L, encoder_dim], padded - "chain_length": [B], - "label": [B], - } - """ - embs = batch["sentence_embeddings"] # [B, L_max, encoder_dim] - chain_lengths = batch["chain_length"] # [B] - - B, L_max, _ = embs.shape - - # Build padding mask: True = padded position - positions = torch.arange(L_max, device=embs.device).unsqueeze(0) # [1, L_max] - key_padding_mask = positions >= chain_lengths.unsqueeze(1) # [B, L_max] - - # Full backbone with padding mask - mv = self._lift_and_attend(embs, key_padding_mask=key_padding_mask) # [B, L, C, D] - - B, L, C, D = mv.shape - - # Masked mean pooling: exclude padded positions - valid_mask = ~key_padding_mask # [B, L], True = valid - valid_count = valid_mask.sum(dim=1, keepdim=True).clamp(min=1) # [B, 1] - valid_mask_expanded = valid_mask.unsqueeze(-1).unsqueeze(-1) # [B, L, 1, 1] - mv_pooled = (mv * valid_mask_expanded).sum(dim=1) / valid_count.unsqueeze(-1) # [B, C, D] - - mv_pooled = self.final_norm(mv_pooled) - mv_pooled = self.neutralizer(mv_pooled) - - logits = self.head(mv_pooled) - return {"logits": logits} - - def _forward_entailment(self, batch: dict) -> dict: - """Entailment: stack premise + hypothesis as 2-token sequence. - - The transformer sees both as a length-2 sequence, enabling - cross-attention between premise and hypothesis. After attention, - we split back and feed into the asymmetric head. - - Args: - batch: { - "premise_emb": [B, encoder_dim], - "hypothesis_emb": [B, encoder_dim], - } - """ - # Stack as 2-token sequence: [B, 2, encoder_dim] - seq = torch.stack([batch["premise_emb"], batch["hypothesis_emb"]], dim=1) - - # Full backbone - mv = self._lift_and_attend(seq) # [B, 2, C, D] - - # Split back into premise/hypothesis multivectors - premise_mv = mv[:, 0, :, :] # [B, C, D] - hypothesis_mv = mv[:, 1, :, :] # [B, C, D] - - # Normalize + neutralize each - premise_mv = self.neutralizer(self.final_norm(premise_mv)) - hypothesis_mv = self.neutralizer(self.final_norm(hypothesis_mv)) - - logits = self.head(premise_mv, hypothesis_mv) - return { - "logits": logits, - "premise_mv": premise_mv, - "hypothesis_mv": hypothesis_mv, - } - - def _forward_negation(self, batch: dict) -> dict: - """Negation: stack passage + question as 2-token sequence. - - Same principle as entailment -- the transformer cross-attends - between passage and question, then the head uses grade involution. - - Args: - batch: { - "passage_emb": [B, encoder_dim], - "question_emb": [B, encoder_dim], - } - """ - seq = torch.stack([batch["passage_emb"], batch["question_emb"]], dim=1) - - mv = self._lift_and_attend(seq) # [B, 2, C, D] - - passage_mv = mv[:, 0, :, :] - question_mv = mv[:, 1, :, :] - - passage_mv = self.neutralizer(self.final_norm(passage_mv)) - question_mv = self.neutralizer(self.final_norm(question_mv)) - - logits = self.head(passage_mv, question_mv) - return { - "logits": logits, - "passage_mv": passage_mv, - "question_mv": question_mv, - } diff --git a/models/lqa/heads.py b/models/lqa/heads.py deleted file mode 100644 index 7eb985d..0000000 --- a/models/lqa/heads.py +++ /dev/null @@ -1,232 +0,0 @@ -# Versor: Universal Geometric Algebra Neural Network -# Copyright (C) 2026 Eunkyum Kim -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# - -"""Probe-specific task heads for Geometric Latent Reasoning. - -Three CliffordModule heads: -- ChainReasoningHead: soft-gated rotor bank, classify from weighted rotor composition -- EntailmentHead: geometric product P*rev(H) -> asymmetric features -> 3-way -- NegationHead: grade involution + GeometricNeutralizer -> binary -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from core.algebra import CliffordAlgebra -from core.module import CliffordModule -from layers.primitives.projection import GeometricNeutralizer -from layers.primitives.rotor import RotorLayer - - -class ChainReasoningHead(CliffordModule): - """Soft-gated rotor bank for compositional chain reasoning. - - Learns K relation rotors as a geometric basis. A gating MLP maps - the pooled grade-0 features to softmax weights over K rotors. - All K rotors are applied, and the weighted-sum of transformed - grade-0 features is classified. - - The key insight: the rotor bank provides a learned geometric basis - for relation composition, with soft gating selecting the composition. - """ - - def __init__(self, algebra: CliffordAlgebra, channels: int, num_relations: int = 18, hidden_dim: int = 64): - super().__init__(algebra) - self.channels = channels - self.num_relations = num_relations - - # Learned relation rotors -- each captures a geometric transformation - self.relation_rotors = nn.ModuleList([RotorLayer(algebra, channels) for _ in range(num_relations)]) - - # Gating MLP: grade-0 features -> softmax weights over K rotors - self.gate = nn.Sequential( - nn.Linear(channels, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, num_relations), - ) - - # Grade-0 (scalar) features -> classifier - self.classifier = nn.Sequential( - nn.Linear(channels, hidden_dim), - nn.ReLU(), - nn.Dropout(0.1), - nn.Linear(hidden_dim, num_relations), - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Apply soft-gated rotor bank and classify. - - Args: - x: Pooled multivectors [B, C, D] from backbone. - - Returns: - Logits [B, num_relations]. - """ - B, C, D = x.shape - - # Compute gating weights from grade-0 features - g0 = x[..., 0] # [B, C] - gate_weights = F.softmax(self.gate(g0), dim=-1) # [B, K] - - # Apply all K rotors and weighted-sum their grade-0 outputs - transformed_g0 = torch.zeros(B, self.num_relations, C, device=x.device, dtype=x.dtype) - for k, rotor_layer in enumerate(self.relation_rotors): - R, R_rev = rotor_layer._compute_versors(x.device, x.dtype) - Rx = self.algebra.geometric_product(R.unsqueeze(0), x) - RxRr = self.algebra.geometric_product(Rx, R_rev.unsqueeze(0)) - transformed_g0[:, k] = RxRr[..., 0] # [B, C] - - # Weighted sum: [B, K, C] * [B, K, 1] -> [B, C] - weighted = (transformed_g0 * gate_weights.unsqueeze(-1)).sum(dim=1) # [B, C] - - return self.classifier(weighted) - - def isometry_loss(self) -> torch.Tensor: - """Regularization: relation rotors should preserve norms.""" - loss = torch.tensor(0.0, device=next(self.parameters()).device) - for rotor in self.relation_rotors: - bw = rotor.bivector_weights # [C, num_bv] - loss = loss + (bw.pow(2).sum(dim=-1) - 1.0).pow(2).mean() - return loss / max(len(self.relation_rotors), 1) - - -class EntailmentHead(CliffordModule): - """Asymmetric entailment via geometric product structure (binary). - - Computes P * rev(H) where P=premise, H=hypothesis multivectors. - The grade-0 part (symmetric alignment) and grade-2 part (antisymmetric - orientation) provide naturally asymmetric features. - - Binary output: entailment (1) vs non-entailment (0), matching HANS protocol. - - Key:

_2 flips sign when P <-> H are swapped, - giving the model asymmetry for free from the algebra. - """ - - def __init__(self, algebra: CliffordAlgebra, channels: int, hidden_dim: int = 64): - super().__init__(algebra) - self.channels = channels - - g2_mask = algebra.grade_masks[2] - self.register_buffer("g2_idx", g2_mask.nonzero(as_tuple=False).squeeze(-1)) - self.d2 = len(self.g2_idx) - - # Features: grade-0 (1) + grade-2 norm (1) + grade-2 direction (min(d2, 4)) - feature_dim = channels * (1 + 1 + min(self.d2, 4)) - self.classifier = nn.Sequential( - nn.Linear(feature_dim, hidden_dim), - nn.ReLU(), - nn.Dropout(0.1), - nn.Linear(hidden_dim, hidden_dim // 2), - nn.ReLU(), - nn.Linear(hidden_dim // 2, 1), - ) - - def _compute_product_features(self, premise: torch.Tensor, hypothesis: torch.Tensor): - """Compute geometric product and extract grade features. - - Returns: - (g0, g2, g2_norm, g2_dir, features) tuple. - """ - H_rev = self.algebra.reverse(hypothesis) - product = self.algebra.geometric_product(premise, H_rev) # [B, C, D] - - g0 = product[..., 0] # [B, C] -- symmetric - g2 = product[..., self.g2_idx] # [B, C, d2] -- antisymmetric - g2_norm = g2.norm(dim=-1) # [B, C] - - k = min(self.d2, 4) - g2_dir = g2[..., :k] # [B, C, k] - - features = torch.cat( - [ - g0, - g2_norm, - g2_dir.reshape(g2_dir.shape[0], -1), - ], - dim=-1, - ) - - return g0, g2, g2_norm, g2_dir, features - - def forward(self, premise: torch.Tensor, hypothesis: torch.Tensor) -> torch.Tensor: - """Compute binary entailment logits from geometric product features. - - Args: - premise: Premise multivectors [B, C, D]. - hypothesis: Hypothesis multivectors [B, C, D]. - - Returns: - Logits [B, 1]. - """ - _, _, _, _, features = self._compute_product_features(premise, hypothesis) - return self.classifier(features) - - def get_grade2_stats(self, premise: torch.Tensor, hypothesis: torch.Tensor) -> dict: - """Diagnostic: grade-2 signal statistics. - - Returns dict with g2_norm mean/std/max across the batch. - """ - _, _, g2_norm, _, _ = self._compute_product_features(premise, hypothesis) - g2_flat = g2_norm.flatten() - return { - "g2_norm_mean": g2_flat.mean().item(), - "g2_norm_std": g2_flat.std().item(), - "g2_norm_max": g2_flat.max().item(), - } - - -class NegationHead(CliffordModule): - """Negation sensitivity via grade involution. - - Grade involution x^ = sum (-1)^k _k flips odd grades and - preserves even grades. This is an algebraic automorphism. - - The head measures the involution-distance between features and - uses GeometricNeutralizer to separate truth (grade-0) from - relational noise (grade-2). - """ - - def __init__(self, algebra: CliffordAlgebra, channels: int, hidden_dim: int = 64): - super().__init__(algebra) - self.channels = channels - self.neutralizer = GeometricNeutralizer(algebra, channels) - - # Features: grade-0 (neutralized) + involution distance + original grade-0 - feature_dim = channels * 3 - self.classifier = nn.Sequential( - nn.Linear(feature_dim, hidden_dim), - nn.ReLU(), - nn.Dropout(0.1), - nn.Linear(hidden_dim, 1), - ) - - def forward(self, passage_mv: torch.Tensor, question_mv: torch.Tensor) -> torch.Tensor: - """Predict answer using grade involution structure. - - Args: - passage_mv: Passage multivectors [B, C, D]. - question_mv: Question multivectors [B, C, D]. - - Returns: - Logits [B, 1]. - """ - combined = self.algebra.geometric_product(passage_mv, question_mv) - involuted = self.algebra.grade_involution(combined) - - inv_dist = (combined - involuted).norm(dim=-1) # [B, C] - neutralized = self.neutralizer(combined) - g0_neutralized = neutralized[..., 0] # [B, C] - g0_original = combined[..., 0] # [B, C] - - features = torch.cat([g0_neutralized, g0_original, inv_dist], dim=-1) - return self.classifier(features) - - def get_features(self, passage_mv: torch.Tensor, question_mv: torch.Tensor) -> torch.Tensor: - """Get intermediate multivector features (for InvolutionConsistencyLoss).""" - return self.algebra.geometric_product(passage_mv, question_mv) diff --git a/tasks/lqa.py b/tasks/lqa.py deleted file mode 100644 index 2d049e5..0000000 --- a/tasks/lqa.py +++ /dev/null @@ -1,364 +0,0 @@ -# Versor: Universal Geometric Algebra Neural Network -# Copyright (C) 2026 Eunkyum Kim -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# - -"""Geometric Latent Reasoning (GLR) Task -- LQA Redesign. - -Three probes testing structural blind spots of flat embeddings: - 1. Chain: Compositional multi-hop reasoning (CLUTRR-style) - 2. Entailment: Asymmetric entailment (HANS-style) - 3. Negation: Negation sensitivity (BoolQ-Neg-style) - -Each probe demonstrates a specific algebraic advantage of Clifford algebra -over flat inner-product spaces. -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F -from omegaconf import DictConfig - -from core.algebra import CliffordAlgebra -from datalib.lqa import get_lqa_loaders -from functional.loss import AsymmetryLoss, InvolutionConsistencyLoss -from log import get_logger -from models.lqa.glr_net import GLRNet -from tasks.base import BaseTask - -logger = get_logger(__name__) - - -class LQATask(BaseTask): - """Geometric Latent Reasoning via three algebraic probes. - - Demonstrates that a small geometric post-processor (~300K params) on - frozen LLM embeddings outperforms equivalently-sized flat models on - tasks requiring composition, asymmetry, or negation. - - Config keys: - probe: "chain" | "entailment" | "negation" - algebra: {p, q, r, device} - model: {channels, num_layers, num_heads, num_rotors, ...} - training: {epochs, lr, batch_size, optimizer_type, ...} - dataset: {data_root, n_train, n_test} - """ - - def __init__(self, cfg: DictConfig): - self.probe = cfg.get("probe", "chain") - self._train_loader = None - self._test_loader = None - super().__init__(cfg) - - def setup_algebra(self): - """Cl(4,1) -- conformal GA, dim=32, 6 grades.""" - p = self.cfg.algebra.get("p", 4) - q = self.cfg.algebra.get("q", 1) - r = self.cfg.algebra.get("r", 0) - return CliffordAlgebra(p, q, r, device=self.device) - - def setup_model(self): - """GLRNet with probe-specific head.""" - mcfg = self.cfg.model - return GLRNet( - algebra=self.algebra, - encoder_dim=mcfg.get("encoder_dim", 384), - channels=mcfg.get("channels", 16), - num_layers=mcfg.get("num_layers", 3), - num_heads=mcfg.get("num_heads", 4), - num_rotors=mcfg.get("num_rotors", 8), - dropout=mcfg.get("dropout", 0.1), - probe=self.probe, - max_seq_len=mcfg.get("max_seq_len", 64), - use_entropy_gating=mcfg.get("use_entropy_gating", True), - num_relations=mcfg.get("num_relations", 10), - ) - - def setup_criterion(self): - """Multi-loss per probe: primary + auxiliary.""" - if self.probe == "chain": - return nn.CrossEntropyLoss() - elif self.probe == "entailment": - return nn.BCEWithLogitsLoss() - elif self.probe == "negation": - return nn.BCEWithLogitsLoss() - else: - raise ValueError(f"Unknown probe: {self.probe}") - - def get_data(self): - """Load probe-specific datasets.""" - dcfg = self.cfg.dataset - self._train_loader, self._test_loader = get_lqa_loaders( - data_root=dcfg.get("data_root", "data"), - probe=self.probe, - batch_size=self.cfg.training.get("batch_size", 64), - encoder_name=self.cfg.model.get("encoder", "sentence-transformers/all-MiniLM-L6-v2"), - n_train=dcfg.get("n_train", None), - n_test=dcfg.get("n_test", None), - num_workers=dcfg.get("num_workers", 0), - pin_memory=self.device_config.pin_memory, - ) - return self._train_loader - - def _to_device(self, batch: dict) -> dict: - """Move batch tensors to device.""" - return {k: v.to(self.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()} - - def train_step(self, batch): - """Forward + multi-loss backward.""" - batch = self._to_device(batch) - self.optimizer.zero_grad() - - output = self.model(batch) - logits = output["logits"] - - # Primary loss - if self.probe == "negation": - labels = batch["answer"].float().unsqueeze(-1) - loss = self.criterion(logits, labels) - elif self.probe == "entailment": - labels = batch["label"].float().unsqueeze(-1) # [B, 1] - loss = self.criterion(logits, labels) - else: - labels = batch["label"] - loss = self.criterion(logits, labels) - - # Auxiliary losses - aux_losses = {} - tcfg = self.cfg.training - - if self.probe == "chain": - iso_w = tcfg.get("isometry_weight", 0.01) - if iso_w > 0: - iso_loss = self.model.head.isometry_loss() - loss = loss + iso_w * iso_loss - aux_losses["Iso"] = iso_loss.item() - - elif self.probe == "entailment": - asym_w = tcfg.get("asymmetry_weight", 0.1) - if asym_w > 0: - # Compute reverse-order logits for asymmetry penalty - rev_batch = { - "premise_emb": batch["hypothesis_emb"], - "hypothesis_emb": batch["premise_emb"], - } - with torch.no_grad(): - rev_output = self.model(rev_batch) - asym_loss_fn = AsymmetryLoss(margin=tcfg.get("asymmetry_margin", 0.1)) - asym_loss = asym_loss_fn(output["logits"], rev_output["logits"]) - loss = loss + asym_w * asym_loss - aux_losses["Asym"] = asym_loss.item() - - elif self.probe == "negation": - inv_w = tcfg.get("involution_weight", 0.1) - if inv_w > 0 and "passage_mv" in output: - # For pairs of (original, negated) samples in the batch - is_neg = batch["is_negated"] - orig_mask = ~is_neg - neg_mask = is_neg - n_pairs = min(orig_mask.sum(), neg_mask.sum()) - if n_pairs > 0: - orig_features = self.model.head.get_features( - output["passage_mv"][orig_mask][:n_pairs], - output["question_mv"][orig_mask][:n_pairs] - if "question_mv" in output - else output["passage_mv"][orig_mask][:n_pairs], - ) - neg_features = self.model.head.get_features( - output["passage_mv"][neg_mask][:n_pairs], - output["question_mv"][neg_mask][:n_pairs] - if "question_mv" in output - else output["passage_mv"][neg_mask][:n_pairs], - ) - inv_loss_fn = InvolutionConsistencyLoss() - inv_loss = inv_loss_fn(orig_features, neg_features, self.algebra) - loss = loss + inv_w * inv_loss - aux_losses["Inv"] = inv_loss.item() - - self._backward(loss) - self._optimizer_step() - - logs = {"Loss": loss.item()} - logs.update(aux_losses) - return loss.item(), logs - - def evaluate(self, data=None): - """Per-probe evaluation metrics.""" - loader = self._test_loader if self._test_loader is not None else data - if loader is None: - logger.warning("No test loader available for evaluation.") - return {"Accuracy": 0.0} - - self.model.eval() - all_preds = [] - all_labels = [] - all_chain_lengths = [] - all_confidences = [] - all_g2_norms = [] - - with torch.no_grad(): - for batch in loader: - batch = self._to_device(batch) - output = self.model(batch) - logits = output["logits"] - - if self.probe in ("negation", "entailment"): - preds = (logits.squeeze(-1) > 0).long() - if self.probe == "negation": - labels = batch["answer"].long() - else: - labels = batch["label"].long() - # Confidence via sigmoid - conf = torch.sigmoid(logits.squeeze(-1)) - all_confidences.append(conf.cpu()) - # Grade-2 diagnostics for entailment - if self.probe == "entailment" and "premise_mv" in output: - _, _, g2_norm, _, _ = self.model.head._compute_product_features( - output["premise_mv"], output["hypothesis_mv"] - ) - all_g2_norms.append(g2_norm.cpu()) - else: - preds = logits.argmax(dim=-1) - labels = batch["label"] - - all_preds.append(preds.cpu()) - all_labels.append(labels.cpu()) - - if self.probe == "chain" and "chain_length" in batch: - all_chain_lengths.append(batch["chain_length"].cpu()) - - all_preds = torch.cat(all_preds) - all_labels = torch.cat(all_labels) - correct = (all_preds == all_labels).float() - accuracy = correct.mean().item() - - metrics = {"Accuracy": accuracy} - - # Probe-specific metrics - if self.probe == "chain" and all_chain_lengths: - all_chain_lengths = torch.cat(all_chain_lengths) - # Accuracy by chain length - for length in sorted(all_chain_lengths.unique().tolist()): - mask = all_chain_lengths == length - if mask.sum() > 0: - acc = correct[mask].mean().item() - metrics[f"Acc@len{int(length)}"] = acc - logger.info("Chain accuracy by length:") - for k, v in metrics.items(): - if k.startswith("Acc@"): - logger.info(" %s: %.4f", k, v) - - elif self.probe == "entailment": - # Per-class accuracy (binary: 1=entailment, 0=non-entailment) - ent_mask = all_labels == 1 - nonent_mask = all_labels == 0 - if ent_mask.sum() > 0: - metrics["Acc_Entailment"] = correct[ent_mask].mean().item() - if nonent_mask.sum() > 0: - metrics["Acc_NonEntailment"] = correct[nonent_mask].mean().item() - - # Prediction distribution - n_total = len(all_preds) - metrics["Pred_Entailment_Frac"] = (all_preds == 1).float().mean().item() - metrics["Pred_NonEntailment_Frac"] = (all_preds == 0).float().mean().item() - - # Confidence statistics - if all_confidences: - all_conf = torch.cat(all_confidences) - metrics["Confidence_Mean"] = all_conf.mean().item() - metrics["Confidence_Std"] = all_conf.std().item() - # Confidence for correct vs incorrect predictions - if correct.sum() > 0: - metrics["Confidence_Correct"] = all_conf[correct.bool()].mean().item() - if (1 - correct).sum() > 0: - metrics["Confidence_Incorrect"] = all_conf[~correct.bool()].mean().item() - - # Grade-2 signal diagnostics - if all_g2_norms: - g2_all = torch.cat([g.flatten() for g in all_g2_norms]) - metrics["Grade2_Norm_Mean"] = g2_all.mean().item() - metrics["Grade2_Norm_Std"] = g2_all.std().item() - metrics["Grade2_Norm_Max"] = g2_all.max().item() - - elif self.probe == "negation": - # Negation robustness - all_chain_lengths_or_neg = [] - for batch in loader: - if "is_negated" in batch: - all_chain_lengths_or_neg.append(batch["is_negated"].cpu()) - if all_chain_lengths_or_neg: - is_neg = torch.cat(all_chain_lengths_or_neg) - acc_orig = correct[~is_neg].mean().item() if (~is_neg).sum() > 0 else 0.0 - acc_neg = correct[is_neg].mean().item() if is_neg.sum() > 0 else 0.0 - metrics["Acc_Original"] = acc_orig - metrics["Acc_Negated"] = acc_neg - metrics["Negation_Robustness"] = min(acc_orig, acc_neg) - - logger.info("Evaluation metrics:") - for k, v in metrics.items(): - logger.info(" %s: %.4f", k, v) - - self.model.train() - return metrics - - def visualize(self, data=None): - """Visualization placeholder for GLR probes.""" - logger.info("GLR probe=%s -- visualization not implemented (use evaluate() for metrics)", self.probe) - - def run(self): - """Full training loop with validation.""" - logger.info( - "Starting GLR Task: probe=%s, algebra=Cl(%d,%d,%d)", - self.probe, - self.algebra.p, - self.algebra.q, - self.algebra.r, - ) - - train_loader = self.get_data() - - # Count parameters - n_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) - logger.info("Model parameters: %d (%.1fK)", n_params, n_params / 1000) - - # Training loop - self.model.train() - best_acc = 0.0 - - from tqdm import tqdm - - pbar = tqdm(range(self.epochs)) - - for epoch in pbar: - total_loss = 0.0 - n_batches = 0 - for batch in train_loader: - loss, logs = self.train_step(batch) - total_loss += loss - n_batches += 1 - - avg_loss = total_loss / max(n_batches, 1) - self.scheduler.step(avg_loss) - - # Periodic evaluation - if (epoch + 1) % 5 == 0 or epoch == self.epochs - 1: - metrics = self.evaluate() - acc = metrics.get("Accuracy", 0.0) - if acc > best_acc: - best_acc = acc - logs["ValAcc"] = acc - logs["BestAcc"] = best_acc - - current_lr = self.optimizer.param_groups[0]["lr"] - logs["Loss"] = avg_loss - logs["LR"] = current_lr - desc = " | ".join([f"{k}: {v:.4f}" for k, v in logs.items()]) - pbar.set_description(desc) - - logger.info("Training complete. Best accuracy: %.4f", best_acc) - - # Final evaluation - final_metrics = self.evaluate() - return final_metrics From a5594f72191ceda79f68e76dd9c33e1fc6bc7e88 Mon Sep 17 00:00:00 2001 From: Concode0 Date: Fri, 8 May 2026 16:53:14 +0900 Subject: [PATCH 2/3] feat: brief concepts for slm structure that replace lqa task --- conf/task/slm.yaml | 91 +++++++++ datalib/__init__.py | 2 +- datalib/slm.py | 213 +++++++++++++++++++++ main.py | 2 +- models/__init__.py | 2 +- models/slm/__init__.py | 13 ++ models/slm/model.py | 179 ++++++++++++++++++ models/slm/reasoning.py | 193 +++++++++++++++++++ models/slm/tokenizer.py | 174 +++++++++++++++++ pyproject.toml | 8 +- tasks/__init__.py | 8 +- tasks/slm.py | 408 ++++++++++++++++++++++++++++++++++++++++ uv.lock | 315 ++----------------------------- 13 files changed, 1293 insertions(+), 315 deletions(-) create mode 100644 conf/task/slm.yaml create mode 100644 datalib/slm.py create mode 100644 models/slm/__init__.py create mode 100644 models/slm/model.py create mode 100644 models/slm/reasoning.py create mode 100644 models/slm/tokenizer.py create mode 100644 tasks/slm.py diff --git a/conf/task/slm.yaml b/conf/task/slm.yaml new file mode 100644 index 0000000..0fd8b3e --- /dev/null +++ b/conf/task/slm.yaml @@ -0,0 +1,91 @@ +# @package _global_ +name: slm # task key registered in main.py + +algebra: + p: 4 # Cl(p,q,r) positive basis dimensions + q: 1 # Cl(p,q,r) negative basis dimensions + r: 1 # Cl(p,q,r) null basis dimensions + device: cuda:0 # auto | cpu | cuda | cuda:0 | mps + +model: + channels: 16 # multivector channels; must divide num_heads + num_layers: 4 # model-local geometric SLM blocks + num_heads: 4 # causal geometric attention heads + num_rotors: 8 # rotors inside MultiRotorFFN + ffn_mult: 4 # FFN expansion multiplier + dropout: 0.1 # dropout in attention/block paths + bivector_weight: 0.5 # grade-2 orientation contribution to attention scores + max_seq_len: 256 # token context length for CUDA throughput runs + attn_block_size: 128 # 64 | 128 | 256; larger is faster when VRAM allows + tie_embeddings: true # true | false; tied token embedding decoder vs untied linear head + use_neutralizer: true # true | false; final grade-0/grade-2 neutralization + +tokenizer: + mode: subword # subword | word; subword uses WordPiece when tokenizers is installed + vocab_size: 8192 # vocabulary size including special tokens + min_frequency: 2 # minimum frequency for tokenizer training + lowercase: true # true | false + +dataset: + name: HuggingFaceTB/cosmopedia # HuggingFace dataset path + config: stories # Cosmopedia config, e.g. stories | web_samples_v1 | web_samples_v2 | stanford + split: train # HuggingFace split name + text_field: text # row field to train on; null falls back through common text field names + sample_size: 8192 # sampled documents for the first CUDA-scale run; null streams all + streaming: true # true | false; streaming avoids full local materialization + shuffle: true # true | false; shuffle corpus before sampling + shuffle_train: true # true | false; shuffle DataLoader batches + seed: 0 # dataset shuffle seed + shuffle_buffer: 20000 # streaming shuffle buffer + max_chars_per_sample: 12000 # truncate long documents before tokenizer training + eval_fraction: 0.05 # held-out fraction from sampled texts + chunk_long_texts: true # true | false; split long documents into multiple training chunks + stride: 256 # chunk stride; null defaults to max_seq_len + num_workers: null # null uses training.num_workers + pin_memory: null # null uses training.pin_memory + +analysis: + enabled: true # true | false; run core.analysis on hidden multivector states + run_on: final # final | eval | both; when to run analysis + max_batches: 1 # loader batches inspected by analysis + max_samples: 256 # valid token states passed to analyzers + sampling_strategy: passthrough # passthrough | random | stratified | bootstrap + run_spectral: true # true | false; grade energy, bivector spectrum, GP operator spectrum + run_symmetry: true # true | false; null directions, involution/reflection symmetries + run_commutator: true # true | false; commutativity and Lie-bracket closure summaries + run_dimension: false # true | false; ignored for pre-embedded hidden states + run_signature: false # true | false; ignored for pre-embedded hidden states + energy_threshold: 0.05 # active-energy/null/symmetry threshold + k_neighbors: 8 # reserved for raw-data analysis tools + save_summary: true # true | false; write analysis_summary.txt when checkpointing is enabled + +inference: + enabled: false # true | false; sample text after training/evaluation + prompt: "The" # string; prompt used for optional generation + max_new_tokens: 32 # generated tokens for preview + temperature: 1.0 # lower is sharper + top_k: 50 # top-k sampling cutoff; null disables + sample: true # true | false; multinomial sampling vs greedy argmax + +checkpointing: + enabled: true # true | false; automatic model saving + dir: checkpoints/slm # output directory, relative to the Hydra run dir unless absolute + save_final: true # true | false; save final checkpoint at end of training + save_best: true # true | false; save best checkpoint according to monitor/mode + monitor: EvalPPL # EvalPPL | EvalAcc | Loss; metric used for save_best + mode: min # min | max + filename: slm_final.pt # final checkpoint filename + best_filename: slm_best.pt # best checkpoint filename + +training: + epochs: 10 # training epochs + lr: 0.001 # optimizer learning rate + batch_size: 16 # batch size + optimizer_type: riemannian_adam # riemannian_adam | exponential_sgd | adamw + eval_interval: 1 # evaluate every N epochs when eval split exists + num_workers: 4 # DataLoader workers for CUDA training + pin_memory: true # true | false | null; true speeds CPU-to-CUDA transfer + compile: true # true | false; torch.compile wrapper + compile_backend: inductor # null | inductor | aot_eager; inductor for CUDA + amp: true # true | false; CUDA autocast plus GradScaler + cudnn_benchmark: true # true | false | null; tune kernels for stable shapes diff --git a/datalib/__init__.py b/datalib/__init__.py index 0d750c4..a4c5481 100644 --- a/datalib/__init__.py +++ b/datalib/__init__.py @@ -8,6 +8,6 @@ from datalib.md17 import get_md17_loaders from datalib.symbolic_regression import get_sr_loaders - from datalib.lqa import get_lqa_loaders + from datalib.slm import build_causal_lm_loaders from datalib.deap import get_deap_loaders """ diff --git a/datalib/slm.py b/datalib/slm.py new file mode 100644 index 0000000..4ef4d87 --- /dev/null +++ b/datalib/slm.py @@ -0,0 +1,213 @@ +"""Data utilities for raw-text SLM training.""" + +from dataclasses import dataclass +from typing import Iterable, List, Optional, Protocol + +import torch +from torch.utils.data import DataLoader, Dataset + +from log import get_logger + +logger = get_logger(__name__) + +IGNORE_INDEX = -100 + + +class TokenizerLike(Protocol): + pad_id: int + + def encode(self, text: str, max_length: Optional[int] = None, add_special_tokens: bool = True) -> List[int]: + """Encode text into token ids.""" + + +@dataclass +class TextCorpusConfig: + """HuggingFace text corpus loading options.""" + + name: str = "HuggingFaceTB/cosmopedia" + config: Optional[str] = "stories" + split: str = "train" + text_field: Optional[str] = "text" + sample_size: Optional[int] = 2048 + streaming: bool = True + shuffle: bool = True + seed: int = 0 + shuffle_buffer: int = 10_000 + max_chars_per_sample: int = 12_000 + eval_fraction: float = 0.05 + + @classmethod + def from_mapping(cls, values: dict) -> "TextCorpusConfig": + keys = cls.__dataclass_fields__.keys() + return cls(**{key: values.get(key, getattr(cls, key)) for key in keys}) + + +@dataclass +class CausalLoaderConfig: + """Causal LM dataloader options.""" + + batch_size: int = 8 + max_seq_len: int = 128 + chunk_long_texts: bool = False + stride: Optional[int] = None + shuffle_train: bool = True + num_workers: int = 0 + pin_memory: bool = False + + +def extract_text(row: dict, text_field: Optional[str] = None) -> str: + """Extract a text field from a HuggingFace dataset row.""" + if text_field is not None: + return str(row.get(text_field, "")) + for candidate in ("text", "content", "article", "story", "generated_text"): + if candidate in row and row[candidate]: + return str(row[candidate]) + return "" + + +def load_text_samples(config: TextCorpusConfig) -> List[str]: + """Load a sampled raw-text corpus from HuggingFace datasets.""" + try: + from datasets import load_dataset + except ImportError: + raise ImportError("The SLM data pipeline requires the optional 'datasets' dependency.") + + kwargs = {"path": config.name, "split": config.split, "streaming": config.streaming} + if config.config: + kwargs["name"] = config.config + + logger.info( + "Loading dataset=%s config=%s split=%s streaming=%s sample_size=%s", + config.name, + config.config, + config.split, + config.streaming, + config.sample_size, + ) + dataset = load_dataset(**kwargs) + + if config.streaming: + if config.shuffle: + dataset = dataset.shuffle(seed=config.seed, buffer_size=config.shuffle_buffer) + iterator = iter(dataset) + else: + if config.shuffle: + dataset = dataset.shuffle(seed=config.seed) + if config.sample_size is not None: + dataset = dataset.select(range(min(int(config.sample_size), len(dataset)))) + iterator = iter(dataset) + + texts = [] + for row in iterator: + text = extract_text(row, config.text_field).strip() + if text: + texts.append(text[: config.max_chars_per_sample]) + if config.sample_size is not None and len(texts) >= int(config.sample_size): + break + + if not texts: + raise RuntimeError("No text samples were loaded. Check dataset config and text_field.") + logger.info("Loaded %d text samples for SLM training.", len(texts)) + return texts + + +def split_train_eval(texts: List[str], eval_fraction: float) -> tuple: + """Split sampled texts into train/eval partitions.""" + if len(texts) < 2 or eval_fraction <= 0: + return texts, [] + eval_size = max(1, int(len(texts) * eval_fraction)) + return texts[:-eval_size], texts[-eval_size:] + + +class CausalTextDataset(Dataset): + """Fixed-length causal LM examples from tokenized text.""" + + def __init__( + self, + texts: Iterable[str], + tokenizer: TokenizerLike, + max_seq_len: int, + chunk_long_texts: bool = False, + stride: Optional[int] = None, + ): + self.tokenizer = tokenizer + self.max_seq_len = max_seq_len + self.pad_id = tokenizer.pad_id + self.examples = [] + + stride = stride or max_seq_len + for text in texts: + ids = tokenizer.encode(text, add_special_tokens=True) + if len(ids) < 2: + continue + if chunk_long_texts: + starts = range(0, max(len(ids) - 1, 1), stride) + for start in starts: + chunk = ids[start : start + max_seq_len + 1] + if len(chunk) >= 2: + self.examples.append(self._make_example(chunk)) + else: + self.examples.append(self._make_example(ids[: max_seq_len + 1])) + + if not self.examples: + raise RuntimeError("Tokenizer produced no causal LM examples.") + + def _make_example(self, ids: List[int]) -> dict: + input_ids = ids[:-1][: self.max_seq_len] + labels = ids[1:][: self.max_seq_len] + valid_len = len(input_ids) + pad_len = self.max_seq_len - valid_len + + if pad_len > 0: + input_ids = input_ids + [self.pad_id] * pad_len + labels = labels + [IGNORE_INDEX] * pad_len + + attention_mask = [1] * valid_len + [0] * pad_len + return { + "input_ids": torch.tensor(input_ids, dtype=torch.long), + "labels": torch.tensor(labels, dtype=torch.long), + "attention_mask": torch.tensor(attention_mask, dtype=torch.long), + } + + def __len__(self): + return len(self.examples) + + def __getitem__(self, idx): + return self.examples[idx] + + +def build_causal_lm_loaders( + train_texts: Iterable[str], + eval_texts: Iterable[str], + tokenizer: TokenizerLike, + config: CausalLoaderConfig, +) -> tuple: + """Build train/eval dataloaders for causal LM training.""" + train_dataset = CausalTextDataset( + train_texts, + tokenizer, + max_seq_len=config.max_seq_len, + chunk_long_texts=config.chunk_long_texts, + stride=config.stride, + ) + train_loader = DataLoader( + train_dataset, + batch_size=config.batch_size, + shuffle=config.shuffle_train, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + ) + + eval_loader = None + eval_texts = list(eval_texts) + if eval_texts: + eval_dataset = CausalTextDataset(eval_texts, tokenizer, max_seq_len=config.max_seq_len) + eval_loader = DataLoader( + eval_dataset, + batch_size=config.batch_size, + shuffle=False, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + ) + + return train_loader, eval_loader diff --git a/main.py b/main.py index cb99c11..4c58f0f 100644 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ _TASK_MODULES = { "md17": ("tasks.md17", "MD17Task"), "sr": ("tasks.symbolic_regression", "SRTask"), - "lqa": ("tasks.lqa", "LQATask"), + "slm": ("tasks.slm", "SLMTask"), "deap_eeg": ("tasks.deap_eeg", "DEAPEEGTask"), } diff --git a/models/__init__.py b/models/__init__.py index 8915e8a..37ae79a 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -8,7 +8,7 @@ from models.md17 import MD17ForceNet # requires --extra md17 from models.sr import SRGBN # requires --extra sr - from models.lqa import GLRNet # requires --extra lqa + from models.slm import GeometricSLM # required --extra slm from models.deap import EEGNet from models.blocks import GeometricBladeNetwork """ diff --git a/models/slm/__init__.py b/models/slm/__init__.py new file mode 100644 index 0000000..77af037 --- /dev/null +++ b/models/slm/__init__.py @@ -0,0 +1,13 @@ +# Versor: Universal Geometric Algebra Neural Network +# Copyright (C) 2026 Eunkyum Kim +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# + +"""Small geometric language model components.""" + +from .model import GeometricSLM +from .tokenizer import SubwordTokenizer + +__all__ = ["GeometricSLM", "SubwordTokenizer"] diff --git a/models/slm/model.py b/models/slm/model.py new file mode 100644 index 0000000..4b36880 --- /dev/null +++ b/models/slm/model.py @@ -0,0 +1,179 @@ +# Versor: Universal Geometric Algebra Neural Network +# Copyright (C) 2026 Eunkyum Kim +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# + +"""Raw-text geometric SLM.""" + +import math +from typing import Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from core.algebra import CliffordAlgebra +from core.module import CliffordModule +from layers.adapters.embedding import MultivectorEmbedding, RotaryBivectorPE +from layers.primitives.normalization import CliffordLayerNorm +from layers.primitives.projection import GeometricNeutralizer + +from .reasoning import GeometricSLMBlock + + +class GeometricSLM(CliffordModule): + """Small causal language model with multivector token states.""" + + def __init__( + self, + algebra: CliffordAlgebra, + vocab_size: int, + channels: int = 16, + num_layers: int = 4, + num_heads: int = 4, + num_rotors: int = 8, + ffn_mult: int = 4, + max_seq_len: int = 128, + dropout: float = 0.1, + bivector_weight: float = 0.5, + attn_block_size: int = 128, + tie_embeddings: bool = True, + use_neutralizer: bool = True, + pad_token_id: int = 0, + ): + super().__init__(algebra) + self.vocab_size = vocab_size + self.channels = channels + self.max_seq_len = max_seq_len + self.tie_embeddings = tie_embeddings + self.use_neutralizer = use_neutralizer + self.pad_token_id = pad_token_id + + self.token_embedding = MultivectorEmbedding(algebra, vocab_size, channels) + with torch.no_grad(): + self.token_embedding.embedding.weight[pad_token_id].zero_() + + self.position_embedding = RotaryBivectorPE(algebra, channels, max_seq_len) + self.blocks = nn.ModuleList( + [ + GeometricSLMBlock( + algebra=algebra, + channels=channels, + num_heads=num_heads, + num_rotors=num_rotors, + ffn_mult=ffn_mult, + dropout=dropout, + bivector_weight=bivector_weight, + attn_block_size=attn_block_size, + ) + for _ in range(num_layers) + ] + ) + self.final_norm = CliffordLayerNorm(algebra, channels) + self.neutralizer = GeometricNeutralizer(algebra, channels) if use_neutralizer else None + + if tie_embeddings: + self.lm_head = None + self.output_bias = nn.Parameter(torch.zeros(vocab_size)) + else: + self.lm_head = nn.Linear(channels * algebra.dim, vocab_size) + self.output_bias = None + + def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor = None) -> dict: + """Return next-token logits for ``input_ids``. + + Args: + input_ids: Token ids ``[B, L]``. + attention_mask: Optional mask ``[B, L]`` where 1/True means valid. + """ + B, L = input_ids.shape + if L > self.max_seq_len: + raise ValueError(f"Sequence length {L} exceeds max_seq_len={self.max_seq_len}") + + key_padding_mask = None + if attention_mask is not None: + key_padding_mask = attention_mask == 0 + + h = self.token_embedding(input_ids) + h = self.position_embedding(h) + + if key_padding_mask is not None: + h = h.masked_fill(key_padding_mask.unsqueeze(-1).unsqueeze(-1), 0.0) + + for block in self.blocks: + h = block(h, key_padding_mask=key_padding_mask) + + h = self.final_norm(h.reshape(B * L, self.channels, self.algebra.dim)) + if self.neutralizer is not None: + h = self.neutralizer(h) + + flat = h.reshape(B * L, self.channels * self.algebra.dim) + if self.tie_embeddings: + logits = flat @ self.token_embedding.embedding.weight.t() + logits = logits / math.sqrt(self.channels * self.algebra.dim) + logits = logits + self.output_bias + else: + logits = self.lm_head(flat) + + return { + "logits": logits.reshape(B, L, self.vocab_size), + "hidden_states": h.reshape(B, L, self.channels, self.algebra.dim), + } + + def reasoner_parameter_count(self) -> int: + """Count trainable parameters outside token embedding and decoder head.""" + total = 0 + for name, param in self.named_parameters(): + if not param.requires_grad: + continue + if name.startswith("token_embedding."): + continue + if name.startswith("lm_head.") or name == "output_bias": + continue + total += param.numel() + return total + + @torch.no_grad() + def generate( + self, + input_ids: torch.Tensor, + max_new_tokens: int = 32, + temperature: float = 1.0, + top_k: Optional[int] = None, + sample: bool = True, + eos_token_id: Optional[int] = None, + ) -> torch.Tensor: + """Naive autoregressive generation for small local inference checks.""" + if temperature <= 0: + raise ValueError("temperature must be > 0") + + was_training = self.training + self.eval() + generated = input_ids + + for _ in range(max_new_tokens): + context = generated[:, -self.max_seq_len :] + attention_mask = torch.ones_like(context) + logits = self(context, attention_mask=attention_mask)["logits"][:, -1, :] + logits = logits / temperature + + if top_k is not None and top_k > 0 and top_k < logits.size(-1): + values, indices = logits.topk(top_k, dim=-1) + filtered = torch.full_like(logits, float("-inf")) + logits = filtered.scatter(-1, indices, values) + + if sample: + probs = F.softmax(logits, dim=-1) + next_token = torch.multinomial(probs, num_samples=1) + else: + next_token = logits.argmax(dim=-1, keepdim=True) + + generated = torch.cat([generated, next_token], dim=1) + if eos_token_id is not None and (next_token == eos_token_id).all(): + break + + if was_training: + self.train() + return generated diff --git a/models/slm/reasoning.py b/models/slm/reasoning.py new file mode 100644 index 0000000..c6a7c35 --- /dev/null +++ b/models/slm/reasoning.py @@ -0,0 +1,193 @@ +# Versor: Universal Geometric Algebra Neural Network +# Copyright (C) 2026 Eunkyum Kim +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# + +"""Geometric reasoning layers for SLM experiments.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from core.algebra import CliffordAlgebra +from core.module import CliffordModule +from layers.blocks.multi_rotor_ffn import MultiRotorFFN +from layers.primitives.linear import CliffordLinear +from layers.primitives.normalization import CliffordLayerNorm + + +class CausalGeometricAttention(CliffordModule): + """Causal self-attention scored by scalar and bivector product evidence. + + This is intentionally model-local rather than using ``GeometricTransformerBlock``, + so SLM-specific attention changes can happen inside ``models/slm``. + """ + + def __init__( + self, + algebra: CliffordAlgebra, + channels: int, + num_heads: int = 4, + bivector_weight: float = 0.5, + dropout: float = 0.0, + block_size: int = 128, + ): + super().__init__(algebra) + if channels % num_heads != 0: + raise ValueError(f"channels ({channels}) must be divisible by num_heads ({num_heads})") + if block_size <= 0: + raise ValueError(f"block_size ({block_size}) must be > 0") + + self.channels = channels + self.num_heads = num_heads + self.head_channels = channels // num_heads + self.bivector_weight = bivector_weight + self.block_size = block_size + + self.q_proj = CliffordLinear(algebra, channels, channels) + self.k_proj = CliffordLinear(algebra, channels, channels) + self.v_proj = CliffordLinear(algebra, channels, channels) + self.out_proj = CliffordLinear(algebra, channels, channels) + self.dropout = nn.Dropout(dropout) if dropout > 0 else None + + self._precompute_score_tables() + + def _precompute_score_tables(self): + """Precompute bilinear tables for ``Q * reverse(K)`` attention scores.""" + algebra = self.algebra + D = algebra.dim + + metric_rev = algebra.gp_signs[:, 0].float() * algebra.rev_signs.float() + self.register_buffer("_metric_rev", metric_rev) + + g2_blades = [idx for idx in range(D) if bin(idx).count("1") == 2] + self.n_g2 = len(g2_blades) + + if g2_blades: + a_idx = torch.arange(D, device=algebra.device) + r_vals = torch.tensor(g2_blades, dtype=torch.long, device=algebra.device) + b_idx = a_idx.unsqueeze(0) ^ r_vals.unsqueeze(1) + rev_b = algebra.rev_signs.float()[b_idx] + gp_ar = algebra.gp_signs[:, r_vals].float().T + g2_sign = rev_b * gp_ar + else: + b_idx = torch.zeros(0, D, dtype=torch.long, device=algebra.device) + g2_sign = torch.zeros(0, D, device=algebra.device) + + self.register_buffer("_g2_b_idx", b_idx) + self.register_buffer("_g2_sign", g2_sign) + + def _compute_score(self, q_head: torch.Tensor, k_head: torch.Tensor, k_g2: torch.Tensor) -> torch.Tensor: + """Compute grade-0 plus grade-2 attention scores without materializing GP tensors.""" + B, H, Lq, Hc, D = q_head.shape + Lk = k_head.shape[2] + + metric_rev = self._metric_rev.to(dtype=q_head.dtype) + q_flat = (q_head * metric_rev).reshape(B, H, Lq, Hc * D) + k_flat = k_head.reshape(B, H, Lk, Hc * D) + score_g0 = torch.matmul(q_flat, k_flat.transpose(-2, -1)) + + if self.n_g2 > 0: + q_2d = q_head.permute(0, 1, 3, 2, 4).reshape(B * H * Hc, Lq, D) + k_g2_2d = k_g2.permute(0, 1, 3, 2, 4, 5).reshape(B * H * Hc, Lk * self.n_g2, D) + comp = torch.bmm(q_2d, k_g2_2d.transpose(-2, -1)) + comp_sq = comp.reshape(B * H * Hc, Lq, Lk, self.n_g2).pow(2).sum(-1) + score_g2 = comp_sq.reshape(B, H, Hc, Lq, Lk).sum(2).clamp(min=self.algebra.eps).sqrt() + else: + score_g2 = torch.zeros_like(score_g0) + + return (score_g0 + self.bivector_weight * score_g2) / math.sqrt(Hc * D) + + def forward(self, x: torch.Tensor, key_padding_mask: torch.Tensor = None) -> torch.Tensor: + """Apply causal geometric attention. + + Args: + x: Token multivectors ``[B, L, C, D]``. + key_padding_mask: Optional bool mask ``[B, L]`` where ``True`` means padded. + """ + B, L, C, D = x.shape + H = self.num_heads + Hc = self.head_channels + + flat = x.reshape(B * L, C, D) + Q = self.q_proj(flat).reshape(B, L, H, Hc, D).permute(0, 2, 1, 3, 4) + K = self.k_proj(flat).reshape(B, L, H, Hc, D).permute(0, 2, 1, 3, 4) + V = self.v_proj(flat).reshape(B, L, H, Hc, D).permute(0, 2, 1, 3, 4) + + causal_mask = torch.triu(torch.ones(L, L, device=x.device, dtype=torch.bool), diagonal=1) + + g2_sign = self._g2_sign.to(dtype=K.dtype) + K_g2 = K[..., self._g2_b_idx] * g2_sign + + output_chunks = [] + for q_start in range(0, L, self.block_size): + q_end = min(q_start + self.block_size, L) + Q_block = Q[:, :, q_start:q_end] + scores = self._compute_score(Q_block, K, K_g2) + + mask_block = causal_mask[q_start:q_end, :] + scores = scores.masked_fill(mask_block.unsqueeze(0).unsqueeze(0), float("-inf")) + + if key_padding_mask is not None: + scores = scores.masked_fill(key_padding_mask.unsqueeze(1).unsqueeze(2), float("-inf")) + + weights = F.softmax(scores, dim=-1) + if self.dropout is not None: + weights = self.dropout(weights) + + output_chunks.append(torch.einsum("bhij,bhjcd->bhicd", weights, V)) + + out = torch.cat(output_chunks, dim=2).permute(0, 2, 1, 3, 4).reshape(B, L, C, D) + out = self.out_proj(out.reshape(B * L, C, D)).reshape(B, L, C, D) + + if key_padding_mask is not None: + out = out.masked_fill(key_padding_mask.unsqueeze(-1).unsqueeze(-1), 0.0) + return out + + +class GeometricSLMBlock(CliffordModule): + """A model-local causal block with geometric attention and multi-rotor FFN.""" + + def __init__( + self, + algebra: CliffordAlgebra, + channels: int, + num_heads: int = 4, + num_rotors: int = 8, + ffn_mult: int = 4, + dropout: float = 0.0, + bivector_weight: float = 0.5, + attn_block_size: int = 128, + ): + super().__init__(algebra) + self.norm1 = CliffordLayerNorm(algebra, channels) + self.attn = CausalGeometricAttention( + algebra=algebra, + channels=channels, + num_heads=num_heads, + bivector_weight=bivector_weight, + dropout=dropout, + block_size=attn_block_size, + ) + self.norm2 = CliffordLayerNorm(algebra, channels) + self.ffn = MultiRotorFFN(algebra, channels, ffn_mult=ffn_mult, num_rotors=num_rotors) + self.dropout = nn.Dropout(dropout) if dropout > 0 else None + + def forward(self, x: torch.Tensor, key_padding_mask: torch.Tensor = None) -> torch.Tensor: + B, L, C, D = x.shape + + h = self.norm1(x.reshape(B * L, C, D)).reshape(B, L, C, D) + h = self.attn(h, key_padding_mask=key_padding_mask) + if self.dropout is not None: + h = self.dropout(h) + x = x + h + + h = self.norm2(x.reshape(B * L, C, D)).reshape(B, L, C, D) + h = self.ffn(h.reshape(B * L, C, D)).reshape(B, L, C, D) + if self.dropout is not None: + h = self.dropout(h) + return x + h diff --git a/models/slm/tokenizer.py b/models/slm/tokenizer.py new file mode 100644 index 0000000..c1eed24 --- /dev/null +++ b/models/slm/tokenizer.py @@ -0,0 +1,174 @@ +# Versor: Universal Geometric Algebra Neural Network +# Copyright (C) 2026 Eunkyum Kim +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# + +"""Tokenizer utilities for raw-text SLM experiments. + +The default path uses a WordPiece-style subword tokenizer from the optional +``tokenizers`` package. A small regex tokenizer is kept as a fallback so local +shape tests do not depend on the compiled tokenizer package being available. +""" + +import re +from collections import Counter +from typing import Iterable, List, Optional + +SPECIAL_TOKENS = ["[PAD]", "[UNK]", "[BOS]", "[EOS]"] + + +class SubwordTokenizer: + """Small trainable tokenizer with fixed special token ids. + + Special ids: + [PAD] = 0, [UNK] = 1, [BOS] = 2, [EOS] = 3. + """ + + pad_token = "[PAD]" + unk_token = "[UNK]" + bos_token = "[BOS]" + eos_token = "[EOS]" + + pad_id = 0 + unk_id = 1 + bos_id = 2 + eos_id = 3 + + def __init__( + self, + vocab_size: int = 8192, + min_frequency: int = 2, + lowercase: bool = True, + mode: str = "subword", + ): + self.vocab_size = vocab_size + self.min_frequency = min_frequency + self.lowercase = lowercase + self.mode = mode + self.backend = None + self.vocab = {token: idx for idx, token in enumerate(SPECIAL_TOKENS)} + self.id_to_token = {idx: token for token, idx in self.vocab.items()} + + def train(self, texts: Iterable[str]) -> "SubwordTokenizer": + """Train tokenizer state from a text iterator.""" + texts = list(texts) + if self.mode == "subword" and self._train_wordpiece(texts): + return self + self._train_regex_vocab(texts) + return self + + def _train_wordpiece(self, texts: List[str]) -> bool: + try: + from tokenizers import Tokenizer + from tokenizers.decoders import WordPiece as WordPieceDecoder + from tokenizers.models import WordPiece + from tokenizers.normalizers import NFD, Lowercase, Sequence, StripAccents + from tokenizers.pre_tokenizers import BertPreTokenizer + from tokenizers.trainers import WordPieceTrainer + except ImportError: + return False + + tokenizer = Tokenizer(WordPiece(unk_token=self.unk_token)) + if self.lowercase: + tokenizer.normalizer = Sequence([NFD(), Lowercase(), StripAccents()]) + tokenizer.pre_tokenizer = BertPreTokenizer() + tokenizer.decoder = WordPieceDecoder(prefix="##") + + trainer = WordPieceTrainer( + vocab_size=self.vocab_size, + min_frequency=self.min_frequency, + special_tokens=SPECIAL_TOKENS, + ) + tokenizer.train_from_iterator(texts, trainer=trainer) + special_ids = [tokenizer.token_to_id(token) for token in SPECIAL_TOKENS] + if special_ids != list(range(len(SPECIAL_TOKENS))): + raise RuntimeError(f"Unexpected tokenizer special ids: {dict(zip(SPECIAL_TOKENS, special_ids))}") + self.backend = tokenizer + self.vocab = tokenizer.get_vocab() + self.id_to_token = {idx: token for token, idx in self.vocab.items()} + return True + + def _train_regex_vocab(self, texts: List[str]) -> None: + counter = Counter() + for text in texts: + counter.update(self._split_regex(text)) + + keep = max(self.vocab_size - len(SPECIAL_TOKENS), 0) + for token, count in counter.most_common(keep): + if count >= self.min_frequency and token not in self.vocab: + self.vocab[token] = len(self.vocab) + self.id_to_token = {idx: token for token, idx in self.vocab.items()} + + def encode(self, text: str, max_length: Optional[int] = None, add_special_tokens: bool = True) -> List[int]: + """Encode text to token ids.""" + if self.backend is not None: + ids = self.backend.encode(text).ids + else: + ids = [self.vocab.get(token, self.unk_id) for token in self._split_regex(text)] + + if add_special_tokens: + ids = [self.bos_id] + ids + [self.eos_id] + if max_length is not None: + ids = ids[:max_length] + if add_special_tokens and ids and ids[-1] != self.eos_id: + ids[-1] = self.eos_id + return ids + + def decode(self, ids: List[int], skip_special_tokens: bool = True) -> str: + """Decode token ids back to text.""" + if self.backend is not None: + return self.backend.decode(ids, skip_special_tokens=skip_special_tokens) + + tokens = [] + special = set(SPECIAL_TOKENS) + for idx in ids: + token = self.id_to_token.get(int(idx), self.unk_token) + if skip_special_tokens and token in special: + continue + tokens.append(token) + text = " ".join(tokens) + text = re.sub(r"\s+([^\w\s])", r"\1", text) + return text.strip() + + def to_state(self) -> dict: + """Serializable tokenizer state for SLM checkpoints.""" + backend_json = self.backend.to_str() if self.backend is not None else None + return { + "vocab_size": self.vocab_size, + "min_frequency": self.min_frequency, + "lowercase": self.lowercase, + "mode": self.mode, + "vocab": self.vocab, + "backend_json": backend_json, + } + + @classmethod + def from_state(cls, state: dict) -> "SubwordTokenizer": + """Restore tokenizer state from a checkpoint payload.""" + tokenizer = cls( + vocab_size=state.get("vocab_size", 8192), + min_frequency=state.get("min_frequency", 2), + lowercase=state.get("lowercase", True), + mode=state.get("mode", "subword"), + ) + tokenizer.vocab = dict(state.get("vocab", tokenizer.vocab)) + tokenizer.id_to_token = {idx: token for token, idx in tokenizer.vocab.items()} + backend_json = state.get("backend_json") + if backend_json is not None: + try: + from tokenizers import Tokenizer + + tokenizer.backend = Tokenizer.from_str(backend_json) + except ImportError: + tokenizer.backend = None + return tokenizer + + def _split_regex(self, text: str) -> List[str]: + if self.lowercase: + text = text.lower() + return re.findall(r"\w+|[^\w\s]", text, flags=re.UNICODE) + + def __len__(self) -> int: + return len(self.vocab) diff --git a/pyproject.toml b/pyproject.toml index 40c6321..eec8ee0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,9 +49,9 @@ sr = [ md17 = [ "torch-geometric>=2.6.1", ] -lqa = [ - "sentence-transformers>=5.1.2", +slm = [ "datasets>=2.16.0,<4.0", + "tokenizers>=0.22.0", ] viz = [ "matplotlib>=3.8.0", @@ -67,10 +67,10 @@ dev = [ "ruff>=0.8.0", ] all_tasks = [ - "versor[sr,md17,lqa]", + "versor[sr,md17,slm]", ] all = [ - "versor[sr,md17,lqa,viz,demo,dev]", + "versor[sr,md17,slm,viz,demo,dev]", ] [tool.setuptools.packages.find] diff --git a/tasks/__init__.py b/tasks/__init__.py index 18b3e8e..2d0af98 100644 --- a/tasks/__init__.py +++ b/tasks/__init__.py @@ -10,7 +10,7 @@ "BaseTask", "SRTask", "MD17Task", - "LQATask", + "SLMTask", "DEAPEEGTask", ] @@ -24,10 +24,10 @@ def __getattr__(name): from .md17 import MD17Task return MD17Task - if name == "LQATask": - from .lqa import LQATask + if name == "SLMTask": + from .slm import SLMTask - return LQATask + return SLMTask if name == "DEAPEEGTask": from .deap_eeg import DEAPEEGTask diff --git a/tasks/slm.py b/tasks/slm.py new file mode 100644 index 0000000..fa5c9cc --- /dev/null +++ b/tasks/slm.py @@ -0,0 +1,408 @@ +# Versor: Universal Geometric Algebra Neural Network +# Copyright (C) 2026 Eunkyum Kim +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# + +"""Raw-text Small Language Model task. + +The SLM task intentionally owns dataset sampling, tokenizer training, and +experiment choices. The model package stays focused on the geometric forward +path and can be reused by later logical reasoning evaluations. +""" + +import math +from pathlib import Path + +import torch +import torch.nn as nn +from omegaconf import DictConfig +from tqdm import tqdm + +from core.algebra import CliffordAlgebra +from core.analysis import AnalysisConfig, GeometricAnalyzer, SamplingConfig +from datalib.slm import ( + IGNORE_INDEX, + CausalLoaderConfig, + TextCorpusConfig, + build_causal_lm_loaders, + load_text_samples, + split_train_eval, +) +from log import get_logger +from models.slm import GeometricSLM, SubwordTokenizer +from tasks.base import BaseTask + +logger = get_logger(__name__) + + +def _section(cfg: DictConfig, name: str) -> dict: + value = cfg.get(name, {}) + return value if value is not None else {} + + +class SLMTask(BaseTask): + """Train a raw-text geometric SLM on a sampled Cosmopedia split.""" + + def __init__(self, cfg: DictConfig): + self.tokenizer = None + self._train_loader = None + self._eval_loader = None + self._last_analysis_summary = None + super().__init__(cfg) + + def setup_algebra(self): + algebra_cfg = _section(self.cfg, "algebra") + p = algebra_cfg.get("p", 4) + q = algebra_cfg.get("q", 1) + r = algebra_cfg.get("r", 1) + return CliffordAlgebra(p, q, r, device=self.device) + + def setup_model(self): + model_cfg = _section(self.cfg, "model") + tokenizer_cfg = _section(self.cfg, "tokenizer") + return GeometricSLM( + algebra=self.algebra, + vocab_size=tokenizer_cfg.get("vocab_size", 8192), + channels=model_cfg.get("channels", 16), + num_layers=model_cfg.get("num_layers", 4), + num_heads=model_cfg.get("num_heads", 4), + num_rotors=model_cfg.get("num_rotors", 8), + ffn_mult=model_cfg.get("ffn_mult", 4), + max_seq_len=model_cfg.get("max_seq_len", 128), + dropout=model_cfg.get("dropout", 0.1), + bivector_weight=model_cfg.get("bivector_weight", 0.5), + attn_block_size=model_cfg.get("attn_block_size", 128), + tie_embeddings=model_cfg.get("tie_embeddings", True), + use_neutralizer=model_cfg.get("use_neutralizer", True), + pad_token_id=SubwordTokenizer.pad_id, + ) + + def setup_criterion(self): + return nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX) + + def get_data(self): + dataset_cfg = _section(self.cfg, "dataset") + tokenizer_cfg = _section(self.cfg, "tokenizer") + model_cfg = _section(self.cfg, "model") + training_cfg = _section(self.cfg, "training") + + corpus_cfg = TextCorpusConfig.from_mapping(dataset_cfg) + texts = load_text_samples(corpus_cfg) + train_texts, eval_texts = split_train_eval(texts, corpus_cfg.eval_fraction) + + self.tokenizer = SubwordTokenizer( + vocab_size=tokenizer_cfg.get("vocab_size", 8192), + min_frequency=tokenizer_cfg.get("min_frequency", 2), + lowercase=tokenizer_cfg.get("lowercase", True), + mode=tokenizer_cfg.get("mode", "subword"), + ).train(train_texts) + logger.info("Tokenizer mode=%s vocab=%d", tokenizer_cfg.get("mode", "subword"), len(self.tokenizer)) + + num_workers = dataset_cfg.get("num_workers", self.device_config.num_workers) + if num_workers is None: + num_workers = self.device_config.num_workers + pin_memory = dataset_cfg.get("pin_memory", self.device_config.pin_memory) + if pin_memory is None: + pin_memory = self.device_config.pin_memory + + loader_cfg = CausalLoaderConfig( + batch_size=training_cfg.get("batch_size", 8), + max_seq_len=model_cfg.get("max_seq_len", 128), + chunk_long_texts=dataset_cfg.get("chunk_long_texts", False), + stride=dataset_cfg.get("stride", None), + shuffle_train=dataset_cfg.get("shuffle_train", True), + num_workers=num_workers, + pin_memory=pin_memory, + ) + self._train_loader, self._eval_loader = build_causal_lm_loaders( + train_texts, + eval_texts, + self.tokenizer, + loader_cfg, + ) + + return self._train_loader + + def _to_device(self, batch: dict) -> dict: + return { + key: value.to(self.device) if isinstance(value, torch.Tensor) else value + for key, value in batch.items() + } + + def train_step(self, batch): + batch = self._to_device(batch) + self.optimizer.zero_grad() + + with self.device_config.autocast_context(): + output = self.model(batch["input_ids"], attention_mask=batch["attention_mask"]) + logits = output["logits"] + loss = self.criterion(logits.reshape(-1, logits.size(-1)), batch["labels"].reshape(-1)) + + self._backward(loss) + self._optimizer_step() + + with torch.no_grad(): + metrics = self._batch_metrics(logits, batch["labels"], loss) + return loss.item(), metrics + + def _batch_metrics(self, logits: torch.Tensor, labels: torch.Tensor, loss: torch.Tensor) -> dict: + valid = labels != IGNORE_INDEX + if valid.any(): + preds = logits.argmax(dim=-1) + acc = (preds[valid] == labels[valid]).float().mean().item() + else: + acc = 0.0 + ppl = math.exp(min(loss.item(), 20.0)) + return {"Loss": loss.item(), "PPL": ppl, "TokenAcc": acc} + + def evaluate(self, data=None): + loader = data if data is not None else self._eval_loader + if loader is None: + logger.info("No evaluation split available.") + return {"Loss": 0.0, "PPL": 1.0, "TokenAcc": 0.0} + + batches = [loader] if isinstance(loader, dict) else loader + self.model.eval() + total_loss = 0.0 + total_tokens = 0 + total_correct = 0 + + with torch.no_grad(): + for batch in batches: + batch = self._to_device(batch) + with self.device_config.autocast_context(): + output = self.model(batch["input_ids"], attention_mask=batch["attention_mask"]) + logits = output["logits"] + labels = batch["labels"] + loss = self.criterion(logits.reshape(-1, logits.size(-1)), labels.reshape(-1)) + + valid = labels != IGNORE_INDEX + tokens = int(valid.sum().item()) + total_loss += loss.item() * max(tokens, 1) + total_tokens += tokens + if tokens > 0: + total_correct += int((logits.argmax(dim=-1)[valid] == labels[valid]).sum().item()) + + denom = max(total_tokens, 1) + avg_loss = total_loss / denom + metrics = { + "Loss": avg_loss, + "PPL": math.exp(min(avg_loss, 20.0)), + "TokenAcc": total_correct / denom, + } + logger.info( + "Evaluation: Loss=%.4f PPL=%.2f TokenAcc=%.4f", + metrics["Loss"], + metrics["PPL"], + metrics["TokenAcc"], + ) + self.model.train() + return metrics + + def visualize(self, data=None): + logger.info("SLM visualization is not implemented; use evaluate() for perplexity and token accuracy.") + + def _model_core(self): + """Return the original module when torch.compile wraps the model.""" + return getattr(self.model, "_orig_mod", self.model) + + def _checkpoint_dir(self) -> Path: + checkpoint_cfg = _section(self.cfg, "checkpointing") + return Path(checkpoint_cfg.get("dir", "checkpoints/slm")) + + def save_checkpoint(self, path: str): + """Save SLM model state together with tokenizer state.""" + path_obj = Path(path) + path_obj.parent.mkdir(parents=True, exist_ok=True) + checkpoint = { + "model_state_dict": self.model.state_dict(), + "optimizer_state_dict": self.optimizer.state_dict(), + "scheduler_state_dict": self.scheduler.state_dict(), + "config": self.cfg, + "tokenizer_state": self.tokenizer.to_state() if self.tokenizer is not None else None, + "analysis_summary": self._last_analysis_summary, + } + torch.save(checkpoint, path_obj) + logger.info("Checkpoint saved to %s", path_obj) + + def load_checkpoint(self, path: str): + """Restore model, optimizer, scheduler, and tokenizer state when available.""" + try: + checkpoint = torch.load(path, map_location=self.device, weights_only=False) + except TypeError: + checkpoint = torch.load(path, map_location=self.device) + + self.model.load_state_dict(checkpoint["model_state_dict"]) + self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) + self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) + tokenizer_state = checkpoint.get("tokenizer_state") + if tokenizer_state is not None: + self.tokenizer = SubwordTokenizer.from_state(tokenizer_state) + self._last_analysis_summary = checkpoint.get("analysis_summary") + logger.info("Checkpoint loaded from %s", path) + + def _metric_is_better(self, value: float, best: float, mode: str) -> bool: + return value < best if mode == "min" else value > best + + def _maybe_save_best(self, logs: dict, best_value): + checkpoint_cfg = _section(self.cfg, "checkpointing") + if not checkpoint_cfg.get("enabled", True) or not checkpoint_cfg.get("save_best", True): + return best_value + + monitor = checkpoint_cfg.get("monitor", "EvalPPL") + metric = logs.get(monitor) + if metric is None: + return best_value + + mode = checkpoint_cfg.get("mode", "min") + if best_value is None or self._metric_is_better(metric, best_value, mode): + best_value = metric + self.save_checkpoint(self._checkpoint_dir() / checkpoint_cfg.get("best_filename", "slm_best.pt")) + return best_value + + def _maybe_save_final(self): + checkpoint_cfg = _section(self.cfg, "checkpointing") + if checkpoint_cfg.get("enabled", True) and checkpoint_cfg.get("save_final", True): + self.save_checkpoint(self._checkpoint_dir() / checkpoint_cfg.get("filename", "slm_final.pt")) + + def _analysis_should_run(self, stage: str) -> bool: + analysis_cfg = _section(self.cfg, "analysis") + if not analysis_cfg.get("enabled", False): + return False + run_on = analysis_cfg.get("run_on", "final") + return run_on == stage or run_on == "both" + + def _collect_analysis_states(self, loader, max_batches: int, max_samples: int): + if loader is None: + return None + + states = [] + self.model.eval() + with torch.no_grad(): + for batch_idx, batch in enumerate(loader): + if batch_idx >= max_batches: + break + batch = self._to_device(batch) + output = self.model(batch["input_ids"], attention_mask=batch["attention_mask"]) + hidden = output["hidden_states"] # [B, L, C, D] + valid = batch["attention_mask"].bool() + states.append(hidden[valid].detach()) + + total = sum(part.shape[0] for part in states) + if total >= max_samples: + break + + if not states: + return None + return torch.cat(states, dim=0)[:max_samples] + + def _run_model_analysis(self, stage: str): + analysis_cfg = _section(self.cfg, "analysis") + if not self._analysis_should_run(stage): + return None + + loader = self._eval_loader if self._eval_loader is not None else self._train_loader + max_batches = analysis_cfg.get("max_batches", 1) + max_samples = analysis_cfg.get("max_samples", 256) + mv_states = self._collect_analysis_states(loader, max_batches=max_batches, max_samples=max_samples) + if mv_states is None: + logger.info("Skipping SLM analysis: no hidden states collected.") + return None + + config = AnalysisConfig( + device=self.device, + sampling=SamplingConfig( + strategy=analysis_cfg.get("sampling_strategy", "passthrough"), + max_samples=max_samples, + seed=_section(self.cfg, "dataset").get("seed", 0), + ), + run_dimension=analysis_cfg.get("run_dimension", False), + run_signature=analysis_cfg.get("run_signature", False), + run_spectral=analysis_cfg.get("run_spectral", True), + run_symmetry=analysis_cfg.get("run_symmetry", True), + run_commutator=analysis_cfg.get("run_commutator", True), + energy_threshold=analysis_cfg.get("energy_threshold", 0.05), + k_neighbors=analysis_cfg.get("k_neighbors", 8), + ) + report = GeometricAnalyzer(config).analyze(mv_states, algebra=self.algebra) + summary = report.summary() + self._last_analysis_summary = summary + logger.info("SLM %s analysis:\n%s", stage, summary) + + checkpoint_cfg = _section(self.cfg, "checkpointing") + if analysis_cfg.get("save_summary", True) and checkpoint_cfg.get("enabled", True): + out = self._checkpoint_dir() / f"analysis_{stage}_summary.txt" + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(summary + "\n", encoding="utf-8") + logger.info("Analysis summary saved to %s", out) + + self.model.train() + return report + + def _run_inference_preview(self): + inference_cfg = _section(self.cfg, "inference") + if not inference_cfg.get("enabled", False): + return None + if self.tokenizer is None: + logger.info("Skipping SLM inference preview: tokenizer is not available.") + return None + + prompt = inference_cfg.get("prompt", "") + prompt_ids = [self.tokenizer.bos_id] + self.tokenizer.encode(prompt, add_special_tokens=False) + if not prompt_ids: + prompt_ids = [self.tokenizer.bos_id] + + input_ids = torch.tensor([prompt_ids], dtype=torch.long, device=self.device) + generated = self._model_core().generate( + input_ids=input_ids, + max_new_tokens=inference_cfg.get("max_new_tokens", 32), + temperature=inference_cfg.get("temperature", 1.0), + top_k=inference_cfg.get("top_k", 50), + sample=inference_cfg.get("sample", True), + eos_token_id=self.tokenizer.eos_id, + ) + text = self.tokenizer.decode(generated[0].tolist()) + logger.info("SLM inference preview: %s", text) + return text + + def run(self): + logger.info("Starting SLM task.") + train_loader = self.get_data() + + total_params = sum(param.numel() for param in self.model.parameters() if param.requires_grad) + reasoner_params = self._model_core().reasoner_parameter_count() + logger.info("Model parameters: total=%d reasoner=%d", total_params, reasoner_params) + + eval_interval = _section(self.cfg, "training").get("eval_interval", 1) + best_value = None + pbar = tqdm(range(self.epochs)) + for epoch in pbar: + total_loss = 0.0 + logs = {"Loss": 0.0, "PPL": 1.0, "TokenAcc": 0.0} + for batch in train_loader: + loss, logs = self.train_step(batch) + total_loss += loss + + avg_loss = total_loss / max(len(train_loader), 1) + self.scheduler.step(avg_loss) + + if self._eval_loader is not None and (epoch + 1) % eval_interval == 0: + eval_metrics = self.evaluate() + logs["EvalPPL"] = eval_metrics["PPL"] + logs["EvalAcc"] = eval_metrics["TokenAcc"] + if self._analysis_should_run("eval"): + self._run_model_analysis("eval") + + logs["Loss"] = avg_loss + logs["LR"] = self.optimizer.param_groups[0]["lr"] + best_value = self._maybe_save_best(logs, best_value) + pbar.set_description(" | ".join(f"{key}: {value:.4f}" for key, value in logs.items())) + + logger.info("SLM training complete.") + final_metrics = self.evaluate() if self._eval_loader is not None else {} + self._run_model_analysis("final") + self._run_inference_preview() + self._maybe_save_final() + return final_metrics diff --git a/uv.lock b/uv.lock index 1cd172e..6944d29 100644 --- a/uv.lock +++ b/uv.lock @@ -3842,144 +3842,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] -[[package]] -name = "regex" -version = "2026.1.15" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/86/07d5056945f9ec4590b518171c4254a5925832eb727b56d3c38a7476f316/regex-2026.1.15.tar.gz", hash = "sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5", size = 414811, upload-time = "2026-01-14T23:18:02.775Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/d2/e6ee96b7dff201a83f650241c52db8e5bd080967cb93211f57aa448dc9d6/regex-2026.1.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4e3dd93c8f9abe8aa4b6c652016da9a3afa190df5ad822907efe6b206c09896e", size = 488166, upload-time = "2026-01-14T23:13:46.408Z" }, - { url = "https://files.pythonhosted.org/packages/23/8a/819e9ce14c9f87af026d0690901b3931f3101160833e5d4c8061fa3a1b67/regex-2026.1.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:97499ff7862e868b1977107873dd1a06e151467129159a6ffd07b66706ba3a9f", size = 290632, upload-time = "2026-01-14T23:13:48.688Z" }, - { url = "https://files.pythonhosted.org/packages/d5/c3/23dfe15af25d1d45b07dfd4caa6003ad710dcdcb4c4b279909bdfe7a2de8/regex-2026.1.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bda75ebcac38d884240914c6c43d8ab5fb82e74cde6da94b43b17c411aa4c2b", size = 288500, upload-time = "2026-01-14T23:13:50.503Z" }, - { url = "https://files.pythonhosted.org/packages/c6/31/1adc33e2f717df30d2f4d973f8776d2ba6ecf939301efab29fca57505c95/regex-2026.1.15-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7dcc02368585334f5bc81fc73a2a6a0bbade60e7d83da21cead622faf408f32c", size = 781670, upload-time = "2026-01-14T23:13:52.453Z" }, - { url = "https://files.pythonhosted.org/packages/23/ce/21a8a22d13bc4adcb927c27b840c948f15fc973e21ed2346c1bd0eae22dc/regex-2026.1.15-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:693b465171707bbe882a7a05de5e866f33c76aa449750bee94a8d90463533cc9", size = 850820, upload-time = "2026-01-14T23:13:54.894Z" }, - { url = "https://files.pythonhosted.org/packages/6c/4f/3eeacdf587a4705a44484cd0b30e9230a0e602811fb3e2cc32268c70d509/regex-2026.1.15-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b0d190e6f013ea938623a58706d1469a62103fb2a241ce2873a9906e0386582c", size = 898777, upload-time = "2026-01-14T23:13:56.908Z" }, - { url = "https://files.pythonhosted.org/packages/79/a9/1898a077e2965c35fc22796488141a22676eed2d73701e37c73ad7c0b459/regex-2026.1.15-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ff818702440a5878a81886f127b80127f5d50563753a28211482867f8318106", size = 791750, upload-time = "2026-01-14T23:13:58.527Z" }, - { url = "https://files.pythonhosted.org/packages/4c/84/e31f9d149a178889b3817212827f5e0e8c827a049ff31b4b381e76b26e2d/regex-2026.1.15-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f052d1be37ef35a54e394de66136e30fa1191fab64f71fc06ac7bc98c9a84618", size = 782674, upload-time = "2026-01-14T23:13:59.874Z" }, - { url = "https://files.pythonhosted.org/packages/d2/ff/adf60063db24532add6a1676943754a5654dcac8237af024ede38244fd12/regex-2026.1.15-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6bfc31a37fd1592f0c4fc4bfc674b5c42e52efe45b4b7a6a14f334cca4bcebe4", size = 767906, upload-time = "2026-01-14T23:14:01.298Z" }, - { url = "https://files.pythonhosted.org/packages/af/3e/e6a216cee1e2780fec11afe7fc47b6f3925d7264e8149c607ac389fd9b1a/regex-2026.1.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3d6ce5ae80066b319ae3bc62fd55a557c9491baa5efd0d355f0de08c4ba54e79", size = 774798, upload-time = "2026-01-14T23:14:02.715Z" }, - { url = "https://files.pythonhosted.org/packages/0f/98/23a4a8378a9208514ed3efc7e7850c27fa01e00ed8557c958df0335edc4a/regex-2026.1.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1704d204bd42b6bb80167df0e4554f35c255b579ba99616def38f69e14a5ccb9", size = 845861, upload-time = "2026-01-14T23:14:04.824Z" }, - { url = "https://files.pythonhosted.org/packages/f8/57/d7605a9d53bd07421a8785d349cd29677fe660e13674fa4c6cbd624ae354/regex-2026.1.15-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:e3174a5ed4171570dc8318afada56373aa9289eb6dc0d96cceb48e7358b0e220", size = 755648, upload-time = "2026-01-14T23:14:06.371Z" }, - { url = "https://files.pythonhosted.org/packages/6f/76/6f2e24aa192da1e299cc1101674a60579d3912391867ce0b946ba83e2194/regex-2026.1.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:87adf5bd6d72e3e17c9cb59ac4096b1faaf84b7eb3037a5ffa61c4b4370f0f13", size = 836250, upload-time = "2026-01-14T23:14:08.343Z" }, - { url = "https://files.pythonhosted.org/packages/11/3a/1f2a1d29453299a7858eab7759045fc3d9d1b429b088dec2dc85b6fa16a2/regex-2026.1.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e85dc94595f4d766bd7d872a9de5ede1ca8d3063f3bdf1e2c725f5eb411159e3", size = 779919, upload-time = "2026-01-14T23:14:09.954Z" }, - { url = "https://files.pythonhosted.org/packages/c0/67/eab9bc955c9dcc58e9b222c801e39cff7ca0b04261792a2149166ce7e792/regex-2026.1.15-cp310-cp310-win32.whl", hash = "sha256:21ca32c28c30d5d65fc9886ff576fc9b59bbca08933e844fa2363e530f4c8218", size = 265888, upload-time = "2026-01-14T23:14:11.35Z" }, - { url = "https://files.pythonhosted.org/packages/1d/62/31d16ae24e1f8803bddb0885508acecaec997fcdcde9c243787103119ae4/regex-2026.1.15-cp310-cp310-win_amd64.whl", hash = "sha256:3038a62fc7d6e5547b8915a3d927a0fbeef84cdbe0b1deb8c99bbd4a8961b52a", size = 277830, upload-time = "2026-01-14T23:14:12.908Z" }, - { url = "https://files.pythonhosted.org/packages/e5/36/5d9972bccd6417ecd5a8be319cebfd80b296875e7f116c37fb2a2deecebf/regex-2026.1.15-cp310-cp310-win_arm64.whl", hash = "sha256:505831646c945e3e63552cc1b1b9b514f0e93232972a2d5bedbcc32f15bc82e3", size = 270376, upload-time = "2026-01-14T23:14:14.782Z" }, - { url = "https://files.pythonhosted.org/packages/d0/c9/0c80c96eab96948363d270143138d671d5731c3a692b417629bf3492a9d6/regex-2026.1.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ae6020fb311f68d753b7efa9d4b9a5d47a5d6466ea0d5e3b5a471a960ea6e4a", size = 488168, upload-time = "2026-01-14T23:14:16.129Z" }, - { url = "https://files.pythonhosted.org/packages/17/f0/271c92f5389a552494c429e5cc38d76d1322eb142fb5db3c8ccc47751468/regex-2026.1.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eddf73f41225942c1f994914742afa53dc0d01a6e20fe14b878a1b1edc74151f", size = 290636, upload-time = "2026-01-14T23:14:17.715Z" }, - { url = "https://files.pythonhosted.org/packages/a0/f9/5f1fd077d106ca5655a0f9ff8f25a1ab55b92128b5713a91ed7134ff688e/regex-2026.1.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e8cd52557603f5c66a548f69421310886b28b7066853089e1a71ee710e1cdc1", size = 288496, upload-time = "2026-01-14T23:14:19.326Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e1/8f43b03a4968c748858ec77f746c286d81f896c2e437ccf050ebc5d3128c/regex-2026.1.15-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5170907244b14303edc5978f522f16c974f32d3aa92109fabc2af52411c9433b", size = 793503, upload-time = "2026-01-14T23:14:20.922Z" }, - { url = "https://files.pythonhosted.org/packages/8d/4e/a39a5e8edc5377a46a7c875c2f9a626ed3338cb3bb06931be461c3e1a34a/regex-2026.1.15-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2748c1ec0663580b4510bd89941a31560b4b439a0b428b49472a3d9944d11cd8", size = 860535, upload-time = "2026-01-14T23:14:22.405Z" }, - { url = "https://files.pythonhosted.org/packages/dc/1c/9dce667a32a9477f7a2869c1c767dc00727284a9fa3ff5c09a5c6c03575e/regex-2026.1.15-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2f2775843ca49360508d080eaa87f94fa248e2c946bbcd963bb3aae14f333413", size = 907225, upload-time = "2026-01-14T23:14:23.897Z" }, - { url = "https://files.pythonhosted.org/packages/a4/3c/87ca0a02736d16b6262921425e84b48984e77d8e4e572c9072ce96e66c30/regex-2026.1.15-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9ea2604370efc9a174c1b5dcc81784fb040044232150f7f33756049edfc9026", size = 800526, upload-time = "2026-01-14T23:14:26.039Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ff/647d5715aeea7c87bdcbd2f578f47b415f55c24e361e639fe8c0cc88878f/regex-2026.1.15-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0dcd31594264029b57bf16f37fd7248a70b3b764ed9e0839a8f271b2d22c0785", size = 773446, upload-time = "2026-01-14T23:14:28.109Z" }, - { url = "https://files.pythonhosted.org/packages/af/89/bf22cac25cb4ba0fe6bff52ebedbb65b77a179052a9d6037136ae93f42f4/regex-2026.1.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c08c1f3e34338256732bd6938747daa3c0d5b251e04b6e43b5813e94d503076e", size = 783051, upload-time = "2026-01-14T23:14:29.929Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f4/6ed03e71dca6348a5188363a34f5e26ffd5db1404780288ff0d79513bce4/regex-2026.1.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e43a55f378df1e7a4fa3547c88d9a5a9b7113f653a66821bcea4718fe6c58763", size = 854485, upload-time = "2026-01-14T23:14:31.366Z" }, - { url = "https://files.pythonhosted.org/packages/d9/9a/8e8560bd78caded8eb137e3e47612430a05b9a772caf60876435192d670a/regex-2026.1.15-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:f82110ab962a541737bd0ce87978d4c658f06e7591ba899192e2712a517badbb", size = 762195, upload-time = "2026-01-14T23:14:32.802Z" }, - { url = "https://files.pythonhosted.org/packages/38/6b/61fc710f9aa8dfcd764fe27d37edfaa023b1a23305a0d84fccd5adb346ea/regex-2026.1.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:27618391db7bdaf87ac6c92b31e8f0dfb83a9de0075855152b720140bda177a2", size = 845986, upload-time = "2026-01-14T23:14:34.898Z" }, - { url = "https://files.pythonhosted.org/packages/fd/2e/fbee4cb93f9d686901a7ca8d94285b80405e8c34fe4107f63ffcbfb56379/regex-2026.1.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bfb0d6be01fbae8d6655c8ca21b3b72458606c4aec9bbc932db758d47aba6db1", size = 788992, upload-time = "2026-01-14T23:14:37.116Z" }, - { url = "https://files.pythonhosted.org/packages/ed/14/3076348f3f586de64b1ab75a3fbabdaab7684af7f308ad43be7ef1849e55/regex-2026.1.15-cp311-cp311-win32.whl", hash = "sha256:b10e42a6de0e32559a92f2f8dc908478cc0fa02838d7dbe764c44dca3fa13569", size = 265893, upload-time = "2026-01-14T23:14:38.426Z" }, - { url = "https://files.pythonhosted.org/packages/0f/19/772cf8b5fc803f5c89ba85d8b1870a1ca580dc482aa030383a9289c82e44/regex-2026.1.15-cp311-cp311-win_amd64.whl", hash = "sha256:e9bf3f0bbdb56633c07d7116ae60a576f846efdd86a8848f8d62b749e1209ca7", size = 277840, upload-time = "2026-01-14T23:14:39.785Z" }, - { url = "https://files.pythonhosted.org/packages/78/84/d05f61142709474da3c0853222d91086d3e1372bcdab516c6fd8d80f3297/regex-2026.1.15-cp311-cp311-win_arm64.whl", hash = "sha256:41aef6f953283291c4e4e6850607bd71502be67779586a61472beacb315c97ec", size = 270374, upload-time = "2026-01-14T23:14:41.592Z" }, - { url = "https://files.pythonhosted.org/packages/92/81/10d8cf43c807d0326efe874c1b79f22bfb0fb226027b0b19ebc26d301408/regex-2026.1.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1", size = 489398, upload-time = "2026-01-14T23:14:43.741Z" }, - { url = "https://files.pythonhosted.org/packages/90/b0/7c2a74e74ef2a7c32de724658a69a862880e3e4155cba992ba04d1c70400/regex-2026.1.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681", size = 291339, upload-time = "2026-01-14T23:14:45.183Z" }, - { url = "https://files.pythonhosted.org/packages/19/4d/16d0773d0c818417f4cc20aa0da90064b966d22cd62a8c46765b5bd2d643/regex-2026.1.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f", size = 289003, upload-time = "2026-01-14T23:14:47.25Z" }, - { url = "https://files.pythonhosted.org/packages/c6/e4/1fc4599450c9f0863d9406e944592d968b8d6dfd0d552a7d569e43bceada/regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa", size = 798656, upload-time = "2026-01-14T23:14:48.77Z" }, - { url = "https://files.pythonhosted.org/packages/b2/e6/59650d73a73fa8a60b3a590545bfcf1172b4384a7df2e7fe7b9aab4e2da9/regex-2026.1.15-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804", size = 864252, upload-time = "2026-01-14T23:14:50.528Z" }, - { url = "https://files.pythonhosted.org/packages/6e/ab/1d0f4d50a1638849a97d731364c9a80fa304fec46325e48330c170ee8e80/regex-2026.1.15-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c", size = 912268, upload-time = "2026-01-14T23:14:52.952Z" }, - { url = "https://files.pythonhosted.org/packages/dd/df/0d722c030c82faa1d331d1921ee268a4e8fb55ca8b9042c9341c352f17fa/regex-2026.1.15-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5", size = 803589, upload-time = "2026-01-14T23:14:55.182Z" }, - { url = "https://files.pythonhosted.org/packages/66/23/33289beba7ccb8b805c6610a8913d0131f834928afc555b241caabd422a9/regex-2026.1.15-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3", size = 775700, upload-time = "2026-01-14T23:14:56.707Z" }, - { url = "https://files.pythonhosted.org/packages/e7/65/bf3a42fa6897a0d3afa81acb25c42f4b71c274f698ceabd75523259f6688/regex-2026.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb", size = 787928, upload-time = "2026-01-14T23:14:58.312Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f5/13bf65864fc314f68cdd6d8ca94adcab064d4d39dbd0b10fef29a9da48fc/regex-2026.1.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410", size = 858607, upload-time = "2026-01-14T23:15:00.657Z" }, - { url = "https://files.pythonhosted.org/packages/a3/31/040e589834d7a439ee43fb0e1e902bc81bd58a5ba81acffe586bb3321d35/regex-2026.1.15-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4", size = 763729, upload-time = "2026-01-14T23:15:02.248Z" }, - { url = "https://files.pythonhosted.org/packages/9b/84/6921e8129687a427edf25a34a5594b588b6d88f491320b9de5b6339a4fcb/regex-2026.1.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d", size = 850697, upload-time = "2026-01-14T23:15:03.878Z" }, - { url = "https://files.pythonhosted.org/packages/8a/87/3d06143d4b128f4229158f2de5de6c8f2485170c7221e61bf381313314b2/regex-2026.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22", size = 789849, upload-time = "2026-01-14T23:15:06.102Z" }, - { url = "https://files.pythonhosted.org/packages/77/69/c50a63842b6bd48850ebc7ab22d46e7a2a32d824ad6c605b218441814639/regex-2026.1.15-cp312-cp312-win32.whl", hash = "sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913", size = 266279, upload-time = "2026-01-14T23:15:07.678Z" }, - { url = "https://files.pythonhosted.org/packages/f2/36/39d0b29d087e2b11fd8191e15e81cce1b635fcc845297c67f11d0d19274d/regex-2026.1.15-cp312-cp312-win_amd64.whl", hash = "sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a", size = 277166, upload-time = "2026-01-14T23:15:09.257Z" }, - { url = "https://files.pythonhosted.org/packages/28/32/5b8e476a12262748851fa8ab1b0be540360692325975b094e594dfebbb52/regex-2026.1.15-cp312-cp312-win_arm64.whl", hash = "sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056", size = 270415, upload-time = "2026-01-14T23:15:10.743Z" }, - { url = "https://files.pythonhosted.org/packages/f8/2e/6870bb16e982669b674cce3ee9ff2d1d46ab80528ee6bcc20fb2292efb60/regex-2026.1.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e69d0deeb977ffe7ed3d2e4439360089f9c3f217ada608f0f88ebd67afb6385e", size = 489164, upload-time = "2026-01-14T23:15:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/dc/67/9774542e203849b0286badf67199970a44ebdb0cc5fb739f06e47ada72f8/regex-2026.1.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3601ffb5375de85a16f407854d11cca8fe3f5febbe3ac78fb2866bb220c74d10", size = 291218, upload-time = "2026-01-14T23:15:15.647Z" }, - { url = "https://files.pythonhosted.org/packages/b2/87/b0cda79f22b8dee05f774922a214da109f9a4c0eca5da2c9d72d77ea062c/regex-2026.1.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4c5ef43b5c2d4114eb8ea424bb8c9cec01d5d17f242af88b2448f5ee81caadbc", size = 288895, upload-time = "2026-01-14T23:15:17.788Z" }, - { url = "https://files.pythonhosted.org/packages/3b/6a/0041f0a2170d32be01ab981d6346c83a8934277d82c780d60b127331f264/regex-2026.1.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:968c14d4f03e10b2fd960f1d5168c1f0ac969381d3c1fcc973bc45fb06346599", size = 798680, upload-time = "2026-01-14T23:15:19.342Z" }, - { url = "https://files.pythonhosted.org/packages/58/de/30e1cfcdbe3e891324aa7568b7c968771f82190df5524fabc1138cb2d45a/regex-2026.1.15-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56a5595d0f892f214609c9f76b41b7428bed439d98dc961efafdd1354d42baae", size = 864210, upload-time = "2026-01-14T23:15:22.005Z" }, - { url = "https://files.pythonhosted.org/packages/64/44/4db2f5c5ca0ccd40ff052ae7b1e9731352fcdad946c2b812285a7505ca75/regex-2026.1.15-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf650f26087363434c4e560011f8e4e738f6f3e029b85d4904c50135b86cfa5", size = 912358, upload-time = "2026-01-14T23:15:24.569Z" }, - { url = "https://files.pythonhosted.org/packages/79/b6/e6a5665d43a7c42467138c8a2549be432bad22cbd206f5ec87162de74bd7/regex-2026.1.15-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18388a62989c72ac24de75f1449d0fb0b04dfccd0a1a7c1c43af5eb503d890f6", size = 803583, upload-time = "2026-01-14T23:15:26.526Z" }, - { url = "https://files.pythonhosted.org/packages/e7/53/7cd478222169d85d74d7437e74750005e993f52f335f7c04ff7adfda3310/regex-2026.1.15-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d220a2517f5893f55daac983bfa9fe998a7dbcaee4f5d27a88500f8b7873788", size = 775782, upload-time = "2026-01-14T23:15:29.352Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b5/75f9a9ee4b03a7c009fe60500fe550b45df94f0955ca29af16333ef557c5/regex-2026.1.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9c08c2fbc6120e70abff5d7f28ffb4d969e14294fb2143b4b5c7d20e46d1714", size = 787978, upload-time = "2026-01-14T23:15:31.295Z" }, - { url = "https://files.pythonhosted.org/packages/72/b3/79821c826245bbe9ccbb54f6eadb7879c722fd3e0248c17bfc90bf54e123/regex-2026.1.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7ef7d5d4bd49ec7364315167a4134a015f61e8266c6d446fc116a9ac4456e10d", size = 858550, upload-time = "2026-01-14T23:15:33.558Z" }, - { url = "https://files.pythonhosted.org/packages/4a/85/2ab5f77a1c465745bfbfcb3ad63178a58337ae8d5274315e2cc623a822fa/regex-2026.1.15-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e42844ad64194fa08d5ccb75fe6a459b9b08e6d7296bd704460168d58a388f3", size = 763747, upload-time = "2026-01-14T23:15:35.206Z" }, - { url = "https://files.pythonhosted.org/packages/6d/84/c27df502d4bfe2873a3e3a7cf1bdb2b9cc10284d1a44797cf38bed790470/regex-2026.1.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cfecdaa4b19f9ca534746eb3b55a5195d5c95b88cac32a205e981ec0a22b7d31", size = 850615, upload-time = "2026-01-14T23:15:37.523Z" }, - { url = "https://files.pythonhosted.org/packages/7d/b7/658a9782fb253680aa8ecb5ccbb51f69e088ed48142c46d9f0c99b46c575/regex-2026.1.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:08df9722d9b87834a3d701f3fca570b2be115654dbfd30179f30ab2f39d606d3", size = 789951, upload-time = "2026-01-14T23:15:39.582Z" }, - { url = "https://files.pythonhosted.org/packages/fc/2a/5928af114441e059f15b2f63e188bd00c6529b3051c974ade7444b85fcda/regex-2026.1.15-cp313-cp313-win32.whl", hash = "sha256:d426616dae0967ca225ab12c22274eb816558f2f99ccb4a1d52ca92e8baf180f", size = 266275, upload-time = "2026-01-14T23:15:42.108Z" }, - { url = "https://files.pythonhosted.org/packages/4f/16/5bfbb89e435897bff28cf0352a992ca719d9e55ebf8b629203c96b6ce4f7/regex-2026.1.15-cp313-cp313-win_amd64.whl", hash = "sha256:febd38857b09867d3ed3f4f1af7d241c5c50362e25ef43034995b77a50df494e", size = 277145, upload-time = "2026-01-14T23:15:44.244Z" }, - { url = "https://files.pythonhosted.org/packages/56/c1/a09ff7392ef4233296e821aec5f78c51be5e91ffde0d163059e50fd75835/regex-2026.1.15-cp313-cp313-win_arm64.whl", hash = "sha256:8e32f7896f83774f91499d239e24cebfadbc07639c1494bb7213983842348337", size = 270411, upload-time = "2026-01-14T23:15:45.858Z" }, - { url = "https://files.pythonhosted.org/packages/3c/38/0cfd5a78e5c6db00e6782fdae70458f89850ce95baa5e8694ab91d89744f/regex-2026.1.15-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ec94c04149b6a7b8120f9f44565722c7ae31b7a6d2275569d2eefa76b83da3be", size = 492068, upload-time = "2026-01-14T23:15:47.616Z" }, - { url = "https://files.pythonhosted.org/packages/50/72/6c86acff16cb7c959c4355826bbf06aad670682d07c8f3998d9ef4fee7cd/regex-2026.1.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40c86d8046915bb9aeb15d3f3f15b6fd500b8ea4485b30e1bbc799dab3fe29f8", size = 292756, upload-time = "2026-01-14T23:15:49.307Z" }, - { url = "https://files.pythonhosted.org/packages/4e/58/df7fb69eadfe76526ddfce28abdc0af09ffe65f20c2c90932e89d705153f/regex-2026.1.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:726ea4e727aba21643205edad8f2187ec682d3305d790f73b7a51c7587b64bdd", size = 291114, upload-time = "2026-01-14T23:15:51.484Z" }, - { url = "https://files.pythonhosted.org/packages/ed/6c/a4011cd1cf96b90d2cdc7e156f91efbd26531e822a7fbb82a43c1016678e/regex-2026.1.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cb740d044aff31898804e7bf1181cc72c03d11dfd19932b9911ffc19a79070a", size = 807524, upload-time = "2026-01-14T23:15:53.102Z" }, - { url = "https://files.pythonhosted.org/packages/1d/25/a53ffb73183f69c3e9f4355c4922b76d2840aee160af6af5fac229b6201d/regex-2026.1.15-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05d75a668e9ea16f832390d22131fe1e8acc8389a694c8febc3e340b0f810b93", size = 873455, upload-time = "2026-01-14T23:15:54.956Z" }, - { url = "https://files.pythonhosted.org/packages/66/0b/8b47fc2e8f97d9b4a851736f3890a5f786443aa8901061c55f24c955f45b/regex-2026.1.15-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d991483606f3dbec93287b9f35596f41aa2e92b7c2ebbb935b63f409e243c9af", size = 915007, upload-time = "2026-01-14T23:15:57.041Z" }, - { url = "https://files.pythonhosted.org/packages/c2/fa/97de0d681e6d26fabe71968dbee06dd52819e9a22fdce5dac7256c31ed84/regex-2026.1.15-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:194312a14819d3e44628a44ed6fea6898fdbecb0550089d84c403475138d0a09", size = 812794, upload-time = "2026-01-14T23:15:58.916Z" }, - { url = "https://files.pythonhosted.org/packages/22/38/e752f94e860d429654aa2b1c51880bff8dfe8f084268258adf9151cf1f53/regex-2026.1.15-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe2fda4110a3d0bc163c2e0664be44657431440722c5c5315c65155cab92f9e5", size = 781159, upload-time = "2026-01-14T23:16:00.817Z" }, - { url = "https://files.pythonhosted.org/packages/e9/a7/d739ffaef33c378fc888302a018d7f81080393d96c476b058b8c64fd2b0d/regex-2026.1.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:124dc36c85d34ef2d9164da41a53c1c8c122cfb1f6e1ec377a1f27ee81deb794", size = 795558, upload-time = "2026-01-14T23:16:03.267Z" }, - { url = "https://files.pythonhosted.org/packages/3e/c4/542876f9a0ac576100fc73e9c75b779f5c31e3527576cfc9cb3009dcc58a/regex-2026.1.15-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1774cd1981cd212506a23a14dba7fdeaee259f5deba2df6229966d9911e767a", size = 868427, upload-time = "2026-01-14T23:16:05.646Z" }, - { url = "https://files.pythonhosted.org/packages/fc/0f/d5655bea5b22069e32ae85a947aa564912f23758e112cdb74212848a1a1b/regex-2026.1.15-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:b5f7d8d2867152cdb625e72a530d2ccb48a3d199159144cbdd63870882fb6f80", size = 769939, upload-time = "2026-01-14T23:16:07.542Z" }, - { url = "https://files.pythonhosted.org/packages/20/06/7e18a4fa9d326daeda46d471a44ef94201c46eaa26dbbb780b5d92cbfdda/regex-2026.1.15-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:492534a0ab925d1db998defc3c302dae3616a2fc3fe2e08db1472348f096ddf2", size = 854753, upload-time = "2026-01-14T23:16:10.395Z" }, - { url = "https://files.pythonhosted.org/packages/3b/67/dc8946ef3965e166f558ef3b47f492bc364e96a265eb4a2bb3ca765c8e46/regex-2026.1.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c661fc820cfb33e166bf2450d3dadbda47c8d8981898adb9b6fe24e5e582ba60", size = 799559, upload-time = "2026-01-14T23:16:12.347Z" }, - { url = "https://files.pythonhosted.org/packages/a5/61/1bba81ff6d50c86c65d9fd84ce9699dd106438ee4cdb105bf60374ee8412/regex-2026.1.15-cp313-cp313t-win32.whl", hash = "sha256:99ad739c3686085e614bf77a508e26954ff1b8f14da0e3765ff7abbf7799f952", size = 268879, upload-time = "2026-01-14T23:16:14.049Z" }, - { url = "https://files.pythonhosted.org/packages/e9/5e/cef7d4c5fb0ea3ac5c775fd37db5747f7378b29526cc83f572198924ff47/regex-2026.1.15-cp313-cp313t-win_amd64.whl", hash = "sha256:32655d17905e7ff8ba5c764c43cb124e34a9245e45b83c22e81041e1071aee10", size = 280317, upload-time = "2026-01-14T23:16:15.718Z" }, - { url = "https://files.pythonhosted.org/packages/b4/52/4317f7a5988544e34ab57b4bde0f04944c4786128c933fb09825924d3e82/regex-2026.1.15-cp313-cp313t-win_arm64.whl", hash = "sha256:b2a13dd6a95e95a489ca242319d18fc02e07ceb28fa9ad146385194d95b3c829", size = 271551, upload-time = "2026-01-14T23:16:17.533Z" }, - { url = "https://files.pythonhosted.org/packages/52/0a/47fa888ec7cbbc7d62c5f2a6a888878e76169170ead271a35239edd8f0e8/regex-2026.1.15-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d920392a6b1f353f4aa54328c867fec3320fa50657e25f64abf17af054fc97ac", size = 489170, upload-time = "2026-01-14T23:16:19.835Z" }, - { url = "https://files.pythonhosted.org/packages/ac/c4/d000e9b7296c15737c9301708e9e7fbdea009f8e93541b6b43bdb8219646/regex-2026.1.15-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b5a28980a926fa810dbbed059547b02783952e2efd9c636412345232ddb87ff6", size = 291146, upload-time = "2026-01-14T23:16:21.541Z" }, - { url = "https://files.pythonhosted.org/packages/f9/b6/921cc61982e538682bdf3bdf5b2c6ab6b34368da1f8e98a6c1ddc503c9cf/regex-2026.1.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:621f73a07595d83f28952d7bd1e91e9d1ed7625fb7af0064d3516674ec93a2a2", size = 288986, upload-time = "2026-01-14T23:16:23.381Z" }, - { url = "https://files.pythonhosted.org/packages/ca/33/eb7383dde0bbc93f4fb9d03453aab97e18ad4024ac7e26cef8d1f0a2cff0/regex-2026.1.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d7d92495f47567a9b1669c51fc8d6d809821849063d168121ef801bbc213846", size = 799098, upload-time = "2026-01-14T23:16:25.088Z" }, - { url = "https://files.pythonhosted.org/packages/27/56/b664dccae898fc8d8b4c23accd853f723bde0f026c747b6f6262b688029c/regex-2026.1.15-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dd16fba2758db7a3780a051f245539c4451ca20910f5a5e6ea1c08d06d4a76b", size = 864980, upload-time = "2026-01-14T23:16:27.297Z" }, - { url = "https://files.pythonhosted.org/packages/16/40/0999e064a170eddd237bae9ccfcd8f28b3aa98a38bf727a086425542a4fc/regex-2026.1.15-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1e1808471fbe44c1a63e5f577a1d5f02fe5d66031dcbdf12f093ffc1305a858e", size = 911607, upload-time = "2026-01-14T23:16:29.235Z" }, - { url = "https://files.pythonhosted.org/packages/07/78/c77f644b68ab054e5a674fb4da40ff7bffb2c88df58afa82dbf86573092d/regex-2026.1.15-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0751a26ad39d4f2ade8fe16c59b2bf5cb19eb3d2cd543e709e583d559bd9efde", size = 803358, upload-time = "2026-01-14T23:16:31.369Z" }, - { url = "https://files.pythonhosted.org/packages/27/31/d4292ea8566eaa551fafc07797961c5963cf5235c797cc2ae19b85dfd04d/regex-2026.1.15-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0c7684c7f9ca241344ff95a1de964f257a5251968484270e91c25a755532c5", size = 775833, upload-time = "2026-01-14T23:16:33.141Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b2/cff3bf2fea4133aa6fb0d1e370b37544d18c8350a2fa118c7e11d1db0e14/regex-2026.1.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:74f45d170a21df41508cb67165456538425185baaf686281fa210d7e729abc34", size = 788045, upload-time = "2026-01-14T23:16:35.005Z" }, - { url = "https://files.pythonhosted.org/packages/8d/99/2cb9b69045372ec877b6f5124bda4eb4253bc58b8fe5848c973f752bc52c/regex-2026.1.15-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1862739a1ffb50615c0fde6bae6569b5efbe08d98e59ce009f68a336f64da75", size = 859374, upload-time = "2026-01-14T23:16:36.919Z" }, - { url = "https://files.pythonhosted.org/packages/09/16/710b0a5abe8e077b1729a562d2f297224ad079f3a66dce46844c193416c8/regex-2026.1.15-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:453078802f1b9e2b7303fb79222c054cb18e76f7bdc220f7530fdc85d319f99e", size = 763940, upload-time = "2026-01-14T23:16:38.685Z" }, - { url = "https://files.pythonhosted.org/packages/dd/d1/7585c8e744e40eb3d32f119191969b91de04c073fca98ec14299041f6e7e/regex-2026.1.15-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:a30a68e89e5a218b8b23a52292924c1f4b245cb0c68d1cce9aec9bbda6e2c160", size = 850112, upload-time = "2026-01-14T23:16:40.646Z" }, - { url = "https://files.pythonhosted.org/packages/af/d6/43e1dd85df86c49a347aa57c1f69d12c652c7b60e37ec162e3096194a278/regex-2026.1.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9479cae874c81bf610d72b85bb681a94c95722c127b55445285fb0e2c82db8e1", size = 789586, upload-time = "2026-01-14T23:16:42.799Z" }, - { url = "https://files.pythonhosted.org/packages/93/38/77142422f631e013f316aaae83234c629555729a9fbc952b8a63ac91462a/regex-2026.1.15-cp314-cp314-win32.whl", hash = "sha256:d639a750223132afbfb8f429c60d9d318aeba03281a5f1ab49f877456448dcf1", size = 271691, upload-time = "2026-01-14T23:16:44.671Z" }, - { url = "https://files.pythonhosted.org/packages/4a/a9/ab16b4649524ca9e05213c1cdbb7faa85cc2aa90a0230d2f796cbaf22736/regex-2026.1.15-cp314-cp314-win_amd64.whl", hash = "sha256:4161d87f85fa831e31469bfd82c186923070fc970b9de75339b68f0c75b51903", size = 280422, upload-time = "2026-01-14T23:16:46.607Z" }, - { url = "https://files.pythonhosted.org/packages/be/2a/20fd057bf3521cb4791f69f869635f73e0aaf2b9ad2d260f728144f9047c/regex-2026.1.15-cp314-cp314-win_arm64.whl", hash = "sha256:91c5036ebb62663a6b3999bdd2e559fd8456d17e2b485bf509784cd31a8b1705", size = 273467, upload-time = "2026-01-14T23:16:48.967Z" }, - { url = "https://files.pythonhosted.org/packages/ad/77/0b1e81857060b92b9cad239104c46507dd481b3ff1fa79f8e7f865aae38a/regex-2026.1.15-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ee6854c9000a10938c79238de2379bea30c82e4925a371711af45387df35cab8", size = 492073, upload-time = "2026-01-14T23:16:51.154Z" }, - { url = "https://files.pythonhosted.org/packages/70/f3/f8302b0c208b22c1e4f423147e1913fd475ddd6230565b299925353de644/regex-2026.1.15-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c2b80399a422348ce5de4fe40c418d6299a0fa2803dd61dc0b1a2f28e280fcf", size = 292757, upload-time = "2026-01-14T23:16:53.08Z" }, - { url = "https://files.pythonhosted.org/packages/bf/f0/ef55de2460f3b4a6da9d9e7daacd0cb79d4ef75c64a2af316e68447f0df0/regex-2026.1.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dca3582bca82596609959ac39e12b7dad98385b4fefccb1151b937383cec547d", size = 291122, upload-time = "2026-01-14T23:16:55.383Z" }, - { url = "https://files.pythonhosted.org/packages/cf/55/bb8ccbacabbc3a11d863ee62a9f18b160a83084ea95cdfc5d207bfc3dd75/regex-2026.1.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef71d476caa6692eea743ae5ea23cde3260677f70122c4d258ca952e5c2d4e84", size = 807761, upload-time = "2026-01-14T23:16:57.251Z" }, - { url = "https://files.pythonhosted.org/packages/8f/84/f75d937f17f81e55679a0509e86176e29caa7298c38bd1db7ce9c0bf6075/regex-2026.1.15-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c243da3436354f4af6c3058a3f81a97d47ea52c9bd874b52fd30274853a1d5df", size = 873538, upload-time = "2026-01-14T23:16:59.349Z" }, - { url = "https://files.pythonhosted.org/packages/b8/d9/0da86327df70349aa8d86390da91171bd3ca4f0e7c1d1d453a9c10344da3/regex-2026.1.15-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8355ad842a7c7e9e5e55653eade3b7d1885ba86f124dd8ab1f722f9be6627434", size = 915066, upload-time = "2026-01-14T23:17:01.607Z" }, - { url = "https://files.pythonhosted.org/packages/2a/5e/f660fb23fc77baa2a61aa1f1fe3a4eea2bbb8a286ddec148030672e18834/regex-2026.1.15-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f192a831d9575271a22d804ff1a5355355723f94f31d9eef25f0d45a152fdc1a", size = 812938, upload-time = "2026-01-14T23:17:04.366Z" }, - { url = "https://files.pythonhosted.org/packages/69/33/a47a29bfecebbbfd1e5cd3f26b28020a97e4820f1c5148e66e3b7d4b4992/regex-2026.1.15-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:166551807ec20d47ceaeec380081f843e88c8949780cd42c40f18d16168bed10", size = 781314, upload-time = "2026-01-14T23:17:06.378Z" }, - { url = "https://files.pythonhosted.org/packages/65/ec/7ec2bbfd4c3f4e494a24dec4c6943a668e2030426b1b8b949a6462d2c17b/regex-2026.1.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9ca1cbdc0fbfe5e6e6f8221ef2309988db5bcede52443aeaee9a4ad555e0dac", size = 795652, upload-time = "2026-01-14T23:17:08.521Z" }, - { url = "https://files.pythonhosted.org/packages/46/79/a5d8651ae131fe27d7c521ad300aa7f1c7be1dbeee4d446498af5411b8a9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b30bcbd1e1221783c721483953d9e4f3ab9c5d165aa709693d3f3946747b1aea", size = 868550, upload-time = "2026-01-14T23:17:10.573Z" }, - { url = "https://files.pythonhosted.org/packages/06/b7/25635d2809664b79f183070786a5552dd4e627e5aedb0065f4e3cf8ee37d/regex-2026.1.15-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2a8d7b50c34578d0d3bf7ad58cde9652b7d683691876f83aedc002862a35dc5e", size = 769981, upload-time = "2026-01-14T23:17:12.871Z" }, - { url = "https://files.pythonhosted.org/packages/16/8b/fc3fcbb2393dcfa4a6c5ffad92dc498e842df4581ea9d14309fcd3c55fb9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9d787e3310c6a6425eb346be4ff2ccf6eece63017916fd77fe8328c57be83521", size = 854780, upload-time = "2026-01-14T23:17:14.837Z" }, - { url = "https://files.pythonhosted.org/packages/d0/38/dde117c76c624713c8a2842530be9c93ca8b606c0f6102d86e8cd1ce8bea/regex-2026.1.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:619843841e220adca114118533a574a9cd183ed8a28b85627d2844c500a2b0db", size = 799778, upload-time = "2026-01-14T23:17:17.369Z" }, - { url = "https://files.pythonhosted.org/packages/e3/0d/3a6cfa9ae99606afb612d8fb7a66b245a9d5ff0f29bb347c8a30b6ad561b/regex-2026.1.15-cp314-cp314t-win32.whl", hash = "sha256:e90b8db97f6f2c97eb045b51a6b2c5ed69cedd8392459e0642d4199b94fabd7e", size = 274667, upload-time = "2026-01-14T23:17:19.301Z" }, - { url = "https://files.pythonhosted.org/packages/5b/b2/297293bb0742fd06b8d8e2572db41a855cdf1cae0bf009b1cb74fe07e196/regex-2026.1.15-cp314-cp314t-win_amd64.whl", hash = "sha256:5ef19071f4ac9f0834793af85bd04a920b4407715624e40cb7a0631a11137cdf", size = 284386, upload-time = "2026-01-14T23:17:21.231Z" }, - { url = "https://files.pythonhosted.org/packages/95/e4/a3b9480c78cf8ee86626cb06f8d931d74d775897d44201ccb813097ae697/regex-2026.1.15-cp314-cp314t-win_arm64.whl", hash = "sha256:ca89c5e596fc05b015f27561b3793dc2fa0917ea0d7507eebb448efd35274a70", size = 274837, upload-time = "2026-01-14T23:17:23.146Z" }, - { url = "https://files.pythonhosted.org/packages/a2/e7/0e1913dc52eee9c5cf8417c9813c4c55972a3f37d27cfa2e623b79b63dbc/regex-2026.1.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:55b4ea996a8e4458dd7b584a2f89863b1655dd3d17b88b46cbb9becc495a0ec5", size = 488185, upload-time = "2026-01-14T23:17:25.2Z" }, - { url = "https://files.pythonhosted.org/packages/78/df/c52c1ff4221529faad0953e197982fe9508c6dbb42327e31bf98ea07472a/regex-2026.1.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e1e28be779884189cdd57735e997f282b64fd7ccf6e2eef3e16e57d7a34a815", size = 290628, upload-time = "2026-01-14T23:17:27.125Z" }, - { url = "https://files.pythonhosted.org/packages/4b/d2/a2fef3717deaff647d7de2bccf899a576c7eaf042b6b271fc4474515fe97/regex-2026.1.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0057de9eaef45783ff69fa94ae9f0fd906d629d0bd4c3217048f46d1daa32e9b", size = 288509, upload-time = "2026-01-14T23:17:29.017Z" }, - { url = "https://files.pythonhosted.org/packages/70/89/faf5ee5c69168753c845a3d58b4683f61c899d162bfe1264fca88d5b3924/regex-2026.1.15-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc7cd0b2be0f0269283a45c0d8b2c35e149d1319dcb4a43c9c3689fa935c1ee6", size = 781088, upload-time = "2026-01-14T23:17:30.961Z" }, - { url = "https://files.pythonhosted.org/packages/7d/2c/707e5c380ad547c93686e21144e7e24dc2064dd84ec5b751b6dbdfc9be2b/regex-2026.1.15-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8db052bbd981e1666f09e957f3790ed74080c2229007c1dd67afdbf0b469c48b", size = 850516, upload-time = "2026-01-14T23:17:32.946Z" }, - { url = "https://files.pythonhosted.org/packages/5d/3b/baa816cdcad1c0f8195f9f40ab2b2a2246c8a2989dcd90641c0c6559e3fd/regex-2026.1.15-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:343db82cb3712c31ddf720f097ef17c11dab2f67f7a3e7be976c4f82eba4e6df", size = 898124, upload-time = "2026-01-14T23:17:36.019Z" }, - { url = "https://files.pythonhosted.org/packages/e7/74/1eb46bde30899825ed9fdf645eba16b7b97c49d12d300f5177989b9a09a4/regex-2026.1.15-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:55e9d0118d97794367309635df398bdfd7c33b93e2fdfa0b239661cd74b4c14e", size = 791290, upload-time = "2026-01-14T23:17:38.097Z" }, - { url = "https://files.pythonhosted.org/packages/c4/5d/b72e176fb21e2ec248baed01151a342d1f44dd43c2b6bb6a41ad183b274e/regex-2026.1.15-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:008b185f235acd1e53787333e5690082e4f156c44c87d894f880056089e9bc7c", size = 781996, upload-time = "2026-01-14T23:17:40.109Z" }, - { url = "https://files.pythonhosted.org/packages/61/0e/d3b3710eaafd994a4a71205d114abc38cda8691692a2ce2313abe68e7eb7/regex-2026.1.15-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fd65af65e2aaf9474e468f9e571bd7b189e1df3a61caa59dcbabd0000e4ea839", size = 767578, upload-time = "2026-01-14T23:17:42.134Z" }, - { url = "https://files.pythonhosted.org/packages/09/51/c6a6311833e040f95d229a34d82ac1cec2af8a5c00d58b244f2fceecef87/regex-2026.1.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f42e68301ff4afee63e365a5fc302b81bb8ba31af625a671d7acb19d10168a8c", size = 774354, upload-time = "2026-01-14T23:17:44.392Z" }, - { url = "https://files.pythonhosted.org/packages/cc/97/c522d1f19fb2c549aaf680b115c110cd124c02062bc8c95f33db8583b4bb/regex-2026.1.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:f7792f27d3ee6e0244ea4697d92b825f9a329ab5230a78c1a68bd274e64b5077", size = 845297, upload-time = "2026-01-14T23:17:47.145Z" }, - { url = "https://files.pythonhosted.org/packages/99/a0/99468c386ab68a5e24c946c5c353c29c33a95523e275c17839f2446db15d/regex-2026.1.15-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:dbaf3c3c37ef190439981648ccbf0c02ed99ae066087dd117fcb616d80b010a4", size = 755132, upload-time = "2026-01-14T23:17:49.796Z" }, - { url = "https://files.pythonhosted.org/packages/70/33/d5748c7b6c9d3621f12570583561ba529e2d1b12e4f70b8f17979b133e65/regex-2026.1.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:adc97a9077c2696501443d8ad3fa1b4fc6d131fc8fd7dfefd1a723f89071cf0a", size = 835662, upload-time = "2026-01-14T23:17:52.559Z" }, - { url = "https://files.pythonhosted.org/packages/ad/15/1986972c276672505437f1ba3c9706c2d91f321cfb9b2f4d06e8bff1b999/regex-2026.1.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:069f56a7bf71d286a6ff932a9e6fb878f151c998ebb2519a9f6d1cee4bffdba3", size = 779513, upload-time = "2026-01-14T23:17:54.711Z" }, - { url = "https://files.pythonhosted.org/packages/bc/f9/124f6a5cb3969d8e30471ed4f46cfc17c47aef1a9863ee8b4ba1d98b1bc4/regex-2026.1.15-cp39-cp39-win32.whl", hash = "sha256:ea4e6b3566127fda5e007e90a8fd5a4169f0cf0619506ed426db647f19c8454a", size = 265923, upload-time = "2026-01-14T23:17:56.69Z" }, - { url = "https://files.pythonhosted.org/packages/7b/c2/bb8fad7d27f1d71fc9772befd544bccd22eddc62a6735f57b003b4aff005/regex-2026.1.15-cp39-cp39-win_amd64.whl", hash = "sha256:cda1ed70d2b264952e88adaa52eea653a33a1b98ac907ae2f86508eb44f65cdc", size = 277900, upload-time = "2026-01-14T23:17:58.72Z" }, - { url = "https://files.pythonhosted.org/packages/f7/fa/4e033327c1d8350bc812cac906d873984d3d4b39529252f392a47ccc356d/regex-2026.1.15-cp39-cp39-win_arm64.whl", hash = "sha256:b325d4714c3c48277bfea1accd94e193ad6ed42b4bad79ad64f3b8f8a31260a5", size = 270413, upload-time = "2026-01-14T23:18:00.764Z" }, -] - [[package]] name = "requests" version = "2.32.5" @@ -4323,36 +4185,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, ] -[[package]] -name = "safetensors" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, - { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, - { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, - { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, - { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, - { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, - { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, - { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, - { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, - { url = "https://files.pythonhosted.org/packages/a7/6a/4d08d89a6fcbe905c5ae68b8b34f0791850882fc19782d0d02c65abbdf3b/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4729811a6640d019a4b7ba8638ee2fd21fa5ca8c7e7bdf0fed62068fcaac737", size = 492430, upload-time = "2025-11-19T15:18:11.884Z" }, - { url = "https://files.pythonhosted.org/packages/dd/29/59ed8152b30f72c42d00d241e58eaca558ae9dbfa5695206e2e0f54c7063/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12f49080303fa6bb424b362149a12949dfbbf1e06811a88f2307276b0c131afd", size = 503977, upload-time = "2025-11-19T15:18:17.523Z" }, - { url = "https://files.pythonhosted.org/packages/d3/0b/4811bfec67fa260e791369b16dab105e4bae82686120554cc484064e22b4/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0071bffba4150c2f46cae1432d31995d77acfd9f8db598b5d1a2ce67e8440ad2", size = 623890, upload-time = "2025-11-19T15:18:22.666Z" }, - { url = "https://files.pythonhosted.org/packages/58/5b/632a58724221ef03d78ab65062e82a1010e1bef8e8e0b9d7c6d7b8044841/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473b32699f4200e69801bf5abf93f1a4ecd432a70984df164fc22ccf39c4a6f3", size = 531885, upload-time = "2025-11-19T15:18:27.146Z" }, - { url = "https://files.pythonhosted.org/packages/94/60/13ccb63ea85bfe2e4fe6af602cf1272155f048906556d5ec8509da9dba42/safetensors-0.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b95a3fa7b3abb9b5b0e07668e808364d0d40f6bbbf9ae0faa8b5b210c97b140", size = 492627, upload-time = "2025-11-19T15:18:14.661Z" }, - { url = "https://files.pythonhosted.org/packages/2e/2b/e2fde0d6334439908b0b0c4cba18b8ad76ea6a03b569d4a3388f423b4046/safetensors-0.7.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cfdead2f57330d76aa7234051dadfa7d4eedc0e5a27fd08e6f96714a92b00f09", size = 503861, upload-time = "2025-11-19T15:18:19.418Z" }, - { url = "https://files.pythonhosted.org/packages/f0/71/566e3dd559a9cef1b4775c239daae09e6b6a32ca8b45eb1db9a4dfa1ba81/safetensors-0.7.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc92bc2db7b45bda4510e4f51c59b00fe80b2d6be88928346e4294ce1c2abe7c", size = 623577, upload-time = "2025-11-19T15:18:24.275Z" }, - { url = "https://files.pythonhosted.org/packages/82/fc/3035c5c30c8a5a82c31c6b2ad6f8bcd45ea2ddd9a8088840406bcf997413/safetensors-0.7.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6999421eb8ba9df4450a16d9184fcb7bef26240b9f98e95401f17af6c2210b71", size = 532524, upload-time = "2025-11-19T15:18:29.334Z" }, -] - [[package]] name = "scikit-learn" version = "1.6.1" @@ -4711,66 +4543,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" }, ] -[[package]] -name = "sentence-transformers" -version = "5.1.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "huggingface-hub", version = "0.36.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "pillow", version = "11.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "tqdm", marker = "python_full_version < '3.10'" }, - { name = "transformers", version = "4.57.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "typing-extensions", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0f/96/f3f3409179d14dbfdbea8622e2e9eaa3c8836ddcaecd2cd5ff0a11731d20/sentence_transformers-5.1.2.tar.gz", hash = "sha256:0f6c8bd916a78dc65b366feb8d22fd885efdb37432e7630020d113233af2b856", size = 375185, upload-time = "2025-10-22T12:47:55.019Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/a6/a607a737dc1a00b7afe267b9bfde101b8cee2529e197e57471d23137d4e5/sentence_transformers-5.1.2-py3-none-any.whl", hash = "sha256:724ce0ea62200f413f1a5059712aff66495bc4e815a1493f7f9bca242414c333", size = 488009, upload-time = "2025-10-22T12:47:53.433Z" }, -] - -[[package]] -name = "sentence-transformers" -version = "5.2.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.14' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.10.*' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "(python_full_version == '3.10.*' and platform_machine != 'ARM64') or (python_full_version == '3.10.*' and sys_platform != 'win32')", -] -dependencies = [ - { name = "huggingface-hub", version = "1.3.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "tqdm", marker = "python_full_version >= '3.10'" }, - { name = "transformers", version = "5.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "typing-extensions", marker = "python_full_version >= '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5b/30/21664028fc0776eb1ca024879480bbbab36f02923a8ff9e4cae5a150fa35/sentence_transformers-5.2.3.tar.gz", hash = "sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7", size = 381623, upload-time = "2026-02-17T14:05:20.238Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/9f/dba4b3e18ebbe1eaa29d9f1764fbc7da0cd91937b83f2b7928d15c5d2d36/sentence_transformers-5.2.3-py3-none-any.whl", hash = "sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30", size = 494225, upload-time = "2026-02-17T14:05:18.596Z" }, -] - [[package]] name = "setuptools" version = "80.10.2" @@ -5249,68 +5021,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] -[[package]] -name = "transformers" -version = "4.57.6" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "filelock", version = "3.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "huggingface-hub", version = "0.36.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "packaging", version = "25.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "pyyaml", marker = "python_full_version < '3.10'" }, - { name = "regex", marker = "python_full_version < '3.10'" }, - { name = "requests", marker = "python_full_version < '3.10'" }, - { name = "safetensors", marker = "python_full_version < '3.10'" }, - { name = "tokenizers", marker = "python_full_version < '3.10'" }, - { name = "tqdm", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" }, -] - -[[package]] -name = "transformers" -version = "5.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.14' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and platform_machine != 'ARM64' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.10.*' and platform_machine == 'ARM64' and sys_platform == 'win32'", - "(python_full_version == '3.10.*' and platform_machine != 'ARM64') or (python_full_version == '3.10.*' and sys_platform != 'win32')", -] -dependencies = [ - { name = "filelock", version = "3.20.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "huggingface-hub", version = "1.3.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "packaging", version = "26.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "pyyaml", marker = "python_full_version >= '3.10'" }, - { name = "regex", marker = "python_full_version >= '3.10'" }, - { name = "safetensors", marker = "python_full_version >= '3.10'" }, - { name = "tokenizers", marker = "python_full_version >= '3.10'" }, - { name = "tqdm", marker = "python_full_version >= '3.10'" }, - { name = "typer-slim", marker = "python_full_version >= '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bc/79/845941711811789c85fb7e2599cea425a14a07eda40f50896b9d3fda7492/transformers-5.0.0.tar.gz", hash = "sha256:5f5634efed6cf76ad068cc5834c7adbc32db78bbd6211fb70df2325a9c37dec8", size = 8424830, upload-time = "2026-01-26T10:46:46.813Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/f3/ac976fa8e305c9e49772527e09fbdc27cc6831b8a2f6b6063406626be5dd/transformers-5.0.0-py3-none-any.whl", hash = "sha256:587086f249ce64c817213cf36afdb318d087f790723e9b3d4500b97832afd52d", size = 10142091, upload-time = "2026-01-26T10:46:43.88Z" }, -] - [[package]] name = "triton" version = "3.4.0" @@ -5420,11 +5130,10 @@ all = [ { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "seaborn" }, - { name = "sentence-transformers", version = "5.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sentence-transformers", version = "5.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "streamlit", version = "1.50.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "streamlit", version = "1.53.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "sympy" }, + { name = "tokenizers" }, { name = "torch-geometric", version = "2.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "torch-geometric", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] @@ -5434,9 +5143,8 @@ all-tasks = [ { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "sentence-transformers", version = "5.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sentence-transformers", version = "5.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "sympy" }, + { name = "tokenizers" }, { name = "torch-geometric", version = "2.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "torch-geometric", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] @@ -5451,15 +5159,14 @@ dev = [ { name = "pytest-xdist" }, { name = "ruff" }, ] -lqa = [ - { name = "datasets" }, - { name = "sentence-transformers", version = "5.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sentence-transformers", version = "5.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, -] md17 = [ { name = "torch-geometric", version = "2.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "torch-geometric", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] +slm = [ + { name = "datasets" }, + { name = "tokenizers" }, +] sr = [ { name = "pmlb" }, { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -5484,7 +5191,7 @@ docs = [ [package.metadata] requires-dist = [ - { name = "datasets", marker = "extra == 'lqa'", specifier = ">=2.16.0,<4.0" }, + { name = "datasets", marker = "extra == 'slm'", specifier = ">=2.16.0,<4.0" }, { name = "hydra-core", specifier = ">=1.3.2" }, { name = "matplotlib", marker = "extra == 'viz'", specifier = ">=3.8.0" }, { name = "numpy", specifier = ">=1.26.0" }, @@ -5495,16 +5202,16 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, { name = "scikit-learn", marker = "extra == 'sr'", specifier = ">=1.3.0" }, { name = "seaborn", marker = "extra == 'viz'", specifier = ">=0.13.0" }, - { name = "sentence-transformers", marker = "extra == 'lqa'", specifier = ">=5.1.2" }, { name = "streamlit", marker = "extra == 'demo'", specifier = ">=1.50.0" }, { name = "sympy", marker = "extra == 'sr'", specifier = ">=1.12" }, + { name = "tokenizers", marker = "extra == 'slm'", specifier = ">=0.22.0" }, { name = "torch", specifier = ">=2.0.0" }, { name = "torch-geometric", marker = "extra == 'md17'", specifier = ">=2.6.1" }, { name = "tqdm", specifier = ">=4.67.3" }, - { name = "versor", extras = ["sr", "md17", "lqa"], marker = "extra == 'all-tasks'" }, - { name = "versor", extras = ["sr", "md17", "lqa", "viz", "demo", "dev"], marker = "extra == 'all'" }, + { name = "versor", extras = ["sr", "md17", "slm"], marker = "extra == 'all-tasks'" }, + { name = "versor", extras = ["sr", "md17", "slm", "viz", "demo", "dev"], marker = "extra == 'all'" }, ] -provides-extras = ["sr", "md17", "lqa", "viz", "demo", "dev", "all-tasks", "all"] +provides-extras = ["sr", "md17", "slm", "viz", "demo", "dev", "all-tasks", "all"] [package.metadata.requires-dev] docs = [ From 20e87caf253dead0c49b2b6ec1febe1a4562ff17 Mon Sep 17 00:00:00 2001 From: Concode0 Date: Fri, 8 May 2026 16:53:48 +0900 Subject: [PATCH 3/3] fix: mps compat fix about linalg_eigvals in analysis ( commutator and spectral ) --- core/analysis/commutator.py | 3 ++- core/analysis/spectral.py | 3 ++- utils/compat.py | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/analysis/commutator.py b/core/analysis/commutator.py index 4c46dd3..04be3a8 100644 --- a/core/analysis/commutator.py +++ b/core/analysis/commutator.py @@ -17,6 +17,7 @@ import torch from core.algebra import CliffordAlgebra +from utils.compat import safe_linalg_eigvals from ._types import CONSTANTS, CommutatorResult @@ -143,7 +144,7 @@ def exchange_spectrum(self, mv_data: torch.Tensor) -> torch.Tensor: # Batched commutator: [dim, dim] x [dim, dim] -> [dim, dim], transpose ad_mu = self.algebra.commutator(mu.unsqueeze(0).expand(dim, -1), basis).T - eigvals = torch.linalg.eigvals(ad_mu) # complex + eigvals = safe_linalg_eigvals(ad_mu) # complex magnitudes = eigvals.abs() return magnitudes.sort(descending=True).values diff --git a/core/analysis/spectral.py b/core/analysis/spectral.py index 4065c37..812db64 100644 --- a/core/analysis/spectral.py +++ b/core/analysis/spectral.py @@ -18,6 +18,7 @@ from core.algebra import CliffordAlgebra from core.decomposition import differentiable_invariant_decomposition from core.metric import hermitian_grade_spectrum +from utils.compat import safe_linalg_eigvals from ._types import CONSTANTS, SpectralResult @@ -186,6 +187,6 @@ def gp_operator_spectrum(self, mv_data: torch.Tensor, n_samples: Optional[int] = # Result[j, :] = gp(mean_x, e_j) = L[:, j], so transpose L = self.algebra.geometric_product(mean_x.unsqueeze(0).expand(dim, -1), basis).T - eigvals = torch.linalg.eigvals(L) # complex + eigvals = safe_linalg_eigvals(L) # complex magnitudes = eigvals.abs() return magnitudes.sort(descending=True).values diff --git a/utils/compat.py b/utils/compat.py index 0e09909..aba959a 100644 --- a/utils/compat.py +++ b/utils/compat.py @@ -22,5 +22,11 @@ def safe_linalg_solve(A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: if A.is_mps: return torch.linalg.solve(A.cpu(), B.cpu()).to(A.device) return torch.linalg.solve(A, B) + + def safe_linalg_eigvals(A: torch.Tensor) -> torch.Tensor: + if A.is_mps: + return torch.linalg.eigvals(A.cpu()).to(A.device) + return torch.linalg.eigvals(A) else: safe_linalg_solve = torch.linalg.solve + safe_linalg_eigvals = torch.linalg.eigvals