Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crates/aft/src/cli/warmup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ fn semantic_index_state(ctx: &AppContext) -> SubsystemState {
files,
entries_done,
entries_total,
..
} => {
let mut detail = stage;
if let Some(files) = files {
Expand Down Expand Up @@ -453,12 +454,14 @@ fn drain_semantic_index_events(ctx: &AppContext) {
files,
entries_done,
entries_total,
stats,
} => {
*ctx.semantic_index_status().borrow_mut() = SemanticIndexStatus::Building {
stage,
files,
entries_done,
entries_total,
stats,
};
}
SemanticIndexEvent::Ready(index) => {
Expand Down
83 changes: 79 additions & 4 deletions crates/aft/src/commands/configure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ use std::collections::{HashMap, HashSet};
use crate::callgraph::CallGraph;
use crate::config::{SemanticBackend, SemanticBackendConfig, UserServerDef};
use crate::context::{
AppContext, SemanticIndexEvent, SemanticIndexStatus, SemanticRefreshEvent,
SemanticRefreshRequest, SemanticRefreshWorkerSlot,
AppContext, SemanticIndexEvent, SemanticIndexStatus, SemanticProgressStats,
SemanticRefreshEvent, SemanticRefreshRequest, SemanticRefreshWorkerSlot,
};
use crate::harness::Harness;
use crate::log_ctx;
Expand All @@ -27,7 +27,9 @@ use crate::search_index::{
build_path_filters, current_git_head, project_cache_key, resolve_cache_dir,
walk_project_files_bounded_matching, CacheLock, SearchIndex,
};
use crate::semantic_index::{is_semantic_indexed_extension, SemanticIndex, SemanticIndexLock};
use crate::semantic_index::{
is_semantic_indexed_extension, SemanticIndex, SemanticIndexLock, SemanticMemoryEstimate,
};
use crate::{slog_info, slog_warn};

static WATCHER_GENERATION: AtomicU64 = AtomicU64::new(0);
Expand All @@ -38,6 +40,39 @@ const SEMANTIC_REFRESH_MAX_BATCH_PATHS: usize = 50;
const MAX_SEMANTIC_TIMEOUT_MS: u64 = 120_000;
const MAX_SEMANTIC_BATCH_SIZE: usize = 1_024;

fn semantic_progress_stats_from_estimate(
estimate: SemanticMemoryEstimate,
) -> SemanticProgressStats {
SemanticProgressStats {
indexed_files: Some(estimate.indexed_files),
entries: Some(estimate.entries),
vector_bytes: Some(estimate.vector_bytes),
snippet_bytes: Some(estimate.snippet_bytes),
embed_text_bytes: Some(estimate.embed_text_bytes),
metadata_bytes: Some(estimate.metadata_bytes),
estimated_payload_bytes: Some(estimate.estimated_payload_bytes),
cache_bytes: None,
clone_estimated_bytes: None,
}
}

fn semantic_clone_stats_from_estimate(estimate: SemanticMemoryEstimate) -> SemanticProgressStats {
SemanticProgressStats {
clone_estimated_bytes: Some(estimate.estimated_payload_bytes),
..semantic_progress_stats_from_estimate(estimate)
}
}

fn semantic_progress_stats_with_cache(
index: &SemanticIndex,
cache_bytes: u64,
) -> SemanticProgressStats {
SemanticProgressStats {
cache_bytes: Some(cache_bytes),
..semantic_progress_stats_from_estimate(index.memory_estimate())
}
}

fn resolve_home_dir() -> Option<PathBuf> {
let raw = std::env::var_os("HOME")
.or_else(|| std::env::var_os("USERPROFILE"))
Expand Down Expand Up @@ -196,9 +231,18 @@ fn spawn_semantic_refresh_worker(
summary.total_processed,
);
}
let clone_estimate = index.memory_estimate();
let clone_started = Instant::now();
let refreshed_index = index.clone();
slog_info!(
"semantic corpus refresh clone: {} entries, {} estimated payload bytes, {} ms",
clone_estimate.entries,
clone_estimate.estimated_payload_bytes,
clone_started.elapsed().as_millis(),
);
if event_tx
.send(SemanticRefreshEvent::CorpusCompleted {
index: index.clone(),
index: refreshed_index,
changed: summary.changed,
added: summary.added,
deleted: summary.deleted,
Expand Down Expand Up @@ -1934,6 +1978,7 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: None,
entries_done: None,
entries_total: None,
stats: None,
};
let (tx, rx): (
crossbeam_channel::Sender<SemanticIndexEvent>,
Expand Down Expand Up @@ -1974,6 +2019,7 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: None,
entries_done: None,
entries_total: None,
stats: None,
});
let mut model =
crate::semantic_index::EmbeddingModel::from_config(&semantic_config)?;
Expand Down Expand Up @@ -2034,13 +2080,15 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: None,
entries_done: None,
entries_total: None,
stats: None,
});
let mut progress = |done: usize, total: usize| {
let _ = tx_progress.send(SemanticIndexEvent::Progress {
stage: "embedding_stale_symbols".to_string(),
files: None,
entries_done: Some(done),
entries_total: Some(total),
stats: None,
});
};

Expand Down Expand Up @@ -2079,6 +2127,10 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: None,
entries_done: Some(cached.entry_count()),
entries_total: Some(cached.entry_count()),
stats: Some(semantic_progress_stats_with_cache(
&cached,
cached.serialized_size_estimate(),
)),
});
return Ok((cached, model));
}
Expand All @@ -2104,6 +2156,7 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: Some(files.len()),
entries_done: None,
entries_total: None,
stats: None,
});
files
}
Expand All @@ -2113,6 +2166,7 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: Some(observed),
entries_done: None,
entries_total: None,
stats: None,
});
slog_warn!(
"skipping semantic index: more than {} files exceeds limit of {}. \
Expand All @@ -2134,13 +2188,15 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: Some(files.len()),
entries_done: None,
entries_total: None,
stats: None,
});
let mut progress = |done: usize, total: usize| {
let _ = tx_progress.send(SemanticIndexEvent::Progress {
stage: "embedding_symbols".to_string(),
files: Some(files.len()),
entries_done: Some(done),
entries_total: Some(total),
stats: None,
});
};
let index = SemanticIndex::build_with_progress(
Expand All @@ -2162,6 +2218,10 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {
files: Some(files.len()),
entries_done: Some(index.len()),
entries_total: Some(index.len()),
stats: Some(semantic_progress_stats_with_cache(
&index,
index.serialized_size_estimate(),
)),
});

if !is_worktree_bridge_for_semantic {
Expand All @@ -2176,7 +2236,22 @@ pub fn handle_configure(req: &RawRequest, ctx: &AppContext) -> Response {

let event = match build_result {
Ok(Ok((index, model))) => {
let clone_estimate = index.memory_estimate();
let clone_started = Instant::now();
let worker_index = index.clone();
slog_info!(
"semantic index clone for refresh worker: {} entries, {} estimated payload bytes, {} ms",
clone_estimate.entries,
clone_estimate.estimated_payload_bytes,
clone_started.elapsed().as_millis(),
);
let _ = tx_progress.send(SemanticIndexEvent::Progress {
stage: "starting_refresh_worker".to_string(),
files: Some(clone_estimate.indexed_files),
entries_done: Some(clone_estimate.entries),
entries_total: Some(clone_estimate.entries),
stats: Some(semantic_clone_stats_from_estimate(clone_estimate)),
});
let worker_handle = spawn_semantic_refresh_worker(
root_clone.clone(),
worker_index,
Expand Down
2 changes: 2 additions & 0 deletions crates/aft/src/commands/semantic_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ fn handle_semantic_or_hybrid_search(
files,
entries_done,
entries_total,
..
} => {
let mut detail = format!("Semantic index is still building (stage: {}).", stage);
if let Some(files) = files {
Expand Down Expand Up @@ -1851,6 +1852,7 @@ mod tests {
files: Some(1),
entries_done: Some(0),
entries_total: Some(1),
stats: None,
};

let response = response_value(handle_semantic_search(
Expand Down
59 changes: 57 additions & 2 deletions crates/aft/src/commands/status.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! AFT status command — returns the current state of indexes, features, and configuration.

use crate::context::AppContext;
use crate::context::SemanticIndexStatus;
use crate::context::{SemanticIndexStatus, SemanticProgressStats};
use crate::db::compression_events::CompressionAggregate;
use crate::protocol::{RawRequest, Response, StatusPayload, DEFAULT_SESSION_ID};

Expand All @@ -19,6 +19,23 @@ pub struct CompressionAggregateSerde {
pub savings_tokens: u64,
}

fn semantic_progress_stats_json(stats: Option<SemanticProgressStats>) -> serde_json::Value {
let Some(stats) = stats else {
return serde_json::Value::Null;
};
serde_json::json!({
"indexed_files": stats.indexed_files,
"entries": stats.entries,
"vector_bytes": stats.vector_bytes,
"snippet_bytes": stats.snippet_bytes,
"embed_text_bytes": stats.embed_text_bytes,
"metadata_bytes": stats.metadata_bytes,
"estimated_payload_bytes": stats.estimated_payload_bytes,
"cache_bytes": stats.cache_bytes,
"clone_estimated_bytes": stats.clone_estimated_bytes,
})
}

impl From<CompressionAggregate> for CompressionAggregateSerde {
fn from(agg: CompressionAggregate) -> Self {
Self {
Expand Down Expand Up @@ -104,6 +121,7 @@ impl AppContext {
files,
entries_done,
entries_total,
stats,
} => serde_json::json!({
"status": "loading",
"state": "loading",
Expand All @@ -112,6 +130,7 @@ impl AppContext {
"files": files,
"entries_done": entries_done,
"entries_total": entries_total,
"stats": semantic_progress_stats_json(stats),
"backend": config.semantic_backend_label(),
"model": config.semantic.model.as_str(),
}),
Expand Down Expand Up @@ -296,7 +315,7 @@ fn dir_size_recursive(path: &std::path::Path) -> u64 {
mod tests {
use super::handle_status;
use crate::config::Config;
use crate::context::AppContext;
use crate::context::{AppContext, SemanticIndexStatus, SemanticProgressStats};
use crate::parser::TreeSitterProvider;
use crate::protocol::RawRequest;
use serde_json::json;
Expand Down Expand Up @@ -330,4 +349,40 @@ mod tests {
let response = handle_status(&request(), &ctx);
assert_eq!(response.data["cache_role"], "worktree");
}

#[test]
fn status_exposes_semantic_progress_stats() {
let ctx = AppContext::new(Box::new(TreeSitterProvider::new()), Config::default());
ctx.config_mut().semantic_search = true;
*ctx.semantic_index_status().borrow_mut() = SemanticIndexStatus::Building {
stage: "persisting_index".to_string(),
files: Some(3),
entries_done: Some(5),
entries_total: Some(5),
stats: Some(SemanticProgressStats {
indexed_files: Some(3),
entries: Some(5),
vector_bytes: Some(7680),
snippet_bytes: Some(1200),
embed_text_bytes: Some(3400),
metadata_bytes: Some(260),
estimated_payload_bytes: Some(12540),
cache_bytes: Some(15000),
clone_estimated_bytes: None,
}),
};

let response = handle_status(&request(), &ctx);
let stats = &response.data["semantic_index"]["stats"];
assert_eq!(response.data["semantic_index"]["stage"], "persisting_index");
assert_eq!(stats["indexed_files"], 3);
assert_eq!(stats["entries"], 5);
assert_eq!(stats["vector_bytes"], 7680);
assert_eq!(stats["snippet_bytes"], 1200);
assert_eq!(stats["embed_text_bytes"], 3400);
assert_eq!(stats["metadata_bytes"], 260);
assert_eq!(stats["estimated_payload_bytes"], 12540);
assert_eq!(stats["cache_bytes"], 15000);
assert!(stats["clone_estimated_bytes"].is_null());
}
}
19 changes: 19 additions & 0 deletions crates/aft/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ pub enum SemanticIndexStatus {
files: Option<usize>,
entries_done: Option<usize>,
entries_total: Option<usize>,
stats: Option<SemanticProgressStats>,
},
Ready {
/// Files currently being re-embedded after recent edits. The index is
Expand Down Expand Up @@ -267,11 +268,29 @@ pub enum SemanticIndexEvent {
files: Option<usize>,
entries_done: Option<usize>,
entries_total: Option<usize>,
stats: Option<SemanticProgressStats>,
},
Ready(SemanticIndex),
Failed(String),
}

/// Optional diagnostic counters attached to semantic-index progress updates.
///
/// These are intentionally estimates: they expose payload/cache/clone pressure
/// without requiring a platform-specific heap profiler in the Rust worker.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct SemanticProgressStats {
pub indexed_files: Option<usize>,
pub entries: Option<usize>,
pub vector_bytes: Option<u64>,
pub snippet_bytes: Option<u64>,
pub embed_text_bytes: Option<u64>,
pub metadata_bytes: Option<u64>,
pub estimated_payload_bytes: Option<u64>,
pub cache_bytes: Option<u64>,
pub clone_estimated_bytes: Option<u64>,
}

#[derive(Debug, Clone)]
pub enum SemanticRefreshRequest {
Files { paths: Vec<PathBuf> },
Expand Down
4 changes: 4 additions & 0 deletions crates/aft/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,7 @@ fn refresh_corpus_after_ignore_change(ctx: &AppContext) {
files: Some(file_count),
entries_done: None,
entries_total: None,
stats: None,
};
match sender.send(SemanticRefreshRequest::Corpus { current_files }) {
Ok(()) => {
Expand Down Expand Up @@ -1332,12 +1333,14 @@ fn drain_semantic_index_events(ctx: &AppContext) {
files,
entries_done,
entries_total,
stats,
} => {
*ctx.semantic_index_status().borrow_mut() = SemanticIndexStatus::Building {
stage,
files,
entries_done,
entries_total,
stats,
};
// Push progress to the sidebar. Without this, a long rebuild
// (e.g. a slow local embedding backend re-indexing after a prior
Expand Down Expand Up @@ -1388,6 +1391,7 @@ fn drain_semantic_index_events(ctx: &AppContext) {
files: Some(file_count),
entries_done: None,
entries_total: None,
stats: None,
};
let sent = ctx.semantic_refresh_sender().is_some_and(|sender| {
sender
Expand Down
Loading
Loading