From 1ead7f8de35490f66c76382d0dc446428de551bf Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Wed, 10 Jun 2026 00:59:22 -0700 Subject: [PATCH 1/2] feat: use diff instead of clang-format XML output This avoids discrepancies with clang-format's XML output.; see cpp-linter/cpp-linter#168. Instead, we now save the clang-format fixed output to a project-specific cache folder. Then we take the diff between the formatted output and the original file's content. The resulting diff is how we determine the lines changed by clang-format. This actually lays the ground work for other improvements, specifically how to assemble changes for an auto-fix commit. Additionally, this also removes the dependency used for XML parsing . --- Cargo.lock | 11 -- cpp-linter/Cargo.toml | 1 - cpp-linter/src/clang_tools/clang_format.rs | 205 +++++++-------------- cpp-linter/src/clang_tools/clang_tidy.rs | 1 + cpp-linter/src/cli/structs.rs | 3 + cpp-linter/src/common_fs/mod.rs | 46 +---- cpp-linter/src/error.rs | 10 +- cpp-linter/src/rest_client/mod.rs | 23 ++- cpp-linter/src/run.rs | 11 +- 9 files changed, 102 insertions(+), 209 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10d31bd..d4a8c68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,6 @@ dependencies = [ "gix-imara-diff", "log", "mockito", - "quick-xml", "regex", "reqwest", "semver", @@ -1499,16 +1498,6 @@ dependencies = [ "syn", ] -[[package]] -name = "quick-xml" -version = "0.40.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quinn" version = "0.11.9" diff --git a/cpp-linter/Cargo.toml b/cpp-linter/Cargo.toml index 0403dfc..6ea5939 100644 --- a/cpp-linter/Cargo.toml +++ b/cpp-linter/Cargo.toml @@ -24,7 +24,6 @@ fast-glob = "1.0.1" futures = "0.3.32" gix-imara-diff = { version = "0.2.2", features = ["unified_diff"] } log = { workspace = true } -quick-xml = { version = "0.40.1", features = ["serialize"] } regex = { workspace = true } reqwest = { workspace = true } semver = { workspace = true } diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index f67cf3a..2eed065 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -3,28 +3,25 @@ use std::{ fs, + ops::Range, + path::PathBuf, process::Command, sync::{Arc, Mutex, MutexGuard}, }; +use gix_imara_diff::{Diff, InternedInput}; use log::Level; -use serde::Deserialize; // project-specific crates/modules use super::MakeSuggestions; -use crate::{ - cli::ClangParams, - common_fs::{FileObj, get_line_count_from_offset}, - error::ClangCaptureError, -}; +use crate::{cli::ClangParams, common_fs::FileObj, error::ClangCaptureError}; -#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)] +#[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct FormatAdvice { /// A list of [`Replacement`]s that clang-tidy wants to make. - #[serde(rename(deserialize = "replacement"), default)] - pub replacements: Vec, + pub replacements: Vec>, - pub patched: Option>, + pub patched: PathBuf, } impl MakeSuggestions for FormatAdvice { @@ -37,21 +34,6 @@ impl MakeSuggestions for FormatAdvice { } } -/// A single replacement that clang-format wants to make. -#[derive(Debug, PartialEq, Eq, Default, Clone, Copy, Deserialize)] -pub struct Replacement { - /// The byte offset where the replacement will start. - #[serde(rename = "@offset")] - pub offset: u32, - - /// The line number described by the [`Replacement::offset`]. - /// - /// This value is not provided by the XML output, but we calculate it after - /// deserialization. - #[serde(default)] - pub line: u32, -} - /// Get a string that summarizes the given `--style` pub fn summarize_style(style: &str) -> String { let mut char_iter = style.chars(); @@ -97,39 +79,23 @@ pub fn run_clang_format( for range in &ranges { cmd.arg(format!("--lines={}:{}", range.start(), range.end())); } + let cache_path = clang_params.project_cache_dir.join("patches"); + let cache_format_fixes = cache_path.join(file.name.with_added_extension("format")); + fs::create_dir_all( + cache_format_fixes + .parent() + .ok_or(ClangCaptureError::UnknownCacheParentPath)?, + ) + .map_err(ClangCaptureError::MkDirFailed)?; let file_name = file.name.to_string_lossy().to_string(); cmd.arg(file.name.to_path_buf().as_os_str()); - let patched = if !clang_params.format_review { - None - } else { - logs.push(( - Level::Info, - format!( - "Getting format fixes with \"{} {}\"", - cmd.get_program().to_string_lossy(), - cmd.get_args() - .map(|a| a.to_string_lossy()) - .collect::>() - .join(" ") - ), - )); - Some( - cmd.output() - .map_err(|e| ClangCaptureError::FailedToRunCommand { - task: format!("get fixes from clang-format {file_name}"), - source: e, - })? - .stdout, - ) - }; - cmd.arg("--output-replacements-xml"); logs.push(( - log::Level::Info, + Level::Info, format!( - "Running \"{} {}\"", + "Getting format fixes with \"{} {}\"", cmd.get_program().to_string_lossy(), cmd.get_args() - .map(|x| x.to_string_lossy()) + .map(|a| a.to_string_lossy()) .collect::>() .join(" ") ), @@ -137,9 +103,16 @@ pub fn run_clang_format( let output = cmd .output() .map_err(|e| ClangCaptureError::FailedToRunCommand { - task: format!("Failed to get replacements from clang-format: {file_name}"), + task: format!("get fixes from clang-format {file_name}"), source: e, })?; + fs::write(&cache_format_fixes, &output.stdout).map_err(|e| { + ClangCaptureError::WriteFileFailed { + file_name: cache_format_fixes.to_string_lossy().to_string(), + source: e, + } + })?; + if !output.stderr.is_empty() || !output.status.success() { logs.push(( log::Level::Debug, @@ -149,46 +122,43 @@ pub fn run_clang_format( ), )); } - let mut format_advice = if !output.stdout.is_empty() { - let xml = - String::from_utf8(output.stdout).map_err(|e| ClangCaptureError::NonUtf8Output { - task: format!("XML output from clang-format (for {file_name})"), - source: e, - })?; - quick_xml::de::from_str::(&xml).map_err(|e| { - ClangCaptureError::XmlParsingFailed { - file_name: file_name.clone(), - source: e, - } - })? - } else { - FormatAdvice::default() - }; - format_advice.patched = patched; - if !format_advice.replacements.is_empty() { - let original_contents = - fs::read(&file.name).map_err(|e| ClangCaptureError::ReadFileFailed { - file_name: file_name.clone(), - source: e, - })?; - // get line and column numbers from format_advice.offset - let mut filtered_replacements = Vec::new(); - for replacement in &mut format_advice.replacements { - let line_number = get_line_count_from_offset(&original_contents, replacement.offset); - replacement.line = line_number; - for range in &ranges { - if range.contains(&line_number) { - filtered_replacements.push(*replacement); - break; - } - } - if ranges.is_empty() { - // lines_changed_only is disabled - filtered_replacements.push(*replacement); - } + + // use a diff between patched and original contents to get format results + let original_contents = + fs::read_to_string(&file.name).map_err(|e| ClangCaptureError::ReadFileFailed { + file_name: file_name.clone(), + source: e, + })?; + let patched_contents = String::from_utf8(output.stdout.to_vec()).map_err(|e| { + ClangCaptureError::NonUtf8Output { + task: "clang-format".to_string(), + source: e, } - format_advice.replacements = filtered_replacements; - } + })?; + let input = InternedInput::new(original_contents.as_str(), patched_contents.as_str()); + let mut diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input); + diff.postprocess_lines(&input); + let format_advice = FormatAdvice { + replacements: diff + .hunks() + .filter_map(|hunk| { + if ranges.is_empty() { + Some(hunk.before) + } else { + // only include replacements that fall within the specified line ranges + if ranges.iter().any(|range| { + range.contains(&hunk.before.start) + && range.contains(&hunk.before.end.saturating_sub(1)) + }) { + Some(hunk.before) + } else { + None + } + } + }) + .collect(), + patched: cache_format_fixes, + }; file.format_advice = Some(format_advice); Ok(logs) } @@ -197,56 +167,7 @@ pub fn run_clang_format( mod tests { #![allow(clippy::unwrap_used)] - use super::{FormatAdvice, Replacement, summarize_style}; - - #[test] - fn parse_blank_xml() { - let xml = String::new(); - let result = quick_xml::de::from_str::(&xml); - assert!(result.is_err()); - } - - #[test] - fn parse_xml_no_replacements() { - let xml_raw = r#" - -"# - .as_bytes() - .to_vec(); - let expected = FormatAdvice::default(); - let xml = String::from_utf8(xml_raw).unwrap(); - let document = quick_xml::de::from_str::(&xml).unwrap(); - assert_eq!(expected, document); - } - - #[test] - fn parse_xml() { - let xml_raw = r#" - - - - - -"# - .as_bytes() - .to_vec(); - - let expected = FormatAdvice { - replacements: [113, 147, 161, 165] - .iter() - .map(|offset| Replacement { - offset: *offset, - ..Default::default() - }) - .collect(), - patched: None, - }; - - let xml = String::from_utf8(xml_raw).unwrap(); - - let document = quick_xml::de::from_str::(&xml).unwrap(); - assert_eq!(expected, document); - } + use super::summarize_style; fn formalize_style(style: &str, expected: &str) { assert_eq!(summarize_style(style), expected); diff --git a/cpp-linter/src/clang_tools/clang_tidy.rs b/cpp-linter/src/clang_tools/clang_tidy.rs index 74be74e..111767e 100644 --- a/cpp-linter/src/clang_tools/clang_tidy.rs +++ b/cpp-linter/src/clang_tools/clang_tidy.rs @@ -496,6 +496,7 @@ mod test { format_review: false, clang_tidy_command: Some(exe_path), clang_format_command: None, + project_cache_dir: PathBuf::from(".").join(".cpp-linter-cache"), }; let mut file_lock = arc_file.lock().unwrap(); let logs = run_clang_tidy(&mut file_lock, &clang_params) diff --git a/cpp-linter/src/cli/structs.rs b/cpp-linter/src/cli/structs.rs index b975ebc..48e473f 100644 --- a/cpp-linter/src/cli/structs.rs +++ b/cpp-linter/src/cli/structs.rs @@ -178,6 +178,7 @@ pub struct ClangParams { pub format_filter: Option, pub tidy_review: bool, pub format_review: bool, + pub project_cache_dir: PathBuf, } #[cfg(feature = "bin")] @@ -215,6 +216,8 @@ impl From<&Cli> for ClangParams { format_filter, tidy_review: args.feedback_options.tidy_review, format_review: args.feedback_options.format_review, + project_cache_dir: PathBuf::from(&args.source_options.repo_root) + .join(".cpp-linter-cache"), } } } diff --git a/cpp-linter/src/common_fs/mod.rs b/cpp-linter/src/common_fs/mod.rs index c24753e..51ab07f 100644 --- a/cpp-linter/src/common_fs/mod.rs +++ b/cpp-linter/src/common_fs/mod.rs @@ -143,11 +143,8 @@ impl FileObj { ) -> Result<(), FileObjError> { let original_content = fs::read_to_string(&self.name).map_err(FileObjError::ReadFile)?; let file_name = self.name.to_str().unwrap_or_default().replace("\\", "/"); - if let Some(advice) = &self.format_advice - && let Some(patched) = &advice.patched - { - let patched = String::from_utf8(patched.to_vec()) - .map_err(|e| FileObjError::FromUtf8Error(file_name.clone(), e))?; + if let Some(advice) = &self.format_advice { + let patched = fs::read_to_string(&advice.patched).map_err(FileObjError::ReadFile)?; let (diff, input) = make_patch(patched.as_str(), &original_content); advice.get_suggestions(review_comments, self, &diff, &input, summary_only); } @@ -219,48 +216,13 @@ impl FileObj { } } -/// Gets the line number for a given `offset` (of bytes) from the given -/// buffer `contents`. -/// -/// The `offset` given to this function is expected to originate from -/// diagnostic information provided by clang-format. Any `offset` out of -/// bounds is clamped to the given `contents` buffer's length. -pub fn get_line_count_from_offset(contents: &[u8], offset: u32) -> u32 { - let offset = (offset as usize).min(contents.len()); - let lines = contents[0..offset].split(|byte| byte == &b'\n'); - lines.count() as u32 -} - #[cfg(test)] mod test { - use std::{fs, path::PathBuf}; + use std::path::PathBuf; - use super::{FileObj, get_line_count_from_offset}; + use super::FileObj; use crate::cli::LinesChangedOnly; - // *********************** tests for translating byte offset into line/column - - #[test] - fn translate_byte_offset() { - let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap(); - let lines = get_line_count_from_offset(&contents, 144); - assert_eq!(lines, 13); - } - - #[test] - fn get_line_count_edge_cases() { - // Empty content - assert_eq!(get_line_count_from_offset(&[], 0), 1); - - // No newlines - assert_eq!(get_line_count_from_offset(b"abc", 3), 1); - - // Consecutive newlines - assert_eq!(get_line_count_from_offset(b"a\n\nb", 3), 3); - - // Offset beyond content length - assert_eq!(get_line_count_from_offset(b"a\nb\n", 10), 3); - } // *********************** tests for FileObj::get_ranges() #[test] diff --git a/cpp-linter/src/error.rs b/cpp-linter/src/error.rs index b31cde5..e92fcfa 100644 --- a/cpp-linter/src/error.rs +++ b/cpp-linter/src/error.rs @@ -39,12 +39,6 @@ pub enum ClangCaptureError { #[source] source: std::string::FromUtf8Error, }, - #[error("Failed to parse XML output from clang-format for {file_name}: {source}")] - XmlParsingFailed { - file_name: String, - #[source] - source: quick_xml::DeError, - }, #[error("Failed to read contents of file '{file_name}': {source}")] ReadFileFailed { file_name: String, @@ -63,6 +57,10 @@ pub enum ClangCaptureError { UnknownWorkingDirectory(#[source] std::io::Error), #[error("Failed to parse integer from string: {0}")] ParseIntError(#[from] std::num::ParseIntError), + #[error("Failed to determine the parent directory for caching purposes")] + UnknownCacheParentPath, + #[error("Failed to create directory for caching patches: {0}")] + MkDirFailed(#[source] std::io::Error), } #[derive(Debug, thiserror::Error)] diff --git a/cpp-linter/src/rest_client/mod.rs b/cpp-linter/src/rest_client/mod.rs index 28bd766..52c35d7 100644 --- a/cpp-linter/src/rest_client/mod.rs +++ b/cpp-linter/src/rest_client/mod.rs @@ -223,8 +223,10 @@ impl RestClient { // assemble a list of line numbers let mut lines = Vec::new(); for replacement in &format_advice.replacements { - if !lines.contains(&replacement.line) { - lines.push(replacement.line); + for i in replacement.start..replacement.end { + if !lines.contains(&i) { + lines.push(i); + } } } // post annotation if any applicable lines were formatted @@ -355,7 +357,15 @@ fn make_format_comment( && !format_advice.replacements.is_empty() && *remaining_length > 0 { - let note = format!("- {}\n", file.name.to_string_lossy().replace('\\', "/")); + let line_count = format_advice + .replacements + .iter() + .fold(0, |acc, r| acc + r.len()); + let note = format!( + "- {} ({line_count} line{})\n", + file.name.to_string_lossy().replace('\\', "/"), + if line_count > 1 { "s" } else { "" } + ); if (note.len() as u64) < *remaining_length { format_comment.push_str(¬e.to_string()); *remaining_length -= note.len() as u64; @@ -433,7 +443,7 @@ mod test { use crate::{ clang_tools::{ ClangVersions, - clang_format::{FormatAdvice, Replacement}, + clang_format::FormatAdvice, clang_tidy::{TidyAdvice, TidyNotification}, }, cli::FeedbackInput, @@ -477,8 +487,9 @@ mod test { patched: None, }); file.format_advice = Some(FormatAdvice { - replacements: vec![Replacement { offset: 0, line: 1 }], - patched: None, + #[allow(clippy::single_range_in_vec_init)] + replacements: vec![1..2], + patched: PathBuf::new(), }); files.push(Arc::new(Mutex::new(file))); } diff --git a/cpp-linter/src/run.rs b/cpp-linter/src/run.rs index fc2768a..9746a74 100644 --- a/cpp-linter/src/run.rs +++ b/cpp-linter/src/run.rs @@ -11,7 +11,7 @@ use std::{ }; // non-std crates -use anyhow::{Result, anyhow}; +use anyhow::{Context, Result, anyhow}; use clang_tools_manager::RequestedVersion; use clap::Parser; use log::{LevelFilter, set_max_level}; @@ -159,6 +159,15 @@ pub async fn run_main(args: Vec) -> Result<()> { rest_api_client.end_log_group("Get list of specified source files"); let mut clang_params = ClangParams::from(&cli); + // mkdir -p .cpp-linter-cache/ + std::fs::create_dir_all(&clang_params.project_cache_dir) + .with_context(|| "Failed to create a local cache directory.")?; + // add gitignore file in project cache dir + std::fs::write( + clang_params.project_cache_dir.join(".gitignore"), + "# Automatically created by cpp-linter\n*\n", + ) + .with_context(|| "Failed to write .cpp-linter-cache/.gitignore file")?; clang_params.format_review &= is_pr; clang_params.tidy_review &= is_pr; let user_inputs = FeedbackInput::from(&cli); From 646316c42668cf4208403adc42caab70269c9614 Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Wed, 10 Jun 2026 03:18:13 -0700 Subject: [PATCH 2/2] use inclusive range and consider pure insertions --- cpp-linter/src/clang_tools/clang_format.rs | 16 ++++++++++------ cpp-linter/src/rest_client/mod.rs | 6 +++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index 2eed065..0ef7810 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -3,7 +3,7 @@ use std::{ fs, - ops::Range, + ops::RangeInclusive, path::PathBuf, process::Command, sync::{Arc, Mutex, MutexGuard}, @@ -19,7 +19,7 @@ use crate::{cli::ClangParams, common_fs::FileObj, error::ClangCaptureError}; #[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct FormatAdvice { /// A list of [`Replacement`]s that clang-tidy wants to make. - pub replacements: Vec>, + pub replacements: Vec>, pub patched: PathBuf, } @@ -142,15 +142,19 @@ pub fn run_clang_format( replacements: diff .hunks() .filter_map(|hunk| { + let replacement = if hunk.is_pure_insertion() { + RangeInclusive::new(hunk.after.start, hunk.after.end.saturating_sub(1)) + } else { + RangeInclusive::new(hunk.before.start, hunk.before.end.saturating_sub(1)) + }; if ranges.is_empty() { - Some(hunk.before) + Some(replacement) } else { // only include replacements that fall within the specified line ranges if ranges.iter().any(|range| { - range.contains(&hunk.before.start) - && range.contains(&hunk.before.end.saturating_sub(1)) + range.contains(replacement.start()) && range.contains(replacement.end()) }) { - Some(hunk.before) + Some(replacement) } else { None } diff --git a/cpp-linter/src/rest_client/mod.rs b/cpp-linter/src/rest_client/mod.rs index 52c35d7..cec9af4 100644 --- a/cpp-linter/src/rest_client/mod.rs +++ b/cpp-linter/src/rest_client/mod.rs @@ -223,7 +223,7 @@ impl RestClient { // assemble a list of line numbers let mut lines = Vec::new(); for replacement in &format_advice.replacements { - for i in replacement.start..replacement.end { + for i in replacement.clone() { if !lines.contains(&i) { lines.push(i); } @@ -360,7 +360,7 @@ fn make_format_comment( let line_count = format_advice .replacements .iter() - .fold(0, |acc, r| acc + r.len()); + .fold(0, |acc, r| acc + r.clone().count()); let note = format!( "- {} ({line_count} line{})\n", file.name.to_string_lossy().replace('\\', "/"), @@ -488,7 +488,7 @@ mod test { }); file.format_advice = Some(FormatAdvice { #[allow(clippy::single_range_in_vec_init)] - replacements: vec![1..2], + replacements: vec![1..=2], patched: PathBuf::new(), }); files.push(Arc::new(Mutex::new(file)));