diff --git a/crates/trusted-server-core/src/creative.rs b/crates/trusted-server-core/src/creative.rs
index 245af0e1..1a118d30 100644
--- a/crates/trusted-server-core/src/creative.rs
+++ b/crates/trusted-server-core/src/creative.rs
@@ -775,6 +775,49 @@ mod tests {
rewrite_creative_html, rewrite_srcset, rewrite_style_urls, sanitize_creative_html, to_abs,
};
+ fn rewrite_srcset_attr(attr_name: &str, attr_value: &str) -> String {
+ let settings = crate::test_support::tests::create_test_settings();
+ let html = format!(r#"
"#, attr_name, attr_value);
+ rewrite_creative_html(&settings, &html)
+ }
+
+ fn assert_rewritten_srcset_attr_case(
+ case_name: &str,
+ attr_name: &str,
+ attr_value: &str,
+ expected_relative: &str,
+ expected_descriptors: &[&str],
+ ) {
+ let out = rewrite_srcset_attr(attr_name, attr_value);
+
+ assert!(
+ out.matches("/first-party/proxy?tsurl=").count() >= 2,
+ "case `{}` expected two rewritten {} candidates: {}",
+ case_name,
+ attr_name,
+ out
+ );
+ assert!(
+ out.contains(expected_relative),
+ "case `{}` expected relative {} candidate `{}` to be preserved in {}",
+ case_name,
+ attr_name,
+ expected_relative,
+ out
+ );
+
+ for descriptor in expected_descriptors {
+ assert!(
+ out.contains(descriptor),
+ "case `{}` expected {} descriptor `{}` in {}",
+ case_name,
+ attr_name,
+ descriptor,
+ out
+ );
+ }
+ }
+
#[test]
fn rewrites_width_height_attrs() {
use crate::http_util::encode_url;
@@ -941,26 +984,50 @@ mod tests {
}
#[test]
- fn rewrites_srcset_absolute_candidates_and_preserves_descriptors() {
- // Absolute + protocol-relative get rewritten to /first-party/proxy?tsurl=..., relative remains
- let settings = crate::test_support::tests::create_test_settings();
- let html = r#"
"#;
- let out = rewrite_creative_html(&settings, html);
-
- // Should have at least two proxied candidates
- let cnt = out.matches("/first-party/proxy?tsurl=").count();
- assert!(
- cnt >= 2,
- "expected at least two rewritten candidates: {}",
- out
- );
-
- // Descriptors preserved
- assert!(out.contains(" 1x"));
- assert!(out.contains(" 2x"));
+ fn rewrites_srcset_attribute_cases() {
+ struct SrcsetCase<'a> {
+ name: &'a str,
+ attr_value: &'a str,
+ expected_relative: &'a str,
+ expected_descriptors: &'a [&'a str],
+ }
- // Relative left as-is
- assert!(out.contains("/local/img.png 1x"));
+ let cases = [
+ SrcsetCase {
+ name: "absolute and protocol-relative candidates",
+ attr_value: "https://cdn.example/img-1x.png 1x, //cdn.example/img-2x.png 2x, /local/img.png 1x",
+ expected_relative: "/local/img.png 1x",
+ expected_descriptors: &[" 1x", " 2x"],
+ },
+ SrcsetCase {
+ name: "no-space commas with fractional density",
+ attr_value: "https://cdn.example/img-1x.png 1x,//cdn.example/img-1_5x.png 1.5x,/local/img.png 2x",
+ expected_relative: "/local/img.png 2x",
+ expected_descriptors: &[" 1x", " 1.5x"],
+ },
+ SrcsetCase {
+ name: "relative middle candidate without leading slash",
+ attr_value: "https://cdn.example/a.png 1x,local/b.png 2x,//cdn.example/c.png 3x",
+ expected_relative: "local/b.png 2x",
+ expected_descriptors: &[" 1x", " 2x", " 3x"],
+ },
+ SrcsetCase {
+ name: "extra spaces normalize but preserve semantics",
+ attr_value: " https://cdn.example/a.png 1x , //cdn.example/b.png 2x , /local/c.png 1x ",
+ expected_relative: "/local/c.png 1x",
+ expected_descriptors: &[" 1x", " 2x"],
+ },
+ ];
+
+ for case in cases {
+ assert_rewritten_srcset_attr_case(
+ case.name,
+ "srcset",
+ case.attr_value,
+ case.expected_relative,
+ case.expected_descriptors,
+ );
+ }
}
#[test]
@@ -986,59 +1053,6 @@ mod tests {
assert!(out.contains("
"#;
- let out = rewrite_creative_html(&settings, html);
- // Absolute and protocol-relative candidates rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Descriptors preserved (including fractional)
- assert!(out.contains(" 1x"));
- assert!(out.contains(" 1.5x"));
- // Relative left unchanged
- assert!(out.contains("/local/img.png 2x"));
- }
-
- #[test]
- fn rewrites_srcset_relative_no_space_middle() {
- let settings = crate::test_support::tests::create_test_settings();
- // Relative candidate (no leading slash) in the middle, no space after commas
- let html =
- r#"
"#;
- let out = rewrite_creative_html(&settings, html);
- // Two absolute/protocol-relative rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Relative preserved as-is
- assert!(out.contains("local/b.png 2x"));
- }
-
- #[test]
- fn rewrites_srcset_with_extra_spaces() {
- let settings = crate::test_support::tests::create_test_settings();
- let html = r#"
"#;
- let out = rewrite_creative_html(&settings, html);
- // Two absolute/protocol-relative rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Relative preserved
- assert!(out.contains("/local/c.png 1x"));
- // Normalized spacing: single space before descriptor is acceptable
- assert!(out.contains(" 1x"));
- assert!(out.contains(" 2x"));
- }
-
#[test]
fn rewrites_script_src_and_leaves_relative() {
let settings = crate::test_support::tests::create_test_settings();
@@ -1077,66 +1091,50 @@ mod tests {
}
#[test]
- fn rewrites_imagesrcset_absolute_candidates_and_preserves_descriptors() {
- let settings = crate::test_support::tests::create_test_settings();
- // Use a valid quoted attribute; the previous string had malformed escapes
- let html = r#"
"#;
- let out = rewrite_creative_html(&settings, html);
- let cnt = out.matches("/first-party/proxy?tsurl=").count();
- assert!(
- cnt >= 1,
- "expected at least one rewritten imagesrcset candidate: {}",
- out
- );
- assert!(out.contains("/local/img.png 1x"));
- }
-
- #[test]
- fn rewrites_imagesrcset_no_space_commas() {
- let settings = crate::test_support::tests::create_test_settings();
- let html = r#""#;
- let out = rewrite_creative_html(&settings, html);
- // At least two rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Relative preserved
- assert!(out.contains("/local/c.png 1x"));
- }
-
- #[test]
- fn rewrites_imagesrcset_relative_no_space_middle() {
- let settings = crate::test_support::tests::create_test_settings();
- let html = r#""#;
- let out = rewrite_creative_html(&settings, html);
- // Two absolute/protocol-relative rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Relative preserved
- assert!(out.contains("local/b.png 2x"));
- }
+ fn rewrites_imagesrcset_attribute_cases() {
+ struct SrcsetCase<'a> {
+ name: &'a str,
+ attr_value: &'a str,
+ expected_relative: &'a str,
+ expected_descriptors: &'a [&'a str],
+ }
- #[test]
- fn rewrites_imagesrcset_with_extra_spaces() {
- let settings = crate::test_support::tests::create_test_settings();
- let html = r#""#;
- let out = rewrite_creative_html(&settings, html);
- // Two absolute/protocol-relative rewritten
- assert!(
- out.matches("/first-party/proxy?tsurl=").count() >= 2,
- "{}",
- out
- );
- // Relative preserved
- assert!(out.contains("/local/c.png 1x"));
- // Normalized spacing present
- assert!(out.contains(" 1x"));
- assert!(out.contains(" 2x"));
+ let cases = [
+ SrcsetCase {
+ name: "absolute and protocol-relative candidates",
+ attr_value: "https://cdn.example/img-1x.png 1x, //cdn.example/img-2x.png 2x, /local/img.png 1x",
+ expected_relative: "/local/img.png 1x",
+ expected_descriptors: &[" 1x", " 2x"],
+ },
+ SrcsetCase {
+ name: "no-space commas",
+ attr_value: "https://cdn.example/a.png 1x,//cdn.example/b.png 2x,/local/c.png 1x",
+ expected_relative: "/local/c.png 1x",
+ expected_descriptors: &[" 1x", " 2x"],
+ },
+ SrcsetCase {
+ name: "relative middle candidate without leading slash",
+ attr_value: "https://cdn.example/a.png 1x,local/b.png 2x,//cdn.example/c.png 3x",
+ expected_relative: "local/b.png 2x",
+ expected_descriptors: &[" 1x", " 2x", " 3x"],
+ },
+ SrcsetCase {
+ name: "extra spaces normalize but preserve semantics",
+ attr_value: " https://cdn.example/a.png 1x , //cdn.example/b.png 2x , /local/c.png 1x ",
+ expected_relative: "/local/c.png 1x",
+ expected_descriptors: &[" 1x", " 2x"],
+ },
+ ];
+
+ for case in cases {
+ assert_rewritten_srcset_attr_case(
+ case.name,
+ "imagesrcset",
+ case.attr_value,
+ case.expected_relative,
+ case.expected_descriptors,
+ );
+ }
}
#[test]
diff --git a/crates/trusted-server-core/src/streaming_replacer.rs b/crates/trusted-server-core/src/streaming_replacer.rs
index 1e7291e5..4c6e64c5 100644
--- a/crates/trusted-server-core/src/streaming_replacer.rs
+++ b/crates/trusted-server-core/src/streaming_replacer.rs
@@ -199,6 +199,39 @@ pub fn create_url_replacer(
mod tests {
use super::*;
+ fn process_with_fixed_chunk_size(
+ mut replacer: StreamingReplacer,
+ content: &[u8],
+ chunk_size: usize,
+ ) -> String {
+ let chunks: Vec<_> = content.chunks(chunk_size).collect();
+ let mut result = Vec::new();
+
+ for (index, chunk) in chunks.iter().enumerate() {
+ let is_last_chunk = index == chunks.len() - 1;
+ result.extend(replacer.process_chunk(chunk, is_last_chunk));
+ }
+
+ String::from_utf8(result).expect("output should be valid UTF-8")
+ }
+
+ fn process_with_explicit_splits(
+ mut replacer: StreamingReplacer,
+ content: &[u8],
+ split_points: &[usize],
+ ) -> String {
+ let mut result = Vec::new();
+ let mut start = 0usize;
+
+ for (index, end) in split_points.iter().copied().enumerate() {
+ let is_last_chunk = index == split_points.len() - 1;
+ result.extend(replacer.process_chunk(&content[start..end], is_last_chunk));
+ start = end;
+ }
+
+ String::from_utf8(result).expect("output should be valid UTF-8")
+ }
+
#[test]
fn test_streaming_replacer_basic() {
let mut replacer =
@@ -371,61 +404,85 @@ mod tests {
}
#[test]
- fn test_process_chunk_utf8_boundary() {
- let mut replacer =
- create_url_replacer("origin.com", "https://origin.com", "test.com", "https");
-
- // Create content with multi-byte UTF-8 characters that could cause boundary issues
- let content = "https://origin.com/test 思怙ᕏ测试 https://origin.com/more".as_bytes();
+ fn test_process_chunk_utf8_boundary_cases() {
+ struct Utf8BoundaryCase<'a> {
+ name: &'a str,
+ replacer: StreamingReplacer,
+ content: &'a str,
+ chunk_size: usize,
+ expected_fragments: &'a [&'a str],
+ }
- // Process in small chunks to force potential boundary issues
- let chunk_size = 20;
- let mut result = Vec::new();
+ let cases = [
+ Utf8BoundaryCase {
+ name: "multibyte text with chunked url replacements",
+ replacer: create_url_replacer(
+ "origin.com",
+ "https://origin.com",
+ "test.com",
+ "https",
+ ),
+ content: "https://origin.com/test 思怙ᕏ测试 https://origin.com/more",
+ chunk_size: 20,
+ expected_fragments: &[
+ "https://test.com/test",
+ "https://test.com/more",
+ "思怙ᕏ测试",
+ ],
+ },
+ Utf8BoundaryCase {
+ name: "small chunks preserve utf8 without replacements",
+ replacer: create_url_replacer("test.com", "https://test.com", "new.com", "https"),
+ content: "Some text 思怙ᕏ测试 more text with 🎉 emoji",
+ chunk_size: 8,
+ expected_fragments: &["Some text", "思怙ᕏ测试", "🎉 emoji"],
+ },
+ ];
- for (i, chunk) in content.chunks(chunk_size).enumerate() {
- let is_last = i == content.chunks(chunk_size).count() - 1;
- result.extend(replacer.process_chunk(chunk, is_last));
+ for case in cases {
+ let result = process_with_fixed_chunk_size(
+ case.replacer,
+ case.content.as_bytes(),
+ case.chunk_size,
+ );
+
+ for expected_fragment in case.expected_fragments {
+ assert!(
+ result.contains(expected_fragment),
+ "case `{}` should contain `{}` but was `{}`",
+ case.name,
+ expected_fragment,
+ result
+ );
+ }
}
-
- let result_str = String::from_utf8(result).expect("output should be valid UTF-8");
- assert!(result_str.contains("https://test.com/test"));
- assert!(result_str.contains("https://test.com/more"));
- assert!(result_str.contains("思怙ᕏ测试"));
}
#[test]
fn test_process_chunk_boundary_in_multibyte_char() {
- let mut replacer =
- create_url_replacer("example.com", "https://example.com", "new.com", "https");
-
- // Create a scenario where chunk boundary falls in the middle of a multi-byte character
let content = "https://example.com/før/bår/test".as_bytes();
- // Split at byte 23, which should be in the middle of 'ø' (2-byte character)
- let chunk1 = &content[..23];
- let chunk2 = &content[23..];
-
- let mut result = Vec::new();
- result.extend(replacer.process_chunk(chunk1, false));
- result.extend(replacer.process_chunk(chunk2, true));
+ let result = process_with_explicit_splits(
+ create_url_replacer("example.com", "https://example.com", "new.com", "https"),
+ content,
+ &[23, content.len()],
+ );
- let result_str = String::from_utf8(result).expect("output should be valid UTF-8");
- assert!(result_str.contains("https://new.com/før/bår/test"));
+ assert!(result.contains("https://new.com/før/bår/test"));
}
#[test]
- fn test_process_chunk_emoji_boundary() {
- let mut replacer =
- create_url_replacer("emoji.com", "https://emoji.com", "test.com", "https");
+ fn test_process_chunk_boundary_in_emoji() {
+ let content = "🎉🎊🎋 https://emoji.com/more".as_bytes();
- // Test with 4-byte emoji characters
- let content = "https://emoji.com/test 🎉🎊🎋 https://emoji.com/more".as_bytes();
+ let result = process_with_explicit_splits(
+ create_url_replacer("emoji.com", "https://emoji.com", "test.com", "https"),
+ content,
+ &[2, content.len()],
+ );
- // Process the entire content at once to verify it works
- let all_at_once = replacer.process_chunk(content, true);
- let expected = String::from_utf8(all_at_once).expect("output should be valid UTF-8");
- assert!(expected.contains("https://test.com/test"));
- assert!(expected.contains("https://test.com/more"));
+ assert!(result.contains("🎉🎊🎋"));
+ assert!(result.contains("https://test.com/more"));
}
#[test]
@@ -452,29 +509,6 @@ mod tests {
assert!(result_str.contains("https://test.com/page2"));
}
- #[test]
- fn test_process_chunk_utf8_boundary_small_chunks() {
- let mut replacer = create_url_replacer("test.com", "https://test.com", "new.com", "https");
-
- // Test with multi-byte characters and very small chunks to stress UTF-8 boundaries
- let content = "Some text 思怙ᕏ测试 more text with 🎉 emoji".as_bytes();
-
- // Use very small chunks to force UTF-8 boundary handling
- let chunk_size = 8;
- let mut result = Vec::new();
- let chunks: Vec<_> = content.chunks(chunk_size).collect();
-
- for (i, chunk) in chunks.iter().enumerate() {
- let is_last = i == chunks.len() - 1;
- result.extend(replacer.process_chunk(chunk, is_last));
- }
-
- let result_str = String::from_utf8(result).expect("output should be valid UTF-8");
- // Just verify the content is preserved correctly
- assert!(result_str.contains("思怙ᕏ测试"));
- assert!(result_str.contains("🎉"));
- }
-
#[test]
fn test_generic_replacements() {
// Test replacing arbitrary strings