diff --git a/crates/trusted-server-core/src/creative.rs b/crates/trusted-server-core/src/creative.rs index 245af0e1..1a118d30 100644 --- a/crates/trusted-server-core/src/creative.rs +++ b/crates/trusted-server-core/src/creative.rs @@ -775,6 +775,49 @@ mod tests { rewrite_creative_html, rewrite_srcset, rewrite_style_urls, sanitize_creative_html, to_abs, }; + fn rewrite_srcset_attr(attr_name: &str, attr_value: &str) -> String { + let settings = crate::test_support::tests::create_test_settings(); + let html = format!(r#""#, attr_name, attr_value); + rewrite_creative_html(&settings, &html) + } + + fn assert_rewritten_srcset_attr_case( + case_name: &str, + attr_name: &str, + attr_value: &str, + expected_relative: &str, + expected_descriptors: &[&str], + ) { + let out = rewrite_srcset_attr(attr_name, attr_value); + + assert!( + out.matches("/first-party/proxy?tsurl=").count() >= 2, + "case `{}` expected two rewritten {} candidates: {}", + case_name, + attr_name, + out + ); + assert!( + out.contains(expected_relative), + "case `{}` expected relative {} candidate `{}` to be preserved in {}", + case_name, + attr_name, + expected_relative, + out + ); + + for descriptor in expected_descriptors { + assert!( + out.contains(descriptor), + "case `{}` expected {} descriptor `{}` in {}", + case_name, + attr_name, + descriptor, + out + ); + } + } + #[test] fn rewrites_width_height_attrs() { use crate::http_util::encode_url; @@ -941,26 +984,50 @@ mod tests { } #[test] - fn rewrites_srcset_absolute_candidates_and_preserves_descriptors() { - // Absolute + protocol-relative get rewritten to /first-party/proxy?tsurl=..., relative remains - let settings = crate::test_support::tests::create_test_settings(); - let html = r#""#; - let out = rewrite_creative_html(&settings, html); - - // Should have at least two proxied candidates - let cnt = out.matches("/first-party/proxy?tsurl=").count(); - assert!( - cnt >= 2, - "expected at least two rewritten candidates: {}", - out - ); - - // Descriptors preserved - assert!(out.contains(" 1x")); - assert!(out.contains(" 2x")); + fn rewrites_srcset_attribute_cases() { + struct SrcsetCase<'a> { + name: &'a str, + attr_value: &'a str, + expected_relative: &'a str, + expected_descriptors: &'a [&'a str], + } - // Relative left as-is - assert!(out.contains("/local/img.png 1x")); + let cases = [ + SrcsetCase { + name: "absolute and protocol-relative candidates", + attr_value: "https://cdn.example/img-1x.png 1x, //cdn.example/img-2x.png 2x, /local/img.png 1x", + expected_relative: "/local/img.png 1x", + expected_descriptors: &[" 1x", " 2x"], + }, + SrcsetCase { + name: "no-space commas with fractional density", + attr_value: "https://cdn.example/img-1x.png 1x,//cdn.example/img-1_5x.png 1.5x,/local/img.png 2x", + expected_relative: "/local/img.png 2x", + expected_descriptors: &[" 1x", " 1.5x"], + }, + SrcsetCase { + name: "relative middle candidate without leading slash", + attr_value: "https://cdn.example/a.png 1x,local/b.png 2x,//cdn.example/c.png 3x", + expected_relative: "local/b.png 2x", + expected_descriptors: &[" 1x", " 2x", " 3x"], + }, + SrcsetCase { + name: "extra spaces normalize but preserve semantics", + attr_value: " https://cdn.example/a.png 1x , //cdn.example/b.png 2x , /local/c.png 1x ", + expected_relative: "/local/c.png 1x", + expected_descriptors: &[" 1x", " 2x"], + }, + ]; + + for case in cases { + assert_rewritten_srcset_attr_case( + case.name, + "srcset", + case.attr_value, + case.expected_relative, + case.expected_descriptors, + ); + } } #[test] @@ -986,59 +1053,6 @@ mod tests { assert!(out.contains(""#; - let out = rewrite_creative_html(&settings, html); - // Absolute and protocol-relative candidates rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Descriptors preserved (including fractional) - assert!(out.contains(" 1x")); - assert!(out.contains(" 1.5x")); - // Relative left unchanged - assert!(out.contains("/local/img.png 2x")); - } - - #[test] - fn rewrites_srcset_relative_no_space_middle() { - let settings = crate::test_support::tests::create_test_settings(); - // Relative candidate (no leading slash) in the middle, no space after commas - let html = - r#""#; - let out = rewrite_creative_html(&settings, html); - // Two absolute/protocol-relative rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Relative preserved as-is - assert!(out.contains("local/b.png 2x")); - } - - #[test] - fn rewrites_srcset_with_extra_spaces() { - let settings = crate::test_support::tests::create_test_settings(); - let html = r#""#; - let out = rewrite_creative_html(&settings, html); - // Two absolute/protocol-relative rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Relative preserved - assert!(out.contains("/local/c.png 1x")); - // Normalized spacing: single space before descriptor is acceptable - assert!(out.contains(" 1x")); - assert!(out.contains(" 2x")); - } - #[test] fn rewrites_script_src_and_leaves_relative() { let settings = crate::test_support::tests::create_test_settings(); @@ -1077,66 +1091,50 @@ mod tests { } #[test] - fn rewrites_imagesrcset_absolute_candidates_and_preserves_descriptors() { - let settings = crate::test_support::tests::create_test_settings(); - // Use a valid quoted attribute; the previous string had malformed escapes - let html = r#"
"#; - let out = rewrite_creative_html(&settings, html); - let cnt = out.matches("/first-party/proxy?tsurl=").count(); - assert!( - cnt >= 1, - "expected at least one rewritten imagesrcset candidate: {}", - out - ); - assert!(out.contains("/local/img.png 1x")); - } - - #[test] - fn rewrites_imagesrcset_no_space_commas() { - let settings = crate::test_support::tests::create_test_settings(); - let html = r#"
"#; - let out = rewrite_creative_html(&settings, html); - // At least two rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Relative preserved - assert!(out.contains("/local/c.png 1x")); - } - - #[test] - fn rewrites_imagesrcset_relative_no_space_middle() { - let settings = crate::test_support::tests::create_test_settings(); - let html = r#"
"#; - let out = rewrite_creative_html(&settings, html); - // Two absolute/protocol-relative rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Relative preserved - assert!(out.contains("local/b.png 2x")); - } + fn rewrites_imagesrcset_attribute_cases() { + struct SrcsetCase<'a> { + name: &'a str, + attr_value: &'a str, + expected_relative: &'a str, + expected_descriptors: &'a [&'a str], + } - #[test] - fn rewrites_imagesrcset_with_extra_spaces() { - let settings = crate::test_support::tests::create_test_settings(); - let html = r#"
"#; - let out = rewrite_creative_html(&settings, html); - // Two absolute/protocol-relative rewritten - assert!( - out.matches("/first-party/proxy?tsurl=").count() >= 2, - "{}", - out - ); - // Relative preserved - assert!(out.contains("/local/c.png 1x")); - // Normalized spacing present - assert!(out.contains(" 1x")); - assert!(out.contains(" 2x")); + let cases = [ + SrcsetCase { + name: "absolute and protocol-relative candidates", + attr_value: "https://cdn.example/img-1x.png 1x, //cdn.example/img-2x.png 2x, /local/img.png 1x", + expected_relative: "/local/img.png 1x", + expected_descriptors: &[" 1x", " 2x"], + }, + SrcsetCase { + name: "no-space commas", + attr_value: "https://cdn.example/a.png 1x,//cdn.example/b.png 2x,/local/c.png 1x", + expected_relative: "/local/c.png 1x", + expected_descriptors: &[" 1x", " 2x"], + }, + SrcsetCase { + name: "relative middle candidate without leading slash", + attr_value: "https://cdn.example/a.png 1x,local/b.png 2x,//cdn.example/c.png 3x", + expected_relative: "local/b.png 2x", + expected_descriptors: &[" 1x", " 2x", " 3x"], + }, + SrcsetCase { + name: "extra spaces normalize but preserve semantics", + attr_value: " https://cdn.example/a.png 1x , //cdn.example/b.png 2x , /local/c.png 1x ", + expected_relative: "/local/c.png 1x", + expected_descriptors: &[" 1x", " 2x"], + }, + ]; + + for case in cases { + assert_rewritten_srcset_attr_case( + case.name, + "imagesrcset", + case.attr_value, + case.expected_relative, + case.expected_descriptors, + ); + } } #[test] diff --git a/crates/trusted-server-core/src/streaming_replacer.rs b/crates/trusted-server-core/src/streaming_replacer.rs index 1e7291e5..4c6e64c5 100644 --- a/crates/trusted-server-core/src/streaming_replacer.rs +++ b/crates/trusted-server-core/src/streaming_replacer.rs @@ -199,6 +199,39 @@ pub fn create_url_replacer( mod tests { use super::*; + fn process_with_fixed_chunk_size( + mut replacer: StreamingReplacer, + content: &[u8], + chunk_size: usize, + ) -> String { + let chunks: Vec<_> = content.chunks(chunk_size).collect(); + let mut result = Vec::new(); + + for (index, chunk) in chunks.iter().enumerate() { + let is_last_chunk = index == chunks.len() - 1; + result.extend(replacer.process_chunk(chunk, is_last_chunk)); + } + + String::from_utf8(result).expect("output should be valid UTF-8") + } + + fn process_with_explicit_splits( + mut replacer: StreamingReplacer, + content: &[u8], + split_points: &[usize], + ) -> String { + let mut result = Vec::new(); + let mut start = 0usize; + + for (index, end) in split_points.iter().copied().enumerate() { + let is_last_chunk = index == split_points.len() - 1; + result.extend(replacer.process_chunk(&content[start..end], is_last_chunk)); + start = end; + } + + String::from_utf8(result).expect("output should be valid UTF-8") + } + #[test] fn test_streaming_replacer_basic() { let mut replacer = @@ -371,61 +404,85 @@ mod tests { } #[test] - fn test_process_chunk_utf8_boundary() { - let mut replacer = - create_url_replacer("origin.com", "https://origin.com", "test.com", "https"); - - // Create content with multi-byte UTF-8 characters that could cause boundary issues - let content = "https://origin.com/test 思怙ᕏ测试 https://origin.com/more".as_bytes(); + fn test_process_chunk_utf8_boundary_cases() { + struct Utf8BoundaryCase<'a> { + name: &'a str, + replacer: StreamingReplacer, + content: &'a str, + chunk_size: usize, + expected_fragments: &'a [&'a str], + } - // Process in small chunks to force potential boundary issues - let chunk_size = 20; - let mut result = Vec::new(); + let cases = [ + Utf8BoundaryCase { + name: "multibyte text with chunked url replacements", + replacer: create_url_replacer( + "origin.com", + "https://origin.com", + "test.com", + "https", + ), + content: "https://origin.com/test 思怙ᕏ测试 https://origin.com/more", + chunk_size: 20, + expected_fragments: &[ + "https://test.com/test", + "https://test.com/more", + "思怙ᕏ测试", + ], + }, + Utf8BoundaryCase { + name: "small chunks preserve utf8 without replacements", + replacer: create_url_replacer("test.com", "https://test.com", "new.com", "https"), + content: "Some text 思怙ᕏ测试 more text with 🎉 emoji", + chunk_size: 8, + expected_fragments: &["Some text", "思怙ᕏ测试", "🎉 emoji"], + }, + ]; - for (i, chunk) in content.chunks(chunk_size).enumerate() { - let is_last = i == content.chunks(chunk_size).count() - 1; - result.extend(replacer.process_chunk(chunk, is_last)); + for case in cases { + let result = process_with_fixed_chunk_size( + case.replacer, + case.content.as_bytes(), + case.chunk_size, + ); + + for expected_fragment in case.expected_fragments { + assert!( + result.contains(expected_fragment), + "case `{}` should contain `{}` but was `{}`", + case.name, + expected_fragment, + result + ); + } } - - let result_str = String::from_utf8(result).expect("output should be valid UTF-8"); - assert!(result_str.contains("https://test.com/test")); - assert!(result_str.contains("https://test.com/more")); - assert!(result_str.contains("思怙ᕏ测试")); } #[test] fn test_process_chunk_boundary_in_multibyte_char() { - let mut replacer = - create_url_replacer("example.com", "https://example.com", "new.com", "https"); - - // Create a scenario where chunk boundary falls in the middle of a multi-byte character let content = "https://example.com/før/bår/test".as_bytes(); - // Split at byte 23, which should be in the middle of 'ø' (2-byte character) - let chunk1 = &content[..23]; - let chunk2 = &content[23..]; - - let mut result = Vec::new(); - result.extend(replacer.process_chunk(chunk1, false)); - result.extend(replacer.process_chunk(chunk2, true)); + let result = process_with_explicit_splits( + create_url_replacer("example.com", "https://example.com", "new.com", "https"), + content, + &[23, content.len()], + ); - let result_str = String::from_utf8(result).expect("output should be valid UTF-8"); - assert!(result_str.contains("https://new.com/før/bår/test")); + assert!(result.contains("https://new.com/før/bår/test")); } #[test] - fn test_process_chunk_emoji_boundary() { - let mut replacer = - create_url_replacer("emoji.com", "https://emoji.com", "test.com", "https"); + fn test_process_chunk_boundary_in_emoji() { + let content = "🎉🎊🎋 https://emoji.com/more".as_bytes(); - // Test with 4-byte emoji characters - let content = "https://emoji.com/test 🎉🎊🎋 https://emoji.com/more".as_bytes(); + let result = process_with_explicit_splits( + create_url_replacer("emoji.com", "https://emoji.com", "test.com", "https"), + content, + &[2, content.len()], + ); - // Process the entire content at once to verify it works - let all_at_once = replacer.process_chunk(content, true); - let expected = String::from_utf8(all_at_once).expect("output should be valid UTF-8"); - assert!(expected.contains("https://test.com/test")); - assert!(expected.contains("https://test.com/more")); + assert!(result.contains("🎉🎊🎋")); + assert!(result.contains("https://test.com/more")); } #[test] @@ -452,29 +509,6 @@ mod tests { assert!(result_str.contains("https://test.com/page2")); } - #[test] - fn test_process_chunk_utf8_boundary_small_chunks() { - let mut replacer = create_url_replacer("test.com", "https://test.com", "new.com", "https"); - - // Test with multi-byte characters and very small chunks to stress UTF-8 boundaries - let content = "Some text 思怙ᕏ测试 more text with 🎉 emoji".as_bytes(); - - // Use very small chunks to force UTF-8 boundary handling - let chunk_size = 8; - let mut result = Vec::new(); - let chunks: Vec<_> = content.chunks(chunk_size).collect(); - - for (i, chunk) in chunks.iter().enumerate() { - let is_last = i == chunks.len() - 1; - result.extend(replacer.process_chunk(chunk, is_last)); - } - - let result_str = String::from_utf8(result).expect("output should be valid UTF-8"); - // Just verify the content is preserved correctly - assert!(result_str.contains("思怙ᕏ测试")); - assert!(result_str.contains("🎉")); - } - #[test] fn test_generic_replacements() { // Test replacing arbitrary strings