turkdevops · pull · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 22, 2026
diff --git a/benchmark/string_casecmp_p.yml b/benchmark/string_casecmp_p.yml
@@ -15,6 +15,7 @@ prelude: |
   unonascii10 = unonascii1 * 10
   unonascii100 = unonascii10 * 10
   unonascii1000 = unonascii100 * 10
+  GC.disable # GC causes a lot of variance
 benchmark:
   casecmp_p-1: lstr1.casecmp?(ustr1)
   casecmp_p-10: lstr10.casecmp?(ustr10)

diff --git a/prism/prism.c b/prism/prism.c
@@ -11373,6 +11373,8 @@ parser_lex(pm_parser_t *parser) {
             // First we'll set the beginning of the token.
             parser->current.start = parser->current.end;
 
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+
             // If there's any whitespace at the start of the list, then we're
             // going to trim it off the beginning and create a new token.
             size_t whitespace;
@@ -11382,6 +11384,12 @@ parser_lex(pm_parser_t *parser) {
                 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
                     whitespace += 1;
                 }
+            } else if (lex_mode->as.list.terminator == '\n') {
+                // When the list delimiter is a newline (e.g. `%w` followed by a
+                // newline), the newline is the terminator rather than a word
+                // separator. We only trim inline whitespace here so that the
+                // terminating newline is left for the terminator handling below.
+                whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
             } else {
                 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
             }
@@ -11403,7 +11411,6 @@ parser_lex(pm_parser_t *parser) {
 
             // Here we'll get a list of the places where strpbrk should break,
             // and then find the first one.
-            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
             const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
 
@@ -11413,8 +11420,10 @@ parser_lex(pm_parser_t *parser) {
 
             while (breakpoint != NULL) {
                 // If we hit whitespace, then we must have received content by
-                // now, so we can return an element of the list.
-                if (pm_char_is_whitespace(*breakpoint)) {
+                // now, so we can return an element of the list. A whitespace
+                // character that is also the terminator (e.g. a newline
+                // delimiter) is handled by the terminator check below, not here.
+                if (pm_char_is_whitespace(*breakpoint) && *breakpoint != lex_mode->as.list.terminator) {
                     parser->current.end = breakpoint;
                     pm_token_buffer_flush(parser, &token_buffer);
                     LEX(PM_TOKEN_STRING_CONTENT);
@@ -11443,6 +11452,14 @@ parser_lex(pm_parser_t *parser) {
                     // Otherwise, switch back to the default state and return
                     // the end of the list.
                     parser->current.end = breakpoint + 1;
+
+                    // If the terminator is a newline (i.e. the list delimiter
+                    // was a newline), then we need to record it so that line
+                    // numbers after the list remain accurate.
+                    if (*breakpoint == '\n') {
+                        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                    }
+
                     lex_mode_pop(parser);
                     lex_state_set(parser, PM_LEX_STATE_END);
                     LEX(PM_TOKEN_STRING_END);

diff --git a/string.c b/string.c
@@ -4510,6 +4510,20 @@ str_casecmp_p(VALUE str1, VALUE str2)
         return Qnil;
     }
 
+    if (is_ascii_string(str1) && is_ascii_string(str2)) {
+        if (RSTRING_LEN(str1) != RSTRING_LEN(str2)) return Qfalse;
+        const char *p1 = RSTRING_PTR(str1), *p1end = RSTRING_END(str1);
+        const char *p2 = RSTRING_PTR(str2);
+        while (p1 < p1end) {
+            if (*p1 != *p2 && TOLOWER((unsigned char)*p1) != TOLOWER((unsigned char)*p2)) {
+                return Qfalse;
+            }
+            p1++;
+            p2++;
+        }
+        return Qtrue;
+    }
+
     folded_str1 = rb_str_downcase(1, &fold_opt, str1);
     folded_str2 = rb_str_downcase(1, &fold_opt, str2);
 

diff --git a/test/prism/fixtures/percent_array_newline_delimiter.txt b/test/prism/fixtures/percent_array_newline_delimiter.txt
@@ -0,0 +1,11 @@
+%w
+foo bar
+
+%i
+baz qux
+
+%W
+a b
+
+%I
+c d
diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb
@@ -46,6 +46,7 @@ class RubyParserTest < TestCase
       "multi_write.txt",
       "not.txt",
       "patterns.txt",
+      "percent_array_newline_delimiter.txt",
       "regex.txt",
       "seattlerb/and_multi.txt",
       "seattlerb/heredoc__backslash_dos_format.txt",

diff --git a/test/prism/snippets_test.rb b/test/prism/snippets_test.rb
@@ -7,6 +7,7 @@ class SnippetsTest < TestCase
     except = [
       "encoding_binary.txt",
       "newline_terminated.txt",
+      "percent_array_newline_delimiter.txt",
       "seattlerb/begin_rescue_else_ensure_no_bodies.txt",
       "seattlerb/case_in.txt",
       "seattlerb/parse_line_defn_no_parens.txt",
-Original file line number
+Diff line change
@@ -0,0 +1,11 @@
+    %w
+    foo bar
+    %i
+    baz qux
+    %W
+    a b
+    %I
+    c d