Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmark/string_casecmp_p.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ prelude: |
unonascii10 = unonascii1 * 10
unonascii100 = unonascii10 * 10
unonascii1000 = unonascii100 * 10
GC.disable # GC causes a lot of variance
benchmark:
casecmp_p-1: lstr1.casecmp?(ustr1)
casecmp_p-10: lstr10.casecmp?(ustr10)
Expand Down
23 changes: 20 additions & 3 deletions prism/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -11373,6 +11373,8 @@ parser_lex(pm_parser_t *parser) {
// First we'll set the beginning of the token.
parser->current.start = parser->current.end;

pm_lex_mode_t *lex_mode = parser->lex_modes.current;

// If there's any whitespace at the start of the list, then we're
// going to trim it off the beginning and create a new token.
size_t whitespace;
Expand All @@ -11382,6 +11384,12 @@ parser_lex(pm_parser_t *parser) {
if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
whitespace += 1;
}
} else if (lex_mode->as.list.terminator == '\n') {
// When the list delimiter is a newline (e.g. `%w` followed by a
// newline), the newline is the terminator rather than a word
// separator. We only trim inline whitespace here so that the
// terminating newline is left for the terminator handling below.
whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
} else {
whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}
Expand All @@ -11403,7 +11411,6 @@ parser_lex(pm_parser_t *parser) {

// Here we'll get a list of the places where strpbrk should break,
// and then find the first one.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);

Expand All @@ -11413,8 +11420,10 @@ parser_lex(pm_parser_t *parser) {

while (breakpoint != NULL) {
// If we hit whitespace, then we must have received content by
// now, so we can return an element of the list.
if (pm_char_is_whitespace(*breakpoint)) {
// now, so we can return an element of the list. A whitespace
// character that is also the terminator (e.g. a newline
// delimiter) is handled by the terminator check below, not here.
if (pm_char_is_whitespace(*breakpoint) && *breakpoint != lex_mode->as.list.terminator) {
parser->current.end = breakpoint;
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
Expand Down Expand Up @@ -11443,6 +11452,14 @@ parser_lex(pm_parser_t *parser) {
// Otherwise, switch back to the default state and return
// the end of the list.
parser->current.end = breakpoint + 1;

// If the terminator is a newline (i.e. the list delimiter
// was a newline), then we need to record it so that line
// numbers after the list remain accurate.
if (*breakpoint == '\n') {
pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}

lex_mode_pop(parser);
lex_state_set(parser, PM_LEX_STATE_END);
LEX(PM_TOKEN_STRING_END);
Expand Down
14 changes: 14 additions & 0 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -4510,6 +4510,20 @@ str_casecmp_p(VALUE str1, VALUE str2)
return Qnil;
}

if (is_ascii_string(str1) && is_ascii_string(str2)) {
if (RSTRING_LEN(str1) != RSTRING_LEN(str2)) return Qfalse;
const char *p1 = RSTRING_PTR(str1), *p1end = RSTRING_END(str1);
const char *p2 = RSTRING_PTR(str2);
while (p1 < p1end) {
if (*p1 != *p2 && TOLOWER((unsigned char)*p1) != TOLOWER((unsigned char)*p2)) {
return Qfalse;
}
p1++;
p2++;
}
return Qtrue;
}

folded_str1 = rb_str_downcase(1, &fold_opt, str1);
folded_str2 = rb_str_downcase(1, &fold_opt, str2);

Expand Down
11 changes: 11 additions & 0 deletions test/prism/fixtures/percent_array_newline_delimiter.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
%w
foo bar

%i
baz qux

%W
a b

%I
c d
1 change: 1 addition & 0 deletions test/prism/ruby/ruby_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class RubyParserTest < TestCase
"multi_write.txt",
"not.txt",
"patterns.txt",
"percent_array_newline_delimiter.txt",
"regex.txt",
"seattlerb/and_multi.txt",
"seattlerb/heredoc__backslash_dos_format.txt",
Expand Down
1 change: 1 addition & 0 deletions test/prism/snippets_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class SnippetsTest < TestCase
except = [
"encoding_binary.txt",
"newline_terminated.txt",
"percent_array_newline_delimiter.txt",
"seattlerb/begin_rescue_else_ensure_no_bodies.txt",
"seattlerb/case_in.txt",
"seattlerb/parse_line_defn_no_parens.txt",
Expand Down
Loading