LibRegex: Use the *actually* correct repeat start offset for Repeat

Fixes #2931 and various frequent crashes.

(cherry picked from commit 50733c564c3715cac570af6ee5b2907b4aa09b2a)
This commit is contained in:
Ali Mohammad Pur 2024-12-23 12:16:08 +01:00 committed by Nico Weber
parent 7c3331f93f
commit cbafa3ad19
2 changed files with 6 additions and 1 deletions

View file

@ -710,6 +710,11 @@ TEST_CASE(ECMA262_match)
"?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
{ "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
{ "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281
{ "[ a](b{2})"sv, "abb"sv, true }, // Optimizer bug, wrong Repeat basic block splits.
{ "^ {0,3}(([\\`\\~])\\2{2,})\\s*([\\*_]*)\\s*([^\\*_\\s]*).*$"sv, ""sv, false }, // See above.
{ "^(\\d{4}|[+-]\\d{6})(?:-?(\\d{2})(?:-?(\\d{2}))?)?(?:[ T]?(\\d{2}):?(\\d{2})(?::?(\\d{2})(?:[,.](\\d{1,}))?)?(?:(Z)|([+-])(\\d{2})(?::?(\\d{2}))?)?)?$"sv,
""sv,
false, }, // See above, also ladybird#2931.
};
// clang-format on

View file

@ -92,7 +92,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
case OpCodeId::Repeat: {
// Repeat produces two blocks, one containing its repeated expr, and one after that.
auto& repeat = static_cast<OpCode_Repeat const&>(opcode);
auto repeat_start = state.instruction_position - repeat.offset() - repeat.size();
auto repeat_start = state.instruction_position - repeat.offset();
if (repeat_start > end_of_last_block)
block_boundaries.append({ end_of_last_block, repeat_start, "Repeat"sv });
block_boundaries.append({ repeat_start, state.instruction_position, "Repeat after"sv });