mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-23 09:46:04 -05:00
LibRegex: Make FailForks fail all forks up to the last save point
This makes negative lookarounds with more than one fork behave correctly. Fixes #11350.
This commit is contained in:
parent
105d558922
commit
1a35e27490
Notes:
sideshowbarker
2024-07-17 22:11:15 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/1a35e27490b Pull-request: https://github.com/SerenityOS/serenity/pull/11422 Issue: https://github.com/SerenityOS/serenity/issues/11350
4 changed files with 30 additions and 15 deletions
|
@ -973,3 +973,14 @@ TEST_CASE(posix_basic_dollar_is_literal)
|
|||
EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(negative_lookahead)
|
||||
{
|
||||
{
|
||||
// Negative lookahead with more than 2 forks difference between lookahead init and finish.
|
||||
Regex<ECMA262> re(":(?!\\^\\)|1)", ECMAScriptFlags::Global);
|
||||
EXPECT_EQ(re.match(":^)").success, false);
|
||||
EXPECT_EQ(re.match(":1").success, false);
|
||||
EXPECT_EQ(re.match(":foobar").success, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,6 +139,7 @@ static void reverse_string_position(MatchState& state, RegexStringView view, siz
|
|||
static void save_string_position(MatchInput const& input, MatchState const& state)
|
||||
{
|
||||
input.saved_positions.append(state.string_position);
|
||||
input.saved_forks_since_last_save.append(state.forks_since_last_save);
|
||||
input.saved_code_unit_positions.append(state.string_position_in_code_units);
|
||||
}
|
||||
|
||||
|
@ -149,6 +150,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state)
|
|||
|
||||
state.string_position = input.saved_positions.take_last();
|
||||
state.string_position_in_code_units = input.saved_code_unit_positions.take_last();
|
||||
state.forks_since_last_save = input.saved_forks_since_last_save.take_last();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -207,6 +209,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc
|
|||
ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
save_string_position(input, state);
|
||||
state.forks_since_last_save = 0;
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
||||
|
@ -226,11 +229,9 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, Ma
|
|||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const
|
||||
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
VERIFY(count() > 0);
|
||||
|
||||
input.fail_counter += count() - 1;
|
||||
input.fail_counter += state.forks_since_last_save;
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
}
|
||||
|
||||
|
@ -243,6 +244,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState
|
|||
ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const
|
||||
{
|
||||
state.fork_at_position = state.instruction_position + size() + offset();
|
||||
state.forks_since_last_save++;
|
||||
return ExecutionResult::Fork_PrioHigh;
|
||||
}
|
||||
|
||||
|
@ -250,12 +252,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const&
|
|||
{
|
||||
state.fork_at_position = state.instruction_position + size() + offset();
|
||||
input.fork_to_replace = state.instruction_position;
|
||||
state.forks_since_last_save++;
|
||||
return ExecutionResult::Fork_PrioHigh;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const
|
||||
{
|
||||
state.fork_at_position = state.instruction_position + size() + offset();
|
||||
state.forks_since_last_save++;
|
||||
return ExecutionResult::Fork_PrioLow;
|
||||
}
|
||||
|
||||
|
@ -263,6 +267,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const&
|
|||
{
|
||||
state.fork_at_position = state.instruction_position + size() + offset();
|
||||
input.fork_to_replace = state.instruction_position;
|
||||
state.forks_since_last_save++;
|
||||
return ExecutionResult::Fork_PrioLow;
|
||||
}
|
||||
|
||||
|
|
|
@ -267,20 +267,19 @@ public:
|
|||
// JUMP _A
|
||||
// LABEL _L
|
||||
// REGEXP BODY
|
||||
// FAIL 2
|
||||
// FAIL
|
||||
// LABEL _A
|
||||
// SAVE
|
||||
// FORKJUMP _L
|
||||
// RESTORE
|
||||
auto body_length = lookaround_body.size();
|
||||
empend((ByteCodeValueType)OpCodeId::Jump);
|
||||
empend((ByteCodeValueType)body_length + 2); // JUMP to label _A
|
||||
empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
|
||||
extend(move(lookaround_body));
|
||||
empend((ByteCodeValueType)OpCodeId::FailForks);
|
||||
empend((ByteCodeValueType)2); // Fail two forks
|
||||
empend((ByteCodeValueType)OpCodeId::Save);
|
||||
empend((ByteCodeValueType)OpCodeId::ForkJump);
|
||||
empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L
|
||||
empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
|
||||
empend((ByteCodeValueType)OpCodeId::Restore);
|
||||
return;
|
||||
}
|
||||
|
@ -300,22 +299,21 @@ public:
|
|||
// LABEL _L
|
||||
// GOBACK match_length(BODY)
|
||||
// REGEXP BODY
|
||||
// FAIL 2
|
||||
// FAIL
|
||||
// LABEL _A
|
||||
// SAVE
|
||||
// FORKJUMP _L
|
||||
// RESTORE
|
||||
auto body_length = lookaround_body.size();
|
||||
empend((ByteCodeValueType)OpCodeId::Jump);
|
||||
empend((ByteCodeValueType)body_length + 4); // JUMP to label _A
|
||||
empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
|
||||
empend((ByteCodeValueType)OpCodeId::GoBack);
|
||||
empend((ByteCodeValueType)match_length);
|
||||
extend(move(lookaround_body));
|
||||
empend((ByteCodeValueType)OpCodeId::FailForks);
|
||||
empend((ByteCodeValueType)2); // Fail two forks
|
||||
empend((ByteCodeValueType)OpCodeId::Save);
|
||||
empend((ByteCodeValueType)OpCodeId::ForkJump);
|
||||
empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L
|
||||
empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
|
||||
empend((ByteCodeValueType)OpCodeId::Restore);
|
||||
return;
|
||||
}
|
||||
|
@ -593,9 +591,8 @@ class OpCode_FailForks final : public OpCode {
|
|||
public:
|
||||
ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
||||
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
|
||||
ALWAYS_INLINE size_t size() const override { return 2; }
|
||||
ALWAYS_INLINE size_t count() const { return argument(0); }
|
||||
String arguments_string() const override { return String::formatted("count={}", count()); }
|
||||
ALWAYS_INLINE size_t size() const override { return 1; }
|
||||
String arguments_string() const override { return String::empty(); }
|
||||
};
|
||||
|
||||
class OpCode_Save final : public OpCode {
|
||||
|
|
|
@ -514,6 +514,7 @@ struct MatchInput {
|
|||
mutable size_t fail_counter { 0 };
|
||||
mutable Vector<size_t> saved_positions;
|
||||
mutable Vector<size_t> saved_code_unit_positions;
|
||||
mutable Vector<size_t> saved_forks_since_last_save;
|
||||
mutable HashMap<u64, u64> checkpoints;
|
||||
mutable Optional<size_t> fork_to_replace;
|
||||
};
|
||||
|
@ -524,6 +525,7 @@ struct MatchState {
|
|||
size_t string_position_in_code_units { 0 };
|
||||
size_t instruction_position { 0 };
|
||||
size_t fork_at_position { 0 };
|
||||
size_t forks_since_last_save { 0 };
|
||||
Optional<size_t> initiating_fork;
|
||||
Vector<Match> matches;
|
||||
Vector<Vector<Match>> capture_group_matches;
|
||||
|
|
Loading…
Add table
Reference in a new issue