LibRegex: Make FailForks fail all forks up to the last save point

This makes negative lookarounds with more than one fork behave
correctly.
Fixes #11350.
This commit is contained in:
Ali Mohammad Pur 2021-12-25 05:35:09 +03:30 committed by Andreas Kling
parent 105d558922
commit 1a35e27490
Notes: sideshowbarker 2024-07-17 22:11:15 +09:00
4 changed files with 30 additions and 15 deletions

View file

@ -973,3 +973,14 @@ TEST_CASE(posix_basic_dollar_is_literal)
EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true);
}
}
TEST_CASE(negative_lookahead)
{
{
// Negative lookahead with more than 2 forks difference between lookahead init and finish.
Regex<ECMA262> re(":(?!\\^\\)|1)", ECMAScriptFlags::Global);
EXPECT_EQ(re.match(":^)").success, false);
EXPECT_EQ(re.match(":1").success, false);
EXPECT_EQ(re.match(":foobar").success, true);
}
}

View file

@ -139,6 +139,7 @@ static void reverse_string_position(MatchState& state, RegexStringView view, siz
static void save_string_position(MatchInput const& input, MatchState const& state)
{
input.saved_positions.append(state.string_position);
input.saved_forks_since_last_save.append(state.forks_since_last_save);
input.saved_code_unit_positions.append(state.string_position_in_code_units);
}
@ -149,6 +150,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state)
state.string_position = input.saved_positions.take_last();
state.string_position_in_code_units = input.saved_code_unit_positions.take_last();
state.forks_since_last_save = input.saved_forks_since_last_save.take_last();
return true;
}
@ -207,6 +209,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc
ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const
{
save_string_position(input, state);
state.forks_since_last_save = 0;
return ExecutionResult::Continue;
}
@ -226,11 +229,9 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, Ma
return ExecutionResult::Continue;
}
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const
{
VERIFY(count() > 0);
input.fail_counter += count() - 1;
input.fail_counter += state.forks_since_last_save;
return ExecutionResult::Failed_ExecuteLowPrioForks;
}
@ -243,6 +244,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState
ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const
{
state.fork_at_position = state.instruction_position + size() + offset();
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioHigh;
}
@ -250,12 +252,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const&
{
state.fork_at_position = state.instruction_position + size() + offset();
input.fork_to_replace = state.instruction_position;
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioHigh;
}
ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const
{
state.fork_at_position = state.instruction_position + size() + offset();
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioLow;
}
@ -263,6 +267,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const&
{
state.fork_at_position = state.instruction_position + size() + offset();
input.fork_to_replace = state.instruction_position;
state.forks_since_last_save++;
return ExecutionResult::Fork_PrioLow;
}

View file

@ -267,20 +267,19 @@ public:
// JUMP _A
// LABEL _L
// REGEXP BODY
// FAIL 2
// FAIL
// LABEL _A
// SAVE
// FORKJUMP _L
// RESTORE
auto body_length = lookaround_body.size();
empend((ByteCodeValueType)OpCodeId::Jump);
empend((ByteCodeValueType)body_length + 2); // JUMP to label _A
empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
extend(move(lookaround_body));
empend((ByteCodeValueType)OpCodeId::FailForks);
empend((ByteCodeValueType)2); // Fail two forks
empend((ByteCodeValueType)OpCodeId::Save);
empend((ByteCodeValueType)OpCodeId::ForkJump);
empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L
empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
empend((ByteCodeValueType)OpCodeId::Restore);
return;
}
@ -300,22 +299,21 @@ public:
// LABEL _L
// GOBACK match_length(BODY)
// REGEXP BODY
// FAIL 2
// FAIL
// LABEL _A
// SAVE
// FORKJUMP _L
// RESTORE
auto body_length = lookaround_body.size();
empend((ByteCodeValueType)OpCodeId::Jump);
empend((ByteCodeValueType)body_length + 4); // JUMP to label _A
empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
empend((ByteCodeValueType)OpCodeId::GoBack);
empend((ByteCodeValueType)match_length);
extend(move(lookaround_body));
empend((ByteCodeValueType)OpCodeId::FailForks);
empend((ByteCodeValueType)2); // Fail two forks
empend((ByteCodeValueType)OpCodeId::Save);
empend((ByteCodeValueType)OpCodeId::ForkJump);
empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L
empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
empend((ByteCodeValueType)OpCodeId::Restore);
return;
}
@ -593,9 +591,8 @@ class OpCode_FailForks final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
ALWAYS_INLINE size_t size() const override { return 2; }
ALWAYS_INLINE size_t count() const { return argument(0); }
String arguments_string() const override { return String::formatted("count={}", count()); }
ALWAYS_INLINE size_t size() const override { return 1; }
String arguments_string() const override { return String::empty(); }
};
class OpCode_Save final : public OpCode {

View file

@ -514,6 +514,7 @@ struct MatchInput {
mutable size_t fail_counter { 0 };
mutable Vector<size_t> saved_positions;
mutable Vector<size_t> saved_code_unit_positions;
mutable Vector<size_t> saved_forks_since_last_save;
mutable HashMap<u64, u64> checkpoints;
mutable Optional<size_t> fork_to_replace;
};
@ -524,6 +525,7 @@ struct MatchState {
size_t string_position_in_code_units { 0 };
size_t instruction_position { 0 };
size_t fork_at_position { 0 };
size_t forks_since_last_save { 0 };
Optional<size_t> initiating_fork;
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;