LibRegex: Treat inverted Compare entries as disjunctions

[^XYZ] is not(X | Y | Z), we used to translate this to
not(X) | not(Y) | not(Z), this commit makes LibRegex interpret this
pattern as not(X) & not(Y) & not(Z).
This commit is contained in:
Ali Mohammad Pur 2022-07-10 11:36:34 +04:30 committed by Andreas Kling
parent fe46b2c141
commit d348eaf305
2 changed files with 33 additions and 13 deletions

View file

@ -1040,11 +1040,23 @@ TEST_CASE(single_match_flag)
TEST_CASE(inversion_state_in_char_class)
{
// #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
{
// #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
auto result = re.match("hello");
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_string(), "h"sv);
auto result = re.match("hello");
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_string(), "h"sv);
}
{
Regex<ECMA262> re("^(?:([^\\s!\"#%-,\\./;->@\\[-\\^`\\{-~]+(?=([=~}\\s/.)|]))))"sv, ECMAScriptFlags::Global);
auto result = re.match("slideNumbers}}"sv);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_string(), "slideNumbers"sv);
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_string(), "slideNumbers"sv);
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_string(), "}"sv);
}
}

View file

@ -486,8 +486,12 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
return ExecutionResult::Failed_ExecuteLowPrioForks;
auto input_view = input.view.substring_view(state.string_position, 1)[0];
if (input_view != '\n' || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)))
advance_string_position(state, input.view, input_view);
if (input_view != '\n' || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline))) {
if (current_inversion_state())
inverse_matched = true;
else
advance_string_position(state, input.view, input_view);
}
} else if (compare_type == CharacterCompareType::String) {
VERIFY(!current_inversion_state());
@ -507,8 +511,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
auto view = input.view.construct_as_same(data, str, utf16);
offset += length;
if (!compare_string(input, state, view, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (compare_string(input, state, view, had_zero_length_match)) {
if (current_inversion_state())
inverse_matched = true;
}
} else if (compare_type == CharacterCompareType::CharClass) {
@ -575,8 +581,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (input.view.length() < state.string_position + str.length())
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (!compare_string(input, state, str, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (compare_string(input, state, str, had_zero_length_match)) {
if (current_inversion_state())
inverse_matched = true;
}
} else if (compare_type == CharacterCompareType::Property) {
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
@ -600,7 +608,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
break;
}
if (current_inversion_state() && !inverse_matched) {
if (current_inversion_state() && !inverse && !inverse_matched) {
advance_string_position(state, input.view);
inverse_matched = true;
}