From daa6d99e6e91144838839860b0ee4c8c9fc50f8f Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Sat, 10 Jul 2021 13:18:49 +0430 Subject: [PATCH] LibRegex: Add support for non-extended regular expressions in regcomp() Fixes part of #8506. --- Tests/LibRegex/RegexLibC.cpp | 23 +++++++++++++++++++++ Userland/Libraries/LibRegex/C/Regex.cpp | 27 +++++++++++++++---------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/Tests/LibRegex/RegexLibC.cpp b/Tests/LibRegex/RegexLibC.cpp index 3df98370312..e922e4525e7 100644 --- a/Tests/LibRegex/RegexLibC.cpp +++ b/Tests/LibRegex/RegexLibC.cpp @@ -1127,3 +1127,26 @@ TEST_CASE(simple_notbol_noteol) regfree(®ex); regfree(®ex2); } + +TEST_CASE(bre_basic) +{ + regex_t regex; + EXPECT_EQ(regcomp(®ex, "hello friends", REG_NOSUB | REG_ICASE), REG_NOERR); + EXPECT_EQ(regexec(®ex, "hello friends", 0, NULL, 0), REG_NOERR); + regfree(®ex); + + EXPECT_EQ(regcomp(®ex, "\\(15\\)\\1", REG_NOSUB | REG_ICASE), REG_NOERR); + EXPECT_EQ(regexec(®ex, "1515", 0, NULL, 0), REG_NOERR); + EXPECT_EQ(regexec(®ex, "55", 0, NULL, 0), REG_NOMATCH); + regfree(®ex); + + EXPECT_EQ(regcomp(®ex, "15\\{1,2\\}", REG_NOSUB | REG_ICASE), REG_NOERR); + EXPECT_EQ(regexec(®ex, "15", 0, NULL, 0), REG_NOERR); + EXPECT_EQ(regexec(®ex, "1515", 0, NULL, 0), REG_NOMATCH); + EXPECT_EQ(regexec(®ex, "55", 0, NULL, 0), REG_NOMATCH); + regfree(®ex); + + EXPECT_EQ(regcomp(®ex, "15{1,2}", REG_NOSUB | REG_ICASE), REG_NOERR); + EXPECT_EQ(regexec(®ex, "15{1,2}", 0, NULL, 0), REG_NOERR); + regfree(®ex); +} diff --git a/Userland/Libraries/LibRegex/C/Regex.cpp b/Userland/Libraries/LibRegex/C/Regex.cpp index d945ce69fe3..bb45131e041 100644 --- a/Userland/Libraries/LibRegex/C/Regex.cpp +++ b/Userland/Libraries/LibRegex/C/Regex.cpp @@ -17,10 +17,12 @@ # include #endif +#include + struct internal_regex_t { u8 cflags; u8 eflags; - OwnPtr> re; + Optional>, NonnullOwnPtr>>> re; size_t re_pat_errpos; ReError re_pat_err; String re_pat; @@ -52,16 +54,18 @@ int regcomp(regex_t* reg, const char* pattern, int cflags) reg->__data = new internal_regex_t { 0, 0, {}, 0, ReError::REG_NOERR, {}, 0 }; auto preg = impl_from(reg); - - if (!(cflags & REG_EXTENDED)) - return REG_ENOSYS; + bool is_extended = cflags & REG_EXTENDED; preg->cflags = cflags; String pattern_str(pattern); - preg->re = make>(pattern_str, PosixOptions {} | (PosixFlags)cflags | PosixFlags::SkipTrimEmptyMatches); + if (is_extended) + preg->re = make>(pattern_str, PosixOptions {} | (PosixFlags)cflags | PosixFlags::SkipTrimEmptyMatches); + else + preg->re = make>(pattern_str, PosixOptions {} | (PosixFlags)cflags | PosixFlags::SkipTrimEmptyMatches); + + auto parser_result = preg->re->visit([](auto& re) { return re->parser_result; }); - auto parser_result = preg->re->parser_result; if (parser_result.error != regex::Error::NoError) { preg->re_pat_errpos = parser_result.error_token.position(); preg->re_pat_err = (ReError)parser_result.error; @@ -81,7 +85,7 @@ int regexec(const regex_t* reg, const char* string, size_t nmatch, regmatch_t pm { auto preg = impl_from(reg); - if (!preg->re || preg->re_pat_err) { + if (!preg->re.has_value() || preg->re_pat_err) { if (preg->re_pat_err) return preg->re_pat_err; return REG_BADPAT; @@ -89,11 +93,12 @@ int regexec(const regex_t* reg, const char* string, size_t nmatch, regmatch_t pm RegexResult result; if (eflags & REG_SEARCH) - result = preg->re->search(string, PosixOptions {} | (PosixFlags)eflags); + result = preg->re->visit([&](auto& re) { return re->search(string, PosixOptions {} | (PosixFlags)eflags); }); else - result = preg->re->match(string, PosixOptions {} | (PosixFlags)eflags); + result = preg->re->visit([&](auto& re) { return re->match(string, PosixOptions {} | (PosixFlags)eflags); }); if (result.success) { + auto capture_groups_count = preg->re->visit([](auto& re) { return re->parser_result.capture_groups_count; }); auto size = result.matches.size(); if (size && nmatch && pmatch) { pmatch[0].rm_cnt = size; @@ -111,7 +116,7 @@ int regexec(const regex_t* reg, const char* string, size_t nmatch, regmatch_t pm if (i < result.capture_group_matches.size()) { auto capture_groups_size = result.capture_group_matches.at(i).size(); - for (size_t j = 0; j < preg->re->parser_result.capture_groups_count; ++j) { + for (size_t j = 0; j < capture_groups_count; ++j) { if (j >= capture_groups_size || !result.capture_group_matches.at(i).at(j).view.length()) { pmatch[match_index].rm_so = -1; pmatch[match_index].rm_eo = -1; @@ -214,7 +219,7 @@ size_t regerror(int errcode, const regex_t* reg, char* errbuf, size_t errbuf_siz if (!preg) error = get_error((ReError)errcode); else - error = preg->re->error_string(get_error(preg->re_pat_err)); + error = preg->re->visit([&](auto& re) { return re->error_string(get_error(preg->re_pat_err)); }); if (!errbuf_size) return error.length();