mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK/FuzzyMatch: Return the best possible score in case of equality
This fixes cases where fuzzy matching would return a better score for a different pattern than a needle perfectly matching the haystack. As an example, when searching for "fire" in emojis, "Fire Engine" would have scored 168, while "Fire" was giving only 160. This patch makes the latter have the best possible score.
This commit is contained in:
parent
79431198ac
commit
3fc784c442
2 changed files with 24 additions and 19 deletions
|
@ -14,13 +14,14 @@ static constexpr int RECURSION_LIMIT = 10;
|
|||
static constexpr int MAX_MATCHES = 256;
|
||||
|
||||
// Bonuses and penalties are used to build up a final score for the match.
|
||||
static constexpr int SEQUENTIAL_BONUS = 15; // bonus for adjacent matches (needle: 'ca', haystack: 'cat')
|
||||
static constexpr int SEPARATOR_BONUS = 30; // bonus if match occurs after a separator ('_' or ' ')
|
||||
static constexpr int CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower (needle: 'myF' haystack: '/path/to/myFile.txt')
|
||||
static constexpr int FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched (needle: 'c' haystack: 'cat')
|
||||
static constexpr int LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
|
||||
static constexpr int MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
|
||||
static constexpr int UNMATCHED_LETTER_PENALTY = -1; // penalty for every letter that doesn't matter
|
||||
static constexpr int SEQUENTIAL_BONUS = 15; // bonus for adjacent matches (needle: 'ca', haystack: 'cat')
|
||||
static constexpr int SEPARATOR_BONUS = 30; // bonus if match occurs after a separator ('_' or ' ')
|
||||
static constexpr int CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower (needle: 'myF' haystack: '/path/to/myFile.txt')
|
||||
static constexpr int FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched (needle: 'c' haystack: 'cat')
|
||||
static constexpr int LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
|
||||
static constexpr int MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
|
||||
static constexpr int UNMATCHED_LETTER_PENALTY = -1; // penalty for every letter that doesn't matter
|
||||
static constexpr int EQUALITY_SCORE = NumericLimits<int>::max(); // Score set on perfect equality
|
||||
|
||||
static int calculate_score(StringView string, u8 const* index_points, size_t index_points_size)
|
||||
{
|
||||
|
@ -34,6 +35,9 @@ static int calculate_score(StringView string, u8 const* index_points, size_t ind
|
|||
int unmatched = string.length() - index_points_size;
|
||||
out_score += UNMATCHED_LETTER_PENALTY * unmatched;
|
||||
|
||||
if (unmatched == 0)
|
||||
return EQUALITY_SCORE;
|
||||
|
||||
for (size_t i = 0; i < index_points_size; i++) {
|
||||
u8 current_idx = index_points[i];
|
||||
|
||||
|
|
|
@ -20,9 +20,9 @@ TEST_CASE(is_leading_letter_penalty_correctly_applied)
|
|||
TEST_CASE(is_first_letter_bonus_applied_correctly)
|
||||
{
|
||||
// First letter bonus is +15 if the first letter matches.
|
||||
EXPECT_EQ(fuzzy_match("a"sv, "a"sv).score, 115);
|
||||
EXPECT_EQ(fuzzy_match("a"sv, "A"sv).score, 115);
|
||||
EXPECT_EQ(fuzzy_match(" "sv, " "sv).score, 115);
|
||||
EXPECT_EQ(fuzzy_match("a"sv, "ab"sv).score, 114);
|
||||
EXPECT_EQ(fuzzy_match("a"sv, "Ab"sv).score, 114);
|
||||
EXPECT_EQ(fuzzy_match(" "sv, " b"sv).score, 114);
|
||||
}
|
||||
|
||||
TEST_CASE(is_sequential_bonus_applied_correctly)
|
||||
|
@ -41,17 +41,15 @@ TEST_CASE(is_camel_case_bonus_applied_correctly)
|
|||
// Camel case bonus is +30 if the matching character is uppercase and the preceding character is lowercase.
|
||||
// These cases get no camel case bonus.
|
||||
EXPECT_EQ(fuzzy_match("b"sv, "Ab"sv).score, 94);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "ABc"sv).score, 145);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "ABC"sv).score, 145);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "Abc"sv).score, 145);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "abcd"sv).score, 160);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "ABcd"sv).score, 144);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "ABCd"sv).score, 144);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "Abcd"sv).score, 144);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "abcde"sv).score, 159);
|
||||
|
||||
// These cases get a camel case bonus.
|
||||
EXPECT_EQ(fuzzy_match("b"sv, "aB"sv).score, 124);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "aBc"sv).score, 175);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "aBC"sv).score, 175);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "aBcd"sv).score, 174);
|
||||
EXPECT_EQ(fuzzy_match("abc"sv, "aBC-"sv).score, 174);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "aBcD"sv).score, 220);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "aBcD-"sv).score, 219);
|
||||
}
|
||||
|
||||
|
@ -60,11 +58,14 @@ TEST_CASE(is_separator_bonus_applied_correctly)
|
|||
// Separator bonus is +30 if the character preceding the matching character is a space or an underscore.
|
||||
EXPECT_EQ(fuzzy_match("b"sv, "a b"sv).score, 118);
|
||||
EXPECT_EQ(fuzzy_match("bc"sv, "a b c"sv).score, 147);
|
||||
EXPECT_EQ(fuzzy_match("ab cd"sv, "ab cd"sv).score, 205);
|
||||
EXPECT_EQ(fuzzy_match("ab_cd"sv, "ab_cd"sv).score, 205);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "a b c d"sv).score, 202);
|
||||
EXPECT_EQ(fuzzy_match("abcd"sv, "a_b_c_d"sv).score, 202);
|
||||
EXPECT_EQ(fuzzy_match("b c"sv, "ab cd"sv).score, 153);
|
||||
EXPECT_EQ(fuzzy_match("b_c"sv, "ab_cd"sv).score, 153);
|
||||
EXPECT_EQ(fuzzy_match("bc"sv, "ab cd"sv).score, 122);
|
||||
}
|
||||
|
||||
TEST_CASE(equality)
|
||||
{
|
||||
EXPECT(fuzzy_match("abc"sv, "abc"sv).score > fuzzy_match("abc"sv, "a b c"sv).score);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue