AK/FuzzyMatch: Return the best possible score in case of equality

This fixes cases where fuzzy matching would return a better score for
a different pattern than a needle perfectly matching the haystack.

As an example, when searching for "fire" in emojis, "Fire Engine" would
have scored 168, while "Fire" was giving only 160.
This patch makes the latter have the best possible score.
This commit is contained in:
Lucas CHOLLET 2024-08-20 23:02:09 -04:00 committed by Nico Weber
parent 79431198ac
commit 3fc784c442
2 changed files with 24 additions and 19 deletions

View file

@ -14,13 +14,14 @@ static constexpr int RECURSION_LIMIT = 10;
static constexpr int MAX_MATCHES = 256;
// Bonuses and penalties are used to build up a final score for the match.
static constexpr int SEQUENTIAL_BONUS = 15; // bonus for adjacent matches (needle: 'ca', haystack: 'cat')
static constexpr int SEPARATOR_BONUS = 30; // bonus if match occurs after a separator ('_' or ' ')
static constexpr int CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower (needle: 'myF' haystack: '/path/to/myFile.txt')
static constexpr int FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched (needle: 'c' haystack: 'cat')
static constexpr int LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
static constexpr int MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
static constexpr int UNMATCHED_LETTER_PENALTY = -1; // penalty for every letter that doesn't matter
static constexpr int SEQUENTIAL_BONUS = 15; // bonus for adjacent matches (needle: 'ca', haystack: 'cat')
static constexpr int SEPARATOR_BONUS = 30; // bonus if match occurs after a separator ('_' or ' ')
static constexpr int CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower (needle: 'myF' haystack: '/path/to/myFile.txt')
static constexpr int FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched (needle: 'c' haystack: 'cat')
static constexpr int LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
static constexpr int MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
static constexpr int UNMATCHED_LETTER_PENALTY = -1; // penalty for every letter that doesn't matter
static constexpr int EQUALITY_SCORE = NumericLimits<int>::max(); // Score set on perfect equality
static int calculate_score(StringView string, u8 const* index_points, size_t index_points_size)
{
@ -34,6 +35,9 @@ static int calculate_score(StringView string, u8 const* index_points, size_t ind
int unmatched = string.length() - index_points_size;
out_score += UNMATCHED_LETTER_PENALTY * unmatched;
if (unmatched == 0)
return EQUALITY_SCORE;
for (size_t i = 0; i < index_points_size; i++) {
u8 current_idx = index_points[i];

View file

@ -20,9 +20,9 @@ TEST_CASE(is_leading_letter_penalty_correctly_applied)
TEST_CASE(is_first_letter_bonus_applied_correctly)
{
// First letter bonus is +15 if the first letter matches.
EXPECT_EQ(fuzzy_match("a"sv, "a"sv).score, 115);
EXPECT_EQ(fuzzy_match("a"sv, "A"sv).score, 115);
EXPECT_EQ(fuzzy_match(" "sv, " "sv).score, 115);
EXPECT_EQ(fuzzy_match("a"sv, "ab"sv).score, 114);
EXPECT_EQ(fuzzy_match("a"sv, "Ab"sv).score, 114);
EXPECT_EQ(fuzzy_match(" "sv, " b"sv).score, 114);
}
TEST_CASE(is_sequential_bonus_applied_correctly)
@ -41,17 +41,15 @@ TEST_CASE(is_camel_case_bonus_applied_correctly)
// Camel case bonus is +30 if the matching character is uppercase and the preceding character is lowercase.
// These cases get no camel case bonus.
EXPECT_EQ(fuzzy_match("b"sv, "Ab"sv).score, 94);
EXPECT_EQ(fuzzy_match("abc"sv, "ABc"sv).score, 145);
EXPECT_EQ(fuzzy_match("abc"sv, "ABC"sv).score, 145);
EXPECT_EQ(fuzzy_match("abc"sv, "Abc"sv).score, 145);
EXPECT_EQ(fuzzy_match("abcd"sv, "abcd"sv).score, 160);
EXPECT_EQ(fuzzy_match("abc"sv, "ABcd"sv).score, 144);
EXPECT_EQ(fuzzy_match("abc"sv, "ABCd"sv).score, 144);
EXPECT_EQ(fuzzy_match("abc"sv, "Abcd"sv).score, 144);
EXPECT_EQ(fuzzy_match("abcd"sv, "abcde"sv).score, 159);
// These cases get a camel case bonus.
EXPECT_EQ(fuzzy_match("b"sv, "aB"sv).score, 124);
EXPECT_EQ(fuzzy_match("abc"sv, "aBc"sv).score, 175);
EXPECT_EQ(fuzzy_match("abc"sv, "aBC"sv).score, 175);
EXPECT_EQ(fuzzy_match("abc"sv, "aBcd"sv).score, 174);
EXPECT_EQ(fuzzy_match("abc"sv, "aBC-"sv).score, 174);
EXPECT_EQ(fuzzy_match("abcd"sv, "aBcD"sv).score, 220);
EXPECT_EQ(fuzzy_match("abcd"sv, "aBcD-"sv).score, 219);
}
@ -60,11 +58,14 @@ TEST_CASE(is_separator_bonus_applied_correctly)
// Separator bonus is +30 if the character preceding the matching character is a space or an underscore.
EXPECT_EQ(fuzzy_match("b"sv, "a b"sv).score, 118);
EXPECT_EQ(fuzzy_match("bc"sv, "a b c"sv).score, 147);
EXPECT_EQ(fuzzy_match("ab cd"sv, "ab cd"sv).score, 205);
EXPECT_EQ(fuzzy_match("ab_cd"sv, "ab_cd"sv).score, 205);
EXPECT_EQ(fuzzy_match("abcd"sv, "a b c d"sv).score, 202);
EXPECT_EQ(fuzzy_match("abcd"sv, "a_b_c_d"sv).score, 202);
EXPECT_EQ(fuzzy_match("b c"sv, "ab cd"sv).score, 153);
EXPECT_EQ(fuzzy_match("b_c"sv, "ab_cd"sv).score, 153);
EXPECT_EQ(fuzzy_match("bc"sv, "ab cd"sv).score, 122);
}
TEST_CASE(equality)
{
EXPECT(fuzzy_match("abc"sv, "abc"sv).score > fuzzy_match("abc"sv, "a b c"sv).score);
}