mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK: Implement slugify
function for URL slug generation
The slugify function is used to convert input into URL-friendly slugs. It processes each character in the input, keeping ascii alpha characters after lowercase and replacing non-alphanum characters with the glue character or a space if multiple spaces are encountered consecutively. The resulting string is trimmed of leading and trailing whitespace, and any internal whitespace is replaced with the glue character. It is currently used in LibMarkdown headings generation code.
This commit is contained in:
parent
670925a84f
commit
f1b79e0cd3
5 changed files with 95 additions and 0 deletions
|
@ -24,6 +24,7 @@ set(AK_SOURCES
|
|||
OptionParser.cpp
|
||||
Random.cpp
|
||||
SipHash.cpp
|
||||
Slugify.cpp
|
||||
StackInfo.cpp
|
||||
Stream.cpp
|
||||
String.cpp
|
||||
|
|
33
AK/Slugify.cpp
Normal file
33
AK/Slugify.cpp
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Slugify.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace AK {
|
||||
ErrorOr<String> slugify(String const& input, char const glue)
|
||||
{
|
||||
StringBuilder sb;
|
||||
bool just_processed_space = false;
|
||||
|
||||
for (auto const& code_point : input.code_points()) {
|
||||
if (is_ascii_alphanumeric(code_point)) {
|
||||
sb.append_code_point(to_ascii_lowercase(code_point));
|
||||
just_processed_space = false;
|
||||
} else if ((code_point == static_cast<u32>(glue) || is_ascii_space(code_point)) && !just_processed_space) {
|
||||
sb.append_code_point(glue);
|
||||
just_processed_space = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto output = TRY(sb.to_string());
|
||||
if (output.ends_with(static_cast<u32>(glue))) {
|
||||
return output.trim(StringView { &glue, 1 }, TrimMode::Right);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
17
AK/Slugify.h
Normal file
17
AK/Slugify.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
|
||||
namespace AK {
|
||||
ErrorOr<String> slugify(String const& input, char glue = '-');
|
||||
}
|
||||
|
||||
#if USING_AK_GLOBALLY
|
||||
using AK::slugify;
|
||||
#endif
|
|
@ -65,6 +65,7 @@ set(AK_TEST_SOURCES
|
|||
TestRefPtr.cpp
|
||||
TestSIMD.cpp
|
||||
TestSinglyLinkedList.cpp
|
||||
TestSlugify.cpp
|
||||
TestSourceGenerator.cpp
|
||||
TestSourceLocation.cpp
|
||||
TestSpan.cpp
|
||||
|
|
43
Tests/AK/TestSlugify.cpp
Normal file
43
Tests/AK/TestSlugify.cpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Gurkirat Singh <tbhaxor@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Slugify.h>
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
TEST_CASE(ignore_unicode_characters)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("Hello World!🎉"_string)), "hello-world"_string);
|
||||
}
|
||||
|
||||
TEST_CASE(all_whitespace_empty_string)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify(" "_string)), ""_string);
|
||||
}
|
||||
|
||||
TEST_CASE(squeeze_multiple_whitespace)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("Hello World"_string)), "hello-world"_string);
|
||||
}
|
||||
|
||||
TEST_CASE(trim_trailing_whitelist)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("Hello World "_string)), "hello-world"_string);
|
||||
}
|
||||
|
||||
TEST_CASE(lowercase_all_result)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("HelloWorld"_string)), "helloworld"_string);
|
||||
}
|
||||
|
||||
TEST_CASE(slug_glue_change)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("Hello World"_string, '|')), "hello|world"_string);
|
||||
}
|
||||
|
||||
TEST_CASE(multiple_glue_squeeze)
|
||||
{
|
||||
EXPECT_EQ(MUST(slugify("Hello_ World"_string, '_')), "hello_world"_string);
|
||||
}
|
Loading…
Reference in a new issue