LibPDF+Clients: Make Document::text_string_to_utf8() return String

It returns UTF-8 text, so make that clear in its return type,
and propagate this to clients.

No behavior change.
This commit is contained in:
Nico Weber 2024-10-16 18:16:01 -04:00
parent 66378471a0
commit 0d7de7681c
4 changed files with 37 additions and 40 deletions

View file

@ -60,7 +60,10 @@
{
if (_groupName)
return _groupName;
NSString* title = [NSString stringWithUTF8String:_item->title.characters()];
auto title_view = _item->title.bytes_as_string_view();
NSData* title_data = [NSData dataWithBytes:title_view.characters_without_null_termination() length:title_view.length()];
NSString* title = [[NSString alloc] initWithData:title_data encoding:NSUTF8StringEncoding];
// Newlines confuse NSOutlineView, at least in sidebar style (even with `usesSingleLineMode` set to YES on the cell view's text field).
title = [[title componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]] componentsJoinedByString:@" "];

View file

@ -509,23 +509,21 @@ ErrorOr<void> PropertiesWindow::create_pdf_tab(GUI::TabWidget& tab_widget, Nonnu
if (maybe_info_dict.is_error()) {
warnln("Failed to read InfoDict from '{}': {}", m_path, maybe_info_dict.error().message());
} else if (maybe_info_dict.value().has_value()) {
auto get_info_string = [](PDF::PDFErrorOr<Optional<ByteString>> input) -> ErrorOr<String> {
auto get_info_string = []<typename T>(PDF::PDFErrorOr<Optional<T>> input) -> T {
if (input.is_error())
return String {};
if (!input.value().has_value())
return String {};
return String::from_byte_string(input.value().value());
return T {};
return input.value().value_or({});
};
auto info_dict = maybe_info_dict.release_value().release_value();
tab.find_descendant_of_type_named<GUI::Label>("pdf_title")->set_text(TRY(get_info_string(info_dict.title())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_author")->set_text(TRY(get_info_string(info_dict.author())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_subject")->set_text(TRY(get_info_string(info_dict.subject())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_keywords")->set_text(TRY(get_info_string(info_dict.keywords())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creator")->set_text(TRY(get_info_string(info_dict.creator())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_producer")->set_text(TRY(get_info_string(info_dict.producer())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creation_date")->set_text(TRY(get_info_string(info_dict.creation_date())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_modification_date")->set_text(TRY(get_info_string(info_dict.modification_date())));
tab.find_descendant_of_type_named<GUI::Label>("pdf_title")->set_text(get_info_string(info_dict.title()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_author")->set_text(get_info_string(info_dict.author()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_subject")->set_text(get_info_string(info_dict.subject()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_keywords")->set_text(get_info_string(info_dict.keywords()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creator")->set_text(get_info_string(info_dict.creator()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_producer")->set_text(get_info_string(info_dict.producer()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creation_date")->set_text(TRY(String::from_byte_string((get_info_string(info_dict.creation_date())))));
tab.find_descendant_of_type_named<GUI::Label>("pdf_modification_date")->set_text(TRY(String::from_byte_string(get_info_string(info_dict.modification_date()))));
}
return {};

View file

@ -35,32 +35,32 @@ ByteString OutlineItem::to_byte_string(int indent) const
return builder.to_byte_string();
}
PDFErrorOr<Optional<ByteString>> InfoDict::title() const
PDFErrorOr<Optional<String>> InfoDict::title() const
{
return get_text(CommonNames::Title);
}
PDFErrorOr<Optional<ByteString>> InfoDict::author() const
PDFErrorOr<Optional<String>> InfoDict::author() const
{
return get_text(CommonNames::Author);
}
PDFErrorOr<Optional<ByteString>> InfoDict::subject() const
PDFErrorOr<Optional<String>> InfoDict::subject() const
{
return get_text(CommonNames::Subject);
}
PDFErrorOr<Optional<ByteString>> InfoDict::keywords() const
PDFErrorOr<Optional<String>> InfoDict::keywords() const
{
return get_text(CommonNames::Keywords);
}
PDFErrorOr<Optional<ByteString>> InfoDict::creator() const
PDFErrorOr<Optional<String>> InfoDict::creator() const
{
return get_text(CommonNames::Creator);
}
PDFErrorOr<Optional<ByteString>> InfoDict::producer() const
PDFErrorOr<Optional<String>> InfoDict::producer() const
{
return get_text(CommonNames::Producer);
}
@ -75,24 +75,20 @@ PDFErrorOr<Optional<ByteString>> InfoDict::modification_date() const
return get(CommonNames::ModDate);
}
PDFErrorOr<Optional<ByteString>> InfoDict::get_text(DeprecatedFlyString const& name) const
PDFErrorOr<Optional<String>> InfoDict::get_text(DeprecatedFlyString const& name) const
{
return TRY(get(name)).map(Document::text_string_to_utf8);
}
ByteString Document::text_string_to_utf8(ByteString const& text_string)
String Document::text_string_to_utf8(ByteString const& text_string)
{
if (text_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
// The string is encoded in UTF16-BE
return TextCodec::decoder_for("utf-16be"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors().to_byte_string();
}
if (text_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff }))
return TextCodec::decoder_for("utf-16be"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
if (text_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
// The string is encoded in UTF-8.
return text_string.substring(3);
}
if (text_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 }))
return TextCodec::decoder_for("utf-8"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
return TextCodec::decoder_for("PDFDocEncoding"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors().to_byte_string();
return TextCodec::decoder_for("PDFDocEncoding"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
}
PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes)

View file

@ -40,7 +40,7 @@ struct OutlineItem final : public RefCounted<OutlineItem>
, public Weakable<OutlineItem> {
WeakPtr<OutlineItem> parent;
Vector<NonnullRefPtr<OutlineItem>> children;
ByteString title; // Already converted to UTF-8.
String title;
i32 count { 0 };
Destination dest;
Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec
@ -69,16 +69,16 @@ public:
// These all return strings that are already converted to UTF-8.
PDFErrorOr<Optional<ByteString>> title() const;
PDFErrorOr<Optional<ByteString>> author() const;
PDFErrorOr<Optional<ByteString>> subject() const;
PDFErrorOr<Optional<ByteString>> keywords() const;
PDFErrorOr<Optional<String>> title() const;
PDFErrorOr<Optional<String>> author() const;
PDFErrorOr<Optional<String>> subject() const;
PDFErrorOr<Optional<String>> keywords() const;
// Name of the program that created the original, non-PDF file.
PDFErrorOr<Optional<ByteString>> creator() const;
PDFErrorOr<Optional<String>> creator() const;
// Name of the program that converted the file to PDF.
PDFErrorOr<Optional<ByteString>> producer() const;
PDFErrorOr<Optional<String>> producer() const;
// FIXME: Provide some helper for parsing the date strings returned by these two methods.
PDFErrorOr<Optional<ByteString>> creation_date() const;
@ -92,7 +92,7 @@ private:
return TRY(m_info_dict->get_string(m_document, name))->string();
}
PDFErrorOr<Optional<ByteString>> get_text(DeprecatedFlyString const& name) const;
PDFErrorOr<Optional<String>> get_text(DeprecatedFlyString const& name) const;
WeakPtr<Document> m_document;
NonnullRefPtr<DictObject> m_info_dict;
@ -103,7 +103,7 @@ class Document final
, public Weakable<Document> {
public:
// Converts a text string (PDF 1.7 spec, 3.8.1. "String Types") to UTF-8.
static ByteString text_string_to_utf8(ByteString const&);
static String text_string_to_utf8(ByteString const&);
static PDFErrorOr<NonnullRefPtr<Document>> create(ReadonlyBytes bytes);