LibPDF+Clients: Make Document::text_string_to_utf8() return String

It returns UTF-8 text, so make that clear in its return type,
and propagate this to clients.

No behavior change.
This commit is contained in:
Nico Weber 2024-10-16 18:16:01 -04:00
parent 66378471a0
commit 0d7de7681c
4 changed files with 37 additions and 40 deletions

View file

@ -60,7 +60,10 @@
{ {
if (_groupName) if (_groupName)
return _groupName; return _groupName;
NSString* title = [NSString stringWithUTF8String:_item->title.characters()];
auto title_view = _item->title.bytes_as_string_view();
NSData* title_data = [NSData dataWithBytes:title_view.characters_without_null_termination() length:title_view.length()];
NSString* title = [[NSString alloc] initWithData:title_data encoding:NSUTF8StringEncoding];
// Newlines confuse NSOutlineView, at least in sidebar style (even with `usesSingleLineMode` set to YES on the cell view's text field). // Newlines confuse NSOutlineView, at least in sidebar style (even with `usesSingleLineMode` set to YES on the cell view's text field).
title = [[title componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]] componentsJoinedByString:@" "]; title = [[title componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]] componentsJoinedByString:@" "];

View file

@ -509,23 +509,21 @@ ErrorOr<void> PropertiesWindow::create_pdf_tab(GUI::TabWidget& tab_widget, Nonnu
if (maybe_info_dict.is_error()) { if (maybe_info_dict.is_error()) {
warnln("Failed to read InfoDict from '{}': {}", m_path, maybe_info_dict.error().message()); warnln("Failed to read InfoDict from '{}': {}", m_path, maybe_info_dict.error().message());
} else if (maybe_info_dict.value().has_value()) { } else if (maybe_info_dict.value().has_value()) {
auto get_info_string = [](PDF::PDFErrorOr<Optional<ByteString>> input) -> ErrorOr<String> { auto get_info_string = []<typename T>(PDF::PDFErrorOr<Optional<T>> input) -> T {
if (input.is_error()) if (input.is_error())
return String {}; return T {};
if (!input.value().has_value()) return input.value().value_or({});
return String {};
return String::from_byte_string(input.value().value());
}; };
auto info_dict = maybe_info_dict.release_value().release_value(); auto info_dict = maybe_info_dict.release_value().release_value();
tab.find_descendant_of_type_named<GUI::Label>("pdf_title")->set_text(TRY(get_info_string(info_dict.title()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_title")->set_text(get_info_string(info_dict.title()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_author")->set_text(TRY(get_info_string(info_dict.author()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_author")->set_text(get_info_string(info_dict.author()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_subject")->set_text(TRY(get_info_string(info_dict.subject()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_subject")->set_text(get_info_string(info_dict.subject()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_keywords")->set_text(TRY(get_info_string(info_dict.keywords()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_keywords")->set_text(get_info_string(info_dict.keywords()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creator")->set_text(TRY(get_info_string(info_dict.creator()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_creator")->set_text(get_info_string(info_dict.creator()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_producer")->set_text(TRY(get_info_string(info_dict.producer()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_producer")->set_text(get_info_string(info_dict.producer()));
tab.find_descendant_of_type_named<GUI::Label>("pdf_creation_date")->set_text(TRY(get_info_string(info_dict.creation_date()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_creation_date")->set_text(TRY(String::from_byte_string((get_info_string(info_dict.creation_date())))));
tab.find_descendant_of_type_named<GUI::Label>("pdf_modification_date")->set_text(TRY(get_info_string(info_dict.modification_date()))); tab.find_descendant_of_type_named<GUI::Label>("pdf_modification_date")->set_text(TRY(String::from_byte_string(get_info_string(info_dict.modification_date()))));
} }
return {}; return {};

View file

@ -35,32 +35,32 @@ ByteString OutlineItem::to_byte_string(int indent) const
return builder.to_byte_string(); return builder.to_byte_string();
} }
PDFErrorOr<Optional<ByteString>> InfoDict::title() const PDFErrorOr<Optional<String>> InfoDict::title() const
{ {
return get_text(CommonNames::Title); return get_text(CommonNames::Title);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::author() const PDFErrorOr<Optional<String>> InfoDict::author() const
{ {
return get_text(CommonNames::Author); return get_text(CommonNames::Author);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::subject() const PDFErrorOr<Optional<String>> InfoDict::subject() const
{ {
return get_text(CommonNames::Subject); return get_text(CommonNames::Subject);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::keywords() const PDFErrorOr<Optional<String>> InfoDict::keywords() const
{ {
return get_text(CommonNames::Keywords); return get_text(CommonNames::Keywords);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::creator() const PDFErrorOr<Optional<String>> InfoDict::creator() const
{ {
return get_text(CommonNames::Creator); return get_text(CommonNames::Creator);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::producer() const PDFErrorOr<Optional<String>> InfoDict::producer() const
{ {
return get_text(CommonNames::Producer); return get_text(CommonNames::Producer);
} }
@ -75,24 +75,20 @@ PDFErrorOr<Optional<ByteString>> InfoDict::modification_date() const
return get(CommonNames::ModDate); return get(CommonNames::ModDate);
} }
PDFErrorOr<Optional<ByteString>> InfoDict::get_text(DeprecatedFlyString const& name) const PDFErrorOr<Optional<String>> InfoDict::get_text(DeprecatedFlyString const& name) const
{ {
return TRY(get(name)).map(Document::text_string_to_utf8); return TRY(get(name)).map(Document::text_string_to_utf8);
} }
ByteString Document::text_string_to_utf8(ByteString const& text_string) String Document::text_string_to_utf8(ByteString const& text_string)
{ {
if (text_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) { if (text_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff }))
// The string is encoded in UTF16-BE return TextCodec::decoder_for("utf-16be"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
return TextCodec::decoder_for("utf-16be"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors().to_byte_string();
}
if (text_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) { if (text_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 }))
// The string is encoded in UTF-8. return TextCodec::decoder_for("utf-8"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
return text_string.substring(3);
}
return TextCodec::decoder_for("PDFDocEncoding"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors().to_byte_string(); return TextCodec::decoder_for("PDFDocEncoding"sv)->to_utf8(text_string).release_value_but_fixme_should_propagate_errors();
} }
PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes) PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes)

View file

@ -40,7 +40,7 @@ struct OutlineItem final : public RefCounted<OutlineItem>
, public Weakable<OutlineItem> { , public Weakable<OutlineItem> {
WeakPtr<OutlineItem> parent; WeakPtr<OutlineItem> parent;
Vector<NonnullRefPtr<OutlineItem>> children; Vector<NonnullRefPtr<OutlineItem>> children;
ByteString title; // Already converted to UTF-8. String title;
i32 count { 0 }; i32 count { 0 };
Destination dest; Destination dest;
Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec
@ -69,16 +69,16 @@ public:
// These all return strings that are already converted to UTF-8. // These all return strings that are already converted to UTF-8.
PDFErrorOr<Optional<ByteString>> title() const; PDFErrorOr<Optional<String>> title() const;
PDFErrorOr<Optional<ByteString>> author() const; PDFErrorOr<Optional<String>> author() const;
PDFErrorOr<Optional<ByteString>> subject() const; PDFErrorOr<Optional<String>> subject() const;
PDFErrorOr<Optional<ByteString>> keywords() const; PDFErrorOr<Optional<String>> keywords() const;
// Name of the program that created the original, non-PDF file. // Name of the program that created the original, non-PDF file.
PDFErrorOr<Optional<ByteString>> creator() const; PDFErrorOr<Optional<String>> creator() const;
// Name of the program that converted the file to PDF. // Name of the program that converted the file to PDF.
PDFErrorOr<Optional<ByteString>> producer() const; PDFErrorOr<Optional<String>> producer() const;
// FIXME: Provide some helper for parsing the date strings returned by these two methods. // FIXME: Provide some helper for parsing the date strings returned by these two methods.
PDFErrorOr<Optional<ByteString>> creation_date() const; PDFErrorOr<Optional<ByteString>> creation_date() const;
@ -92,7 +92,7 @@ private:
return TRY(m_info_dict->get_string(m_document, name))->string(); return TRY(m_info_dict->get_string(m_document, name))->string();
} }
PDFErrorOr<Optional<ByteString>> get_text(DeprecatedFlyString const& name) const; PDFErrorOr<Optional<String>> get_text(DeprecatedFlyString const& name) const;
WeakPtr<Document> m_document; WeakPtr<Document> m_document;
NonnullRefPtr<DictObject> m_info_dict; NonnullRefPtr<DictObject> m_info_dict;
@ -103,7 +103,7 @@ class Document final
, public Weakable<Document> { , public Weakable<Document> {
public: public:
// Converts a text string (PDF 1.7 spec, 3.8.1. "String Types") to UTF-8. // Converts a text string (PDF 1.7 spec, 3.8.1. "String Types") to UTF-8.
static ByteString text_string_to_utf8(ByteString const&); static String text_string_to_utf8(ByteString const&);
static PDFErrorOr<NonnullRefPtr<Document>> create(ReadonlyBytes bytes); static PDFErrorOr<NonnullRefPtr<Document>> create(ReadonlyBytes bytes);