From 5b316462b292b24a9fa5e641b45d7dc7133cf33a Mon Sep 17 00:00:00 2001 From: Matthew Olsson Date: Sun, 20 Mar 2022 14:24:23 -0700 Subject: [PATCH] LibPDF: Add implementation of the Standard security handler Security handlers manage encryption and decription of PDF files. The standard security handler uses RC4/MD5 to perform its crypto (AES as well, but that is not yet implemented). --- Tests/LibPDF/TestPDF.cpp | 3 + .../PDFViewer/PDFViewerWidget.cpp | 12 + Userland/Libraries/LibPDF/CMakeLists.txt | 3 +- Userland/Libraries/LibPDF/CommonNames.h | 5 + Userland/Libraries/LibPDF/Document.cpp | 24 +- Userland/Libraries/LibPDF/Document.h | 12 + Userland/Libraries/LibPDF/Encryption.cpp | 394 ++++++++++++++++++ Userland/Libraries/LibPDF/Encryption.h | 70 ++++ Userland/Libraries/LibPDF/ObjectDerivatives.h | 2 + 9 files changed, 522 insertions(+), 3 deletions(-) create mode 100644 Userland/Libraries/LibPDF/Encryption.cpp create mode 100644 Userland/Libraries/LibPDF/Encryption.h diff --git a/Tests/LibPDF/TestPDF.cpp b/Tests/LibPDF/TestPDF.cpp index 559f3554a4c..e666b7ec9c9 100644 --- a/Tests/LibPDF/TestPDF.cpp +++ b/Tests/LibPDF/TestPDF.cpp @@ -16,6 +16,7 @@ TEST_CASE(linearized_pdf) auto file = Core::MappedFile::map("linearized.pdf").release_value(); auto document = PDF::Document::create(file->bytes()); EXPECT(!document.is_error()); + EXPECT(!document.value()->initialize().is_error()); EXPECT_EQ(document.value()->get_page_count(), 1U); } @@ -24,6 +25,7 @@ TEST_CASE(non_linearized_pdf) auto file = Core::MappedFile::map("non-linearized.pdf").release_value(); auto document = PDF::Document::create(file->bytes()); EXPECT(!document.is_error()); + EXPECT(!document.value()->initialize().is_error()); EXPECT_EQ(document.value()->get_page_count(), 1U); } @@ -32,6 +34,7 @@ TEST_CASE(complex_pdf) auto file = Core::MappedFile::map("complex.pdf").release_value(); auto document = PDF::Document::create(file->bytes()); EXPECT(!document.is_error()); + EXPECT(!document.value()->initialize().is_error()); EXPECT_EQ(document.value()->get_page_count(), 3U); } diff --git a/Userland/Applications/PDFViewer/PDFViewerWidget.cpp b/Userland/Applications/PDFViewer/PDFViewerWidget.cpp index 579b74f8267..07c9f00b5e1 100644 --- a/Userland/Applications/PDFViewer/PDFViewerWidget.cpp +++ b/Userland/Applications/PDFViewer/PDFViewerWidget.cpp @@ -160,6 +160,18 @@ void PDFViewerWidget::open_file(Core::File& file) auto document = maybe_document.release_value(); + if (auto sh = document->security_handler(); !sh->has_user_password()) { + // FIXME: Prompt the user for a password + VERIFY_NOT_REACHED(); + } + + auto result = document->initialize(); + if (result.is_error()) { + auto error = result.release_error(); + GUI::MessageBox::show_error(nullptr, String::formatted("Couldn't load PDF {}:\n{}", file.filename(), error.message())); + return; + } + m_viewer->set_document(document); m_total_page_label->set_text(String::formatted("of {}", document->get_page_count())); diff --git a/Userland/Libraries/LibPDF/CMakeLists.txt b/Userland/Libraries/LibPDF/CMakeLists.txt index 3b5fa712f0d..363fc55b898 100644 --- a/Userland/Libraries/LibPDF/CMakeLists.txt +++ b/Userland/Libraries/LibPDF/CMakeLists.txt @@ -3,6 +3,7 @@ set(SOURCES CommonNames.cpp Document.cpp Encoding.cpp + Encryption.cpp Filter.cpp Fonts.cpp ObjectDerivatives.cpp @@ -12,4 +13,4 @@ set(SOURCES ) serenity_lib(LibPDF pdf) -target_link_libraries(LibPDF LibC LibCore LibIPC LibGfx LibTextCodec) +target_link_libraries(LibPDF LibC LibCore LibIPC LibGfx LibTextCodec LibCrypto) diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index 7b7f24a58ae..b10980f3cee 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -37,6 +37,8 @@ A(Differences) \ A(E) \ A(Encoding) \ + A(Encrypt) \ + A(EncryptMetadata) \ A(ExtGState) \ A(F) \ A(FL) \ @@ -61,6 +63,7 @@ A(H) \ A(HT) \ A(HTO) \ + A(ID) \ A(JBIG2Decode) \ A(JPXDecode) \ A(Kids) \ @@ -87,6 +90,7 @@ A(Parent) \ A(Pattern) \ A(Prev) \ + A(R) \ A(RI) \ A(Resources) \ A(Root) \ @@ -103,6 +107,7 @@ A(Title) \ A(ToUnicode) \ A(Type) \ + A(U) \ A(UCR) \ A(UseBlackPTComp) \ A(UserUnit) \ diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index 51a60afa6cd..2a3aefce9e5 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -41,9 +41,18 @@ PDFErrorOr> Document::create(ReadonlyBytes bytes) TRY(parser->initialize()); + document->m_trailer = parser->trailer(); document->m_catalog = TRY(parser->trailer()->get_dict(document, CommonNames::Root)); - TRY(document->build_page_tree()); - TRY(document->build_outline()); + + if (document->m_trailer->contains(CommonNames::Encrypt)) { + auto encryption_dict = TRY(document->m_trailer->get_dict(document, CommonNames::Encrypt)); + document->m_security_handler = TRY(SecurityHandler::create(document, encryption_dict)); + + // Automatically attempt to unencrypt the document with the empty string. The + // result is not important; it is the caller's responsibility to ensure the + // document is unencrypted before calling initialize(). + document->m_security_handler->try_provide_user_password(""); + } return document; } @@ -54,6 +63,17 @@ Document::Document(NonnullRefPtr const& parser) m_parser->set_document(this); } +PDFErrorOr Document::initialize() +{ + if (m_security_handler) + VERIFY(m_security_handler->has_user_password()); + + TRY(build_page_tree()); + TRY(build_outline()); + + return {}; +} + PDFErrorOr Document::get_or_load_value(u32 index) { auto value = get_value(index); diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index bebde8aa4ea..127577de110 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -78,8 +79,17 @@ class Document final public: static PDFErrorOr> create(ReadonlyBytes bytes); + // If a security handler is present, it is the caller's responsibility to ensure + // this document is unencrypted before calling this function. The user does not + // need to handle the case where the user password is the empty string. + PDFErrorOr initialize(); + + ALWAYS_INLINE RefPtr const& security_handler() const { return m_security_handler; } + ALWAYS_INLINE RefPtr const& outline() const { return m_outline; } + ALWAYS_INLINE RefPtr const& trailer() const { return m_trailer; } + [[nodiscard]] PDFErrorOr get_or_load_value(u32 index); [[nodiscard]] u32 get_first_page_index() const; @@ -139,10 +149,12 @@ private: NonnullRefPtr m_parser; RefPtr m_catalog; + RefPtr m_trailer; Vector m_page_object_indices; HashMap m_pages; HashMap m_values; RefPtr m_outline; + RefPtr m_security_handler; }; } diff --git a/Userland/Libraries/LibPDF/Encryption.cpp b/Userland/Libraries/LibPDF/Encryption.cpp new file mode 100644 index 00000000000..07ee33b1327 --- /dev/null +++ b/Userland/Libraries/LibPDF/Encryption.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2022, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +namespace PDF { + +static constexpr Array standard_encryption_key_padding_bytes = { + 0x28, + 0xBF, + 0x4E, + 0x5E, + 0x4E, + 0x75, + 0x8A, + 0x41, + 0x64, + 0x00, + 0x4E, + 0x56, + 0xFF, + 0xFA, + 0x01, + 0x08, + 0x2E, + 0x2E, + 0x00, + 0xB6, + 0xD0, + 0x68, + 0x3E, + 0x80, + 0x2F, + 0x0C, + 0xA9, + 0xFE, + 0x64, + 0x53, + 0x69, + 0x7A, +}; + +PDFErrorOr> SecurityHandler::create(Document* document, NonnullRefPtr encryption_dict) +{ + auto filter = TRY(encryption_dict->get_name(document, CommonNames::Filter))->name(); + if (filter == "Standard") + return TRY(StandardSecurityHandler::create(document, encryption_dict)); + + dbgln("Unrecognized security handler filter: {}", filter); + TODO(); +} + +PDFErrorOr> StandardSecurityHandler::create(Document* document, NonnullRefPtr encryption_dict) +{ + auto revision = encryption_dict->get_value(CommonNames::R).get(); + auto o = TRY(encryption_dict->get_string(document, CommonNames::O))->string(); + auto u = TRY(encryption_dict->get_string(document, CommonNames::U))->string(); + auto p = encryption_dict->get_value(CommonNames::P).get(); + auto length = encryption_dict->get_value(CommonNames::Length).get() / 8; + bool encrypt_metadata = true; + if (encryption_dict->contains(CommonNames::EncryptMetadata)) + encryption_dict->get_value(CommonNames::EncryptMetadata).get(); + return adopt_ref(*new StandardSecurityHandler(document, revision, o, u, p, encrypt_metadata, length)); +} + +StandardSecurityHandler::StandardSecurityHandler(Document* document, size_t revision, String const& o_entry, String const& u_entry, u32 flags, bool encrypt_metadata, size_t length) + : m_document(document) + , m_revision(revision) + , m_o_entry(o_entry) + , m_u_entry(u_entry) + , m_flags(flags) + , m_encrypt_metadata(encrypt_metadata) + , m_length(length) +{ +} + +template<> +ByteBuffer StandardSecurityHandler::compute_user_password_value(ByteBuffer password_string) +{ + // Algorithm 4: Computing the encryption dictionary's U (user password) + // value (Security handlers of revision 2) + + // a) Create an encryption key based on the user password string, as + // described in [Algorithm 2] + auto encryption_key = compute_encryption_key(password_string); + + // b) Encrypt the 32-byte padding string shown in step (a) of [Algorithm 2], + // using an RC4 encryption function with the encryption key from the + // preceding step. + RC4 rc4(encryption_key); + auto output = rc4.encrypt(standard_encryption_key_padding_bytes); + + // c) Store the result of step (b) as the value of the U entry in the + // encryption dictionary. + return output; +} + +template<> +ByteBuffer StandardSecurityHandler::compute_user_password_value(ByteBuffer password_string) +{ + // Algorithm 5: Computing the encryption dictionary's U (user password) + // value (Security handlers of revision 3 or greater) + + // a) Create an encryption key based on the user password string, as + // described in [Algorithm 2] + auto encryption_key = compute_encryption_key(password_string); + + // b) Initialize the MD5 hash functino and pass the 32-byte padding string + // shown in step (a) of [Algorithm 2] as input to this function + Crypto::Hash::MD5 md5; + md5.update(standard_encryption_key_padding_bytes); + + // e) Pass the first element of the file's file identifier array to the MD5 + // hash function. + auto id_array = MUST(m_document->trailer()->get_array(m_document, CommonNames::ID)); + auto first_element_string = MUST(id_array->get_string_at(m_document, 0))->string(); + md5.update(first_element_string); + + // d) Encrypt the 16-byte result of the hash, using an RC4 encryption function + // with the encryption key from step (a). + RC4 rc4(encryption_key); + auto out = md5.peek(); + auto buffer = rc4.encrypt(out.bytes()); + + // e) Do the following 19 times: + // + // Take the output from the previous invocation of the RC4 function and pass + // it as input to a new invocation of the function; use an encryption key generated + // by taking each byte of the original encryption key obtained in step (a) and + // performing an XOR operation between the that byte and the single-byte value of + // the iteration counter (from 1 to 19). + auto new_encryption_key = MUST(ByteBuffer::create_uninitialized(encryption_key.size())); + for (size_t i = 1; i <= 19; i++) { + for (size_t j = 0; j < encryption_key.size(); j++) + new_encryption_key[j] = encryption_key[j] ^ i; + + RC4 new_rc4(new_encryption_key); + buffer = new_rc4.encrypt(buffer); + } + + // f) Append 16 bytes of the arbitrary padding to the output from the final invocation + // of the RC4 function and store the 32-byte result as the value of the U entry in + // the encryption dictionary. + VERIFY(buffer.size() == 16); + for (size_t i = 0; i < 16; i++) + buffer.append(0xab); + + return buffer; +} + +bool StandardSecurityHandler::try_provide_user_password(StringView password_string) +{ + // Algorithm 6: Authenticating the user password + + // a) Perform all but the last step of [Algorithm 4] or [Algorithm 5] using the + // supplied password string. + ByteBuffer password_buffer = MUST(ByteBuffer::copy(password_string.bytes())); + if (m_revision == 2) { + password_buffer = compute_user_password_value(password_buffer); + } else { + password_buffer = compute_user_password_value(password_buffer); + } + + // b) If the result of step (a) is equal to the value of the encryption + // dictionary's "U" entry (comparing the first 16 bytes in the case of security + // handlers of revision 3 or greater), the password supplied is the correct user + // password. + auto u_bytes = m_u_entry.bytes(); + if (m_revision >= 3) + return u_bytes.slice(0, 16) == password_buffer.bytes().slice(0, 16); + return u_bytes == password_buffer.bytes(); +} + +ByteBuffer StandardSecurityHandler::compute_encryption_key(ByteBuffer password_string) +{ + // This function should never be called after we have a valid encryption key. + VERIFY(!m_encryption_key.has_value()); + + // 7.6.3.3 Encryption Key Algorithm + + // Algorithm 2: Computing an encryption key + + // a) Pad or truncate the password string to exactly 32 bytes. If the password string + // is more than 32 bytes long, use only its first 32 bytes; if it is less than 32 + // bytes long, pad it by appending the required number of additional bytes from the + // beginning of the following padding string: [omitted] + + if (password_string.size() > 32) { + password_string.resize(32); + } else { + password_string.append(standard_encryption_key_padding_bytes.data(), 32 - password_string.size()); + } + + // b) Initialize the MD5 hash function and pass the result of step (a) as input to + // this function. + Crypto::Hash::MD5 md5; + md5.update(password_string); + + // c) Pass the value of the encryption dictionary's "O" entry to the MD5 hash function. + md5.update(m_o_entry); + + // d) Convert the integer value of the P entry to a 32-bit unsigned binary number and pass + // these bytes to the MD5 hash function, low-order byte first. + md5.update(reinterpret_cast(&m_flags), sizeof(m_flags)); + + // e) Pass the first element of the file's file identifier array to the MD5 hash function. + auto id_array = MUST(m_document->trailer()->get_array(m_document, CommonNames::ID)); + auto first_element_string = MUST(id_array->get_string_at(m_document, 0))->string(); + md5.update(first_element_string); + + // f) (Security handlers of revision 4 or greater) if the document metadata is not being + // encrypted, pass 4 bytes with the value 0xffffffff to the MD5 hash function. + if (m_revision >= 4 && !m_encrypt_metadata) { + u32 value = 0xffffffff; + md5.update(reinterpret_cast(&value), 4); + } + + // g) Finish the hash. + // h) (Security handlers of revision 3 or greater) Do the following 50 times: + // + // Take the output from the previous MD5 hash and pass the first n bytes + // of the output as input into a new MD5 hash, where n is the number of + // bytes of the encryption key as defined by the value of the encryption + // dictionary's Length entry. + if (m_revision >= 3) { + ByteBuffer n_bytes; + + for (u32 i = 0; i < 50; i++) { + Crypto::Hash::MD5 new_md5; + n_bytes.ensure_capacity(m_length); + + while (n_bytes.size() < m_length) { + auto out = md5.peek().bytes(); + for (size_t j = 0; j < out.size() && n_bytes.size() < m_length; j++) + n_bytes.append(out[j]); + } + + VERIFY(n_bytes.size() == m_length); + new_md5.update(n_bytes); + md5 = move(new_md5); + n_bytes.clear(); + } + } + + // i) Set the encryption key to the first n bytes of the output from the final MD5 + // hash, where n shall always be 5 for security handlers of revision 2 but, for + // security handlers of revision 3 or greater, shall depend on the value of the + // encryption dictionary's Length entry. + size_t n; + if (m_revision == 2) { + n = 5; + } else if (m_revision >= 3) { + n = m_length; + } else { + VERIFY_NOT_REACHED(); + } + + ByteBuffer encryption_key; + encryption_key.ensure_capacity(n); + while (encryption_key.size() < n) { + auto out = md5.peek(); + for (size_t i = 0; encryption_key.size() < n && i < out.data_length(); i++) + encryption_key.append(out.bytes()[i]); + } + + m_encryption_key = encryption_key; + + return encryption_key; +} + +void StandardSecurityHandler::encrypt(NonnullRefPtr object, Reference reference) const +{ + // 7.6.2 General Encryption Algorithm + // Algorithm 1: Encryption of data using the RC3 or AES algorithms + + // FIXME: Support AES + + VERIFY(m_encryption_key.has_value()); + + // a) Obtain the object number and generation number from the object identifier of + // the string or stream to be encrypted. If the string is a direct object, use + // the identifier of the indirect object containing it. + // + // Note: This is always passed in at parse time because objects don't know their own + // object number. + + // b) For all strings and streams with crypt filter specifier; treating the object + // number as binary integers, extends the origin n-byte encryption key to n + 5 + // bytes by appending the low-order 3 bytes of the object number and the low-order + // 2 bytes of the generation number in that order, low-order byte first. ... + + auto encryption_key = m_encryption_key.value(); + ReadonlyBytes bytes; + Function assign; + + if (object->is()) { + auto stream = object->cast(); + bytes = stream->bytes(); + + assign = [&stream](ByteBuffer const& buffer) { + stream->buffer() = buffer; + }; + + if (stream->dict()->contains(CommonNames::Filter)) { + auto filter = MUST(stream->dict()->get_name(m_document, CommonNames::Filter))->name(); + if (filter == "Crypt") + TODO(); + } + } else if (object->is()) { + auto string = object->cast(); + bytes = string->string().bytes(); + assign = [&string](ByteBuffer const& buffer) { + string->set_string(String(buffer.bytes())); + }; + } else { + VERIFY_NOT_REACHED(); + } + + auto index = reference.as_ref_index(); + auto generation = reference.as_ref_generation_index(); + + encryption_key.append(index & 0xff); + encryption_key.append((index >> 8) & 0xff); + encryption_key.append((index >> 16) & 0xff); + encryption_key.append(generation & 0xff); + encryption_key.append((generation >> 8) & 0xff); + + // c) Initialize the MD5 hash function and pass the result of step (b) as input to this + // function. + Crypto::Hash::MD5 md5; + md5.update(encryption_key); + + // d) Use the first (n + 5) bytes, up to a maximum of 16, of the output from the MD5 + // hash as the key for the RC4 or AES symmetric key algorithms, along with the string + // or stream data to be encrypted. + auto key = MUST(ByteBuffer::copy(md5.peek().bytes())); + + if (key.size() > min(encryption_key.size(), 16)) + key.resize(encryption_key.size()); + + RC4 rc4(key); + auto output = rc4.encrypt(bytes); + + assign(output); +} + +void StandardSecurityHandler::decrypt(NonnullRefPtr object, Reference reference) const +{ + // AES and RC4 are both symmetric, so decryption is the same as encryption + encrypt(object, reference); +} + +static constexpr auto identity_permutation = iota_array(0); + +RC4::RC4(ReadonlyBytes key) + : m_bytes(identity_permutation) +{ + size_t j = 0; + for (size_t i = 0; i < 256; i++) { + j = (j + m_bytes[i] + key[i % key.size()]) & 0xff; + swap(m_bytes[i], m_bytes[j]); + } +} + +void RC4::generate_bytes(ByteBuffer& bytes) +{ + size_t i = 0; + size_t j = 0; + + for (size_t count = 0; count < bytes.size(); count++) { + i = (i + 1) % 256; + j = (j + m_bytes[i]) % 256; + swap(m_bytes[i], m_bytes[j]); + bytes[count] = m_bytes[(m_bytes[i] + m_bytes[j]) % 256]; + } +} + +ByteBuffer RC4::encrypt(ReadonlyBytes bytes) +{ + auto output = MUST(ByteBuffer::create_uninitialized(bytes.size())); + generate_bytes(output); + for (size_t i = 0; i < bytes.size(); i++) + output[i] ^= bytes[i]; + return output; +} + +} diff --git a/Userland/Libraries/LibPDF/Encryption.h b/Userland/Libraries/LibPDF/Encryption.h new file mode 100644 index 00000000000..e01848b66fd --- /dev/null +++ b/Userland/Libraries/LibPDF/Encryption.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include + +namespace PDF { + +class SecurityHandler : public RefCounted { +public: + static PDFErrorOr> create(Document*, NonnullRefPtr encryption_dict); + + virtual ~SecurityHandler() = default; + + virtual bool try_provide_user_password(StringView password) = 0; + virtual bool has_user_password() const = 0; + + virtual void encrypt(NonnullRefPtr, Reference reference) const = 0; + virtual void decrypt(NonnullRefPtr, Reference reference) const = 0; +}; + +class StandardSecurityHandler : public SecurityHandler { +public: + static PDFErrorOr> create(Document*, NonnullRefPtr encryption_dict); + + StandardSecurityHandler(Document*, size_t revision, String const& o_entry, String const& u_entry, u32 flags, bool encrypt_metadata, size_t length); + + ~StandardSecurityHandler() override = default; + + bool try_provide_user_password(StringView password_string) override; + + bool has_user_password() const override { return m_encryption_key.has_value(); } + +protected: + void encrypt(NonnullRefPtr, Reference reference) const override; + void decrypt(NonnullRefPtr, Reference reference) const override; + +private: + template + ByteBuffer compute_user_password_value(ByteBuffer password_string); + + ByteBuffer compute_encryption_key(ByteBuffer password_string); + + Document* m_document; + size_t m_revision; + Optional m_encryption_key; + String m_o_entry; + String m_u_entry; + u32 m_flags; + bool m_encrypt_metadata; + size_t m_length; +}; + +class RC4 { +public: + RC4(ReadonlyBytes key); + + void generate_bytes(ByteBuffer&); + ByteBuffer encrypt(ReadonlyBytes bytes); + +private: + Array m_bytes; +}; + +} diff --git a/Userland/Libraries/LibPDF/ObjectDerivatives.h b/Userland/Libraries/LibPDF/ObjectDerivatives.h index 99e93b133fb..769d0c2d402 100644 --- a/Userland/Libraries/LibPDF/ObjectDerivatives.h +++ b/Userland/Libraries/LibPDF/ObjectDerivatives.h @@ -29,6 +29,7 @@ public: [[nodiscard]] ALWAYS_INLINE String const& string() const { return m_string; } [[nodiscard]] ALWAYS_INLINE bool is_binary() const { return m_is_binary; } + void set_string(String string) { m_string = move(string); } const char* type_name() const override { return "string"; } String to_string(int indent) const override; @@ -153,6 +154,7 @@ public: [[nodiscard]] ALWAYS_INLINE NonnullRefPtr dict() const { return m_dict; } [[nodiscard]] ReadonlyBytes bytes() const { return m_buffer.bytes(); }; + [[nodiscard]] ByteBuffer& buffer() { return m_buffer; }; const char* type_name() const override { return "stream"; } String to_string(int indent) const override;