mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 09:51:57 -05:00
LibPDF: Do not crash on encrypted files that start unluckily
PDF files can be linearized. In that case, they start with a "linearization dict" that stores the key `/Linearized` and the value `1`. To check if a file is linearized, we just read the first dict, and then checked if it has that key. If the first object of a PDF was a stream with a compression filter and the input PDF was encrypted and not linearized, then us trying to decode the linearization dict could crash due to stream contents being encrypted, decryption state not yet being initialized, and us trying to decompress stream data before decrypting it. To prevent this, disable uncompression when parsing the first object to determine if it's a lineralization dictionary. (A linearization dict never stores string values, so decryption not yet being initialized is not a problem. Integer values aren't encrypted in encrypted PDF files.)
This commit is contained in:
parent
c781686198
commit
39b2eed3f6
3 changed files with 19 additions and 1 deletions
|
@ -124,7 +124,19 @@ PDFErrorOr<DocumentParser::LinearizationResult> DocumentParser::initialize_linea
|
|||
return LinearizationResult::NotLinearized;
|
||||
|
||||
// At this point, we still don't know for sure if we are dealing with a valid object.
|
||||
|
||||
// The linearization dict is read before decryption state is initialized.
|
||||
// A linearization dict only contains numbers, so the decryption dictionary is not been needed (only strings and streams get decrypted, and only streams get unfiltered).
|
||||
// But we don't know if the first object is a linearization dictionary until after parsing it, so the object might be a stream.
|
||||
// If that stream is encrypted and filtered, we'd try to unfilter it while it's still encrypted, handing encrypted data to the unfiltering algorithms.
|
||||
// This makes them assert, since they can't make sense of the encrypted data.
|
||||
// So read the first object without unfiltering.
|
||||
// If it is a linearization dict, there's no stream data and this has no effect.
|
||||
// If it is a stream, this isn't a linearized file and the object will be read on demand (and unfiltered) later, when the object is lazily read via an xref entry.
|
||||
set_filters_enabled(false);
|
||||
auto indirect_value_or_error = parse_indirect_value();
|
||||
set_filters_enabled(true);
|
||||
|
||||
if (indirect_value_or_error.is_error())
|
||||
return LinearizationResult::NotLinearized;
|
||||
|
||||
|
|
|
@ -474,7 +474,7 @@ PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictO
|
|||
if (m_document->security_handler() && m_enable_encryption)
|
||||
m_document->security_handler()->decrypt(stream_object, m_current_reference_stack.last());
|
||||
|
||||
if (dict->contains(CommonNames::Filter)) {
|
||||
if (dict->contains(CommonNames::Filter) && m_enable_filters) {
|
||||
Vector<DeprecatedFlyString> filters;
|
||||
|
||||
// We may either get a single filter or an array of cascading filters
|
||||
|
|
|
@ -57,6 +57,11 @@ public:
|
|||
PDFErrorOr<NonnullRefPtr<StreamObject>> parse_stream(NonnullRefPtr<DictObject> dict);
|
||||
PDFErrorOr<Vector<Operator>> parse_operators();
|
||||
|
||||
void set_filters_enabled(bool enabled)
|
||||
{
|
||||
m_enable_filters = enabled;
|
||||
}
|
||||
|
||||
protected:
|
||||
void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
|
||||
void pop_reference() { m_current_reference_stack.take_last(); }
|
||||
|
@ -73,6 +78,7 @@ protected:
|
|||
WeakPtr<Document> m_document;
|
||||
Vector<Reference> m_current_reference_stack;
|
||||
bool m_enable_encryption { true };
|
||||
bool m_enable_filters { false };
|
||||
};
|
||||
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue