LibPDF+LibGfx: Support JBIG2s with /JBIG2Globals set

Several ramifications:

* /JBIG2Globals is an indirect reference, which means we now need
  a Document for unfiltering. (Technically, other decode parameters
  can also be indirect objects and we should use the Document to
  resolve() those too, but in practice it only seems to be needed
  for /JBIG2Globals.)

* Since /JBIG2Globals are so rare, we just parse once for each
  image that use them, and decode_embedded() now receives a
  Vector<ReadonlyBytes> with all sections of sequences of
  segments.

* Internally, decode_segment_headers() is now called several times
  for embedded JBIG2s with multiple such sections (e.g. PDFs with
  /JBIG2Globals).

* That means `data` is now no longer part of JBIG2LoadingContext
  and things get slightly reshuffled due to this.

This completes the LibPDF part of JBIG2 support. Once LibGfx
implements actual decoding of JBIG2s, things should start to
Just Work in PDFs.
This commit is contained in:
Nico Weber 2024-03-03 21:54:00 -05:00
parent ad886d6e17
commit ead02da98a
5 changed files with 32 additions and 28 deletions

View file

@ -73,7 +73,6 @@ struct JBIG2LoadingContext {
Error,
};
State state { State::NotDecoded };
ReadonlyBytes data;
Organization organization { Organization::Sequential };
IntSize size;
@ -83,12 +82,12 @@ struct JBIG2LoadingContext {
Vector<SegmentData> segments;
};
static ErrorOr<void> decode_jbig2_header(JBIG2LoadingContext& context)
static ErrorOr<void> decode_jbig2_header(JBIG2LoadingContext& context, ReadonlyBytes data)
{
if (!JBIG2ImageDecoderPlugin::sniff(context.data))
if (!JBIG2ImageDecoderPlugin::sniff(data))
return Error::from_string_literal("JBIG2LoadingContext: Invalid JBIG2 header");
FixedMemoryStream stream(context.data.slice(sizeof(id_string)));
FixedMemoryStream stream(data.slice(sizeof(id_string)));
// D.4.2 File header flags
u8 header_flags = TRY(stream.read_value<u8>());
@ -219,11 +218,8 @@ static ErrorOr<size_t> scan_for_immediate_generic_region_size(ReadonlyBytes data
return size;
}
static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context)
static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context, ReadonlyBytes data)
{
ReadonlyBytes data = context.data;
if (context.organization != Organization::Embedded)
data = data.slice(sizeof(id_string) + sizeof(u8) + (context.number_of_pages.has_value() ? sizeof(u32) : 0));
FixedMemoryStream stream(data);
Vector<ReadonlyBytes> segment_datas;
@ -270,10 +266,9 @@ static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context)
return {};
}
JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin(ReadonlyBytes data)
JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin()
{
m_context = make<JBIG2LoadingContext>();
m_context->data = data;
}
IntSize JBIG2ImageDecoderPlugin::size()
@ -288,9 +283,12 @@ bool JBIG2ImageDecoderPlugin::sniff(ReadonlyBytes data)
ErrorOr<NonnullOwnPtr<ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create(ReadonlyBytes data)
{
auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin(data)));
TRY(decode_jbig2_header(*plugin->m_context));
TRY(decode_segment_headers(*plugin->m_context));
auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
TRY(decode_jbig2_header(*plugin->m_context, data));
data = data.slice(sizeof(id_string) + sizeof(u8) + (plugin->m_context->number_of_pages.has_value() ? sizeof(u32) : 0));
TRY(decode_segment_headers(*plugin->m_context, data));
return plugin;
}
@ -306,12 +304,14 @@ ErrorOr<ImageFrameDescriptor> JBIG2ImageDecoderPlugin::frame(size_t index, Optio
return Error::from_string_literal("JBIG2ImageDecoderPlugin: Draw the rest of the owl");
}
ErrorOr<ByteBuffer> JBIG2ImageDecoderPlugin::decode_embedded(ReadonlyBytes data)
ErrorOr<ByteBuffer> JBIG2ImageDecoderPlugin::decode_embedded(Vector<ReadonlyBytes> data)
{
dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: Decoding embedded JBIG2 of size {}", data.size());
auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin(data)));
auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin()));
plugin->m_context->organization = Organization::Embedded;
TRY(decode_segment_headers(*plugin->m_context));
for (auto const& segment_data : data)
TRY(decode_segment_headers(*plugin->m_context, segment_data));
return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode embedded JBIG2 yet");
}

View file

@ -25,10 +25,10 @@ public:
virtual ErrorOr<ImageFrameDescriptor> frame(size_t index, Optional<IntSize> ideal_size = {}) override;
static ErrorOr<ByteBuffer> decode_embedded(ReadonlyBytes);
static ErrorOr<ByteBuffer> decode_embedded(Vector<ReadonlyBytes>);
private:
JBIG2ImageDecoderPlugin(ReadonlyBytes);
JBIG2ImageDecoderPlugin();
OwnPtr<JBIG2LoadingContext> m_context;
};

View file

@ -19,7 +19,7 @@
namespace PDF {
PDFErrorOr<ByteBuffer> Filter::decode(ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr<DictObject> decode_parms)
PDFErrorOr<ByteBuffer> Filter::decode(Document* document, ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr<DictObject> decode_parms)
{
if (encoding_type == CommonNames::ASCIIHexDecode)
return decode_ascii_hex(bytes);
@ -34,7 +34,7 @@ PDFErrorOr<ByteBuffer> Filter::decode(ReadonlyBytes bytes, DeprecatedFlyString c
if (encoding_type == CommonNames::CCITTFaxDecode)
return decode_ccitt(bytes, decode_parms);
if (encoding_type == CommonNames::JBIG2Decode)
return decode_jbig2(bytes, decode_parms);
return decode_jbig2(document, bytes, decode_parms);
if (encoding_type == CommonNames::DCTDecode)
return decode_dct(bytes);
if (encoding_type == CommonNames::JPXDecode)
@ -334,15 +334,19 @@ PDFErrorOr<ByteBuffer> Filter::decode_ccitt(ReadonlyBytes bytes, RefPtr<DictObje
return decoded;
}
PDFErrorOr<ByteBuffer> Filter::decode_jbig2(ReadonlyBytes bytes, RefPtr<DictObject> decode_parms)
PDFErrorOr<ByteBuffer> Filter::decode_jbig2(Document* document, ReadonlyBytes bytes, RefPtr<DictObject> decode_parms)
{
// 3.3.6 JBIG2Decode Filter
Vector<ReadonlyBytes> segments;
if (decode_parms) {
if (decode_parms->contains(CommonNames::JBIG2Globals))
return Error::rendering_unsupported_error("JBIG2Globals is not yet supported");
if (decode_parms->contains(CommonNames::JBIG2Globals)) {
auto globals = TRY(decode_parms->get_stream(document, CommonNames::JBIG2Globals));
segments.append(globals->bytes());
}
}
return TRY(Gfx::JBIG2ImageDecoderPlugin::decode_embedded(bytes));
segments.append(bytes);
return TRY(Gfx::JBIG2ImageDecoderPlugin::decode_embedded(segments));
}
PDFErrorOr<ByteBuffer> Filter::decode_dct(ReadonlyBytes bytes)

View file

@ -15,7 +15,7 @@ namespace PDF {
class Filter {
public:
static PDFErrorOr<ByteBuffer> decode(ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr<DictObject> decode_parms);
static PDFErrorOr<ByteBuffer> decode(Document* document, ReadonlyBytes bytes, DeprecatedFlyString const& encoding_type, RefPtr<DictObject> decode_parms);
private:
static PDFErrorOr<ByteBuffer> decode_ascii_hex(ReadonlyBytes bytes);
@ -26,7 +26,7 @@ private:
static PDFErrorOr<ByteBuffer> decode_flate(ReadonlyBytes bytes, RefPtr<DictObject> decode_parms);
static PDFErrorOr<ByteBuffer> decode_run_length(ReadonlyBytes bytes);
static PDFErrorOr<ByteBuffer> decode_ccitt(ReadonlyBytes bytes, RefPtr<DictObject> decode_parms);
static PDFErrorOr<ByteBuffer> decode_jbig2(ReadonlyBytes bytes, RefPtr<DictObject> decode_parms);
static PDFErrorOr<ByteBuffer> decode_jbig2(Document* document, ReadonlyBytes bytes, RefPtr<DictObject> decode_parms);
static PDFErrorOr<ByteBuffer> decode_dct(ReadonlyBytes bytes);
static PDFErrorOr<ByteBuffer> decode_jpx(ReadonlyBytes bytes);
static PDFErrorOr<ByteBuffer> decode_crypt(ReadonlyBytes bytes);

View file

@ -460,7 +460,7 @@ PDFErrorOr<void> Parser::unfilter_stream(NonnullRefPtr<StreamObject> stream_obje
if (!decode_parms_vector.is_empty())
decode_parms = decode_parms_vector.at(i);
stream_object->buffer() = TRY(Filter::decode(stream_object->bytes(), filters.at(i), decode_parms));
stream_object->buffer() = TRY(Filter::decode(m_document, stream_object->bytes(), filters.at(i), decode_parms));
}
return {};