mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
7fc4ea5495
Since we're parsing segment headers for random-access jbig2 inputs already, just always do that and get the image dimensions from the PageInformation segment data. Not all that much more code, and it makes this script much more pleasant to use.
241 lines
6.1 KiB
Python
Executable file
241 lines
6.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Creates a PDF that embeds a jbig2 image. Useful for viewing .jbig2 files in
|
|
PDF viewers, since all PDF viewers support .jbig2 but few image viewers do.
|
|
|
|
Usage :
|
|
% Meta/jbig2_to_pdf.py -o foo.pdf path/to/bitmap.jbig2
|
|
% open foo.pdf
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
import argparse
|
|
import struct
|
|
import textwrap
|
|
|
|
|
|
PageInformation = 48
|
|
EndOfFile = 51
|
|
|
|
|
|
def dedent(b):
|
|
return textwrap.dedent(b.decode('latin1')).encode('latin1')
|
|
|
|
|
|
@dataclass
|
|
class SegmentHeader:
|
|
segment_header_size: int
|
|
type: int
|
|
bytes: bytes
|
|
data_size: int
|
|
data: bytes
|
|
|
|
|
|
def read_segment_header(data, offset):
|
|
segment_number, = struct.unpack_from('>I', data, offset)
|
|
flags = data[offset + 4]
|
|
segment_page_association_size_is_32_bits = (flags & 0b100_0000) != 0
|
|
type = (flags & 0b11_1111)
|
|
|
|
referred_segments_count = data[offset + 5] >> 5
|
|
if referred_segments_count > 4:
|
|
raise Exception('cannot handle more than 4 referred-to segments')
|
|
|
|
if segment_number <= 256:
|
|
ref_size = 1
|
|
elif segment_number <= 65536:
|
|
ref_size = 2
|
|
else:
|
|
ref_size = 4
|
|
segment_header_size = 4 + 1 + 1 + ref_size * referred_segments_count
|
|
|
|
if segment_page_association_size_is_32_bits:
|
|
segment_header_size += 4
|
|
else:
|
|
segment_header_size += 1
|
|
|
|
data_size, = struct.unpack_from('>I', data, offset + segment_header_size)
|
|
if data_size == 0xffff_ffff:
|
|
raise Exception('cannot handle indeterminate length')
|
|
segment_header_size += 4
|
|
|
|
bytes = data[offset:offset + segment_header_size]
|
|
return SegmentHeader(segment_header_size, type, bytes, data_size, None)
|
|
|
|
|
|
def read_segment_headers(data, is_random_access):
|
|
offset = 0
|
|
|
|
segment_headers = []
|
|
while offset < len(data):
|
|
segment_header = read_segment_header(data, offset)
|
|
offset += segment_header.segment_header_size
|
|
|
|
if not is_random_access:
|
|
segment_header.data = data[offset:offset + segment_header.data_size]
|
|
offset += segment_header.data_size
|
|
|
|
segment_headers.append(segment_header)
|
|
|
|
if segment_header.type == EndOfFile:
|
|
break
|
|
|
|
if is_random_access:
|
|
for segment_header in segment_headers:
|
|
segment_header.data = data[offset:offset + segment_header.data_size]
|
|
offset += segment_header.data_size
|
|
|
|
return segment_headers
|
|
|
|
|
|
def random_access_to_sequential(segment_headers):
|
|
out_data = bytes()
|
|
for segment_header in segment_headers:
|
|
out_data += segment_header.bytes
|
|
out_data += segment_header.data
|
|
return out_data
|
|
|
|
|
|
def get_dimensions(segment_headers):
|
|
for segment_header in segment_headers:
|
|
if segment_header.type != PageInformation:
|
|
continue
|
|
return struct.unpack_from('>II', segment_header.data)
|
|
raise Exception('did not find PageInformation')
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
|
)
|
|
parser.add_argument("image", help="Input image")
|
|
parser.add_argument("-o", "--output", help="Path to output PDF")
|
|
args = parser.parse_args()
|
|
|
|
with open(args.image, 'rb') as f:
|
|
image_data = f.read()
|
|
|
|
# strip jbig2 header
|
|
image_data = image_data[8:]
|
|
is_random_access = image_data[0] & 1 == 0
|
|
if image_data[0] & 2 == 0:
|
|
image_data = image_data[4:]
|
|
image_data = image_data[1:]
|
|
|
|
segment_headers = read_segment_headers(image_data, is_random_access)
|
|
|
|
width, height = get_dimensions(segment_headers)
|
|
print(f'dims {width}x{height}')
|
|
|
|
if is_random_access:
|
|
image_data = random_access_to_sequential(segment_headers)
|
|
|
|
start = dedent(b'''\
|
|
%PDF-1.4
|
|
%\265\266
|
|
|
|
''')
|
|
|
|
operators = dedent(b'''\
|
|
%d 0 0 %d 0 0 cm
|
|
/Im Do''' % (width, height))
|
|
|
|
objs = [dedent(b'''\
|
|
1 0 obj
|
|
<<
|
|
/Type /Catalog
|
|
/Pages 2 0 R
|
|
>>
|
|
endobj
|
|
'''),
|
|
|
|
dedent(b'''\
|
|
2 0 obj
|
|
<<
|
|
/Type /Pages
|
|
/Kids [3 0 R]
|
|
/Count 1
|
|
>>
|
|
endobj
|
|
'''),
|
|
|
|
dedent(b'''\
|
|
3 0 obj
|
|
<<
|
|
/Type /Page
|
|
/Parent 2 0 R
|
|
/MediaBox [0 0 %d %d]
|
|
/Contents 4 0 R
|
|
/Resources <<
|
|
/XObject <<
|
|
/Im 5 0 R
|
|
>>
|
|
>>
|
|
>>
|
|
endobj
|
|
''' % (width, height)),
|
|
|
|
dedent(b'''\
|
|
4 0 obj
|
|
<</Length %d>>
|
|
stream
|
|
''' % len(operators)) +
|
|
operators +
|
|
dedent(b'''
|
|
endstream
|
|
endobj
|
|
'''),
|
|
|
|
dedent(b'''\
|
|
5 0 obj
|
|
<<
|
|
/Length %d
|
|
/Type /XObject
|
|
/Subtype /Image
|
|
/Width %d
|
|
/Height %d
|
|
/ColorSpace /DeviceGray
|
|
/Filter /JBIG2Decode
|
|
/BitsPerComponent 1
|
|
>>
|
|
stream
|
|
''' % (len(image_data), width, height)) +
|
|
image_data +
|
|
dedent(b'''
|
|
endstream
|
|
endobj
|
|
'''),
|
|
]
|
|
|
|
with open(args.output, 'wb') as f:
|
|
f.write(start)
|
|
|
|
offsets = []
|
|
for obj in objs:
|
|
offsets.append(f.tell())
|
|
f.write(obj)
|
|
f.write(b'\n')
|
|
|
|
xref_offset = f.tell()
|
|
f.write(b'xref\n')
|
|
f.write(b'0 %d\n' % (len(objs) + 1))
|
|
f.write(b'0000000000 65536 f \n')
|
|
for offset in offsets:
|
|
f.write(b'%010d 00000 n \n' % offset)
|
|
f.write(b'\n')
|
|
|
|
f.write(dedent(b'''\
|
|
trailer
|
|
<<
|
|
/Size %d
|
|
/Root 1 0 R
|
|
>>
|
|
startxref
|
|
%d
|
|
%%%%EOF
|
|
''' % (len(objs) + 1, xref_offset)))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|