Spaces:
Runtime error
Runtime error
# Copyright (c) 2006, Mathieu Fenniak | |
# Copyright (c) 2007, Ashish Kulkarni <[email protected]> | |
# | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are | |
# met: | |
# | |
# * Redistributions of source code must retain the above copyright notice, | |
# this list of conditions and the following disclaimer. | |
# * Redistributions in binary form must reproduce the above copyright notice, | |
# this list of conditions and the following disclaimer in the documentation | |
# and/or other materials provided with the distribution. | |
# * The name of the author may not be used to endorse or promote products | |
# derived from this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
# POSSIBILITY OF SUCH DAMAGE. | |
"""Anything related to encryption / decryption.""" | |
import struct | |
from hashlib import md5 | |
from typing import Tuple, Union | |
from ._utils import b_, ord_, str_ | |
from .generic import ByteStringObject | |
try: | |
from typing import Literal # type: ignore[attr-defined] | |
except ImportError: | |
# PEP 586 introduced typing.Literal with Python 3.8 | |
# For older Python versions, the backport typing_extensions is necessary: | |
from typing_extensions import Literal # type: ignore[misc] | |
# ref: pdf1.8 spec section 3.5.2 algorithm 3.2 | |
_encryption_padding = ( | |
b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56" | |
b"\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c" | |
b"\xa9\xfe\x64\x53\x69\x7a" | |
) | |
def _alg32( | |
password: str, | |
rev: Literal[2, 3, 4], | |
keylen: int, | |
owner_entry: ByteStringObject, | |
p_entry: int, | |
id1_entry: ByteStringObject, | |
metadata_encrypt: bool = True, | |
) -> bytes: | |
""" | |
Implementation of algorithm 3.2 of the PDF standard security handler. | |
See section 3.5.2 of the PDF 1.6 reference. | |
""" | |
# 1. Pad or truncate the password string to exactly 32 bytes. If the | |
# password string is more than 32 bytes long, use only its first 32 bytes; | |
# if it is less than 32 bytes long, pad it by appending the required number | |
# of additional bytes from the beginning of the padding string | |
# (_encryption_padding). | |
password_bytes = b_((str_(password) + str_(_encryption_padding))[:32]) | |
# 2. Initialize the MD5 hash function and pass the result of step 1 as | |
# input to this function. | |
m = md5(password_bytes) | |
# 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash | |
# function. | |
m.update(owner_entry.original_bytes) | |
# 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass | |
# these bytes to the MD5 hash function, low-order byte first. | |
p_entry_bytes = struct.pack("<i", p_entry) | |
m.update(p_entry_bytes) | |
# 5. Pass the first element of the file's file identifier array to the MD5 | |
# hash function. | |
m.update(id1_entry.original_bytes) | |
# 6. (Revision 3 or greater) If document metadata is not being encrypted, | |
# pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function. | |
if rev >= 3 and not metadata_encrypt: | |
m.update(b"\xff\xff\xff\xff") | |
# 7. Finish the hash. | |
md5_hash = m.digest() | |
# 8. (Revision 3 or greater) Do the following 50 times: Take the output | |
# from the previous MD5 hash and pass the first n bytes of the output as | |
# input into a new MD5 hash, where n is the number of bytes of the | |
# encryption key as defined by the value of the encryption dictionary's | |
# /Length entry. | |
if rev >= 3: | |
for _ in range(50): | |
md5_hash = md5(md5_hash[:keylen]).digest() | |
# 9. Set the encryption key to the first n bytes of the output from the | |
# final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or | |
# greater, depends on the value of the encryption dictionary's /Length | |
# entry. | |
return md5_hash[:keylen] | |
def _alg33( | |
owner_password: str, user_password: str, rev: Literal[2, 3, 4], keylen: int | |
) -> bytes: | |
""" | |
Implementation of algorithm 3.3 of the PDF standard security handler, | |
section 3.5.2 of the PDF 1.6 reference. | |
""" | |
# steps 1 - 4 | |
key = _alg33_1(owner_password, rev, keylen) | |
# 5. Pad or truncate the user password string as described in step 1 of | |
# algorithm 3.2. | |
user_password_bytes = b_((user_password + str_(_encryption_padding))[:32]) | |
# 6. Encrypt the result of step 5, using an RC4 encryption function with | |
# the encryption key obtained in step 4. | |
val = RC4_encrypt(key, user_password_bytes) | |
# 7. (Revision 3 or greater) Do the following 19 times: Take the output | |
# from the previous invocation of the RC4 function and pass it as input to | |
# a new invocation of the function; use an encryption key generated by | |
# taking each byte of the encryption key obtained in step 4 and performing | |
# an XOR operation between that byte and the single-byte value of the | |
# iteration counter (from 1 to 19). | |
if rev >= 3: | |
for i in range(1, 20): | |
new_key = "" | |
for key_char in key: | |
new_key += chr(ord_(key_char) ^ i) | |
val = RC4_encrypt(new_key, val) | |
# 8. Store the output from the final invocation of the RC4 as the value of | |
# the /O entry in the encryption dictionary. | |
return val | |
def _alg33_1(password: str, rev: Literal[2, 3, 4], keylen: int) -> bytes: | |
"""Steps 1-4 of algorithm 3.3""" | |
# 1. Pad or truncate the owner password string as described in step 1 of | |
# algorithm 3.2. If there is no owner password, use the user password | |
# instead. | |
password_bytes = b_((password + str_(_encryption_padding))[:32]) | |
# 2. Initialize the MD5 hash function and pass the result of step 1 as | |
# input to this function. | |
m = md5(password_bytes) | |
# 3. (Revision 3 or greater) Do the following 50 times: Take the output | |
# from the previous MD5 hash and pass it as input into a new MD5 hash. | |
md5_hash = m.digest() | |
if rev >= 3: | |
for _ in range(50): | |
md5_hash = md5(md5_hash).digest() | |
# 4. Create an RC4 encryption key using the first n bytes of the output | |
# from the final MD5 hash, where n is always 5 for revision 2 but, for | |
# revision 3 or greater, depends on the value of the encryption | |
# dictionary's /Length entry. | |
key = md5_hash[:keylen] | |
return key | |
def _alg34( | |
password: str, | |
owner_entry: ByteStringObject, | |
p_entry: int, | |
id1_entry: ByteStringObject, | |
) -> Tuple[bytes, bytes]: | |
""" | |
Implementation of algorithm 3.4 of the PDF standard security handler. | |
See section 3.5.2 of the PDF 1.6 reference. | |
""" | |
# 1. Create an encryption key based on the user password string, as | |
# described in algorithm 3.2. | |
rev: Literal[2] = 2 | |
keylen = 5 | |
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) | |
# 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, | |
# using an RC4 encryption function with the encryption key from the | |
# preceding step. | |
U = RC4_encrypt(key, _encryption_padding) | |
# 3. Store the result of step 2 as the value of the /U entry in the | |
# encryption dictionary. | |
return U, key | |
def _alg35( | |
password: str, | |
rev: Literal[2, 3, 4], | |
keylen: int, | |
owner_entry: ByteStringObject, | |
p_entry: int, | |
id1_entry: ByteStringObject, | |
metadata_encrypt: bool, | |
) -> Tuple[bytes, bytes]: | |
""" | |
Implementation of algorithm 3.4 of the PDF standard security handler. | |
See section 3.5.2 of the PDF 1.6 reference. | |
""" | |
# 1. Create an encryption key based on the user password string, as | |
# described in Algorithm 3.2. | |
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) | |
# 2. Initialize the MD5 hash function and pass the 32-byte padding string | |
# shown in step 1 of Algorithm 3.2 as input to this function. | |
m = md5() | |
m.update(_encryption_padding) | |
# 3. Pass the first element of the file's file identifier array (the value | |
# of the ID entry in the document's trailer dictionary; see Table 3.13 on | |
# page 73) to the hash function and finish the hash. (See implementation | |
# note 25 in Appendix H.) | |
m.update(id1_entry.original_bytes) | |
md5_hash = m.digest() | |
# 4. Encrypt the 16-byte result of the hash, using an RC4 encryption | |
# function with the encryption key from step 1. | |
val = RC4_encrypt(key, md5_hash) | |
# 5. Do the following 19 times: Take the output from the previous | |
# invocation of the RC4 function and pass it as input to a new invocation | |
# of the function; use an encryption key generated by taking each byte of | |
# the original encryption key (obtained in step 2) and performing an XOR | |
# operation between that byte and the single-byte value of the iteration | |
# counter (from 1 to 19). | |
for i in range(1, 20): | |
new_key = b"" | |
for k in key: | |
new_key += b_(chr(ord_(k) ^ i)) | |
val = RC4_encrypt(new_key, val) | |
# 6. Append 16 bytes of arbitrary padding to the output from the final | |
# invocation of the RC4 function and store the 32-byte result as the value | |
# of the U entry in the encryption dictionary. | |
# (implementer note: I don't know what "arbitrary padding" is supposed to | |
# mean, so I have used null bytes. This seems to match a few other | |
# people's implementations) | |
return val + (b"\x00" * 16), key | |
def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes: # TODO | |
S = list(range(256)) | |
j = 0 | |
for i in range(256): | |
j = (j + S[i] + ord_(key[i % len(key)])) % 256 | |
S[i], S[j] = S[j], S[i] | |
i, j = 0, 0 | |
retval = [] | |
for plaintext_char in plaintext: | |
i = (i + 1) % 256 | |
j = (j + S[i]) % 256 | |
S[i], S[j] = S[j], S[i] | |
t = S[(S[i] + S[j]) % 256] | |
retval.append(b_(chr(ord_(plaintext_char) ^ t))) | |
return b"".join(retval) | |