Skip to content

Commit

Permalink
Move hash functions into Crypto module
Browse files Browse the repository at this point in the history
  • Loading branch information
fviard committed Oct 18, 2022
1 parent 6f96ba5 commit bab45a7
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 61 deletions.
51 changes: 50 additions & 1 deletion S3/Crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from . import Config
from logging import debug
from .BaseUtils import encode_to_s3, decode_from_s3, s3_quote
from .BaseUtils import encode_to_s3, decode_from_s3, s3_quote, md5
from .Utils import time_to_epoch, deunicodise, check_bucket_name_dns_support
from .SortedDict import SortedDict

Expand All @@ -29,6 +29,7 @@

__all__ = []


def format_param_str(params, always_have_equal=False, limited_keys=None):
"""
Format URL parameters from a params dict and returns
Expand Down Expand Up @@ -56,6 +57,7 @@ def format_param_str(params, always_have_equal=False, limited_keys=None):
return param_str and "?" + param_str[1:]
__all__.append("format_param_str")


### AWS Version 2 signing
def sign_string_v2(string_to_sign):
"""Sign a string with the secret key, returning base64 encoded results.
Expand All @@ -71,6 +73,7 @@ def sign_string_v2(string_to_sign):
return signature
__all__.append("sign_string_v2")


def sign_request_v2(method='GET', canonical_uri='/', params=None, cur_headers=None):
"""Sign a string with the secret key, returning base64 encoded results.
By default the configured secret key is used, but may be overridden as
Expand Down Expand Up @@ -122,6 +125,7 @@ def sign_request_v2(method='GET', canonical_uri='/', params=None, cur_headers=No
return new_headers
__all__.append("sign_request_v2")


def sign_url_v2(url_to_sign, expiry):
"""Sign a URL in s3://bucket/object form with the given expiry
time. The object will be accessible via the signed URL until the
Expand All @@ -137,6 +141,7 @@ def sign_url_v2(url_to_sign, expiry):
)
__all__.append("sign_url_v2")


def sign_url_base_v2(**parms):
"""Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args."""
content_disposition=Config.Config().content_disposition
Expand Down Expand Up @@ -171,10 +176,13 @@ def sign_url_base_v2(**parms):
if content_type:
url += "&response-content-type=" + s3_quote(content_type, unicode_output=True)
return url
__all__.append("sign_url_base_v2")


def sign(key, msg):
return hmac.new(key, encode_to_s3(msg), sha256).digest()


def getSignatureKey(key, dateStamp, regionName, serviceName):
"""
Input: unicode params
Expand All @@ -186,6 +194,7 @@ def getSignatureKey(key, dateStamp, regionName, serviceName):
kSigning = sign(kService, 'aws4_request')
return kSigning


def sign_request_v4(method='GET', host='', canonical_uri='/', params=None,
region='us-east-1', cur_headers=None, body=b''):
service = 's3'
Expand Down Expand Up @@ -281,3 +290,43 @@ def checksum_sha256_buffer(buffer, offset=0, size=None):
return hash
__all__.append("checksum_sha256_buffer")


def generate_content_md5(body):
m = md5(encode_to_s3(body))
base64md5 = encodestring(m.digest())
base64md5 = decode_from_s3(base64md5)
if base64md5[-1] == '\n':
base64md5 = base64md5[0:-1]
return decode_from_s3(base64md5)
__all__.append("generate_content_md5")


def hash_file_md5(filename):
h = md5()
with open(deunicodise(filename), "rb") as fp:
while True:
# Hash 32kB chunks
data = fp.read(32*1024)
if not data:
break
h.update(data)
return h.hexdigest()
__all__.append("hash_file_md5")


def calculateChecksum(buffer, mfile, offset, chunk_size, send_chunk):
md5_hash = md5()
size_left = chunk_size
if buffer == '':
mfile.seek(offset)
while size_left > 0:
data = mfile.read(min(send_chunk, size_left))
if not data:
break
md5_hash.update(data)
size_left -= len(data)
else:
md5_hash.update(buffer)

return md5_hash.hexdigest()
__all__.append("calculateChecksum")
3 changes: 2 additions & 1 deletion S3/FileDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import logging
from .SortedDict import SortedDict
from .Crypto import hash_file_md5
from . import Utils
from . import Config

Expand Down Expand Up @@ -45,7 +46,7 @@ def get_md5(self, relative_file):
md5 = self.get_hardlink_md5(relative_file)
if md5 is None and 'md5' in cfg.sync_checks:
logging.debug(u"doing file I/O to read md5 of %s" % relative_file)
md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
md5 = hash_file_md5(self[relative_file]['full_name'])
self.record_md5(relative_file, md5)
self[relative_file]['md5'] = md5
return md5
Expand Down
3 changes: 2 additions & 1 deletion S3/MultiPart.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

import sys
from logging import debug, info, warning, error
from .Crypto import calculateChecksum
from .Exceptions import ParameterError
from .S3Uri import S3UriS3
from .BaseUtils import getTextFromXml, getTreeFromXml, s3_quote, parseNodes
from .Utils import formatSize, calculateChecksum
from .Utils import formatSize

SIZE_1MB = 1024 * 1024

Expand Down
29 changes: 8 additions & 21 deletions S3/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,14 @@
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
try:
# Python 2 support
from base64 import encodestring
except ImportError:
# Python 3.9.0+ support
from base64 import encodebytes as encodestring

import select

from .BaseUtils import (getListFromXml, getTextFromXml, getRootTagName,
decode_from_s3, encode_to_s3, md5, s3_quote)
from .Utils import (convertHeaderTupleListToDict, hash_file_md5, unicodise,
from .Utils import (convertHeaderTupleListToDict, unicodise,
deunicodise, check_bucket_name,
check_bucket_name_dns_support, getHostnameFromBucket,
calculateChecksum)
check_bucket_name_dns_support, getHostnameFromBucket)
from .SortedDict import SortedDict
from .AccessLog import AccessLog
from .ACL import ACL, GranteeLogDelivery
Expand All @@ -49,7 +42,8 @@
from .S3Uri import S3Uri
from .ConnMan import ConnMan
from .Crypto import (sign_request_v2, sign_request_v4, checksum_sha256_file,
checksum_sha256_buffer, format_param_str)
checksum_sha256_buffer, generate_content_md5,
hash_file_md5, calculateChecksum, format_param_str)

try:
from ctypes import ArgumentError
Expand Down Expand Up @@ -599,7 +593,7 @@ def _expiration_set(self, uri):
body += '</LifecycleConfiguration>'

headers = SortedDict(ignore_case = True)
headers['content-md5'] = compute_content_md5(body)
headers['content-md5'] = generate_content_md5(body)
bucket = uri.bucket()
request = self.create_request("BUCKET_CREATE", bucket = bucket,
headers = headers, body = body,
Expand Down Expand Up @@ -788,7 +782,7 @@ def compose_batch_del_xml(bucket, key_list):
raise ValueError("Key list is empty")
bucket = S3Uri(batch[0]).bucket()
request_body = compose_batch_del_xml(bucket, batch)
headers = SortedDict({'content-md5': compute_content_md5(request_body),
headers = SortedDict({'content-md5': generate_content_md5(request_body),
'content-type': 'application/xml'}, ignore_case=True)
request = self.create_request("BATCH_DELETE", bucket = bucket,
headers = headers, body = request_body,
Expand Down Expand Up @@ -1097,7 +1091,7 @@ def set_cors(self, uri, cors):
headers = SortedDict(ignore_case = True)
# TODO check cors is proper json string
headers['content-type'] = 'application/xml'
headers['content-md5'] = compute_content_md5(cors)
headers['content-md5'] = generate_content_md5(cors)
request = self.create_request("BUCKET_CREATE", uri = uri,
headers=headers, body = cors,
uri_params = {'cors': None})
Expand All @@ -1113,7 +1107,7 @@ def delete_cors(self, uri):

def set_lifecycle_policy(self, uri, policy):
headers = SortedDict(ignore_case = True)
headers['content-md5'] = compute_content_md5(policy)
headers['content-md5'] = generate_content_md5(policy)
request = self.create_request("BUCKET_CREATE", uri = uri,
headers=headers, body = policy,
uri_params = {'lifecycle': None})
Expand Down Expand Up @@ -2132,11 +2126,4 @@ def parse_attrs_header(attrs_header):
attrs[key] = val
return attrs

def compute_content_md5(body):
m = md5(encode_to_s3(body))
base64md5 = encodestring(m.digest())
base64md5 = decode_from_s3(base64md5)
if base64md5[-1] == '\n':
base64md5 = base64md5[0:-1]
return decode_from_s3(base64md5)
# vim:et:ts=4:sts=4:ai
31 changes: 0 additions & 31 deletions S3/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,6 @@ def mktmpfile(prefix = os.getenv('TMP','/tmp') + "/tmpfile-", randchars = 20):
__all__.append("mktmpfile")


def hash_file_md5(filename):
h = md5()
with open(deunicodise(filename), "rb") as fp:
while True:
# Hash 32kB chunks
data = fp.read(32*1024)
if not data:
break
h.update(data)
return h.hexdigest()
__all__.append("hash_file_md5")


def mkdir_with_parents(dir_name):
"""
mkdir_with_parents(dst_dir)
Expand Down Expand Up @@ -308,24 +295,6 @@ def getHostnameFromBucket(bucket):
__all__.append("getHostnameFromBucket")


def calculateChecksum(buffer, mfile, offset, chunk_size, send_chunk):
md5_hash = md5()
size_left = chunk_size
if buffer == '':
mfile.seek(offset)
while size_left > 0:
data = mfile.read(min(send_chunk, size_left))
if not data:
break
md5_hash.update(data)
size_left -= len(data)
else:
md5_hash.update(buffer)

return md5_hash.hexdigest()
__all__.append("calculateChecksum")


# Deal with the fact that pwd and grp modules don't exist for Windows
try:
import pwd
Expand Down
12 changes: 6 additions & 6 deletions s3cmd
Original file line number Diff line number Diff line change
Expand Up @@ -2281,7 +2281,7 @@ def cmd_accesslog(args):
def cmd_sign(args):
string_to_sign = args.pop()
debug(u"string-to-sign: %r" % string_to_sign)
signature = Crypto.sign_string_v2(encode_to_s3(string_to_sign))
signature = sign_string_v2(encode_to_s3(string_to_sign))
output(u"Signature: %s" % decode_from_s3(signature))
return EX_OK

Expand All @@ -2291,7 +2291,7 @@ def cmd_signurl(args):
if url_to_sign.type != 's3':
raise ParameterError("Must be S3Uri. Got: %s" % url_to_sign)
debug("url to sign: %r" % url_to_sign)
signed_url = Crypto.sign_url_v2(url_to_sign, expiry)
signed_url = sign_url_v2(url_to_sign, expiry)
output(signed_url)
return EX_OK

Expand Down Expand Up @@ -2527,9 +2527,9 @@ def run_configure(config_file, args):
ret_enc = gpg_encrypt(filename)
ret_dec = gpg_decrypt(ret_enc[1], ret_enc[2], False)
hash = [
Utils.hash_file_md5(filename),
Utils.hash_file_md5(ret_enc[1]),
Utils.hash_file_md5(ret_dec[1]),
hash_file_md5(filename),
hash_file_md5(ret_enc[1]),
hash_file_md5(ret_dec[1]),
]
os.unlink(deunicodise(filename))
os.unlink(deunicodise(ret_enc[1]))
Expand Down Expand Up @@ -3270,9 +3270,9 @@ if __name__ == '__main__':
from S3.FileDict import FileDict
from S3.S3Uri import S3Uri
from S3 import Utils
from S3 import Crypto
from S3.BaseUtils import (formatDateTime, getPrettyFromXml,
encode_to_s3, decode_from_s3)
from S3.Crypto import hash_file_md5, sign_string_v2, sign_url_v2
from S3.Utils import (formatSize, unicodise_safe, unicodise_s,
unicodise, deunicodise, replace_nonprintables)
from S3.Progress import Progress, StatsInfo
Expand Down

0 comments on commit bab45a7

Please sign in to comment.