From 2ccecb6b08348a4b6b7bdc20b1102e1b120c1b91 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 15:51:07 +0100 Subject: [PATCH] Storage/S3: Make dynamic bucket names non-predictable --- storage_s3/indico_storage_s3/plugin.py | 30 +++++++++---- storage_s3/indico_storage_s3/task.py | 6 +-- storage_s3/tests/plugin_test.py | 62 ++++++++++++++------------ 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index cd269b1..03497c1 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,6 +16,8 @@ from __future__ import unicode_literals +import hashlib +import hmac import sys from contextlib import contextmanager from datetime import date @@ -65,7 +67,7 @@ class S3StoragePlugin(IndicoPlugin): click.echo('Storage {} does not exist'.format(key)) sys.exit(1) if isinstance(storage_instance, S3Storage): - bucket_name = storage_instance._get_current_bucket() + bucket_name = storage_instance._get_current_bucket_name() if storage_instance._bucket_exists(bucket_name): click.echo('Storage {}: bucket {} already exists'.format(key, bucket_name)) continue @@ -93,15 +95,19 @@ class S3Storage(Storage): session_kwargs['aws_access_key_id'] = data['access_key'] if 'secret_key' in data: session_kwargs['aws_secret_access_key'] = data['secret_key'] - self.bucket = data['bucket'] + self.original_bucket_name = data['bucket'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') session = boto3.session.Session(**session_kwargs) self.client = session.client('s3', endpoint_url=endpoint_url) + self.bucket_secret = (data.get('bucket_secret', '') or + session._session.get_scoped_config().get('indico_bucket_secret', '')).encode('utf-8') + if not self.bucket_secret and self._has_dynamic_bucket_name: + raise StorageError('A bucket secret is required when using dynamic bucket names') def open(self, file_id): try: - s3_object = self.client.get_object(Bucket=self._get_current_bucket(), Key=file_id)['Body'] + s3_object = self.client.get_object(Bucket=self._get_current_bucket_name(), Key=file_id)['Body'] return BytesIO(s3_object.read()) except Exception as e: raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @@ -116,7 +122,7 @@ class S3Storage(Storage): def save(self, name, content_type, filename, fileobj): try: fileobj = self._ensure_fileobj(fileobj) - bucket = self._get_current_bucket() + bucket = self._get_current_bucket_name() checksum = get_file_checksum(fileobj) fileobj.seek(0) content_md5 = checksum.decode('hex').encode('base64').strip() @@ -157,11 +163,19 @@ class S3Storage(Storage): except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - def _get_current_bucket(self): - return self._replace_bucket_placeholders(self.bucket, date.today()) + def _get_current_bucket_name(self): + return self._get_bucket_name(date.today()) - def _get_original_bucket_name(self): - return self.bucket + def _get_bucket_name(self, date): + name = self._replace_bucket_placeholders(self.original_bucket_name, date) + if name == self.original_bucket_name or not self.bucket_secret: + return name + token = hmac.new(self.bucket_secret, name, hashlib.md5).hexdigest() + return '{}-{}'.format(name, token) + + @property + def _has_dynamic_bucket_name(self): + return self._get_current_bucket_name() != self.original_bucket_name def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index 50b9e3a..d13e8eb 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -36,15 +36,15 @@ def create_bucket(): if not isinstance(storage, S3Storage): continue today = date.today() - bucket_name = storage._get_original_bucket_name() + bucket_name = storage.original_bucket_name placeholders = set(re.findall('<.*?>', bucket_name)) if placeholders == {'', ''}: bucket_date = today + relativedelta(weeks=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): bucket_date = today + relativedelta(months=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) diff --git a/storage_s3/tests/plugin_test.py b/storage_s3/tests/plugin_test.py index 6772704..c18efec 100644 --- a/storage_s3/tests/plugin_test.py +++ b/storage_s3/tests/plugin_test.py @@ -16,16 +16,26 @@ from __future__ import unicode_literals -import mock -import pytest -from freezegun import freeze_time +import hashlib +import hmac -import indico_storage_s3.plugin as plugin +import pytest + +from indico_storage_s3 import plugin from indico_storage_s3.task import create_bucket +@pytest.fixture(autouse=True) +def mock_boto3(mocker): + mocker.patch('indico_storage_s3.plugin.boto3') + + +def test_resolve_bucket_name_static(): + storage = plugin.S3Storage('bucket=test,bucket_secret=secret') + assert storage._get_current_bucket_name() == 'test' + + @pytest.mark.parametrize(('date', 'name_template', 'expected_name'), ( - ('2018-04-11', 'name', 'name'), ('2018-04-11', 'name-', 'name-2018'), ('2018-04-11', 'name--', 'name-2018-04'), ('2018-04-11', 'name--', 'name-2018-15'), @@ -33,17 +43,12 @@ from indico_storage_s3.task import create_bucket ('2019-01-01', 'name--', 'name-2019-00'), )) -@mock.patch.object(plugin.S3Storage, '__init__', lambda self: None) -def test_resolve_bucket_name(date, name_template, expected_name): - with freeze_time(date): - storage = plugin.S3Storage() - storage.bucket = name_template - assert storage._get_current_bucket() == expected_name - - -@mock.create_autospec -def mock_bucket_created(self, name): - pass +def test_resolve_bucket_name_dynamic(freeze_time, date, name_template, expected_name): + freeze_time(date) + storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + name, token = storage._get_current_bucket_name().rsplit('-', 1) + assert name == expected_name + assert token == hmac.new(b'secret', expected_name, hashlib.md5).hexdigest() class MockConfig(object): @@ -65,19 +70,20 @@ class MockConfig(object): ('2018-01-01', 'name--', True, 'name-2018-02', None), ('2018-12-03', 'name--', True, 'name-2018-50', None), )) -def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name, expected_error): - with freeze_time(date), \ - mock.patch.object(plugin.S3Storage, '__init__', lambda self: None), \ - mock.patch.object(plugin.S3Storage, '_get_original_bucket_name', return_value=name_template), \ - mock.patch('indico_storage_s3.task.config', MockConfig()), \ - mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()), \ - mock.patch.object(plugin.S3Storage, '_create_bucket', mock_bucket_created) as create_bucket_call: - if expected_error: - with pytest.raises(expected_error): - create_bucket() - else: +def test_dynamic_bucket_creation_task(freeze_time, mocker, date, name_template, bucket_created, expected_name, + expected_error): + freeze_time(date) + storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + mocker.patch('indico_storage_s3.task.config', MockConfig()) + mocker.patch('indico_storage_s3.task.get_storage', return_value=storage) + create_bucket_call = mocker.patch.object(plugin.S3Storage, '_create_bucket') + if expected_error: + with pytest.raises(expected_error): create_bucket() + else: + create_bucket() if bucket_created: - create_bucket_call.assert_called_with(mock.ANY, expected_name) + token = hmac.new(b'secret', expected_name, hashlib.md5).hexdigest() + create_bucket_call.assert_called_with('{}-{}'.format(expected_name, token)) else: assert not create_bucket_call.called