From eaf32f115310f11fb756c874eed3eae92ef43183 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Thu, 8 Mar 2018 16:37:21 +0100 Subject: [PATCH 01/54] Storage/S3: create plugin --- storage_s3/indico_storage_s3/__init__.py | 0 storage_s3/indico_storage_s3/plugin.py | 62 ++++++++++++++++++++++++ storage_s3/setup.py | 43 ++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 storage_s3/indico_storage_s3/__init__.py create mode 100644 storage_s3/indico_storage_s3/plugin.py create mode 100644 storage_s3/setup.py diff --git a/storage_s3/indico_storage_s3/__init__.py b/storage_s3/indico_storage_s3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py new file mode 100644 index 0000000..a552247 --- /dev/null +++ b/storage_s3/indico_storage_s3/plugin.py @@ -0,0 +1,62 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +import ast + +from indico.core.plugins import IndicoPlugin +from indico.core.storage import Storage +from indico.core import signals + + +class S3StoragePlugin(IndicoPlugin): + """S3 Storage + + Provides S3 storage backends. + """ + + def init(self): + super(S3StoragePlugin, self).init() + self.connect(signals.get_storage_backends, self._get_storage_backends) + + def _get_storage_backends(self, sender, **kwargs): + return S3Storage + + +class S3Storage(Storage): + name = 's3' + simple_data = False + + def __init__(self, data): + data = self._parse_data(data) + self.could_host = data['host'] + self.datestamp = bool(ast.literal_eval(data.get('datestamp', 'False').title())) + + def open(self, file_id): + pass + + def save(self, file_id): + pass + + def delete(self, file_id): + pass + + def getsize(self, file_id): + pass + + def send_file(self, file_id): + pass diff --git a/storage_s3/setup.py b/storage_s3/setup.py new file mode 100644 index 0000000..f4dc3fa --- /dev/null +++ b/storage_s3/setup.py @@ -0,0 +1,43 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +from setuptools import setup + + +setup( + name='indico-plugin-storage-s3', + version='1.0', + description='S3 storage backend for Indico', + url='https://github.com/indico/indico-plugins', + license='https://www.gnu.org/licenses/gpl-3.0.txt', + author='Indico Team', + author_email='indico-team@cern.ch', + py_modules=('indico_storage_s3',), + zip_safe=False, + platforms='any', + install_requires=[ + 'indico>=2.0', + ], + classifiers=[ + 'Environment :: Plugins', + 'Environment :: Web Environment', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', + 'Programming Language :: Python :: 2.7' + ], + entry_points={'indico.plugins': {'storage_s3 = indico_storage_cloud:S3StoragePlugin'}} +) From 15f9f4326e8637e019c1fc75042d2be397bca181 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Thu, 22 Mar 2018 11:10:35 +0100 Subject: [PATCH 02/54] Storage/S3: Initialize storage --- storage_s3/indico_storage_s3/plugin.py | 19 +++++++++++++------ storage_s3/setup.py | 3 ++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index a552247..1545d98 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,8 +16,7 @@ from __future__ import unicode_literals -import ast - +import boto3 from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage from indico.core import signals @@ -43,13 +42,21 @@ class S3Storage(Storage): def __init__(self, data): data = self._parse_data(data) - self.could_host = data['host'] - self.datestamp = bool(ast.literal_eval(data.get('datestamp', 'False').title())) + self.host = data['host'] + self.bucket = data['bucket'] + self.secret_key = data['secret_key'] + self.access_key = data['access_key'] + self.client = boto3.client( + 's3', + endpoint_url=self.host, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + ) def open(self, file_id): pass - def save(self, file_id): + def save(self, name, content_type, filename, fileobj): pass def delete(self, file_id): @@ -58,5 +65,5 @@ class S3Storage(Storage): def getsize(self, file_id): pass - def send_file(self, file_id): + def send_file(self, file_id, content_type, filename, inline=True): pass diff --git a/storage_s3/setup.py b/storage_s3/setup.py index f4dc3fa..7f4347d 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -32,6 +32,7 @@ setup( platforms='any', install_requires=[ 'indico>=2.0', + 'boto3>=1.6.5' ], classifiers=[ 'Environment :: Plugins', @@ -39,5 +40,5 @@ setup( 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', 'Programming Language :: Python :: 2.7' ], - entry_points={'indico.plugins': {'storage_s3 = indico_storage_cloud:S3StoragePlugin'}} + entry_points={'indico.plugins': {'storage_s3 = indico_storage_s3.plugin:S3StoragePlugin'}} ) From 65e99933a720d4a512a7c6a16b59bbfffa71e5cc Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Thu, 22 Mar 2018 17:45:25 +0100 Subject: [PATCH 03/54] Storage/S3: Add basic functionality --- storage_s3/indico_storage_s3/plugin.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 1545d98..dcc9d47 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -20,6 +20,7 @@ import boto3 from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage from indico.core import signals +from indico.web.flask.util import send_file class S3StoragePlugin(IndicoPlugin): @@ -54,16 +55,19 @@ class S3Storage(Storage): ) def open(self, file_id): - pass + return self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] def save(self, name, content_type, filename, fileobj): - pass + self.client.upload_fileobj(fileobj, self.bucket, name) + return name, '' def delete(self, file_id): - pass + self.client.delete_object(self.bucket, file_id) def getsize(self, file_id): - pass + response = self.client.head_object(Bucket=self.bucket, Key=file_id) + return response['ContentLength'] def send_file(self, file_id, content_type, filename, inline=True): - pass + fileobj = self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + return send_file(filename, fileobj, content_type, inline=inline) From 09bc16ecef3edb4852d211f3536cc2f6d2599734 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Fri, 23 Mar 2018 17:11:40 +0100 Subject: [PATCH 04/54] Storage/S3: Override get_local_path method --- storage_s3/indico_storage_s3/plugin.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index dcc9d47..9356e79 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,10 +16,15 @@ from __future__ import unicode_literals +from contextlib import contextmanager +from tempfile import NamedTemporaryFile + import boto3 + +from indico.core import signals +from indico.core.config import config from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage -from indico.core import signals from indico.web.flask.util import send_file @@ -57,6 +62,13 @@ class S3Storage(Storage): def open(self, file_id): return self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + @contextmanager + def get_local_path(self, file_id): + with NamedTemporaryFile(suffix='indico.tmp', dir=config.TEMP_DIR) as tmpfile: + self._copy_file(self.open(file_id), tmpfile) + tmpfile.flush() + yield tmpfile.name + def save(self, name, content_type, filename, fileobj): self.client.upload_fileobj(fileobj, self.bucket, name) return name, '' From 4699f85234ecabc28e6e92ab65979e0db1ce3761 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Mon, 26 Mar 2018 10:35:32 +0200 Subject: [PATCH 05/54] Storage/S3: Add error handling --- storage_s3/indico_storage_s3/plugin.py | 33 +++++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 9356e79..ed4b873 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,6 +16,7 @@ from __future__ import unicode_literals +import sys from contextlib import contextmanager from tempfile import NamedTemporaryFile @@ -24,7 +25,7 @@ import boto3 from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin -from indico.core.storage import Storage +from indico.core.storage import Storage, StorageError from indico.web.flask.util import send_file @@ -60,7 +61,10 @@ class S3Storage(Storage): ) def open(self, file_id): - return self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + try: + return self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + except Exception as e: + raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @contextmanager def get_local_path(self, file_id): @@ -70,16 +74,27 @@ class S3Storage(Storage): yield tmpfile.name def save(self, name, content_type, filename, fileobj): - self.client.upload_fileobj(fileobj, self.bucket, name) - return name, '' + try: + self.client.upload_fileobj(fileobj, self.bucket, name) + return name, '' + except Exception as e: + raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] def delete(self, file_id): - self.client.delete_object(self.bucket, file_id) + try: + self.client.delete_object(self.bucket, file_id) + except Exception as e: + raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def getsize(self, file_id): - response = self.client.head_object(Bucket=self.bucket, Key=file_id) - return response['ContentLength'] + try: + return self.client.head_object(Bucket=self.bucket, Key=file_id)['ContentLength'] + except Exception as e: + raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def send_file(self, file_id, content_type, filename, inline=True): - fileobj = self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] - return send_file(filename, fileobj, content_type, inline=inline) + try: + fileobj = self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + return send_file(filename, fileobj, content_type, inline=inline) + except Exception as e: + raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] From 36f8ef06d2a7fb9caf3980308033a472b2f7447a Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Mon, 26 Mar 2018 11:55:15 +0200 Subject: [PATCH 06/54] Storage/S3: Add md5 checksum --- storage_s3/indico_storage_s3/plugin.py | 6 ++++-- storage_s3/setup.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index ed4b873..f35b4ee 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -75,8 +75,10 @@ class S3Storage(Storage): def save(self, name, content_type, filename, fileobj): try: - self.client.upload_fileobj(fileobj, self.bucket, name) - return name, '' + fileobject = self._ensure_fileobj(fileobj) + self.client.upload_fileobj(fileobject, self.bucket, name) + checksum = self.client.head_object(Bucket=self.bucket, Key=name)['ETag'][1:-1] + return name, checksum except Exception as e: raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 7f4347d..ab18d1b 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import setup setup( name='indico-plugin-storage-s3', - version='1.0', + version='1.0-dev', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt', @@ -32,7 +32,7 @@ setup( platforms='any', install_requires=[ 'indico>=2.0', - 'boto3>=1.6.5' + 'boto3>=1.6.16,<2.0' ], classifiers=[ 'Environment :: Plugins', From 6b3570ce2fb779c25c39df945b528a6be2a9d09a Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Mon, 26 Mar 2018 15:37:31 +0200 Subject: [PATCH 07/54] Storage/S3: generate url to download file --- storage_s3/indico_storage_s3/plugin.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index f35b4ee..7ed5b39 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -21,12 +21,12 @@ from contextlib import contextmanager from tempfile import NamedTemporaryFile import boto3 +from werkzeug.utils import redirect from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage, StorageError -from indico.web.flask.util import send_file class S3StoragePlugin(IndicoPlugin): @@ -96,7 +96,9 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: - fileobj = self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] - return send_file(filename, fileobj, content_type, inline=inline) + url = self.client.generate_presigned_url('get_object', + Params={'Bucket': self.bucket, 'Key': file_id}, + ExpiresIn=100) + return redirect(url) except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] From 0b703f155ae1c019d74e333507f13fd3bf94310e Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Mon, 26 Mar 2018 16:15:37 +0200 Subject: [PATCH 08/54] Storage/S3: add response headers to generate url --- storage_s3/indico_storage_s3/plugin.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 7ed5b39..258962c 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -96,9 +96,15 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: + + content_disp = ('inline; filename="%s"' % filename if inline + else 'attachment; filename="%s"' % filename) url = self.client.generate_presigned_url('get_object', - Params={'Bucket': self.bucket, 'Key': file_id}, - ExpiresIn=100) + Params={'Bucket': self.bucket, + 'Key': file_id, + 'ResponseContentDisposition': content_disp, + 'ResponseContentType': content_type}, + ExpiresIn=120) return redirect(url) except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] From 9c6770992c38448ad6ee0da2fe24a345be254ea4 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 4 Apr 2018 16:42:34 +0200 Subject: [PATCH 09/54] Storage/S3: Add dynamic bucket task --- storage_s3/indico_storage_s3/__init__.py | 8 +++++++ storage_s3/indico_storage_s3/plugin.py | 28 +++++++++++++++++++++++- storage_s3/indico_storage_s3/task.py | 23 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 storage_s3/indico_storage_s3/task.py diff --git a/storage_s3/indico_storage_s3/__init__.py b/storage_s3/indico_storage_s3/__init__.py index e69de29..4ab33c4 100644 --- a/storage_s3/indico_storage_s3/__init__.py +++ b/storage_s3/indico_storage_s3/__init__.py @@ -0,0 +1,8 @@ +from __future__ import unicode_literals + +from indico.core import signals + + +@signals.import_tasks.connect +def _import_tasks(sender, **kwargs): + import indico_storage_s3.task diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 258962c..bfdf569 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,8 +16,11 @@ from __future__ import unicode_literals +import datetime +import re import sys from contextlib import contextmanager +from math import ceil from tempfile import NamedTemporaryFile import boto3 @@ -50,9 +53,10 @@ class S3Storage(Storage): def __init__(self, data): data = self._parse_data(data) self.host = data['host'] - self.bucket = data['bucket'] + self.bucket = self.get_bucket_name(data['bucket']) self.secret_key = data['secret_key'] self.access_key = data['access_key'] + self.acl_template_bucket = data['acl_template_bucket'] self.client = boto3.client( 's3', endpoint_url=self.host, @@ -108,3 +112,25 @@ class S3Storage(Storage): return redirect(url) except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] + + def get_bucket_name(self, name, replace_placeholders=True): + if replace_placeholders: + return self.replace_bucket_placeholders(name, datetime.datetime.now()) + else: + return self.bucket + + def create_bucket(self, name): + acl = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) + self.client.create_bucket(name, ACL=acl) + + def get_week_of_the_month(self, date): + first_day = date.replace(day=1) + current_day = date.day + return int(ceil((current_day + first_day.weekday())/7)) + + def replace_bucket_placeholders(self, name, date): + dates = {'year': str(date.year), + 'month': str(date.month), + 'week': str(self.get_week_of_the_month(date))} + pattern = re.compile("|".join(dates.keys())) + return pattern.sub(lambda m: dates[re.escape(m.group(0))], name) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py new file mode 100644 index 0000000..21cb656 --- /dev/null +++ b/storage_s3/indico_storage_s3/task.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +import datetime + +from celery.schedules import crontab +from dateutil.relativedelta import relativedelta + +from indico.core.celery import celery +from indico.core.storage.backend import get_storage + + +@celery.periodic_task(run_every=crontab(minute=0, hour=1)) +def create_bucket(): + storage = get_storage('s3') + bucket_name = storage.get_bucket_name(replace_placeholders=False) + now = datetime.datetime.now() + if all(x in bucket_name for x in ['year', 'month', 'week']) and now.weekday() == 1: + date = now + relativedelta(weeks=1) + storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) + elif (all(x in bucket_name for x in ['year', 'month']) and now.day == 1 + or bucket_name.find('year') and now.day == 1 and now.month == 12): + date = now + relativedelta(months=1) + storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) From 24572e6f7e8310cc6f3cf4c871a0d36726333d55 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 11 Apr 2018 11:48:12 +0200 Subject: [PATCH 10/54] Storage/S3: Set new bucket acl from template --- storage_s3/indico_storage_s3/plugin.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index bfdf569..7b47388 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -100,7 +100,6 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: - content_disp = ('inline; filename="%s"' % filename if inline else 'attachment; filename="%s"' % filename) url = self.client.generate_presigned_url('get_object', @@ -120,8 +119,14 @@ class S3Storage(Storage): return self.bucket def create_bucket(self, name): - acl = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) - self.client.create_bucket(name, ACL=acl) + try: + response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) + acl_keys = ['Owner', 'Grants'] + acl = {key: response[key] for key in response if key in acl_keys} + self.client.create_bucket(Bucket=name) + self.client.put_bucket_acl(AccessControlPolicy=acl, Bucket=name) + except Exception as e: + raise StorageError('Could not create bucket "{}": {}'.format(name, e)), None, sys.exc_info()[2] def get_week_of_the_month(self, date): first_day = date.replace(day=1) From 068ee646b8eddc779e78735f2b2b4a5ab45dd4ef Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 11 Apr 2018 15:16:12 +0200 Subject: [PATCH 11/54] Storage/S3: Escape filename in content-disp header --- storage_s3/indico_storage_s3/plugin.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 7b47388..d28f34e 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -24,6 +24,7 @@ from math import ceil from tempfile import NamedTemporaryFile import boto3 +from werkzeug.datastructures import Headers from werkzeug.utils import redirect from indico.core import signals @@ -100,12 +101,13 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: - content_disp = ('inline; filename="%s"' % filename if inline - else 'attachment; filename="%s"' % filename) + content_disp = ('inline' if inline else 'attachment') + h = Headers() + h.add('Content-Disposition', content_disp, filename=filename) url = self.client.generate_presigned_url('get_object', Params={'Bucket': self.bucket, 'Key': file_id, - 'ResponseContentDisposition': content_disp, + 'ResponseContentDisposition': h.get('Content-Disposition'), 'ResponseContentType': content_type}, ExpiresIn=120) return redirect(url) From dd82b0553313c51ec5000a57b30c2ac0006d7f31 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 11 Apr 2018 16:21:32 +0200 Subject: [PATCH 12/54] Storage/S3: Refactor code --- storage_s3/indico_storage_s3/plugin.py | 28 +++++++++----------------- storage_s3/indico_storage_s3/task.py | 26 +++++++++++++++++++----- storage_s3/indico_storage_s3/test.py | 0 3 files changed, 30 insertions(+), 24 deletions(-) create mode 100644 storage_s3/indico_storage_s3/test.py diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index d28f34e..0758ebe 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,11 +16,9 @@ from __future__ import unicode_literals -import datetime -import re import sys from contextlib import contextmanager -from math import ceil +from datetime import datetime from tempfile import NamedTemporaryFile import boto3 @@ -57,7 +55,8 @@ class S3Storage(Storage): self.bucket = self.get_bucket_name(data['bucket']) self.secret_key = data['secret_key'] self.access_key = data['access_key'] - self.acl_template_bucket = data['acl_template_bucket'] + if any(x in data['bucket'] for x in ['', '', '']): + self.acl_template_bucket = data['acl_template_bucket'] self.client = boto3.client( 's3', endpoint_url=self.host, @@ -116,28 +115,19 @@ class S3Storage(Storage): def get_bucket_name(self, name, replace_placeholders=True): if replace_placeholders: - return self.replace_bucket_placeholders(name, datetime.datetime.now()) + return self.replace_bucket_placeholders(name, datetime.now()) else: return self.bucket def create_bucket(self, name): - try: response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) - acl_keys = ['Owner', 'Grants'] + acl_keys = {'Owner', 'Grants'} acl = {key: response[key] for key in response if key in acl_keys} self.client.create_bucket(Bucket=name) self.client.put_bucket_acl(AccessControlPolicy=acl, Bucket=name) - except Exception as e: - raise StorageError('Could not create bucket "{}": {}'.format(name, e)), None, sys.exc_info()[2] - - def get_week_of_the_month(self, date): - first_day = date.replace(day=1) - current_day = date.day - return int(ceil((current_day + first_day.weekday())/7)) def replace_bucket_placeholders(self, name, date): - dates = {'year': str(date.year), - 'month': str(date.month), - 'week': str(self.get_week_of_the_month(date))} - pattern = re.compile("|".join(dates.keys())) - return pattern.sub(lambda m: dates[re.escape(m.group(0))], name) + name = name.replace('', date.strftime('%Y')) + name = name.replace('', date.strftime('%m')) + name = name.replace('', date.strftime('%W')) + return name diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index 21cb656..e1fd25e 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -1,6 +1,22 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + from __future__ import unicode_literals -import datetime +from datetime import datetime from celery.schedules import crontab from dateutil.relativedelta import relativedelta @@ -13,11 +29,11 @@ from indico.core.storage.backend import get_storage def create_bucket(): storage = get_storage('s3') bucket_name = storage.get_bucket_name(replace_placeholders=False) - now = datetime.datetime.now() - if all(x in bucket_name for x in ['year', 'month', 'week']) and now.weekday() == 1: + now = datetime.now() + if all(x in bucket_name for x in ['', '']) and now.weekday() == 1: date = now + relativedelta(weeks=1) storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) - elif (all(x in bucket_name for x in ['year', 'month']) and now.day == 1 - or bucket_name.find('year') and now.day == 1 and now.month == 12): + elif (all(x in bucket_name for x in ['', '']) and now.day == 1 + or bucket_name.find('') and now.day == 1 and now.month == 12): date = now + relativedelta(months=1) storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/indico_storage_s3/test.py new file mode 100644 index 0000000..e69de29 From 6a1300e712617e1b12ecce7fbf147815defa396f Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Fri, 13 Apr 2018 11:31:46 +0200 Subject: [PATCH 13/54] Storage/S3: Add tests --- storage_s3/indico_storage_s3/plugin.py | 2 +- storage_s3/indico_storage_s3/task.py | 7 +-- storage_s3/indico_storage_s3/test.py | 72 ++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 0758ebe..d23a856 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -117,7 +117,7 @@ class S3Storage(Storage): if replace_placeholders: return self.replace_bucket_placeholders(name, datetime.now()) else: - return self.bucket + return name def create_bucket(self, name): response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index e1fd25e..dec35f8 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -30,10 +30,11 @@ def create_bucket(): storage = get_storage('s3') bucket_name = storage.get_bucket_name(replace_placeholders=False) now = datetime.now() - if all(x in bucket_name for x in ['', '']) and now.weekday() == 1: + if all(x in bucket_name for x in ['', '']) and now.weekday() == 0: date = now + relativedelta(weeks=1) storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) - elif (all(x in bucket_name for x in ['', '']) and now.day == 1 - or bucket_name.find('') and now.day == 1 and now.month == 12): + elif ((all(x in bucket_name for x in ['', '']) and now.day == 1) + or ('' in bucket_name and not any(x in bucket_name for x in ['', '']) + and now.day == 1 and now.month == 12)): date = now + relativedelta(months=1) storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/indico_storage_s3/test.py index e69de29..38c3b9d 100644 --- a/storage_s3/indico_storage_s3/test.py +++ b/storage_s3/indico_storage_s3/test.py @@ -0,0 +1,72 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +import mock +import pytest +from freezegun import freeze_time + +import indico_storage_s3.plugin as plugin +from indico_storage_s3.task import create_bucket + + +@pytest.mark.parametrize(('date', 'name_template', 'expected_name'), ( + ('2018-04-11', 'name', 'name'), + ('2018-04-11', 'name-', 'name-2018'), + ('2018-04-11', 'name--', 'name-2018-04'), + ('2018-04-11', 'name--', 'name-2018-15'), + ('2018-01-01', 'name--', 'name-2018-01'), + ('2019-01-01', 'name--', 'name-2019-00'), + +)) +@mock.patch.object(plugin.S3Storage, '__init__', lambda self: None) +def test_resolve_bucket_name(date, name_template, expected_name): + with freeze_time(date): + storage = plugin.S3Storage() + assert storage.get_bucket_name(name_template) == expected_name + + +@mock.create_autospec +def fake_bucket_created(name, replace_placeholders=True): + pass + + +@pytest.mark.usefixtures('app_context') +@pytest.mark.parametrize(('date', 'name_template', 'bucket_created', 'expected_name'), ( + ('2018-04-11', 'name', False, None), + ('2018-12-01', 'name', False, None), + ('2018-04-11', 'name-', False, None), + ('2018-01-01', 'name-', False, None), + ('2018-12-01', 'name--', False, None), + ('2018-12-02', 'name--', False, None), + ('2018-02-10', 'name--', False, None), + ('2018-12-01', 'name-', True, 'name-2019'), + ('2018-12-01', 'name--', True, 'name-2019-01'), + ('2018-01-01', 'name--', True, 'name-2018-02'), + ('2018-12-03', 'name--', True, 'name-2018-50'), +)) +def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name): + with freeze_time(date), \ + mock.patch.object(plugin.S3Storage, '__init__', lambda self: None),\ + mock.patch.object(plugin.S3Storage, 'get_bucket_name', return_value=name_template),\ + mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()),\ + mock.patch.object(plugin.S3Storage, 'create_bucket', fake_bucket_created) as create_bucket_call: + create_bucket() + if bucket_created: + create_bucket_call.assert_called_with(mock.ANY, expected_name) + else: + assert not create_bucket_call.called From f0d178731fc1a23d21be794d82d6445b438c047c Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 18 Apr 2018 15:41:04 +0200 Subject: [PATCH 14/54] Storage/S3: improve create bucket task --- storage_s3/indico_storage_s3/__init__.py | 16 +++++++ storage_s3/indico_storage_s3/plugin.py | 57 +++++++++++++++--------- storage_s3/indico_storage_s3/task.py | 33 +++++++++----- storage_s3/indico_storage_s3/test.py | 52 ++++++++++++--------- 4 files changed, 104 insertions(+), 54 deletions(-) diff --git a/storage_s3/indico_storage_s3/__init__.py b/storage_s3/indico_storage_s3/__init__.py index 4ab33c4..9eaa627 100644 --- a/storage_s3/indico_storage_s3/__init__.py +++ b/storage_s3/indico_storage_s3/__init__.py @@ -1,3 +1,19 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + from __future__ import unicode_literals from indico.core import signals diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index d23a856..2d7defd 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -18,10 +18,11 @@ from __future__ import unicode_literals import sys from contextlib import contextmanager -from datetime import datetime +from datetime import date from tempfile import NamedTemporaryFile import boto3 +import botocore from werkzeug.datastructures import Headers from werkzeug.utils import redirect @@ -52,11 +53,10 @@ class S3Storage(Storage): def __init__(self, data): data = self._parse_data(data) self.host = data['host'] - self.bucket = self.get_bucket_name(data['bucket']) + self.bucket = data['bucket'] self.secret_key = data['secret_key'] self.access_key = data['access_key'] - if any(x in data['bucket'] for x in ['', '', '']): - self.acl_template_bucket = data['acl_template_bucket'] + self.acl_template_bucket = data.get('acl_template_bucket') self.client = boto3.client( 's3', endpoint_url=self.host, @@ -66,7 +66,7 @@ class S3Storage(Storage): def open(self, file_id): try: - return self.client.get_object(Bucket=self.bucket, Key=file_id)['Body'] + return self.client.get_object(Bucket=self._get_current_bucket(), Key=file_id)['Body'] except Exception as e: raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @@ -80,21 +80,22 @@ class S3Storage(Storage): def save(self, name, content_type, filename, fileobj): try: fileobject = self._ensure_fileobj(fileobj) - self.client.upload_fileobj(fileobject, self.bucket, name) - checksum = self.client.head_object(Bucket=self.bucket, Key=name)['ETag'][1:-1] + bucket = self._get_current_bucket() + self.client.upload_fileobj(fileobject, bucket, name) + checksum = self.client.head_object(Bucket=bucket, Key=name)['ETag'][1:-1] return name, checksum except Exception as e: raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] def delete(self, file_id): try: - self.client.delete_object(self.bucket, file_id) + self.client.delete_object(self._get_current_bucket(), file_id) except Exception as e: raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def getsize(self, file_id): try: - return self.client.head_object(Bucket=self.bucket, Key=file_id)['ContentLength'] + return self.client.head_object(Bucket=self._get_current_bucket(), Key=file_id)['ContentLength'] except Exception as e: raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @@ -104,7 +105,7 @@ class S3Storage(Storage): h = Headers() h.add('Content-Disposition', content_disp, filename=filename) url = self.client.generate_presigned_url('get_object', - Params={'Bucket': self.bucket, + Params={'Bucket': self._get_current_bucket(), 'Key': file_id, 'ResponseContentDisposition': h.get('Content-Disposition'), 'ResponseContentType': content_type}, @@ -113,21 +114,33 @@ class S3Storage(Storage): except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - def get_bucket_name(self, name, replace_placeholders=True): - if replace_placeholders: - return self.replace_bucket_placeholders(name, datetime.now()) - else: - return name + def _get_current_bucket(self): + return self._replace_bucket_placeholders(self.bucket, date.today()) - def create_bucket(self, name): - response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) - acl_keys = {'Owner', 'Grants'} - acl = {key: response[key] for key in response if key in acl_keys} - self.client.create_bucket(Bucket=name) - self.client.put_bucket_acl(AccessControlPolicy=acl, Bucket=name) + def _get_bucket_name(self, current=False): + return self._get_current_bucket() if current else self.bucket - def replace_bucket_placeholders(self, name, date): + def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) name = name.replace('', date.strftime('%m')) name = name.replace('', date.strftime('%W')) return name + + def _create_bucket(self, name): + if self._bucket_exists(name): + S3StoragePlugin.logger.info('Bucket %s already exists', name) + return + self.client.create_bucket(Bucket=name) + S3StoragePlugin.logger.info('New bucket created: %s', name) + if self.acl_template_bucket: + response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) + acl_keys = {'Owner', 'Grants'} + acl = {key: response[key] for key in response if key in acl_keys} + self.client.put_bucket_acl(AccessControlPolicy=acl, Bucket=name) + + def _bucket_exists(self, name): + try: + self.client.head_bucket(Bucket=name) + return True + except botocore.exceptions.ClientError: + return False diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index dec35f8..48ac9b4 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -16,25 +16,34 @@ from __future__ import unicode_literals -from datetime import datetime +import re +from datetime import date from celery.schedules import crontab from dateutil.relativedelta import relativedelta from indico.core.celery import celery +from indico.core.config import config from indico.core.storage.backend import get_storage +from plugin import S3Storage + @celery.periodic_task(run_every=crontab(minute=0, hour=1)) def create_bucket(): - storage = get_storage('s3') - bucket_name = storage.get_bucket_name(replace_placeholders=False) - now = datetime.now() - if all(x in bucket_name for x in ['', '']) and now.weekday() == 0: - date = now + relativedelta(weeks=1) - storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) - elif ((all(x in bucket_name for x in ['', '']) and now.day == 1) - or ('' in bucket_name and not any(x in bucket_name for x in ['', '']) - and now.day == 1 and now.month == 12)): - date = now + relativedelta(months=1) - storage.create_bucket(storage.replace_bucket_placeholders(bucket_name, date)) + for key in config.STORAGE_BACKENDS: + storage = get_storage(key) + if isinstance(storage, S3Storage): + today = date.today() + bucket_name = storage._get_bucket_name() + placeholders = set(re.findall('<.*?>', bucket_name)) + if placeholders == {'', ''}: + bucket_date = today + relativedelta(weeks=1) + bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + storage._create_bucket(bucket) + elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): + bucket_date = today + relativedelta(months=1) + bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + storage._create_bucket(bucket) + elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: + raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/indico_storage_s3/test.py index 38c3b9d..85176e5 100644 --- a/storage_s3/indico_storage_s3/test.py +++ b/storage_s3/indico_storage_s3/test.py @@ -37,35 +37,47 @@ from indico_storage_s3.task import create_bucket def test_resolve_bucket_name(date, name_template, expected_name): with freeze_time(date): storage = plugin.S3Storage() - assert storage.get_bucket_name(name_template) == expected_name + storage.bucket = name_template + assert storage._get_current_bucket() == expected_name @mock.create_autospec -def fake_bucket_created(name, replace_placeholders=True): +def mock_bucket_created(self, name): pass +class MockConfig: + + def __init__(self): + self.STORAGE_BACKENDS = {'s3': None} + + @pytest.mark.usefixtures('app_context') -@pytest.mark.parametrize(('date', 'name_template', 'bucket_created', 'expected_name'), ( - ('2018-04-11', 'name', False, None), - ('2018-12-01', 'name', False, None), - ('2018-04-11', 'name-', False, None), - ('2018-01-01', 'name-', False, None), - ('2018-12-01', 'name--', False, None), - ('2018-12-02', 'name--', False, None), - ('2018-02-10', 'name--', False, None), - ('2018-12-01', 'name-', True, 'name-2019'), - ('2018-12-01', 'name--', True, 'name-2019-01'), - ('2018-01-01', 'name--', True, 'name-2018-02'), - ('2018-12-03', 'name--', True, 'name-2018-50'), +@pytest.mark.parametrize(('date', 'name_template', 'bucket_created', 'expected_name', 'expected_error'), ( + ('2018-04-11', 'name', False, None, None), + ('2018-12-01', 'name', False, None, None), + ('2018-04-11', 'name-', False, None, None), + ('2018-01-01', 'name-', False, None, None), + ('2018-12-01', 'name-', False, None, RuntimeError), + ('2018-12-01', 'name-', False, None, RuntimeError), + ('2018-12-01', 'name--', False, None, RuntimeError), + ('2018-12-01', 'name-', True, 'name-2019', None), + ('2018-12-01', 'name--', True, 'name-2019-01', None), + ('2018-01-01', 'name--', True, 'name-2018-02', None), + ('2018-12-03', 'name--', True, 'name-2018-50', None), )) -def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name): +def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name, expected_error): with freeze_time(date), \ - mock.patch.object(plugin.S3Storage, '__init__', lambda self: None),\ - mock.patch.object(plugin.S3Storage, 'get_bucket_name', return_value=name_template),\ - mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()),\ - mock.patch.object(plugin.S3Storage, 'create_bucket', fake_bucket_created) as create_bucket_call: - create_bucket() + mock.patch.object(plugin.S3Storage, '__init__', lambda self: None), \ + mock.patch.object(plugin.S3Storage, '_get_bucket_name', return_value=name_template), \ + mock.patch('indico_storage_s3.task.config', MockConfig()), \ + mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()), \ + mock.patch.object(plugin.S3Storage, '_create_bucket', mock_bucket_created) as create_bucket_call: + if expected_error: + with pytest.raises(expected_error): + create_bucket() + else: + create_bucket() if bucket_created: create_bucket_call.assert_called_with(mock.ANY, expected_name) else: From 0346a101783f21cef8bac9cfc627d812291133af Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Fri, 20 Apr 2018 19:03:12 +0200 Subject: [PATCH 15/54] Storage/S3: Fix checksum --- storage_s3/indico_storage_s3/plugin.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 2d7defd..e715836 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -30,6 +30,7 @@ from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage, StorageError +from indico.util.fs import get_file_checksum class S3StoragePlugin(IndicoPlugin): @@ -81,8 +82,10 @@ class S3Storage(Storage): try: fileobject = self._ensure_fileobj(fileobj) bucket = self._get_current_bucket() - self.client.upload_fileobj(fileobject, bucket, name) - checksum = self.client.head_object(Bucket=bucket, Key=name)['ETag'][1:-1] + checksum = get_file_checksum(fileobj) + fileobject.seek(0) + contentmd5 = checksum.decode('hex').encode('base64').strip() + self.client.put_object(Body=fileobject, Bucket=bucket, ContentMD5=contentmd5, Key=name) return name, checksum except Exception as e: raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] From 0dc50f4a1b67532c926fed0d5e138ba1d98eb253 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Mon, 23 Apr 2018 16:59:07 +0200 Subject: [PATCH 16/54] Storage/S3: Add packages in setup --- storage_s3/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index ab18d1b..53e648b 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -16,7 +16,7 @@ from __future__ import unicode_literals -from setuptools import setup +from setuptools import find_packages, setup setup( @@ -27,7 +27,7 @@ setup( license='https://www.gnu.org/licenses/gpl-3.0.txt', author='Indico Team', author_email='indico-team@cern.ch', - py_modules=('indico_storage_s3',), + packages=find_packages(), zip_safe=False, platforms='any', install_requires=[ From f1bf6fc7c73b7eb5ecebce85021284cf408e1dcb Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 25 Apr 2018 12:25:27 +0200 Subject: [PATCH 17/54] Storage/S3: Improve open file method --- storage_s3/indico_storage_s3/plugin.py | 10 ++++++---- storage_s3/setup.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index e715836..5148c03 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -19,6 +19,7 @@ from __future__ import unicode_literals import sys from contextlib import contextmanager from datetime import date +from io import BytesIO from tempfile import NamedTemporaryFile import boto3 @@ -67,7 +68,8 @@ class S3Storage(Storage): def open(self, file_id): try: - return self.client.get_object(Bucket=self._get_current_bucket(), Key=file_id)['Body'] + s3_object = self.client.get_object(Bucket=self._get_current_bucket(), Key=file_id)['Body'] + return BytesIO(s3_object.read()) except Exception as e: raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @@ -80,12 +82,12 @@ class S3Storage(Storage): def save(self, name, content_type, filename, fileobj): try: - fileobject = self._ensure_fileobj(fileobj) + fileobj = self._ensure_fileobj(fileobj) bucket = self._get_current_bucket() checksum = get_file_checksum(fileobj) - fileobject.seek(0) + fileobj.seek(0) contentmd5 = checksum.decode('hex').encode('base64').strip() - self.client.put_object(Body=fileobject, Bucket=bucket, ContentMD5=contentmd5, Key=name) + self.client.put_object(Body=fileobj, Bucket=bucket, ContentMD5=contentmd5, Key=name) return name, checksum except Exception as e: raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 53e648b..5132c30 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -32,7 +32,7 @@ setup( platforms='any', install_requires=[ 'indico>=2.0', - 'boto3>=1.6.16,<2.0' + 'boto3>=1.7.8,<2.0', ], classifiers=[ 'Environment :: Plugins', From 076a315dfe25eeb22415abb779b0b90158452d7e Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 25 Apr 2018 14:44:13 +0200 Subject: [PATCH 18/54] Storage/S3: Require Indico 2.1 --- storage_s3/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 5132c30..97506ec 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -31,7 +31,7 @@ setup( zip_safe=False, platforms='any', install_requires=[ - 'indico>=2.0', + 'indico>=2.1rc3', 'boto3>=1.7.8,<2.0', ], classifiers=[ From 72f4ddec05297072e3a22b1fc1dbf62dfc8a4563 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 25 Apr 2018 15:19:59 +0200 Subject: [PATCH 19/54] Storage/S3: Add create bucket command --- storage_s3/indico_storage_s3/plugin.py | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 5148c03..61ebfd4 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -24,13 +24,16 @@ from tempfile import NamedTemporaryFile import boto3 import botocore +import click from werkzeug.datastructures import Headers from werkzeug.utils import redirect +from indico.cli.core import cli_command from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage, StorageError +from indico.core.storage.backend import get_storage from indico.util.fs import get_file_checksum @@ -43,10 +46,36 @@ class S3StoragePlugin(IndicoPlugin): def init(self): super(S3StoragePlugin, self).init() self.connect(signals.get_storage_backends, self._get_storage_backends) + self.connect(signals.plugin.cli, self._extend_indico_cli) def _get_storage_backends(self, sender, **kwargs): return S3Storage + def _extend_indico_cli(self, sender, **kwargs): + @cli_command() + @click.option('--storage', default=None, help='Storage to create bucket for') + def create_s3_bucket(storage): + """Create s3 bucket""" + storages = [storage] if storage else config.STORAGE_BACKENDS + for key in storages: + try: + storage_instance = get_storage(key) + except RuntimeError: + if storage: + click.echo('Storage {} does not exist'.format(key)) + sys.exit(1) + if isinstance(storage_instance, S3Storage): + bucket_name = storage_instance._get_current_bucket() + if storage_instance._bucket_exists(bucket_name): + click.echo('Storage {}: bucket {} already exists'.format(key, bucket_name)) + continue + storage_instance._create_bucket(bucket_name) + click.echo('Storage {}: bucket {} created'.format(key, bucket_name)) + elif storage: + click.echo('Storage {} is not a s3 storage'.format(key)) + sys.exit(1) + return create_s3_bucket + class S3Storage(Storage): name = 's3' @@ -123,7 +152,7 @@ class S3Storage(Storage): return self._replace_bucket_placeholders(self.bucket, date.today()) def _get_bucket_name(self, current=False): - return self._get_current_bucket() if current else self.bucket + return self._get_current_bucket() if current else self.bucket def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) From e7790f8ddefb275fce6bb958c2e615b23ec957c5 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Fri, 27 Apr 2018 15:13:11 +0200 Subject: [PATCH 20/54] Storage/S3: Change tmp file name --- storage_s3/indico_storage_s3/plugin.py | 4 ++-- storage_s3/indico_storage_s3/test.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 61ebfd4..77d7c36 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -104,7 +104,7 @@ class S3Storage(Storage): @contextmanager def get_local_path(self, file_id): - with NamedTemporaryFile(suffix='indico.tmp', dir=config.TEMP_DIR) as tmpfile: + with NamedTemporaryFile(suffix='indico.s3', dir=config.TEMP_DIR) as tmpfile: self._copy_file(self.open(file_id), tmpfile) tmpfile.flush() yield tmpfile.name @@ -135,7 +135,7 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: - content_disp = ('inline' if inline else 'attachment') + content_disp = 'inline' if inline else 'attachment' h = Headers() h.add('Content-Disposition', content_disp, filename=filename) url = self.client.generate_presigned_url('get_object', diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/indico_storage_s3/test.py index 85176e5..e8dc122 100644 --- a/storage_s3/indico_storage_s3/test.py +++ b/storage_s3/indico_storage_s3/test.py @@ -46,8 +46,7 @@ def mock_bucket_created(self, name): pass -class MockConfig: - +class MockConfig(object): def __init__(self): self.STORAGE_BACKENDS = {'s3': None} From fe0b7501e6f1c157fd7e4e9f139ed532917d8e53 Mon Sep 17 00:00:00 2001 From: Natalia Juszka Date: Wed, 9 May 2018 14:47:51 +0200 Subject: [PATCH 21/54] Storage/S3: Store bucket name --- storage_s3/indico_storage_s3/plugin.py | 18 +++++++++++------- storage_s3/indico_storage_s3/task.py | 2 +- storage_s3/indico_storage_s3/test.py | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 77d7c36..e36aa13 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -117,30 +117,34 @@ class S3Storage(Storage): fileobj.seek(0) contentmd5 = checksum.decode('hex').encode('base64').strip() self.client.put_object(Body=fileobj, Bucket=bucket, ContentMD5=contentmd5, Key=name) - return name, checksum + file_id = '{}//{}'.format(bucket, name) + return file_id, checksum except Exception as e: raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] def delete(self, file_id): + bucket, id_ = file_id.split('//', 1) try: - self.client.delete_object(self._get_current_bucket(), file_id) + self.client.delete_object(bucket, id_) except Exception as e: raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def getsize(self, file_id): + bucket, id_ = file_id.split('//', 1) try: - return self.client.head_object(Bucket=self._get_current_bucket(), Key=file_id)['ContentLength'] + return self.client.head_object(Bucket=bucket, Key=id_)['ContentLength'] except Exception as e: raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def send_file(self, file_id, content_type, filename, inline=True): try: + bucket, id_ = file_id.split('//', 1) content_disp = 'inline' if inline else 'attachment' h = Headers() h.add('Content-Disposition', content_disp, filename=filename) url = self.client.generate_presigned_url('get_object', - Params={'Bucket': self._get_current_bucket(), - 'Key': file_id, + Params={'Bucket': bucket, + 'Key': id_, 'ResponseContentDisposition': h.get('Content-Disposition'), 'ResponseContentType': content_type}, ExpiresIn=120) @@ -151,8 +155,8 @@ class S3Storage(Storage): def _get_current_bucket(self): return self._replace_bucket_placeholders(self.bucket, date.today()) - def _get_bucket_name(self, current=False): - return self._get_current_bucket() if current else self.bucket + def _get_original_bucket_name(self): + return self.bucket def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index 48ac9b4..b905fa6 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -35,7 +35,7 @@ def create_bucket(): storage = get_storage(key) if isinstance(storage, S3Storage): today = date.today() - bucket_name = storage._get_bucket_name() + bucket_name = storage._get_original_bucket_name() placeholders = set(re.findall('<.*?>', bucket_name)) if placeholders == {'', ''}: bucket_date = today + relativedelta(weeks=1) diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/indico_storage_s3/test.py index e8dc122..6772704 100644 --- a/storage_s3/indico_storage_s3/test.py +++ b/storage_s3/indico_storage_s3/test.py @@ -68,7 +68,7 @@ class MockConfig(object): def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name, expected_error): with freeze_time(date), \ mock.patch.object(plugin.S3Storage, '__init__', lambda self: None), \ - mock.patch.object(plugin.S3Storage, '_get_bucket_name', return_value=name_template), \ + mock.patch.object(plugin.S3Storage, '_get_original_bucket_name', return_value=name_template), \ mock.patch('indico_storage_s3.task.config', MockConfig()), \ mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()), \ mock.patch.object(plugin.S3Storage, '_create_bucket', mock_bucket_created) as create_bucket_call: From 81693911853589547d24945b4c7b565c415e635b Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 1 Nov 2018 16:36:21 +0100 Subject: [PATCH 22/54] Storage/S3: Require indico 2.1+ & adjust version --- storage_s3/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 97506ec..158971c 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import find_packages, setup setup( name='indico-plugin-storage-s3', - version='1.0-dev', + version='2.0-dev', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt', @@ -31,7 +31,7 @@ setup( zip_safe=False, platforms='any', install_requires=[ - 'indico>=2.1rc3', + 'indico>=2.1', 'boto3>=1.7.8,<2.0', ], classifiers=[ From e552cd9500ea978bdddcac331bf4a30d6638a3f3 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 1 Nov 2018 16:36:41 +0100 Subject: [PATCH 23/54] Storage/S3: Update to latest boto3 --- storage_s3/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 158971c..d730b8b 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -32,7 +32,7 @@ setup( platforms='any', install_requires=[ 'indico>=2.1', - 'boto3>=1.7.8,<2.0', + 'boto3>=1.9.35,<2.0', ], classifiers=[ 'Environment :: Plugins', From 8d60c3b6c93fa6ba544919ad85ef0a00c03dd153 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 1 Nov 2018 16:52:35 +0100 Subject: [PATCH 24/54] Storage/S3: Move tests so pytest can find them --- storage_s3/{indico_storage_s3/test.py => tests/plugin_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename storage_s3/{indico_storage_s3/test.py => tests/plugin_test.py} (100%) diff --git a/storage_s3/indico_storage_s3/test.py b/storage_s3/tests/plugin_test.py similarity index 100% rename from storage_s3/indico_storage_s3/test.py rename to storage_s3/tests/plugin_test.py From 011893804a43a298d74aa1c8b21e3c8070a21292 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 1 Nov 2018 16:58:36 +0100 Subject: [PATCH 25/54] Storage/S3: Improve cli help --- storage_s3/indico_storage_s3/plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index e36aa13..ff26a2d 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -53,9 +53,9 @@ class S3StoragePlugin(IndicoPlugin): def _extend_indico_cli(self, sender, **kwargs): @cli_command() - @click.option('--storage', default=None, help='Storage to create bucket for') + @click.option('--storage', default=None, metavar='NAME', help='Storage backend to create bucket for') def create_s3_bucket(storage): - """Create s3 bucket""" + """Create s3 storage bucket.""" storages = [storage] if storage else config.STORAGE_BACKENDS for key in storages: try: From 90050ae8ae28e1733240280174729fa3f10c59d5 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 1 Nov 2018 17:15:49 +0100 Subject: [PATCH 26/54] Storage/S3: Avoid unnecessary indentation --- storage_s3/indico_storage_s3/task.py | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index b905fa6..50b9e3a 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -33,17 +33,18 @@ from plugin import S3Storage def create_bucket(): for key in config.STORAGE_BACKENDS: storage = get_storage(key) - if isinstance(storage, S3Storage): - today = date.today() - bucket_name = storage._get_original_bucket_name() - placeholders = set(re.findall('<.*?>', bucket_name)) - if placeholders == {'', ''}: - bucket_date = today + relativedelta(weeks=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) - storage._create_bucket(bucket) - elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): - bucket_date = today + relativedelta(months=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) - storage._create_bucket(bucket) - elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: - raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) + if not isinstance(storage, S3Storage): + continue + today = date.today() + bucket_name = storage._get_original_bucket_name() + placeholders = set(re.findall('<.*?>', bucket_name)) + if placeholders == {'', ''}: + bucket_date = today + relativedelta(weeks=1) + bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + storage._create_bucket(bucket) + elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): + bucket_date = today + relativedelta(months=1) + bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + storage._create_bucket(bucket) + elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: + raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) From 2e3a91753524a4b99297d4deddee030f3718c501 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 13:29:04 +0100 Subject: [PATCH 27/54] Storage/S3: Make config more flexible - make all the connection args optional (and thus support storing them in places like ~/.aws/credentials) - add option to set profile (to use e.g. a readonly keypair for some buckets) --- storage_s3/indico_storage_s3/plugin.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index ff26a2d..319f1af 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -83,17 +83,20 @@ class S3Storage(Storage): def __init__(self, data): data = self._parse_data(data) - self.host = data['host'] + endpoint_url = data.get('host') + if endpoint_url and '://' not in endpoint_url: + endpoint_url = 'https://' + endpoint_url + session_kwargs = {} + if 'profile' in data: + session_kwargs['profile_name'] = data['profile'] + if 'access_key' in data: + session_kwargs['aws_access_key_id'] = data['access_key'] + if 'secret_key' in data: + session_kwargs['aws_secret_access_key'] = data['secret_key'] self.bucket = data['bucket'] - self.secret_key = data['secret_key'] - self.access_key = data['access_key'] self.acl_template_bucket = data.get('acl_template_bucket') - self.client = boto3.client( - 's3', - endpoint_url=self.host, - aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key, - ) + session = boto3.session.Session(**session_kwargs) + self.client = session.client('s3', endpoint_url=endpoint_url) def open(self, file_id): try: From 6e6183f880852c0e543900232d54a20b59b72397 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 13:44:37 +0100 Subject: [PATCH 28/54] Storage/S3: Use policy instead of ACL --- storage_s3/indico_storage_s3/plugin.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 319f1af..13fb798 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -94,7 +94,7 @@ class S3Storage(Storage): if 'secret_key' in data: session_kwargs['aws_secret_access_key'] = data['secret_key'] self.bucket = data['bucket'] - self.acl_template_bucket = data.get('acl_template_bucket') + self.bucket_policy_file = data.get('bucket_policy_file') session = boto3.session.Session(**session_kwargs) self.client = session.client('s3', endpoint_url=endpoint_url) @@ -173,11 +173,10 @@ class S3Storage(Storage): return self.client.create_bucket(Bucket=name) S3StoragePlugin.logger.info('New bucket created: %s', name) - if self.acl_template_bucket: - response = self.client.get_bucket_acl(Bucket=self.acl_template_bucket) - acl_keys = {'Owner', 'Grants'} - acl = {key: response[key] for key in response if key in acl_keys} - self.client.put_bucket_acl(AccessControlPolicy=acl, Bucket=name) + if self.bucket_policy_file: + with open(self.bucket_policy_file) as f: + policy = f.read() + self.client.put_bucket_policy(Bucket=name, Policy=policy) def _bucket_exists(self, name): try: From 9d4f8aa154d1e33be076c1a0f617577d4a2673cf Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 13:54:04 +0100 Subject: [PATCH 29/54] Storage/S3: Don't assume unaccessible buckets don't exist Now we fail loudly if a bucket is not accessible during the existence check; that way we don't risk getting errors later when writes to the bucket would fail (or worse, succeed, because it's someone else's world-writable bucket) --- storage_s3/indico_storage_s3/plugin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 13fb798..10bdf9e 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -182,5 +182,7 @@ class S3Storage(Storage): try: self.client.head_bucket(Bucket=name) return True - except botocore.exceptions.ClientError: - return False + except botocore.exceptions.ClientError as exc: + if int(exc.response['Error']['Code']) == 404: + return False + raise From 14b7ce18d4eeb2821f18ed6be534897708eeafa0 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 14:19:48 +0100 Subject: [PATCH 30/54] Storage/S3: Save content type during upload --- storage_s3/indico_storage_s3/plugin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 10bdf9e..97cbb97 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -118,8 +118,9 @@ class S3Storage(Storage): bucket = self._get_current_bucket() checksum = get_file_checksum(fileobj) fileobj.seek(0) - contentmd5 = checksum.decode('hex').encode('base64').strip() - self.client.put_object(Body=fileobj, Bucket=bucket, ContentMD5=contentmd5, Key=name) + content_md5 = checksum.decode('hex').encode('base64').strip() + self.client.put_object(Body=fileobj, Bucket=bucket, Key=name, + ContentType=content_type, ContentMD5=content_md5) file_id = '{}//{}'.format(bucket, name) return file_id, checksum except Exception as e: From 58b96018ede157fdd8de3608c5b76e386bdd82ff Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 14:46:29 +0100 Subject: [PATCH 31/54] Storage/S3: Support bucket versioning --- storage_s3/indico_storage_s3/plugin.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 97cbb97..cd269b1 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -95,6 +95,7 @@ class S3Storage(Storage): session_kwargs['aws_secret_access_key'] = data['secret_key'] self.bucket = data['bucket'] self.bucket_policy_file = data.get('bucket_policy_file') + self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') session = boto3.session.Session(**session_kwargs) self.client = session.client('s3', endpoint_url=endpoint_url) @@ -174,6 +175,8 @@ class S3Storage(Storage): return self.client.create_bucket(Bucket=name) S3StoragePlugin.logger.info('New bucket created: %s', name) + if self.bucket_versioning: + self.client.put_bucket_versioning(Bucket=name, VersioningConfiguration={'Status': 'Enabled'}) if self.bucket_policy_file: with open(self.bucket_policy_file) as f: policy = f.read() From 2ccecb6b08348a4b6b7bdc20b1102e1b120c1b91 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 15:51:07 +0100 Subject: [PATCH 32/54] Storage/S3: Make dynamic bucket names non-predictable --- storage_s3/indico_storage_s3/plugin.py | 30 +++++++++---- storage_s3/indico_storage_s3/task.py | 6 +-- storage_s3/tests/plugin_test.py | 62 ++++++++++++++------------ 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index cd269b1..03497c1 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,6 +16,8 @@ from __future__ import unicode_literals +import hashlib +import hmac import sys from contextlib import contextmanager from datetime import date @@ -65,7 +67,7 @@ class S3StoragePlugin(IndicoPlugin): click.echo('Storage {} does not exist'.format(key)) sys.exit(1) if isinstance(storage_instance, S3Storage): - bucket_name = storage_instance._get_current_bucket() + bucket_name = storage_instance._get_current_bucket_name() if storage_instance._bucket_exists(bucket_name): click.echo('Storage {}: bucket {} already exists'.format(key, bucket_name)) continue @@ -93,15 +95,19 @@ class S3Storage(Storage): session_kwargs['aws_access_key_id'] = data['access_key'] if 'secret_key' in data: session_kwargs['aws_secret_access_key'] = data['secret_key'] - self.bucket = data['bucket'] + self.original_bucket_name = data['bucket'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') session = boto3.session.Session(**session_kwargs) self.client = session.client('s3', endpoint_url=endpoint_url) + self.bucket_secret = (data.get('bucket_secret', '') or + session._session.get_scoped_config().get('indico_bucket_secret', '')).encode('utf-8') + if not self.bucket_secret and self._has_dynamic_bucket_name: + raise StorageError('A bucket secret is required when using dynamic bucket names') def open(self, file_id): try: - s3_object = self.client.get_object(Bucket=self._get_current_bucket(), Key=file_id)['Body'] + s3_object = self.client.get_object(Bucket=self._get_current_bucket_name(), Key=file_id)['Body'] return BytesIO(s3_object.read()) except Exception as e: raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] @@ -116,7 +122,7 @@ class S3Storage(Storage): def save(self, name, content_type, filename, fileobj): try: fileobj = self._ensure_fileobj(fileobj) - bucket = self._get_current_bucket() + bucket = self._get_current_bucket_name() checksum = get_file_checksum(fileobj) fileobj.seek(0) content_md5 = checksum.decode('hex').encode('base64').strip() @@ -157,11 +163,19 @@ class S3Storage(Storage): except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - def _get_current_bucket(self): - return self._replace_bucket_placeholders(self.bucket, date.today()) + def _get_current_bucket_name(self): + return self._get_bucket_name(date.today()) - def _get_original_bucket_name(self): - return self.bucket + def _get_bucket_name(self, date): + name = self._replace_bucket_placeholders(self.original_bucket_name, date) + if name == self.original_bucket_name or not self.bucket_secret: + return name + token = hmac.new(self.bucket_secret, name, hashlib.md5).hexdigest() + return '{}-{}'.format(name, token) + + @property + def _has_dynamic_bucket_name(self): + return self._get_current_bucket_name() != self.original_bucket_name def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index 50b9e3a..d13e8eb 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -36,15 +36,15 @@ def create_bucket(): if not isinstance(storage, S3Storage): continue today = date.today() - bucket_name = storage._get_original_bucket_name() + bucket_name = storage.original_bucket_name placeholders = set(re.findall('<.*?>', bucket_name)) if placeholders == {'', ''}: bucket_date = today + relativedelta(weeks=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): bucket_date = today + relativedelta(months=1) - bucket = storage._replace_bucket_placeholders(bucket_name, bucket_date) + bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) diff --git a/storage_s3/tests/plugin_test.py b/storage_s3/tests/plugin_test.py index 6772704..c18efec 100644 --- a/storage_s3/tests/plugin_test.py +++ b/storage_s3/tests/plugin_test.py @@ -16,16 +16,26 @@ from __future__ import unicode_literals -import mock -import pytest -from freezegun import freeze_time +import hashlib +import hmac -import indico_storage_s3.plugin as plugin +import pytest + +from indico_storage_s3 import plugin from indico_storage_s3.task import create_bucket +@pytest.fixture(autouse=True) +def mock_boto3(mocker): + mocker.patch('indico_storage_s3.plugin.boto3') + + +def test_resolve_bucket_name_static(): + storage = plugin.S3Storage('bucket=test,bucket_secret=secret') + assert storage._get_current_bucket_name() == 'test' + + @pytest.mark.parametrize(('date', 'name_template', 'expected_name'), ( - ('2018-04-11', 'name', 'name'), ('2018-04-11', 'name-', 'name-2018'), ('2018-04-11', 'name--', 'name-2018-04'), ('2018-04-11', 'name--', 'name-2018-15'), @@ -33,17 +43,12 @@ from indico_storage_s3.task import create_bucket ('2019-01-01', 'name--', 'name-2019-00'), )) -@mock.patch.object(plugin.S3Storage, '__init__', lambda self: None) -def test_resolve_bucket_name(date, name_template, expected_name): - with freeze_time(date): - storage = plugin.S3Storage() - storage.bucket = name_template - assert storage._get_current_bucket() == expected_name - - -@mock.create_autospec -def mock_bucket_created(self, name): - pass +def test_resolve_bucket_name_dynamic(freeze_time, date, name_template, expected_name): + freeze_time(date) + storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + name, token = storage._get_current_bucket_name().rsplit('-', 1) + assert name == expected_name + assert token == hmac.new(b'secret', expected_name, hashlib.md5).hexdigest() class MockConfig(object): @@ -65,19 +70,20 @@ class MockConfig(object): ('2018-01-01', 'name--', True, 'name-2018-02', None), ('2018-12-03', 'name--', True, 'name-2018-50', None), )) -def test_dynamic_bucket_creation_task(date, name_template, bucket_created, expected_name, expected_error): - with freeze_time(date), \ - mock.patch.object(plugin.S3Storage, '__init__', lambda self: None), \ - mock.patch.object(plugin.S3Storage, '_get_original_bucket_name', return_value=name_template), \ - mock.patch('indico_storage_s3.task.config', MockConfig()), \ - mock.patch('indico_storage_s3.task.get_storage', return_value=plugin.S3Storage()), \ - mock.patch.object(plugin.S3Storage, '_create_bucket', mock_bucket_created) as create_bucket_call: - if expected_error: - with pytest.raises(expected_error): - create_bucket() - else: +def test_dynamic_bucket_creation_task(freeze_time, mocker, date, name_template, bucket_created, expected_name, + expected_error): + freeze_time(date) + storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + mocker.patch('indico_storage_s3.task.config', MockConfig()) + mocker.patch('indico_storage_s3.task.get_storage', return_value=storage) + create_bucket_call = mocker.patch.object(plugin.S3Storage, '_create_bucket') + if expected_error: + with pytest.raises(expected_error): create_bucket() + else: + create_bucket() if bucket_created: - create_bucket_call.assert_called_with(mock.ANY, expected_name) + token = hmac.new(b'secret', expected_name, hashlib.md5).hexdigest() + create_bucket_call.assert_called_with('{}-{}'.format(expected_name, token)) else: assert not create_bucket_call.called From 9f9fb6a85d7a61cfe2c6ff0fae9574b93ba5c2e4 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 2 Nov 2018 16:26:45 +0100 Subject: [PATCH 33/54] Storage/S3: Simplify invalid-placeholders logic --- storage_s3/indico_storage_s3/task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index d13e8eb..e239884 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -38,7 +38,9 @@ def create_bucket(): today = date.today() bucket_name = storage.original_bucket_name placeholders = set(re.findall('<.*?>', bucket_name)) - if placeholders == {'', ''}: + if not placeholders: + continue + elif placeholders == {'', ''}: bucket_date = today + relativedelta(weeks=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) @@ -46,5 +48,5 @@ def create_bucket(): bucket_date = today + relativedelta(months=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) - elif placeholders == {''} or placeholders == {''} or placeholders == {'', ''}: + else: raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) From ef06b214b2c1c82700a4f8bb6be66e8a1cd1de1a Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 5 Nov 2018 11:15:29 +0100 Subject: [PATCH 34/54] Storage/S3: Fix open() method --- storage_s3/indico_storage_s3/plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 03497c1..e8cd046 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -106,8 +106,9 @@ class S3Storage(Storage): raise StorageError('A bucket secret is required when using dynamic bucket names') def open(self, file_id): + bucket, id_ = file_id.split('//', 1) try: - s3_object = self.client.get_object(Bucket=self._get_current_bucket_name(), Key=file_id)['Body'] + s3_object = self.client.get_object(Bucket=bucket, Key=id_)['Body'] return BytesIO(s3_object.read()) except Exception as e: raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] From 4c2767c1c67563ac798a6450ec48bb101cb40605 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Tue, 6 Nov 2018 10:49:03 +0100 Subject: [PATCH 35/54] Storage/S3: Add readonly version --- storage_s3/indico_storage_s3/plugin.py | 30 ++++++++++++++++++++++++-- storage_s3/indico_storage_s3/task.py | 4 ++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index e8cd046..02ab5b9 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -35,8 +35,9 @@ from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin from indico.core.storage import Storage, StorageError -from indico.core.storage.backend import get_storage +from indico.core.storage.backend import ReadOnlyStorageMixin, StorageReadOnlyError, get_storage from indico.util.fs import get_file_checksum +from indico.util.string import return_ascii class S3StoragePlugin(IndicoPlugin): @@ -51,7 +52,8 @@ class S3StoragePlugin(IndicoPlugin): self.connect(signals.plugin.cli, self._extend_indico_cli) def _get_storage_backends(self, sender, **kwargs): - return S3Storage + yield S3Storage + yield ReadOnlyS3Storage def _extend_indico_cli(self, sender, **kwargs): @cli_command() @@ -66,6 +68,14 @@ class S3StoragePlugin(IndicoPlugin): if storage: click.echo('Storage {} does not exist'.format(key)) sys.exit(1) + continue + + if isinstance(storage_instance, ReadOnlyS3Storage): + if storage: + click.echo('Storage {} is read-only'.format(key)) + sys.exit(1) + continue + if isinstance(storage_instance, S3Storage): bucket_name = storage_instance._get_current_bucket_name() if storage_instance._bucket_exists(bucket_name): @@ -76,6 +86,7 @@ class S3StoragePlugin(IndicoPlugin): elif storage: click.echo('Storage {} is not a s3 storage'.format(key)) sys.exit(1) + return create_s3_bucket @@ -205,3 +216,18 @@ class S3Storage(Storage): if int(exc.response['Error']['Code']) == 404: return False raise + + @return_ascii + def __repr__(self): + return ''.format(self.original_bucket_name) + + +class ReadOnlyS3Storage(ReadOnlyStorageMixin, S3Storage): + name = 's3-readonly' + + def _create_bucket(self, name): + raise StorageReadOnlyError('Cannot write to read-only storage') + + @return_ascii + def __repr__(self): + return ''.format(self.original_bucket_name) diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index e239884..8aad2ae 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -26,14 +26,14 @@ from indico.core.celery import celery from indico.core.config import config from indico.core.storage.backend import get_storage -from plugin import S3Storage +from indico_storage_s3.plugin import ReadOnlyS3Storage, S3Storage @celery.periodic_task(run_every=crontab(minute=0, hour=1)) def create_bucket(): for key in config.STORAGE_BACKENDS: storage = get_storage(key) - if not isinstance(storage, S3Storage): + if not isinstance(storage, S3Storage) or isinstance(storage, ReadOnlyS3Storage): continue today = date.today() bucket_name = storage.original_bucket_name From 09d577ca88871c0d42d1fad388ab7633bd1833e2 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Tue, 6 Nov 2018 13:35:28 +0100 Subject: [PATCH 36/54] Storage/S3: Expose session/client on the instance This makes it much more convenient when using the backend in scripts, e.g. to import data from another storage to S3. --- storage_s3/indico_storage_s3/plugin.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 02ab5b9..d0826d4 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -96,9 +96,9 @@ class S3Storage(Storage): def __init__(self, data): data = self._parse_data(data) - endpoint_url = data.get('host') - if endpoint_url and '://' not in endpoint_url: - endpoint_url = 'https://' + endpoint_url + self.endpoint_url = data.get('host') + if self.endpoint_url and '://' not in self.endpoint_url: + self.endpoint_url = 'https://' + self.endpoint_url session_kwargs = {} if 'profile' in data: session_kwargs['profile_name'] = data['profile'] @@ -109,10 +109,10 @@ class S3Storage(Storage): self.original_bucket_name = data['bucket'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') - session = boto3.session.Session(**session_kwargs) - self.client = session.client('s3', endpoint_url=endpoint_url) + self.session = boto3.session.Session(**session_kwargs) + self.client = self.session.client('s3', endpoint_url=self.endpoint_url) self.bucket_secret = (data.get('bucket_secret', '') or - session._session.get_scoped_config().get('indico_bucket_secret', '')).encode('utf-8') + self.session._session.get_scoped_config().get('indico_bucket_secret', '')).encode('utf-8') if not self.bucket_secret and self._has_dynamic_bucket_name: raise StorageError('A bucket secret is required when using dynamic bucket names') From 60b692295a43e172407e7897da4213bfb0cf79f2 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Tue, 6 Nov 2018 14:13:34 +0100 Subject: [PATCH 37/54] Storage/S3: Separate dynamic/static bucket backends Like this the bucket name does not need to be part of the file_id when using static bucket names, avoiding updating every single row containing one when moving data from another storage backend to S3 --- storage_s3/indico_storage_s3/plugin.py | 150 ++++++++++++++++--------- storage_s3/indico_storage_s3/task.py | 21 ++-- storage_s3/tests/plugin_test.py | 11 +- 3 files changed, 117 insertions(+), 65 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index d0826d4..882c8fc 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -53,7 +53,9 @@ class S3StoragePlugin(IndicoPlugin): def _get_storage_backends(self, sender, **kwargs): yield S3Storage + yield DynamicS3Storage yield ReadOnlyS3Storage + yield ReadOnlyDynamicS3Storage def _extend_indico_cli(self, sender, **kwargs): @cli_command() @@ -70,13 +72,13 @@ class S3StoragePlugin(IndicoPlugin): sys.exit(1) continue - if isinstance(storage_instance, ReadOnlyS3Storage): + if isinstance(storage_instance, ReadOnlyStorageMixin): if storage: click.echo('Storage {} is read-only'.format(key)) sys.exit(1) continue - if isinstance(storage_instance, S3Storage): + if isinstance(storage_instance, S3StorageBase): bucket_name = storage_instance._get_current_bucket_name() if storage_instance._bucket_exists(bucket_name): click.echo('Storage {}: bucket {} already exists'.format(key, bucket_name)) @@ -90,12 +92,11 @@ class S3StoragePlugin(IndicoPlugin): return create_s3_bucket -class S3Storage(Storage): - name = 's3' +class S3StorageBase(Storage): simple_data = False def __init__(self, data): - data = self._parse_data(data) + self.parsed_data = data = self._parse_data(data) self.endpoint_url = data.get('host') if self.endpoint_url and '://' not in self.endpoint_url: self.endpoint_url = 'https://' + self.endpoint_url @@ -106,18 +107,19 @@ class S3Storage(Storage): session_kwargs['aws_access_key_id'] = data['access_key'] if 'secret_key' in data: session_kwargs['aws_secret_access_key'] = data['secret_key'] - self.original_bucket_name = data['bucket'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') self.session = boto3.session.Session(**session_kwargs) self.client = self.session.client('s3', endpoint_url=self.endpoint_url) - self.bucket_secret = (data.get('bucket_secret', '') or - self.session._session.get_scoped_config().get('indico_bucket_secret', '')).encode('utf-8') - if not self.bucket_secret and self._has_dynamic_bucket_name: - raise StorageError('A bucket secret is required when using dynamic bucket names') + + def _get_current_bucket_name(self): + raise NotImplementedError + + def _parse_file_id(self, file_id): + raise NotImplementedError def open(self, file_id): - bucket, id_ = file_id.split('//', 1) + bucket, id_ = self._parse_file_id(file_id) try: s3_object = self.client.get_object(Bucket=bucket, Key=id_)['Body'] return BytesIO(s3_object.read()) @@ -131,29 +133,24 @@ class S3Storage(Storage): tmpfile.flush() yield tmpfile.name - def save(self, name, content_type, filename, fileobj): - try: - fileobj = self._ensure_fileobj(fileobj) - bucket = self._get_current_bucket_name() - checksum = get_file_checksum(fileobj) - fileobj.seek(0) - content_md5 = checksum.decode('hex').encode('base64').strip() - self.client.put_object(Body=fileobj, Bucket=bucket, Key=name, - ContentType=content_type, ContentMD5=content_md5) - file_id = '{}//{}'.format(bucket, name) - return file_id, checksum - except Exception as e: - raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] + def _save(self, bucket, name, content_type, fileobj): + fileobj = self._ensure_fileobj(fileobj) + checksum = get_file_checksum(fileobj) + fileobj.seek(0) + content_md5 = checksum.decode('hex').encode('base64').strip() + self.client.put_object(Body=fileobj, Bucket=bucket, Key=name, + ContentType=content_type, ContentMD5=content_md5) + return checksum def delete(self, file_id): - bucket, id_ = file_id.split('//', 1) + bucket, id_ = self._parse_file_id(file_id) try: self.client.delete_object(bucket, id_) except Exception as e: raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def getsize(self, file_id): - bucket, id_ = file_id.split('//', 1) + bucket, id_ = self._parse_file_id(file_id) try: return self.client.head_object(Bucket=bucket, Key=id_)['ContentLength'] except Exception as e: @@ -161,7 +158,7 @@ class S3Storage(Storage): def send_file(self, file_id, content_type, filename, inline=True): try: - bucket, id_ = file_id.split('//', 1) + bucket, id_ = self._parse_file_id(file_id) content_disp = 'inline' if inline else 'attachment' h = Headers() h.add('Content-Disposition', content_disp, filename=filename) @@ -175,26 +172,6 @@ class S3Storage(Storage): except Exception as e: raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - def _get_current_bucket_name(self): - return self._get_bucket_name(date.today()) - - def _get_bucket_name(self, date): - name = self._replace_bucket_placeholders(self.original_bucket_name, date) - if name == self.original_bucket_name or not self.bucket_secret: - return name - token = hmac.new(self.bucket_secret, name, hashlib.md5).hexdigest() - return '{}-{}'.format(name, token) - - @property - def _has_dynamic_bucket_name(self): - return self._get_current_bucket_name() != self.original_bucket_name - - def _replace_bucket_placeholders(self, name, date): - name = name.replace('', date.strftime('%Y')) - name = name.replace('', date.strftime('%m')) - name = name.replace('', date.strftime('%W')) - return name - def _create_bucket(self, name): if self._bucket_exists(name): S3StoragePlugin.logger.info('Bucket %s already exists', name) @@ -217,9 +194,77 @@ class S3Storage(Storage): return False raise + +class S3Storage(S3StorageBase): + name = 's3' + + def __init__(self, data): + super(S3Storage, self).__init__(data) + self.bucket_name = self.parsed_data['bucket'] + @return_ascii def __repr__(self): - return ''.format(self.original_bucket_name) + return '<{}: {}>'.format(type(self).__name__, self.bucket_name) + + def _get_current_bucket_name(self): + return self.bucket_name + + def _parse_file_id(self, file_id): + return self.bucket_name, file_id + + def save(self, name, content_type, filename, fileobj): + try: + bucket = self._get_current_bucket_name() + checksum = self._save(bucket, name, content_type, fileobj) + return name, checksum + except Exception as e: + raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] + + +class DynamicS3Storage(S3StorageBase): + name = 's3-dynamic' + + def __init__(self, data): + super(DynamicS3Storage, self).__init__(data) + self.bucket_name_template = self.parsed_data['bucket_template'] + self.bucket_secret = (self.parsed_data.get('bucket_secret', '') or + self.session._session.get_scoped_config().get('indico_bucket_secret', '')) + if not any(x in self.bucket_name_template for x in ('', '', '')): + raise StorageError('At least one date placeholder is required when using dynamic bucket names') + if not self.bucket_secret: + raise StorageError('A bucket secret is required when using dynamic bucket names') + + @return_ascii + def __repr__(self): + return '<{}: {}>'.format(type(self).__name__, self.bucket_name_template) + + def _parse_file_id(self, file_id): + return file_id.split('//', 1) + + def _get_current_bucket_name(self): + return self._get_bucket_name(date.today()) + + def _get_bucket_name(self, date): + name = self._replace_bucket_placeholders(self.bucket_name_template, date) + if name == self.bucket_name_template or not self.bucket_secret: + return name + token = hmac.new(self.bucket_secret.encode('utf-8'), name, hashlib.md5).hexdigest() + return '{}-{}'.format(name, token) + + def _replace_bucket_placeholders(self, name, date): + name = name.replace('', date.strftime('%Y')) + name = name.replace('', date.strftime('%m')) + name = name.replace('', date.strftime('%W')) + return name + + def save(self, name, content_type, filename, fileobj): + try: + bucket = self._get_current_bucket_name() + checksum = self._save(bucket, name, content_type, fileobj) + file_id = '{}//{}'.format(bucket, name) + return file_id, checksum + except Exception as e: + raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] class ReadOnlyS3Storage(ReadOnlyStorageMixin, S3Storage): @@ -228,6 +273,9 @@ class ReadOnlyS3Storage(ReadOnlyStorageMixin, S3Storage): def _create_bucket(self, name): raise StorageReadOnlyError('Cannot write to read-only storage') - @return_ascii - def __repr__(self): - return ''.format(self.original_bucket_name) + +class ReadOnlyDynamicS3Storage(ReadOnlyStorageMixin, DynamicS3Storage): + name = 's3-dynamic-readonly' + + def _create_bucket(self, name): + raise StorageReadOnlyError('Cannot write to read-only storage') diff --git a/storage_s3/indico_storage_s3/task.py b/storage_s3/indico_storage_s3/task.py index 8aad2ae..b4fa416 100644 --- a/storage_s3/indico_storage_s3/task.py +++ b/storage_s3/indico_storage_s3/task.py @@ -24,29 +24,30 @@ from dateutil.relativedelta import relativedelta from indico.core.celery import celery from indico.core.config import config -from indico.core.storage.backend import get_storage +from indico.core.storage.backend import ReadOnlyStorageMixin, get_storage -from indico_storage_s3.plugin import ReadOnlyS3Storage, S3Storage +from indico_storage_s3.plugin import DynamicS3Storage @celery.periodic_task(run_every=crontab(minute=0, hour=1)) def create_bucket(): for key in config.STORAGE_BACKENDS: storage = get_storage(key) - if not isinstance(storage, S3Storage) or isinstance(storage, ReadOnlyS3Storage): + if not isinstance(storage, DynamicS3Storage) or isinstance(storage, ReadOnlyStorageMixin): continue today = date.today() - bucket_name = storage.original_bucket_name - placeholders = set(re.findall('<.*?>', bucket_name)) + placeholders = set(re.findall('<.*?>', storage.bucket_name_template)) if not placeholders: continue elif placeholders == {'', ''}: bucket_date = today + relativedelta(weeks=1) bucket = storage._get_bucket_name(bucket_date) storage._create_bucket(bucket) - elif placeholders == {'', ''} or (placeholders == {''} and today.month == 12): - bucket_date = today + relativedelta(months=1) - bucket = storage._get_bucket_name(bucket_date) - storage._create_bucket(bucket) + elif placeholders == {'', ''} or placeholders == {''}: + if '' in placeholders or today.month == 12: + bucket_date = today + relativedelta(months=1) + bucket = storage._get_bucket_name(bucket_date) + storage._create_bucket(bucket) else: - raise RuntimeError('Placeholders combination in bucket name is not correct: %s', bucket_name) + raise RuntimeError('Invalid placeholder combination in bucket name template: {}' + .format(storage.bucket_name_template)) diff --git a/storage_s3/tests/plugin_test.py b/storage_s3/tests/plugin_test.py index c18efec..0083be7 100644 --- a/storage_s3/tests/plugin_test.py +++ b/storage_s3/tests/plugin_test.py @@ -31,7 +31,7 @@ def mock_boto3(mocker): def test_resolve_bucket_name_static(): - storage = plugin.S3Storage('bucket=test,bucket_secret=secret') + storage = plugin.S3Storage('bucket=test ') assert storage._get_current_bucket_name() == 'test' @@ -45,7 +45,7 @@ def test_resolve_bucket_name_static(): )) def test_resolve_bucket_name_dynamic(freeze_time, date, name_template, expected_name): freeze_time(date) - storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + storage = plugin.DynamicS3Storage('bucket_template={},bucket_secret=secret'.format(name_template)) name, token = storage._get_current_bucket_name().rsplit('-', 1) assert name == expected_name assert token == hmac.new(b'secret', expected_name, hashlib.md5).hexdigest() @@ -73,10 +73,13 @@ class MockConfig(object): def test_dynamic_bucket_creation_task(freeze_time, mocker, date, name_template, bucket_created, expected_name, expected_error): freeze_time(date) - storage = plugin.S3Storage('bucket={},bucket_secret=secret'.format(name_template)) + if '<' in name_template: + storage = plugin.DynamicS3Storage('bucket_template={},bucket_secret=secret'.format(name_template)) + else: + storage = plugin.S3Storage('bucket={}'.format(name_template)) mocker.patch('indico_storage_s3.task.config', MockConfig()) mocker.patch('indico_storage_s3.task.get_storage', return_value=storage) - create_bucket_call = mocker.patch.object(plugin.S3Storage, '_create_bucket') + create_bucket_call = mocker.patch.object(plugin.DynamicS3Storage, '_create_bucket') if expected_error: with pytest.raises(expected_error): create_bucket() From 16069e4ac7690201d8c6268ef958d902485861e1 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Wed, 7 Nov 2018 18:04:27 +0100 Subject: [PATCH 38/54] Storage/S3: Split plugin and storage --- storage_s3/indico_storage_s3/plugin.py | 207 +-------------------- storage_s3/indico_storage_s3/storage.py | 227 ++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 203 deletions(-) create mode 100644 storage_s3/indico_storage_s3/storage.py diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 882c8fc..ece4766 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -16,28 +16,18 @@ from __future__ import unicode_literals -import hashlib -import hmac import sys -from contextlib import contextmanager -from datetime import date -from io import BytesIO -from tempfile import NamedTemporaryFile -import boto3 -import botocore import click -from werkzeug.datastructures import Headers -from werkzeug.utils import redirect from indico.cli.core import cli_command from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin -from indico.core.storage import Storage, StorageError -from indico.core.storage.backend import ReadOnlyStorageMixin, StorageReadOnlyError, get_storage -from indico.util.fs import get_file_checksum -from indico.util.string import return_ascii +from indico.core.storage.backend import ReadOnlyStorageMixin, get_storage + +from indico_storage_s3.storage import (DynamicS3Storage, ReadOnlyDynamicS3Storage, ReadOnlyS3Storage, S3Storage, + S3StorageBase) class S3StoragePlugin(IndicoPlugin): @@ -90,192 +80,3 @@ class S3StoragePlugin(IndicoPlugin): sys.exit(1) return create_s3_bucket - - -class S3StorageBase(Storage): - simple_data = False - - def __init__(self, data): - self.parsed_data = data = self._parse_data(data) - self.endpoint_url = data.get('host') - if self.endpoint_url and '://' not in self.endpoint_url: - self.endpoint_url = 'https://' + self.endpoint_url - session_kwargs = {} - if 'profile' in data: - session_kwargs['profile_name'] = data['profile'] - if 'access_key' in data: - session_kwargs['aws_access_key_id'] = data['access_key'] - if 'secret_key' in data: - session_kwargs['aws_secret_access_key'] = data['secret_key'] - self.bucket_policy_file = data.get('bucket_policy_file') - self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') - self.session = boto3.session.Session(**session_kwargs) - self.client = self.session.client('s3', endpoint_url=self.endpoint_url) - - def _get_current_bucket_name(self): - raise NotImplementedError - - def _parse_file_id(self, file_id): - raise NotImplementedError - - def open(self, file_id): - bucket, id_ = self._parse_file_id(file_id) - try: - s3_object = self.client.get_object(Bucket=bucket, Key=id_)['Body'] - return BytesIO(s3_object.read()) - except Exception as e: - raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - - @contextmanager - def get_local_path(self, file_id): - with NamedTemporaryFile(suffix='indico.s3', dir=config.TEMP_DIR) as tmpfile: - self._copy_file(self.open(file_id), tmpfile) - tmpfile.flush() - yield tmpfile.name - - def _save(self, bucket, name, content_type, fileobj): - fileobj = self._ensure_fileobj(fileobj) - checksum = get_file_checksum(fileobj) - fileobj.seek(0) - content_md5 = checksum.decode('hex').encode('base64').strip() - self.client.put_object(Body=fileobj, Bucket=bucket, Key=name, - ContentType=content_type, ContentMD5=content_md5) - return checksum - - def delete(self, file_id): - bucket, id_ = self._parse_file_id(file_id) - try: - self.client.delete_object(bucket, id_) - except Exception as e: - raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - - def getsize(self, file_id): - bucket, id_ = self._parse_file_id(file_id) - try: - return self.client.head_object(Bucket=bucket, Key=id_)['ContentLength'] - except Exception as e: - raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - - def send_file(self, file_id, content_type, filename, inline=True): - try: - bucket, id_ = self._parse_file_id(file_id) - content_disp = 'inline' if inline else 'attachment' - h = Headers() - h.add('Content-Disposition', content_disp, filename=filename) - url = self.client.generate_presigned_url('get_object', - Params={'Bucket': bucket, - 'Key': id_, - 'ResponseContentDisposition': h.get('Content-Disposition'), - 'ResponseContentType': content_type}, - ExpiresIn=120) - return redirect(url) - except Exception as e: - raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] - - def _create_bucket(self, name): - if self._bucket_exists(name): - S3StoragePlugin.logger.info('Bucket %s already exists', name) - return - self.client.create_bucket(Bucket=name) - S3StoragePlugin.logger.info('New bucket created: %s', name) - if self.bucket_versioning: - self.client.put_bucket_versioning(Bucket=name, VersioningConfiguration={'Status': 'Enabled'}) - if self.bucket_policy_file: - with open(self.bucket_policy_file) as f: - policy = f.read() - self.client.put_bucket_policy(Bucket=name, Policy=policy) - - def _bucket_exists(self, name): - try: - self.client.head_bucket(Bucket=name) - return True - except botocore.exceptions.ClientError as exc: - if int(exc.response['Error']['Code']) == 404: - return False - raise - - -class S3Storage(S3StorageBase): - name = 's3' - - def __init__(self, data): - super(S3Storage, self).__init__(data) - self.bucket_name = self.parsed_data['bucket'] - - @return_ascii - def __repr__(self): - return '<{}: {}>'.format(type(self).__name__, self.bucket_name) - - def _get_current_bucket_name(self): - return self.bucket_name - - def _parse_file_id(self, file_id): - return self.bucket_name, file_id - - def save(self, name, content_type, filename, fileobj): - try: - bucket = self._get_current_bucket_name() - checksum = self._save(bucket, name, content_type, fileobj) - return name, checksum - except Exception as e: - raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] - - -class DynamicS3Storage(S3StorageBase): - name = 's3-dynamic' - - def __init__(self, data): - super(DynamicS3Storage, self).__init__(data) - self.bucket_name_template = self.parsed_data['bucket_template'] - self.bucket_secret = (self.parsed_data.get('bucket_secret', '') or - self.session._session.get_scoped_config().get('indico_bucket_secret', '')) - if not any(x in self.bucket_name_template for x in ('', '', '')): - raise StorageError('At least one date placeholder is required when using dynamic bucket names') - if not self.bucket_secret: - raise StorageError('A bucket secret is required when using dynamic bucket names') - - @return_ascii - def __repr__(self): - return '<{}: {}>'.format(type(self).__name__, self.bucket_name_template) - - def _parse_file_id(self, file_id): - return file_id.split('//', 1) - - def _get_current_bucket_name(self): - return self._get_bucket_name(date.today()) - - def _get_bucket_name(self, date): - name = self._replace_bucket_placeholders(self.bucket_name_template, date) - if name == self.bucket_name_template or not self.bucket_secret: - return name - token = hmac.new(self.bucket_secret.encode('utf-8'), name, hashlib.md5).hexdigest() - return '{}-{}'.format(name, token) - - def _replace_bucket_placeholders(self, name, date): - name = name.replace('', date.strftime('%Y')) - name = name.replace('', date.strftime('%m')) - name = name.replace('', date.strftime('%W')) - return name - - def save(self, name, content_type, filename, fileobj): - try: - bucket = self._get_current_bucket_name() - checksum = self._save(bucket, name, content_type, fileobj) - file_id = '{}//{}'.format(bucket, name) - return file_id, checksum - except Exception as e: - raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] - - -class ReadOnlyS3Storage(ReadOnlyStorageMixin, S3Storage): - name = 's3-readonly' - - def _create_bucket(self, name): - raise StorageReadOnlyError('Cannot write to read-only storage') - - -class ReadOnlyDynamicS3Storage(ReadOnlyStorageMixin, DynamicS3Storage): - name = 's3-dynamic-readonly' - - def _create_bucket(self, name): - raise StorageReadOnlyError('Cannot write to read-only storage') diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py new file mode 100644 index 0000000..124c461 --- /dev/null +++ b/storage_s3/indico_storage_s3/storage.py @@ -0,0 +1,227 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +import hashlib +import hmac +import sys +from contextlib import contextmanager +from datetime import date +from io import BytesIO +from tempfile import NamedTemporaryFile + +import boto3 +import botocore +from werkzeug.datastructures import Headers +from werkzeug.utils import redirect + +from indico.core.config import config +from indico.core.storage import Storage, StorageError +from indico.core.storage.backend import ReadOnlyStorageMixin, StorageReadOnlyError +from indico.util.fs import get_file_checksum +from indico.util.string import return_ascii + + +class S3StorageBase(Storage): + simple_data = False + + def __init__(self, data): + self.parsed_data = data = self._parse_data(data) + self.endpoint_url = data.get('host') + if self.endpoint_url and '://' not in self.endpoint_url: + self.endpoint_url = 'https://' + self.endpoint_url + session_kwargs = {} + if 'profile' in data: + session_kwargs['profile_name'] = data['profile'] + if 'access_key' in data: + session_kwargs['aws_access_key_id'] = data['access_key'] + if 'secret_key' in data: + session_kwargs['aws_secret_access_key'] = data['secret_key'] + self.bucket_policy_file = data.get('bucket_policy_file') + self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') + self.session = boto3.session.Session(**session_kwargs) + self.client = self.session.client('s3', endpoint_url=self.endpoint_url) + + def _get_current_bucket_name(self): + raise NotImplementedError + + def _parse_file_id(self, file_id): + raise NotImplementedError + + def open(self, file_id): + bucket, id_ = self._parse_file_id(file_id) + try: + s3_object = self.client.get_object(Bucket=bucket, Key=id_)['Body'] + return BytesIO(s3_object.read()) + except Exception as e: + raise StorageError('Could not open "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] + + @contextmanager + def get_local_path(self, file_id): + with NamedTemporaryFile(suffix='indico.s3', dir=config.TEMP_DIR) as tmpfile: + self._copy_file(self.open(file_id), tmpfile) + tmpfile.flush() + yield tmpfile.name + + def _save(self, bucket, name, content_type, fileobj): + fileobj = self._ensure_fileobj(fileobj) + checksum = get_file_checksum(fileobj) + fileobj.seek(0) + content_md5 = checksum.decode('hex').encode('base64').strip() + self.client.put_object(Body=fileobj, Bucket=bucket, Key=name, + ContentType=content_type, ContentMD5=content_md5) + return checksum + + def delete(self, file_id): + bucket, id_ = self._parse_file_id(file_id) + try: + self.client.delete_object(bucket, id_) + except Exception as e: + raise StorageError('Could not delete "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] + + def getsize(self, file_id): + bucket, id_ = self._parse_file_id(file_id) + try: + return self.client.head_object(Bucket=bucket, Key=id_)['ContentLength'] + except Exception as e: + raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] + + def send_file(self, file_id, content_type, filename, inline=True): + try: + bucket, id_ = self._parse_file_id(file_id) + content_disp = 'inline' if inline else 'attachment' + h = Headers() + h.add('Content-Disposition', content_disp, filename=filename) + url = self.client.generate_presigned_url('get_object', + Params={'Bucket': bucket, + 'Key': id_, + 'ResponseContentDisposition': h.get('Content-Disposition'), + 'ResponseContentType': content_type}, + ExpiresIn=120) + return redirect(url) + except Exception as e: + raise StorageError('Could not send file "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] + + def _create_bucket(self, name): + from indico_storage_s3.plugin import S3StoragePlugin + + if self._bucket_exists(name): + S3StoragePlugin.logger.info('Bucket %s already exists', name) + return + self.client.create_bucket(Bucket=name) + S3StoragePlugin.logger.info('New bucket created: %s', name) + if self.bucket_versioning: + self.client.put_bucket_versioning(Bucket=name, VersioningConfiguration={'Status': 'Enabled'}) + if self.bucket_policy_file: + with open(self.bucket_policy_file) as f: + policy = f.read() + self.client.put_bucket_policy(Bucket=name, Policy=policy) + + def _bucket_exists(self, name): + try: + self.client.head_bucket(Bucket=name) + return True + except botocore.exceptions.ClientError as exc: + if int(exc.response['Error']['Code']) == 404: + return False + raise + + +class S3Storage(S3StorageBase): + name = 's3' + + def __init__(self, data): + super(S3Storage, self).__init__(data) + self.bucket_name = self.parsed_data['bucket'] + + @return_ascii + def __repr__(self): + return '<{}: {}>'.format(type(self).__name__, self.bucket_name) + + def _get_current_bucket_name(self): + return self.bucket_name + + def _parse_file_id(self, file_id): + return self.bucket_name, file_id + + def save(self, name, content_type, filename, fileobj): + try: + bucket = self._get_current_bucket_name() + checksum = self._save(bucket, name, content_type, fileobj) + return name, checksum + except Exception as e: + raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] + + +class DynamicS3Storage(S3StorageBase): + name = 's3-dynamic' + + def __init__(self, data): + super(DynamicS3Storage, self).__init__(data) + self.bucket_name_template = self.parsed_data['bucket_template'] + self.bucket_secret = (self.parsed_data.get('bucket_secret', '') or + self.session._session.get_scoped_config().get('indico_bucket_secret', '')) + if not any(x in self.bucket_name_template for x in ('', '', '')): + raise StorageError('At least one date placeholder is required when using dynamic bucket names') + if not self.bucket_secret: + raise StorageError('A bucket secret is required when using dynamic bucket names') + + @return_ascii + def __repr__(self): + return '<{}: {}>'.format(type(self).__name__, self.bucket_name_template) + + def _parse_file_id(self, file_id): + return file_id.split('//', 1) + + def _get_current_bucket_name(self): + return self._get_bucket_name(date.today()) + + def _get_bucket_name(self, date): + name = self._replace_bucket_placeholders(self.bucket_name_template, date) + if name == self.bucket_name_template or not self.bucket_secret: + return name + token = hmac.new(self.bucket_secret.encode('utf-8'), name, hashlib.md5).hexdigest() + return '{}-{}'.format(name, token) + + def _replace_bucket_placeholders(self, name, date): + name = name.replace('', date.strftime('%Y')) + name = name.replace('', date.strftime('%m')) + name = name.replace('', date.strftime('%W')) + return name + + def save(self, name, content_type, filename, fileobj): + try: + bucket = self._get_current_bucket_name() + checksum = self._save(bucket, name, content_type, fileobj) + file_id = '{}//{}'.format(bucket, name) + return file_id, checksum + except Exception as e: + raise StorageError('Could not save "{}": {}'.format(name, e)), None, sys.exc_info()[2] + + +class ReadOnlyS3Storage(ReadOnlyStorageMixin, S3Storage): + name = 's3-readonly' + + def _create_bucket(self, name): + raise StorageReadOnlyError('Cannot write to read-only storage') + + +class ReadOnlyDynamicS3Storage(ReadOnlyStorageMixin, DynamicS3Storage): + name = 's3-dynamic-readonly' + + def _create_bucket(self, name): + raise StorageReadOnlyError('Cannot write to read-only storage') From d19afe71575b08119034a470fffdcd377494a4da Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 8 Nov 2018 11:13:39 +0100 Subject: [PATCH 39/54] Storage/S3: Add API to get list of buckets --- storage_s3/indico_storage_s3/__init__.py | 4 ++ storage_s3/indico_storage_s3/blueprint.py | 25 +++++++ storage_s3/indico_storage_s3/controllers.py | 77 +++++++++++++++++++++ storage_s3/indico_storage_s3/plugin.py | 38 +++++++++- 4 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 storage_s3/indico_storage_s3/blueprint.py create mode 100644 storage_s3/indico_storage_s3/controllers.py diff --git a/storage_s3/indico_storage_s3/__init__.py b/storage_s3/indico_storage_s3/__init__.py index 9eaa627..7d4e5ea 100644 --- a/storage_s3/indico_storage_s3/__init__.py +++ b/storage_s3/indico_storage_s3/__init__.py @@ -17,6 +17,10 @@ from __future__ import unicode_literals from indico.core import signals +from indico.util.i18n import make_bound_gettext + + +_ = make_bound_gettext('search_cern') @signals.import_tasks.connect diff --git a/storage_s3/indico_storage_s3/blueprint.py b/storage_s3/indico_storage_s3/blueprint.py new file mode 100644 index 0000000..5d3b7ac --- /dev/null +++ b/storage_s3/indico_storage_s3/blueprint.py @@ -0,0 +1,25 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +from indico.core.plugins import IndicoPluginBlueprint + +from indico_storage_s3.controllers import RHBuckets + + +blueprint = IndicoPluginBlueprint('storage_s3', __name__, url_prefix='/api/plugin/s3') +blueprint.add_url_rule('/buckets', 'buckets', RHBuckets) diff --git a/storage_s3/indico_storage_s3/controllers.py b/storage_s3/indico_storage_s3/controllers.py new file mode 100644 index 0000000..1c1ae27 --- /dev/null +++ b/storage_s3/indico_storage_s3/controllers.py @@ -0,0 +1,77 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +from flask import current_app, jsonify, request +from werkzeug.exceptions import NotFound, Unauthorized + +from indico.core.config import config +from indico.core.db import db +from indico.core.storage import StoredFileMixin +from indico.core.storage.backend import ReadOnlyStorageMixin, get_storage +from indico.web.rh import RH + +from indico_storage_s3.storage import DynamicS3Storage, S3Storage, S3StorageBase + + +class RHBuckets(RH): + """Provide information on used S3 buckets""" + + def _check_access(self): + from indico_storage_s3.plugin import S3StoragePlugin + auth = request.authorization + if not S3StoragePlugin.settings.get('bucket_info_enabled'): + raise NotFound + username = S3StoragePlugin.settings.get('username') + password = S3StoragePlugin.settings.get('password') + if not auth or not auth.password or auth.username != username or auth.password != password: + response = current_app.response_class('Authorization required', 401, + {'WWW-Authenticate': 'Basic realm="Indico - S3 Buckets"'}) + raise Unauthorized(response=response) + + def _get_static_info(self, storage): + return { + 'bucket': storage.bucket_name, + 'dynamic': False, + } + + def _get_dynamic_info(self, backend_name, storage): + buckets = set() + for model in StoredFileMixin.__subclasses__(): + query = (db.session.query(db.func.split_part(model.storage_file_id, '//', 1)) + .filter(model.storage_file_id.isnot(None), model.storage_backend == backend_name)) + buckets.update(bucket for bucket, in query) + + return { + 'buckets': sorted(buckets), + 'dynamic': True, + 'template': storage.bucket_name_template, + } + + def _process(self): + data = {} + for key in config.STORAGE_BACKENDS: + storage = get_storage(key) + if not isinstance(storage, S3StorageBase): + continue + readonly = isinstance(storage, ReadOnlyStorageMixin) + data[key] = {'readonly': readonly} + if isinstance(storage, S3Storage): + data[key].update(self._get_static_info(storage)) + elif isinstance(storage, DynamicS3Storage): + data[key].update(self._get_dynamic_info(key, storage)) + return jsonify(data) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index ece4766..f7b0526 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -19,23 +19,56 @@ from __future__ import unicode_literals import sys import click +from markupsafe import Markup +from wtforms.fields import BooleanField, StringField +from wtforms.validators import DataRequired from indico.cli.core import cli_command from indico.core import signals from indico.core.config import config -from indico.core.plugins import IndicoPlugin +from indico.core.plugins import IndicoPlugin, url_for_plugin from indico.core.storage.backend import ReadOnlyStorageMixin, get_storage +from indico.web.forms.base import IndicoForm +from indico.web.forms.fields import IndicoPasswordField +from indico.web.forms.validators import HiddenUnless +from indico.web.forms.widgets import SwitchWidget +from indico_storage_s3 import _ +from indico_storage_s3.blueprint import blueprint from indico_storage_s3.storage import (DynamicS3Storage, ReadOnlyDynamicS3Storage, ReadOnlyS3Storage, S3Storage, S3StorageBase) +class SettingsForm(IndicoForm): + bucket_info_enabled = BooleanField(_("Bucket info API"), widget=SwitchWidget()) + username = StringField(_("Username"), [HiddenUnless('bucket_info_enabled', preserve_data=True), DataRequired()], + description=_("The username to access the S3 bucket info endpoint")) + password = IndicoPasswordField(_('Password'), + [HiddenUnless('bucket_info_enabled', preserve_data=True), DataRequired()], + toggle=True, + description=_("The password to access the S3 bucket info endpoint")) + + def __init__(self, *args, **kwargs): + super(SettingsForm, self).__init__(*args, **kwargs) + url = Markup('{}').format(url_for_plugin('storage_s3.buckets')) + self.bucket_info_enabled.description = _("Enables an API on {url} that returns information on all S3 buckets " + "currently in use, including dynamically-named ones.").format(url=url) + + class S3StoragePlugin(IndicoPlugin): """S3 Storage Provides S3 storage backends. """ + configurable = True + settings_form = SettingsForm + default_settings = { + 'bucket_info_enabled': False, + 'username': '', + 'password': '' + } + def init(self): super(S3StoragePlugin, self).init() self.connect(signals.get_storage_backends, self._get_storage_backends) @@ -47,6 +80,9 @@ class S3StoragePlugin(IndicoPlugin): yield ReadOnlyS3Storage yield ReadOnlyDynamicS3Storage + def get_blueprints(self): + return blueprint + def _extend_indico_cli(self, sender, **kwargs): @cli_command() @click.option('--storage', default=None, metavar='NAME', help='Storage backend to create bucket for') From 1a49a6d7a6770bdb8b3946f251d421c01ae4eaab Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 8 Nov 2018 11:35:02 +0100 Subject: [PATCH 40/54] Storage/S3: Add custom metadata exposed via API --- storage_s3/indico_storage_s3/controllers.py | 2 +- storage_s3/indico_storage_s3/storage.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/controllers.py b/storage_s3/indico_storage_s3/controllers.py index 1c1ae27..5995b79 100644 --- a/storage_s3/indico_storage_s3/controllers.py +++ b/storage_s3/indico_storage_s3/controllers.py @@ -69,7 +69,7 @@ class RHBuckets(RH): if not isinstance(storage, S3StorageBase): continue readonly = isinstance(storage, ReadOnlyStorageMixin) - data[key] = {'readonly': readonly} + data[key] = {'readonly': readonly, 'meta': storage.meta} if isinstance(storage, S3Storage): data[key].update(self._get_static_info(storage)) elif isinstance(storage, DynamicS3Storage): diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index 124c461..f370f07 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -53,6 +53,7 @@ class S3StorageBase(Storage): session_kwargs['aws_secret_access_key'] = data['secret_key'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') + self.meta = data.get('meta') self.session = boto3.session.Session(**session_kwargs) self.client = self.session.client('s3', endpoint_url=self.endpoint_url) From d2a5aadd9bfa1b3c521ea3cbebca7561164ac627 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Thu, 8 Nov 2018 17:54:26 +0100 Subject: [PATCH 41/54] Storage/S3: Remove -dev from version --- storage_s3/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index d730b8b..3f6b507 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import find_packages, setup setup( name='indico-plugin-storage-s3', - version='2.0-dev', + version='2.0', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt', From f222458a70f5dce365f0145edbd83707206858cd Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 9 Nov 2018 11:49:38 +0100 Subject: [PATCH 42/54] Storage/S3: Add option to download files through indico By default we redirect to a presigned S3 download URL, with this setting one can instead have Indico download the file and serve it to the client. --- storage_s3/indico_storage_s3/storage.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index f370f07..c880141 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -34,6 +34,7 @@ from indico.core.storage import Storage, StorageError from indico.core.storage.backend import ReadOnlyStorageMixin, StorageReadOnlyError from indico.util.fs import get_file_checksum from indico.util.string import return_ascii +from indico.web.flask.util import send_file class S3StorageBase(Storage): @@ -53,6 +54,7 @@ class S3StorageBase(Storage): session_kwargs['aws_secret_access_key'] = data['secret_key'] self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') + self.proxy_downloads = data.get('proxy') in ('1', 'true', 'yes') self.meta = data.get('meta') self.session = boto3.session.Session(**session_kwargs) self.client = self.session.client('s3', endpoint_url=self.endpoint_url) @@ -102,6 +104,9 @@ class S3StorageBase(Storage): raise StorageError('Could not get size of "{}": {}'.format(file_id, e)), None, sys.exc_info()[2] def send_file(self, file_id, content_type, filename, inline=True): + if self.proxy_downloads: + return send_file(filename, self.open(file_id), content_type, inline=inline) + try: bucket, id_ = self._parse_file_id(file_id) content_disp = 'inline' if inline else 'attachment' From 55d4de2efb14cb5e2ee4ae1721bcdec6d6a054fb Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 9 Nov 2018 12:04:29 +0100 Subject: [PATCH 43/54] Storage/S3: Fix grammar --- storage_s3/indico_storage_s3/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index f7b0526..84c85cc 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -112,7 +112,7 @@ class S3StoragePlugin(IndicoPlugin): storage_instance._create_bucket(bucket_name) click.echo('Storage {}: bucket {} created'.format(key, bucket_name)) elif storage: - click.echo('Storage {} is not a s3 storage'.format(key)) + click.echo('Storage {} is not an s3 storage'.format(key)) sys.exit(1) return create_s3_bucket From f75fd081ffd95d62de6637f9afb76e44ca56279a Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 9 Nov 2018 17:40:42 +0100 Subject: [PATCH 44/54] Storage/S3: Add missing DISTINCT to query --- storage_s3/indico_storage_s3/controllers.py | 2 +- storage_s3/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/controllers.py b/storage_s3/indico_storage_s3/controllers.py index 5995b79..eaea835 100644 --- a/storage_s3/indico_storage_s3/controllers.py +++ b/storage_s3/indico_storage_s3/controllers.py @@ -52,7 +52,7 @@ class RHBuckets(RH): def _get_dynamic_info(self, backend_name, storage): buckets = set() for model in StoredFileMixin.__subclasses__(): - query = (db.session.query(db.func.split_part(model.storage_file_id, '//', 1)) + query = (db.session.query(db.func.split_part(model.storage_file_id, '//', 1).distinct()) .filter(model.storage_file_id.isnot(None), model.storage_backend == backend_name)) buckets.update(bucket for bucket, in query) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 3f6b507..6db2b2c 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import find_packages, setup setup( name='indico-plugin-storage-s3', - version='2.0', + version='2.0.1', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt', From c6b629283cb388c6d16dc262f5fefc60ae324278 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 9 Nov 2018 17:41:36 +0100 Subject: [PATCH 45/54] Storage/S3: Fix i18n name --- storage_s3/indico_storage_s3/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/indico_storage_s3/__init__.py b/storage_s3/indico_storage_s3/__init__.py index 7d4e5ea..d3785c5 100644 --- a/storage_s3/indico_storage_s3/__init__.py +++ b/storage_s3/indico_storage_s3/__init__.py @@ -20,7 +20,7 @@ from indico.core import signals from indico.util.i18n import make_bound_gettext -_ = make_bound_gettext('search_cern') +_ = make_bound_gettext('storage_s3') @signals.import_tasks.connect From 566503a0ac946c6010c24c1c4b0b67aa4ae8002a Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 12 Nov 2018 11:19:14 +0100 Subject: [PATCH 46/54] Storage/S3: Remove obsolete checks --- storage_s3/indico_storage_s3/storage.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index c880141..03fab29 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -198,8 +198,6 @@ class DynamicS3Storage(S3StorageBase): def _get_bucket_name(self, date): name = self._replace_bucket_placeholders(self.bucket_name_template, date) - if name == self.bucket_name_template or not self.bucket_secret: - return name token = hmac.new(self.bucket_secret.encode('utf-8'), name, hashlib.md5).hexdigest() return '{}-{}'.format(name, token) From 13145470fd7874afdbf24ee9f5efddf89673559d Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 12 Nov 2018 11:37:00 +0100 Subject: [PATCH 47/54] Storage/S3: Limit max bucket name length --- storage_s3/indico_storage_s3/storage.py | 4 ++++ storage_s3/setup.py | 2 +- storage_s3/tests/plugin_test.py | 22 ++++++++++++++++++++-- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index 03fab29..bd77b30 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -153,6 +153,8 @@ class S3Storage(S3StorageBase): def __init__(self, data): super(S3Storage, self).__init__(data) self.bucket_name = self.parsed_data['bucket'] + if len(self.bucket_name) > 63: + raise StorageError('Bucket name cannot be longer than 63 chars') @return_ascii def __repr__(self): @@ -185,6 +187,8 @@ class DynamicS3Storage(S3StorageBase): raise StorageError('At least one date placeholder is required when using dynamic bucket names') if not self.bucket_secret: raise StorageError('A bucket secret is required when using dynamic bucket names') + if len(self._replace_bucket_placeholders(self.bucket_name_template, date.today())) > 46: + raise StorageError('Bucket name cannot be longer than 46 chars (to keep at least 16 hash chars)') @return_ascii def __repr__(self): diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 6db2b2c..4eff19b 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import find_packages, setup setup( name='indico-plugin-storage-s3', - version='2.0.1', + version='2.0.2', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt', diff --git a/storage_s3/tests/plugin_test.py b/storage_s3/tests/plugin_test.py index 0083be7..9928db5 100644 --- a/storage_s3/tests/plugin_test.py +++ b/storage_s3/tests/plugin_test.py @@ -21,17 +21,20 @@ import hmac import pytest +from indico.core.storage import StorageError + from indico_storage_s3 import plugin +from indico_storage_s3.storage import DynamicS3Storage, S3Storage from indico_storage_s3.task import create_bucket @pytest.fixture(autouse=True) def mock_boto3(mocker): - mocker.patch('indico_storage_s3.plugin.boto3') + mocker.patch('indico_storage_s3.storage.boto3') def test_resolve_bucket_name_static(): - storage = plugin.S3Storage('bucket=test ') + storage = plugin.S3Storage('bucket=test') assert storage._get_current_bucket_name() == 'test' @@ -90,3 +93,18 @@ def test_dynamic_bucket_creation_task(freeze_time, mocker, date, name_template, create_bucket_call.assert_called_with('{}-{}'.format(expected_name, token)) else: assert not create_bucket_call.called + + +def test_static_bucket_name_too_long(): + S3Storage('bucket=test' + 'x'*59) + with pytest.raises(StorageError): + S3Storage('bucket=test' + 'x'*60) + + +def test_dynamic_bucket_name_too_long(): + DynamicS3Storage('bucket_secret=secret,bucket_template=test-' + 'x'*37) + DynamicS3Storage('bucket_secret=secret,bucket_template=test--' + 'x'*34) + with pytest.raises(StorageError): + DynamicS3Storage('bucket_secret=secret,bucket_template=test-' + 'x' * 38) + with pytest.raises(StorageError): + DynamicS3Storage('bucket_secret=secret,bucket_template=test--' + 'x' * 35) From c87e24cee6c3d8926fc12a5e3677d23ca8136ea4 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 12 Nov 2018 11:57:20 +0100 Subject: [PATCH 48/54] Storage/S3: Add addressing_style option --- storage_s3/indico_storage_s3/storage.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index bd77b30..256fca8 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -25,7 +25,8 @@ from io import BytesIO from tempfile import NamedTemporaryFile import boto3 -import botocore +from botocore.config import Config +from botocore.exceptions import ClientError from werkzeug.datastructures import Headers from werkzeug.utils import redirect @@ -46,18 +47,21 @@ class S3StorageBase(Storage): if self.endpoint_url and '://' not in self.endpoint_url: self.endpoint_url = 'https://' + self.endpoint_url session_kwargs = {} + client_kwargs = {} if 'profile' in data: session_kwargs['profile_name'] = data['profile'] if 'access_key' in data: session_kwargs['aws_access_key_id'] = data['access_key'] if 'secret_key' in data: session_kwargs['aws_secret_access_key'] = data['secret_key'] + if 'addressing_style' in data: + client_kwargs['config'] = Config(s3={'addressing_style': data['addressing_style']}) self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') self.proxy_downloads = data.get('proxy') in ('1', 'true', 'yes') self.meta = data.get('meta') self.session = boto3.session.Session(**session_kwargs) - self.client = self.session.client('s3', endpoint_url=self.endpoint_url) + self.client = self.session.client('s3', endpoint_url=self.endpoint_url, **client_kwargs) def _get_current_bucket_name(self): raise NotImplementedError @@ -141,7 +145,7 @@ class S3StorageBase(Storage): try: self.client.head_bucket(Bucket=name) return True - except botocore.exceptions.ClientError as exc: + except ClientError as exc: if int(exc.response['Error']['Code']) == 404: return False raise From e3fdc0b4ddf6e3a38e25f2365b0fb4716d7bc7d4 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 23 Nov 2018 16:14:22 +0100 Subject: [PATCH 49/54] Storage/S3: Change CLI command to `indico s3 ...` --- storage_s3/indico_storage_s3/plugin.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 84c85cc..414a8cf 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -23,7 +23,7 @@ from markupsafe import Markup from wtforms.fields import BooleanField, StringField from wtforms.validators import DataRequired -from indico.cli.core import cli_command +from indico.cli.core import cli_group from indico.core import signals from indico.core.config import config from indico.core.plugins import IndicoPlugin, url_for_plugin @@ -84,9 +84,13 @@ class S3StoragePlugin(IndicoPlugin): return blueprint def _extend_indico_cli(self, sender, **kwargs): - @cli_command() + @cli_group() + def s3(): + """Manage S3 storage""" + + @s3.command() @click.option('--storage', default=None, metavar='NAME', help='Storage backend to create bucket for') - def create_s3_bucket(storage): + def create_bucket(storage): """Create s3 storage bucket.""" storages = [storage] if storage else config.STORAGE_BACKENDS for key in storages: @@ -115,4 +119,4 @@ class S3StoragePlugin(IndicoPlugin): click.echo('Storage {} is not an s3 storage'.format(key)) sys.exit(1) - return create_s3_bucket + return s3 From 92f3676235c83b6e70b754598cf30a26f49846e0 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 23 Nov 2018 17:21:46 +0100 Subject: [PATCH 50/54] Storage/S3: Add filesystem-to-s3 migration tool --- storage_s3/indico_storage_s3/migrate.py | 507 ++++++++++++++++++++++++ storage_s3/indico_storage_s3/plugin.py | 4 +- 2 files changed, 510 insertions(+), 1 deletion(-) create mode 100644 storage_s3/indico_storage_s3/migrate.py diff --git a/storage_s3/indico_storage_s3/migrate.py b/storage_s3/indico_storage_s3/migrate.py new file mode 100644 index 0000000..02ed09b --- /dev/null +++ b/storage_s3/indico_storage_s3/migrate.py @@ -0,0 +1,507 @@ +# This file is part of Indico. +# Copyright (C) 2002 - 2018 European Organization for Nuclear Research (CERN). +# +# Indico is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# Indico is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Indico; if not, see . + +from __future__ import unicode_literals + +import errno +import json +import os +import re +import stat +import subprocess +import tempfile +from collections import defaultdict +from contextlib import contextmanager +from datetime import datetime +from operator import itemgetter + +import boto3 +import click +from botocore.exceptions import ClientError +from flask.cli import with_appcontext +from jinja2.filters import do_filesizeformat +from sqlalchemy import inspect +from sqlalchemy.orm import joinedload, lazyload, load_only +from sqlalchemy.sql.elements import Tuple + +from indico.cli.core import cli_group +from indico.core.config import config +from indico.core.db import db +from indico.core.storage import StoredFileMixin +from indico.modules.designer.models.images import DesignerImageFile +from indico.modules.events.abstracts.models.files import AbstractFile +from indico.modules.events.layout.models.images import ImageFile +from indico.modules.events.papers.models.files import PaperFile +from indico.modules.events.papers.models.templates import PaperTemplate +from indico.modules.events.registration.models.registrations import RegistrationData +from indico.modules.events.static.models.static import StaticSite, StaticSiteState +from indico.util.console import cformat +from indico.util.date_time import format_human_timedelta +from indico.util.fs import secure_filename +from indico.util.string import crc32 +from indico.util.struct.iterables import committing_iterator + +from indico_storage_s3.storage import S3StorageBase + + +click.disable_unicode_literals_warning = True + +SPECIAL_FILTERS = { + StaticSite: {'state': StaticSiteState.success} +} + +FILE_DT_MAP = { + AbstractFile: lambda obj: obj.abstract.submitted_dt, + DesignerImageFile: lambda obj: obj.template.event.created_dt if obj.template.event else None, + ImageFile: lambda obj: obj.event.created_dt, + PaperFile: lambda obj: obj.paper_revision.submitted_dt, + PaperTemplate: lambda obj: obj.event.created_dt, + RegistrationData: lambda obj: obj.registration.submitted_dt, + StaticSite: lambda obj: obj.event.created_dt +} + +QUERY_OPTIONS = { + AbstractFile: joinedload('abstract').load_only('submitted_dt'), + DesignerImageFile: joinedload('template').joinedload('event').load_only('created_dt'), + ImageFile: joinedload('event').load_only('created_dt'), + PaperFile: joinedload('paper_revision').load_only('submitted_dt'), + PaperTemplate: joinedload('event').load_only('created_dt'), + RegistrationData: joinedload('registration').load_only('submitted_dt'), + StaticSite: joinedload('event').load_only('created_dt'), +} + + +def rmlinktree(path): + # based on shutil.rmtree but only delete symlinks and directories + names = os.listdir(path) + for name in names: + fullname = os.path.join(path, name) + mode = os.lstat(fullname).st_mode + if stat.S_ISDIR(mode): + rmlinktree(fullname) + elif stat.S_ISLNK(mode): + os.remove(fullname) + else: + raise Exception('Tried to delete {} (not a directory/symlink)'.format(fullname)) + os.rmdir(path) + + +class S3Importer(object): + def __init__(self, get_bucket_name, static_bucket_name, output_file, source_backend_names, rclone_remote, + s3_endpoint, s3_profile, s3_bucket_policy): + self.get_bucket_name = get_bucket_name + self.static_bucket_name = static_bucket_name + self.source_backend_names = source_backend_names + self.output_file = output_file + self.s3_endpoint = s3_endpoint + self.s3_bucket_policy = s3_bucket_policy + self.buckets = {} + self.s3 = boto3.session.Session(profile_name=s3_profile).resource('s3', endpoint_url=s3_endpoint) + self.s3_client = boto3.session.Session(profile_name=s3_profile).client('s3', endpoint_url=s3_endpoint) + self.rclone_remote = rclone_remote + self.rclone_queue = {} + self.used_storage_paths = set() + + def query_chunked(self, model, chunk_size): + pks = inspect(model).primary_key + query = base_query = self.make_query(model).order_by(*pks) + while True: + row = None + for row in query.limit(chunk_size): + yield row + if row is None: + # no rows in the query + break + query = base_query.filter(Tuple(*pks) > inspect(row).identity) + + def make_query(self, model): + cols = ['storage_backend', 'storage_file_id', 'md5'] + if model.add_file_date_column: + cols.append('created_dt') + opts = QUERY_OPTIONS.get(model) + return (model.query + .filter(model.storage_file_id.isnot(None), model.storage_backend.in_(self.source_backend_names)) + .filter_by(**SPECIAL_FILTERS.get(model, {})) + .options(*((opts,) if opts else ())) + .options(lazyload('*'), load_only(*cols))) + + def run(self): + models = {model: self.make_query(model).count() for model in StoredFileMixin.__subclasses__()} + models = {model: total for model, total in models.iteritems() if total} + labels = {model: cformat('Processing %{blue!}{}%{reset} (%{cyan}{}%{reset} rows)').format(model.__name__, total) + for model, total in models.iteritems()} + max_length = max(len(x) for x in labels.itervalues()) + labels = {model: label.ljust(max_length) for model, label in labels.iteritems()} + for model, total in sorted(models.items(), key=itemgetter(1)): + with click.progressbar(self.query_chunked(model, 1000), length=total, label=labels[model], + show_percent=True, show_pos=True) as objects: + for obj in self.flush_rclone_iterator(objects, 1000): + try: + self.process_obj(obj) + except Exception as exc: + click.echo(cformat('\n%{red!}Error processing %{reset}%{yellow}{}%{red!}: %{reset}%{yellow!}{}') + .format(obj, exc)) + click.secho('All done!', fg='green') + click.echo('Add the following entries to your STORAGE_BACKENDS:') + for bucket, data in sorted(self.buckets.viewitems(), key=itemgetter(0)): + click.echo("'{}': 's3-readonly:host={},bucket={}',".format( + data['backend'], self.s3_endpoint.replace('https://', ''), bucket)) + + def process_obj(self, obj): + new_storage_path, new_filename = self.build_storage_path(obj) + if new_storage_path in self.used_storage_paths: + raise Exception('Non-unique storage path: {}'.format(new_storage_path)) + self.used_storage_paths.add(new_storage_path) + bucket_name, backend = self.get_bucket_name(self.get_object_dt(obj)) + bucket_info = self.get_bucket_info(bucket_name, backend, create=(not self.rclone_remote)) + assert backend == bucket_info['backend'] + if new_storage_path not in bucket_info['initial_keys']: + if self.rclone_remote: + self.queue_for_rclone(obj, bucket_name, new_storage_path) + else: + with obj.open() as f: + content_md5 = obj.md5.decode('hex').encode('base64').strip() + self.s3_client.put_object(Body=f, Bucket=bucket_name, Key=new_storage_path, + ContentType=obj.content_type, ContentMD5=content_md5) + self.emit_update(obj, backend, new_storage_path, new_filename) + + def build_storage_path(self, obj, _new_path_re=re.compile(r'^(event/\d+/registrations/\d+/\d+/\d+-)(\d+)(-.*)$')): + old_filename = obj.filename + new_filename = secure_filename(obj.filename, None) + assert new_filename + obj.filename = new_filename + new_storage_path = obj._build_storage_path()[1] + obj.filename = old_filename + assert new_storage_path + if not isinstance(obj, RegistrationData): + return new_storage_path, new_filename + match = _new_path_re.match(obj.storage_file_id) + if match: + # already in the current format + assert obj.storage_file_id == new_storage_path.replace('-0-', '-{}-'.format(match.group(2))) + return obj.storage_file_id, new_filename + else: + match = _new_path_re.match(new_storage_path) + return '{}{}{}'.format(match.group(1), crc32(obj.storage_file_id), match.group(3)), new_filename + + def queue_for_rclone(self, obj, bucket, key): + # XXX: we assume the file is local so the context manager doesn't create a + # temporary file which becomes invalid afterwards + with obj.get_local_path() as file_path: + pass + while not os.path.exists(file_path): + raw_input(cformat('\n%{red}File not found on disk: %{yellow}{}').format(file_path)) + try: + queue_entry = self.rclone_queue[bucket] + except KeyError: + tmpdir = tempfile.mkdtemp(prefix='indico-s3-import-') + self.rclone_queue[bucket] = queue_entry = { + 'path': tmpdir, + 'files': 0, + 'bytes': 0, + } + fskey = os.path.join(queue_entry['path'], key) + fsdir = os.path.dirname(fskey) + try: + os.makedirs(fsdir) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + os.symlink(file_path, fskey) + queue_entry['files'] += 1 + queue_entry['bytes'] += obj.size + + def flush_rclone_iterator(self, iterable, n=100): + for i, data in enumerate(iterable, 1): + yield data + if i % n == 0: + self.flush_rclone() + db.session.rollback() # reduce memory usage + self.flush_rclone() + + def flush_rclone(self): + if not self.rclone_remote or not self.rclone_queue: + return + click.echo() + for name, data in self.buckets.viewitems(): + if not data['exists']: + self.create_bucket(name) + data['exists'] = True + for bucket, data in self.rclone_queue.viewitems(): + click.echo(cformat('Copying %{cyan}{}%{reset} files (%{cyan}{}%{reset}) to %{cyan}{}%{reset} via rclone') + .format(data['files'], do_filesizeformat(data['bytes']), bucket)) + start = datetime.now() + try: + subprocess.check_call([ + 'rclone', 'copy', '--copy-links', + data['path'], '{}:{}'.format(self.rclone_remote, bucket) + ]) + except subprocess.CalledProcessError: + click.secho('\nError while running rclone', fg='red') + raise + duration = (datetime.now() - start) + click.echo('...finished after {}'.format(format_human_timedelta(duration, 'minutes', narrow=True))) + rmlinktree(data['path']) + self.rclone_queue.clear() + + def bucket_exists(self, name): + try: + self.s3_client.head_bucket(Bucket=name) + return True + except ClientError as exc: + if int(exc.response['Error']['Code']) == 404: + return False + raise + + def create_bucket(self, name): + click.echo(cformat('Creating bucket %{green}{}%{reset}').format(name)) + self.s3_client.create_bucket(Bucket=name) + if self.s3_bucket_policy: + self.s3_client.put_bucket_policy(Bucket=name, Policy=self.s3_bucket_policy) + + def get_bucket_info(self, bucket_name, backend, create=False): + try: + return self.buckets[bucket_name] + except KeyError: + click.echo(cformat('\nChecking bucket %{yellow}{}%{reset}').format(bucket_name)) + exists = True + if not self.bucket_exists(bucket_name): + if create: + click.echo() + self.create_bucket(bucket_name) + else: + exists = False + + data = {'backend': backend, 'exists': exists} + if exists: + bucket = self.s3.Bucket(bucket_name) + data['initial_keys'] = {f.key for f in bucket.objects.all()} + else: + data['initial_keys'] = set() + self.buckets[bucket_name] = data + return data + + def get_object_dt(self, obj): + cls = type(obj) + if cls.add_file_date_column: + return obj.created_dt + fn = FILE_DT_MAP.get(cls) or (lambda _: datetime.now()) + return fn(obj) + + def emit_update(self, obj, backend, file_id, filename): + data = {'m': type(obj).__name__, 'pk': inspect(obj).identity} + if obj.storage_backend != backend: + data.update({ + 'ob': obj.storage_backend, + 'nb': backend, + }) + if obj.storage_file_id != file_id: + data.update({ + 'of': obj.storage_file_id, + 'nf': file_id, + }) + if obj.filename != filename: + data.update({ + 'on': obj.filename, + 'nn': filename, + }) + json.dump(data, self.output_file) + self.output_file.write('\n') + + +def apply_changes(data_file, revert=False): + mapping = { + 'pk': 'pk', + 'ob': 'old_storage_backend', + 'nb': 'new_storage_backend', + 'of': 'old_storage_file_id', + 'nf': 'new_storage_file_id', + 'on': 'old_filename', + 'nn': 'new_filename', + } + click.echo('Parsing data...') + data = defaultdict(list) + for line in data_file: + line_data = json.loads(line) + converted = {mapping[k]: v for k, v in line_data.viewitems() if k in mapping} + data[line_data['m']].append(converted) + + models = {model: len(data[model.__name__]) + for model in StoredFileMixin.__subclasses__() + if model.__name__ in data and len(data[model.__name__])} + labels = {model: cformat('Processing %{blue!}{}%{reset} (%{cyan}{}%{reset} rows)').format(model.__name__, total) + for model, total in models.iteritems()} + max_length = max(len(x) for x in labels.itervalues()) + labels = {model: label.ljust(max_length) for model, label in labels.iteritems()} + for model, total in sorted(models.items(), key=itemgetter(1)): + pks = inspect(model).primary_key + with click.progressbar(data[model.__name__], length=total, label=labels[model], + show_percent=True, show_pos=True) as entries: + for entry in committing_iterator(entries, 1000): + updates = {} + key = 'old' if revert else 'new' + if key + '_storage_backend' in entry: + updates[model.storage_backend] = entry[key + '_storage_backend'] + if key + '_storage_file_id' in entry: + updates[model.storage_file_id] = entry[key + '_storage_file_id'] + if key + '_filename' in entry: + updates[model.filename] = entry[key + '_filename'] + model.query.filter(Tuple(*pks) == entry['pk']).update(updates, synchronize_session=False) + + +@contextmanager +def monkeypatch_registration_file_time(): + # RegistrationData objects have the current timestamp in their + # storage_file_id to ensure uniqueness in case lf re-upload, but + # here we want reliable filenames + from indico.modules.events.registration.models import registrations + + class FakeTime(object): + def time(self): + return 0 + + orig_time = registrations.time + registrations.time = FakeTime() + yield + registrations.time = orig_time + + +@cli_group() +@with_appcontext +def cli(): + """Migrate data to S3. + + Use the `copy` subcommand to copy data to S3. This can be done + safely while Indico is running. At the end it will show you what + you need to add to your `indico.conf`. + + Once you updated your config with the new storage backends, you + can use the `apply` subcommand to update your database so files + will actually be loaded using the new S3 storage backends. + + In case you ever need to switch back to your previous storage, + you can use `revert` to undo the database changes. + """ + if config.DB_LOG: + click.secho('Warning: The database logger is currently enabled (DB_LOG = True).\n' + 'This will slow down the migration. Unless you database is very small, please disable it.', + fg='yellow') + click.confirm('Continue anyway?', abort=True) + + +@cli.command() +@click.option('-s', '--source-backend', 'source_backend_names', metavar='NAME', multiple=True, + help='Storage backend names from which to copy files. If omitted, all non-S3 backends are used.') +@click.option('-B', '--bucket-name', 'bucket_names', metavar='NAME', required=True, multiple=True, + help='Bucket(s) to copy files to') +@click.option('-S', '--static-bucket-name', metavar='NAME', required=True, + help='Bucket to copy static site packages to') +@click.option('-e', '--s3-endpoint', metavar='URL', help='Custom S3 endpoint, e.g. https://s3.example.com') +@click.option('-p', '--s3-profile', metavar='NAME', help='S3 profile to use when loading credentials') +@click.option('-P', '--s3-bucket-policy', 's3_bucket_policy_file', metavar='PATH', type=click.File(), + help='Bucket policy file to use when creating buckets') +@click.option('-r', '--rclone', metavar='NAME', + help='If set, use rclone to copy files which is usually faster. The value of this option is the name of ' + 'the rclone remote used to copy files to S3.') +@click.argument('output', metavar='PATH', type=click.File('wb')) +def copy(source_backend_names, bucket_names, static_bucket_name, s3_endpoint, s3_profile, s3_bucket_policy_file, + rclone, output): + """Copy files to S3. + + This command copies files to S3 and records the necessary database changes + in a JSONL file. + + Multiple bucket names can be specified; in that case the bucket name can change + based on the year a file was created in. The last bucket name will be the default, + while any other bucket name must include a conditional indicating when to use it: + + \b + -B '<2001:indico-pre-2001' + -B '<2009:indico-' + -B 'indico--' + + The static bucket name cannot contain any placeholders. + + The indico storage backend will get the same name as the bucket by default, + but this can be overridden, e.g. `-B 'indico-/s3-'` would name + the bucket 'indico-2018' but use a backend named 's3-2018'. It is your + responsibility to ensure that placeholders match between the two names. + + S3 credentials should be specified in the usual places, i.e. + `~/.aws/credentials` for regular S3 access and `~/.config/rclone/rclone.conf` + when using rclone. + """ + bucket_names = [tuple(x.split('/', 1)) if '/' in x else (x, x.split(':', 1)[-1]) for x in bucket_names] + if ':' in bucket_names[-1][0]: + raise click.UsageError('Last bucket name cannot contain criteria') + if not all(':' in x[0] for x in bucket_names[:-1]): + raise click.UsageError('All but the last bucket name need to contain criteria') + matches = [(re.match(r'^(<|>|==|<=|>=)\s*(\d{4}):(.+)$', name), backend) for name, backend in bucket_names[:-1]] + if not all(x[0] for x in matches): + raise click.UsageError("Could not parse '{}'".format(bucket_names[matches.index(None)])) + criteria = [(match.groups(), backend) for match, backend in matches] + # Build and compile a function to get the bucket/backend name to avoid + # processing the criteria for every single file (can be millions for large + # instances) + code = ['def get_bucket_name(dt):'] + if criteria: + for i, ((op, value, bucket), backend) in enumerate(criteria): + code.append(' {}if dt.year {} {}:'.format('el' if i else '', op, value)) + code.append(' bucket, backend = {!r}'.format((bucket, backend))) + code.append(' else:') + code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) + else: + code.append(' bucket, backend = {!r}'.format(bucket_names[-1])) + code.append(' bucket = bucket.replace("", dt.strftime("%Y"))') + code.append(' bucket = bucket.replace("", dt.strftime("%m"))') + code.append(' bucket = bucket.replace("", dt.strftime("%W"))') + code.append(' backend = backend.replace("", dt.strftime("%Y"))') + code.append(' backend = backend.replace("", dt.strftime("%m"))') + code.append(' backend = backend.replace("", dt.strftime("%W"))') + code.append(' return bucket, backend') + d = {} + exec '\n'.join(code) in d + if not source_backend_names: + source_backend_names = [k for k, v in config.STORAGE_BACKENDS.viewitems() + if not isinstance(v, S3StorageBase)] + s3_bucket_policy = s3_bucket_policy_file.read() if s3_bucket_policy_file else None + imp = S3Importer(d['get_bucket_name'], static_bucket_name, + output, source_backend_names, rclone, + s3_endpoint, s3_profile, s3_bucket_policy) + with monkeypatch_registration_file_time(): + imp.run() + + +@cli.command() +@click.argument('data', metavar='PATH', type=click.File('rb')) +def apply(data): + """Apply DB updates. + + This command updates the DB with the changes recorded while + running the `copy` command. + """ + apply_changes(data) + + +@cli.command() +@click.argument('data', metavar='PATH', type=click.File('rb')) +def revert(data): + """Revert DB updates. + + This command reverts the changes made to the database by the + `apply` command. + """ + apply_changes(data, revert=True) diff --git a/storage_s3/indico_storage_s3/plugin.py b/storage_s3/indico_storage_s3/plugin.py index 414a8cf..b0077e9 100644 --- a/storage_s3/indico_storage_s3/plugin.py +++ b/storage_s3/indico_storage_s3/plugin.py @@ -35,6 +35,7 @@ from indico.web.forms.widgets import SwitchWidget from indico_storage_s3 import _ from indico_storage_s3.blueprint import blueprint +from indico_storage_s3.migrate import cli as migrate_cli from indico_storage_s3.storage import (DynamicS3Storage, ReadOnlyDynamicS3Storage, ReadOnlyS3Storage, S3Storage, S3StorageBase) @@ -86,7 +87,7 @@ class S3StoragePlugin(IndicoPlugin): def _extend_indico_cli(self, sender, **kwargs): @cli_group() def s3(): - """Manage S3 storage""" + """Manage S3 storage.""" @s3.command() @click.option('--storage', default=None, metavar='NAME', help='Storage backend to create bucket for') @@ -119,4 +120,5 @@ class S3StoragePlugin(IndicoPlugin): click.echo('Storage {} is not an s3 storage'.format(key)) sys.exit(1) + s3.add_command(migrate_cli, name='migrate') return s3 From b59949e100be4861e82cd6ae1396ddc7466fda06 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 23 Nov 2018 17:43:45 +0100 Subject: [PATCH 51/54] Storage/S3: Fix source backend check Also warn if non-fs backends are used with --rclone, as this only works if they produce filesystem paths that are still valid after leaving the context manager returning them. --- storage_s3/indico_storage_s3/migrate.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/storage_s3/indico_storage_s3/migrate.py b/storage_s3/indico_storage_s3/migrate.py index 02ed09b..b7a0d07 100644 --- a/storage_s3/indico_storage_s3/migrate.py +++ b/storage_s3/indico_storage_s3/migrate.py @@ -41,6 +41,7 @@ from indico.cli.core import cli_group from indico.core.config import config from indico.core.db import db from indico.core.storage import StoredFileMixin +from indico.core.storage.backend import FileSystemStorage, get_storage from indico.modules.designer.models.images import DesignerImageFile from indico.modules.events.abstracts.models.files import AbstractFile from indico.modules.events.layout.models.images import ImageFile @@ -475,8 +476,13 @@ def copy(source_backend_names, bucket_names, static_bucket_name, s3_endpoint, s3 d = {} exec '\n'.join(code) in d if not source_backend_names: - source_backend_names = [k for k, v in config.STORAGE_BACKENDS.viewitems() - if not isinstance(v, S3StorageBase)] + source_backend_names = [x for x in config.STORAGE_BACKENDS if not isinstance(get_storage(x), S3StorageBase)] + if rclone: + invalid = [x for x in source_backend_names if not isinstance(get_storage(x), FileSystemStorage)] + if invalid: + click.secho('Found unsupported storage backends: {}'.format(', '.join(sorted(invalid))), fg='yellow') + click.secho('The backends might not work together with `--rclone`', fg='yellow') + click.confirm('Continue anyway?', abort=True) s3_bucket_policy = s3_bucket_policy_file.read() if s3_bucket_policy_file else None imp = S3Importer(d['get_bucket_name'], static_bucket_name, output, source_backend_names, rclone, From 71a679267c99e8dd00b58945275ab0af81ab23d6 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Fri, 23 Nov 2018 18:26:27 +0100 Subject: [PATCH 52/54] Storage/S3: Cache and lazy-create S3 clients Creating a client in new S3 sessions is somewhat expensive; don't do that more often than needed --- storage_s3/indico_storage_s3/storage.py | 34 ++++++++++++++++++------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index 256fca8..181d771 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -19,6 +19,7 @@ from __future__ import unicode_literals import hashlib import hmac import sys +import threading from contextlib import contextmanager from datetime import date from io import BytesIO @@ -28,7 +29,7 @@ import boto3 from botocore.config import Config from botocore.exceptions import ClientError from werkzeug.datastructures import Headers -from werkzeug.utils import redirect +from werkzeug.utils import cached_property, redirect from indico.core.config import config from indico.core.storage import Storage, StorageError @@ -38,6 +39,9 @@ from indico.util.string import return_ascii from indico.web.flask.util import send_file +s3_session_cache = threading.local() + + class S3StorageBase(Storage): simple_data = False @@ -46,22 +50,34 @@ class S3StorageBase(Storage): self.endpoint_url = data.get('host') if self.endpoint_url and '://' not in self.endpoint_url: self.endpoint_url = 'https://' + self.endpoint_url - session_kwargs = {} - client_kwargs = {} + self.session_kwargs = {} + self.client_kwargs = {} if 'profile' in data: - session_kwargs['profile_name'] = data['profile'] + self.session_kwargs['profile_name'] = data['profile'] if 'access_key' in data: - session_kwargs['aws_access_key_id'] = data['access_key'] + self.session_kwargs['aws_access_key_id'] = data['access_key'] if 'secret_key' in data: - session_kwargs['aws_secret_access_key'] = data['secret_key'] + self.session_kwargs['aws_secret_access_key'] = data['secret_key'] if 'addressing_style' in data: - client_kwargs['config'] = Config(s3={'addressing_style': data['addressing_style']}) + self.client_kwargs['config'] = Config(s3={'addressing_style': data['addressing_style']}) self.bucket_policy_file = data.get('bucket_policy_file') self.bucket_versioning = data.get('bucket_versioning') in ('1', 'true', 'yes') self.proxy_downloads = data.get('proxy') in ('1', 'true', 'yes') self.meta = data.get('meta') - self.session = boto3.session.Session(**session_kwargs) - self.client = self.session.client('s3', endpoint_url=self.endpoint_url, **client_kwargs) + + @cached_property + def session(self): + key = '__'.join('{}_{}'.format(k, v) for k, v in sorted(self.session_kwargs.viewitems())) + try: + return getattr(s3_session_cache, key) + except AttributeError: + session = boto3.session.Session(**self.session_kwargs) + setattr(s3_session_cache, key, session) + return session + + @cached_property + def client(self): + return self.session.client('s3', endpoint_url=self.endpoint_url, **self.client_kwargs) def _get_current_bucket_name(self): raise NotImplementedError From 3a32e524f8cef87367ad07773c0966dbfce1f707 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 26 Nov 2018 14:50:14 +0100 Subject: [PATCH 53/54] Storage/S3: Truncate bucket hash if necessary --- storage_s3/indico_storage_s3/storage.py | 2 +- storage_s3/tests/plugin_test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/storage_s3/indico_storage_s3/storage.py b/storage_s3/indico_storage_s3/storage.py index 181d771..bc7ad5e 100644 --- a/storage_s3/indico_storage_s3/storage.py +++ b/storage_s3/indico_storage_s3/storage.py @@ -223,7 +223,7 @@ class DynamicS3Storage(S3StorageBase): def _get_bucket_name(self, date): name = self._replace_bucket_placeholders(self.bucket_name_template, date) token = hmac.new(self.bucket_secret.encode('utf-8'), name, hashlib.md5).hexdigest() - return '{}-{}'.format(name, token) + return '{}-{}'.format(name, token)[:63] def _replace_bucket_placeholders(self, name, date): name = name.replace('', date.strftime('%Y')) diff --git a/storage_s3/tests/plugin_test.py b/storage_s3/tests/plugin_test.py index 9928db5..994e147 100644 --- a/storage_s3/tests/plugin_test.py +++ b/storage_s3/tests/plugin_test.py @@ -102,8 +102,10 @@ def test_static_bucket_name_too_long(): def test_dynamic_bucket_name_too_long(): - DynamicS3Storage('bucket_secret=secret,bucket_template=test-' + 'x'*37) - DynamicS3Storage('bucket_secret=secret,bucket_template=test--' + 'x'*34) + s = DynamicS3Storage('bucket_secret=secret,bucket_template=test-' + 'x'*37) + assert len(s._get_current_bucket_name()) == 63 + s = DynamicS3Storage('bucket_secret=secret,bucket_template=test--' + 'x'*34) + assert len(s._get_current_bucket_name()) == 63 with pytest.raises(StorageError): DynamicS3Storage('bucket_secret=secret,bucket_template=test-' + 'x' * 38) with pytest.raises(StorageError): From 4a9d59340eb9e6756e69a66651ff6fd18ec37e07 Mon Sep 17 00:00:00 2001 From: Adrian Moennich Date: Mon, 26 Nov 2018 14:41:01 +0100 Subject: [PATCH 54/54] Storage/S3: Bump version --- storage_s3/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage_s3/setup.py b/storage_s3/setup.py index 4eff19b..fb92718 100644 --- a/storage_s3/setup.py +++ b/storage_s3/setup.py @@ -21,7 +21,7 @@ from setuptools import find_packages, setup setup( name='indico-plugin-storage-s3', - version='2.0.2', + version='2.0.3', description='S3 storage backend for Indico', url='https://github.com/indico/indico-plugins', license='https://www.gnu.org/licenses/gpl-3.0.txt',