From 3c37b3a9dec58d791de2c3c215f788fea62c7f31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Louren=C3=A7o?= Date: Mon, 12 Apr 2021 18:13:00 +0100 Subject: [PATCH] LiveSync: Include subcontribs in initial export (#110) --- livesync/indico_livesync/base.py | 28 +++++++++++++++---- livesync/indico_livesync/cli.py | 19 +------------ livesync/indico_livesync/initial.py | 42 ++++++++++++++++++++++++++++ livesync/indico_livesync/uploader.py | 17 +++++------ livesync/tests/agent_test.py | 10 ------- livesync/tests/uploader_test.py | 6 ++-- 6 files changed, 78 insertions(+), 44 deletions(-) diff --git a/livesync/indico_livesync/base.py b/livesync/indico_livesync/base.py index f16314f..89b8639 100644 --- a/livesync/indico_livesync/base.py +++ b/livesync/indico_livesync/base.py @@ -6,12 +6,17 @@ # see the LICENSE file for more details. from flask_pluginengine import depends, trim_docstring +from sqlalchemy.orm import subqueryload from indico.core.plugins import IndicoPlugin, PluginCategory +from indico.modules.categories import Category +from indico.modules.categories.models.principals import CategoryPrincipal from indico.util.date_time import now_utc from indico.util.decorators import classproperty from indico_livesync.forms import AgentForm +from indico_livesync.initial import (apply_acl_entry_strategy, query_attachments, query_contributions, query_events, + query_notes, query_subcontributions) from indico_livesync.models.queue import LiveSyncQueueEntry from indico_livesync.plugin import LiveSyncPlugin @@ -90,16 +95,29 @@ class LiveSyncBackendBase: uploader.run(records) self.update_last_run() - def run_initial_export(self, events, total=None): + def run_initial_export(self): """Runs the initial export. This process is expected to take a very long time. - - :param events: iterable of all records in this indico instance - :param total: (optional) the total of records to be exported """ if self.uploader is None: # pragma: no cover raise NotImplementedError uploader = self.uploader(self) - uploader.run_initial(events, total) + + Category.allow_relationship_preloading = True + Category.preload_relationships(Category.query, 'acl_entries', + strategy=lambda rel: apply_acl_entry_strategy(subqueryload(rel), + CategoryPrincipal)) + _category_cache = Category.query.all() # noqa: F841 + + events = query_events() + uploader.run_initial(events.yield_per(5000), events.count()) + contributions = query_contributions() + uploader.run_initial(contributions.yield_per(5000), contributions.count()) + subcontributions = query_subcontributions() + uploader.run_initial(subcontributions.yield_per(5000), subcontributions.count()) + attachments = query_attachments() + uploader.run_initial(attachments.yield_per(5000), attachments.count()) + notes = query_notes() + uploader.run_initial(notes.yield_per(5000), notes.count()) diff --git a/livesync/indico_livesync/cli.py b/livesync/indico_livesync/cli.py index b2d0a53..f2dff1c 100644 --- a/livesync/indico_livesync/cli.py +++ b/livesync/indico_livesync/cli.py @@ -7,17 +7,12 @@ import click from flask_pluginengine import current_plugin -from sqlalchemy.orm import subqueryload from terminaltables import AsciiTable from indico.cli.core import cli_group from indico.core.db import db -from indico.modules.categories import Category -from indico.modules.categories.models.principals import CategoryPrincipal from indico.util.console import cformat -from indico_livesync.initial import (apply_acl_entry_strategy, query_attachments, query_contributions, query_events, - query_notes) from indico_livesync.models.agents import LiveSyncAgent @@ -77,20 +72,8 @@ def initial_export(agent_id, force): print(cformat('To re-run it, use %{yellow!}--force%{reset}')) return - Category.allow_relationship_preloading = True - Category.preload_relationships(Category.query, 'acl_entries', - strategy=lambda rel: apply_acl_entry_strategy(subqueryload(rel), CategoryPrincipal)) - _category_cache = Category.query.all() # noqa: F841 - backend = agent.create_backend() - events = query_events() - backend.run_initial_export(events.yield_per(5000), events.count()) - contributions = query_contributions() - backend.run_initial_export(contributions.yield_per(5000), contributions.count()) - attachments = query_attachments() - backend.run_initial_export(attachments.yield_per(5000), attachments.count()) - notes = query_notes() - backend.run_initial_export(notes.yield_per(5000), notes.count()) + backend.run_initial_export() agent.initial_data_exported = True db.session.commit() diff --git a/livesync/indico_livesync/initial.py b/livesync/indico_livesync/initial.py index e602068..d540822 100644 --- a/livesync/indico_livesync/initial.py +++ b/livesync/indico_livesync/initial.py @@ -89,6 +89,48 @@ def query_contributions(): ) +def query_subcontributions(): + contrib_event = db.aliased(Event) + contrib_session = db.aliased(Session) + contrib_block = db.aliased(SessionBlock) + + contrib_strategy = contains_eager(SubContribution.contribution) + contrib_strategy.joinedload(Contribution.own_venue) + contrib_strategy.joinedload(Contribution.own_room).options(raiseload('*'), joinedload('location')) + apply_acl_entry_strategy(contrib_strategy.selectinload(Contribution.acl_entries), ContributionPrincipal) + + event_strategy = contrib_strategy.contains_eager(Contribution.event.of_type(contrib_event)) + event_strategy.joinedload(contrib_event.own_venue) + event_strategy.joinedload(contrib_event.own_room).options(raiseload('*'), joinedload('location')) + apply_acl_entry_strategy(event_strategy.selectinload(contrib_event.acl_entries), EventPrincipal) + + session_strategy = contrib_strategy.contains_eager(Contribution.session.of_type(contrib_session)) + apply_acl_entry_strategy(session_strategy.selectinload(contrib_session.acl_entries), SessionPrincipal) + session_strategy.joinedload(contrib_session.own_venue) + session_strategy.joinedload(contrib_session.own_room).options(raiseload('*'), joinedload('location')) + + session_block_strategy = contrib_strategy.contains_eager(Contribution.session_block.of_type(contrib_block)) + session_block_strategy.joinedload(contrib_block.own_venue) + session_block_strategy.joinedload(contrib_block.own_room).options(raiseload('*'), joinedload('location')) + + return ( + SubContribution.query + .join(Contribution) + .join(Contribution.event.of_type(contrib_event)) + .outerjoin(Contribution.session.of_type(contrib_session)) + .outerjoin(Contribution.session_block.of_type(contrib_block)) + .filter(~SubContribution.is_deleted, ~Contribution.is_deleted, ~contrib_event.is_deleted) + .options( + selectinload(SubContribution.person_links), + contrib_strategy, + event_strategy, + session_strategy, + session_block_strategy + ) + .order_by(SubContribution.id) + ) + + def query_attachments(): contrib_event = db.aliased(Event) contrib_session = db.aliased(Session) diff --git a/livesync/indico_livesync/uploader.py b/livesync/indico_livesync/uploader.py index 7096b2c..d1047bb 100644 --- a/livesync/indico_livesync/uploader.py +++ b/livesync/indico_livesync/uploader.py @@ -48,17 +48,18 @@ class Uploader: self.processed_records(batch) self.logger.info('%s finished', self_name) - def run_initial(self, events, total=None): + def run_initial(self, records, total, progress=True): """Runs the initial batch upload - :param events: an iterable containing events - :param total: (optional) the total of records to be exported + :param records: an iterable containing records + :param total: the total of records to be exported + :param progress: enable verbose progress mode """ - if total is not None: - events = verbose_iterator(events, total, attrgetter('id'), - lambda obj: str_to_ascii(getattr(obj, 'title', '')), - print_total_time=True) - for batch in grouper(events, self.INITIAL_BATCH_SIZE, skip_missing=True): + if progress: + records = verbose_iterator(records, total, attrgetter('id'), + lambda obj: str_to_ascii(getattr(obj, 'title', '')), + print_total_time=True) + for batch in grouper(records, self.INITIAL_BATCH_SIZE, skip_missing=True): self.upload_records(batch, from_queue=False) def upload_records(self, records, from_queue): diff --git a/livesync/tests/agent_test.py b/livesync/tests/agent_test.py index 68b0cf8..aa05a85 100644 --- a/livesync/tests/agent_test.py +++ b/livesync/tests/agent_test.py @@ -30,16 +30,6 @@ def test_title_description(): assert NonDescriptiveAgent.description == 'no description available' -def test_run_initial(): - """Test if run_initial_export calls the uploader properly""" - backend = DummyBackend(MagicMock()) - mock_uploader = MagicMock() - backend.uploader = lambda x: mock_uploader - events = object() - backend.run_initial_export(events, 1) - mock_uploader.run_initial.assert_called_with(events, 1) - - def test_run(mocker): """Test if run calls the fetcher/uploader properly""" mocker.patch.object(DummyBackend, 'fetch_records') diff --git a/livesync/tests/uploader_test.py b/livesync/tests/uploader_test.py index 98618a3..7a996d3 100644 --- a/livesync/tests/uploader_test.py +++ b/livesync/tests/uploader_test.py @@ -52,7 +52,7 @@ def test_run_initial(mocker): uploader = RecordingUploader(MagicMock()) uploader.INITIAL_BATCH_SIZE = 3 records = tuple(Mock(spec=Event, id=evt_id) for evt_id in range(4)) - uploader.run_initial(records) + uploader.run_initial(records, 4, False) # We expect two batches, with the second one being smaller (i.e. no None padding, just the events) batches = set(records[:3]), set(records[3:]) assert uploader.all_uploaded == [(batches[0], False), (batches[1], False)] @@ -131,7 +131,7 @@ def test_marcxml_run(mocker, db, dummy_event, dummy_agent): assert not mxg.objects_to_xml.called assert uploader.upload_xml.called mxg.reset_mock() - uploader.run_initial([1]) + uploader.run_initial([1], 1, False) assert not mxg.records_to_xml.called assert mxg.objects_to_xml.called assert uploader.upload_xml.called @@ -142,5 +142,5 @@ def test_marcxml_empty_result(mocker): mocker.patch('indico_livesync.uploader.MARCXMLGenerator.objects_to_xml', return_value=None) mocker.patch.object(MARCXMLUploader, 'upload_xml', autospec=True) uploader = MARCXMLUploader(MagicMock()) - uploader.run_initial([1]) + uploader.run_initial([1], 1, False) assert not uploader.upload_xml.called