diff --git a/.coveragerc b/.coveragerc index a9e5ed4..166c774 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,6 @@ [run] omit = + project/celery.py + project/celery_tasks.py project/cli/test.py project/templates/email/* \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index b6db6ae..ddeebac 100644 --- a/.dockerignore +++ b/.dockerignore @@ -24,3 +24,4 @@ **/values.dev.yaml README.md tmp +celerybeat-schedule \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5826740..72211c8 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ deployment.yaml node_modules cypress/videos cypress/screenshots +celerybeat-schedule # C extensions *.so diff --git a/.vscode/launch.json b/.vscode/launch.json index 326532f..9e6aebc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,58 +1,114 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python: Flask", - "type": "python", - "request": "launch", - "module": "flask", - "env": { - "FLASK_APP": "project", - "FLASK_ENV": "development", - "FLASK_DEBUG": "1" - }, - "args": [ - "run", - "--no-debugger" - ], - "justMyCode": false, - "jinja": true - },{ - "name": "Python: Flask HTTPS", - "type": "python", - "request": "launch", - "module": "flask", - "env": { - "FLASK_APP": "project", - "FLASK_ENV": "development", - "FLASK_DEBUG": "1" - }, - "args": [ - "run", - "--port=443", - "--no-debugger", - "--cert=127.0.0.1.crt", - "--key=127.0.0.1.key" - ], - "sudo": true, - "justMyCode": false, - "jinja": true - }, - { - "name": "Python: Aktuelle Datei", - "type": "python", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal" - }, - { - "name": "Debug Unit Test", - "type": "python", - "request": "test", - "justMyCode": false, - } - ] -} \ No newline at end of file + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Flask", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "project", + "FLASK_ENV": "development", + "FLASK_DEBUG": "1" + }, + "args": ["run", "--no-debugger"], + "justMyCode": false, + "jinja": true + }, + { + "name": "Flask HTTPS", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "project", + "FLASK_ENV": "development", + "FLASK_DEBUG": "1" + }, + "args": [ + "run", + "--port=443", + "--no-debugger", + "--cert=127.0.0.1.crt", + "--key=127.0.0.1.key" + ], + "sudo": true, + "justMyCode": false, + "jinja": true + }, + { + "name": "Flask CLI", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "project", + "FLASK_ENV": "development", + "FLASK_DEBUG": "1" + }, + "args": ["cache", "clear-images"], + "justMyCode": false + }, + { + "name": "Python: Aktuelle Datei", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + { + "name": "Debug Unit Test", + "type": "python", + "request": "test", + "justMyCode": false + }, + { + "name": "Celery worker", + "type": "python", + "request": "launch", + "module": "celery", + "args": [ + "-A", + "project.celery", + "worker", + "--loglevel=debug", + "--concurrency=1" + ], + "justMyCode": false, + "console": "integratedTerminal" + }, + { + "name": "Celery beat", + "type": "python", + "request": "launch", + "module": "celery", + "args": ["-A", "project.celery", "beat", "--loglevel=debug"], + "justMyCode": false, + "console": "integratedTerminal" + }, + { + "name": "Gunicorn", + "type": "python", + "request": "launch", + "module": "gunicorn", + "args": ["-c", "gunicorn.conf.py", "-w", "1", "project:app"], + "justMyCode": false, + "console": "integratedTerminal" + } + ], + "compounds": [ + { + "name": "Flask/Celery", + "configurations": ["Flask", "Celery worker"], + "stopAll": true + }, + { + "name": "Flask/Celery/Beat", + "configurations": ["Flask", "Celery worker", "Celery beat"], + "stopAll": true + } + ] +} diff --git a/Dockerfile b/Dockerfile index 48d8ad7..8849bff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.7 # Add rsync RUN apt update -qq && apt upgrade -y && apt autoremove -y -RUN apt install -y rsync curl && apt autoremove -y +RUN apt install -y rsync redis-tools curl && apt autoremove -y EXPOSE 5000 @@ -21,6 +21,7 @@ ENV SECRET_KEY="" ENV SECURITY_PASSWORD_HASH="" ENV SERVER_NAME="" ENV STATIC_FILES_MIRROR="" +ENV REDIS_URL="" # Install pip requirements COPY requirements.txt . diff --git a/deployment/docker-compose/.env.example b/deployment/docker-compose/.env.example index bd16dbe..b6924ac 100644 --- a/deployment/docker-compose/.env.example +++ b/deployment/docker-compose/.env.example @@ -1,5 +1,6 @@ POSTGRES_DATA_PATH=./tmp/data/postgres/data POSTGRES_BACKUP_PATH=./tmp/data/postgres/backups +REDIS_DATA_PATH=./tmp/data/redis/data CACHE_PATH=./tmp/cache STATIC_PATH=./tmp/static FLUENTD_LOG_PATH=./tmp/logs/fluentd @@ -9,6 +10,7 @@ FLUENTD_DOCKER_CONTAINERS_PATH=/var/lib/docker/containers POSTGRES_USER=oveda POSTGRES_PASSWORD= POSTGRES_DB=oveda +REDIS_PASSWORD= WEB_TAG=latest SERVER_NAME= @@ -22,5 +24,6 @@ MAIL_PASSWORD= MAIL_DEFAULT_SENDER= MAIL_USE_TLS=True GOOGLE_MAPS_API_KEY=AIzaDummy +SEO_SITEMAP_PING_GOOGLE=False JWT_PRIVATE_KEY="" JWT_PUBLIC_JWKS='' \ No newline at end of file diff --git a/deployment/docker-compose/README.md b/deployment/docker-compose/README.md index 5e4797c..6eb50e2 100644 --- a/deployment/docker-compose/README.md +++ b/deployment/docker-compose/README.md @@ -29,3 +29,9 @@ Adjust `WEB_TAG` in .env if necessary. ```sh docker compose exec -it web /bin/sh ``` + +## Worker active tasks + +```sh +docker compose exec -it worker celery -A project.celery inspect active +``` diff --git a/deployment/docker-compose/docker-compose.yml b/deployment/docker-compose/docker-compose.yml index 9dba2e9..f2f54d0 100644 --- a/deployment/docker-compose/docker-compose.yml +++ b/deployment/docker-compose/docker-compose.yml @@ -1,6 +1,46 @@ version: "3.9" name: "oveda" +x-web-env: + &default-web-env + FLASK_APP: main.py + DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db/${POSTGRES_DB} + REDIS_URL: redis://default:${REDIS_PASSWORD}@redis + SECRET_KEY: ${SECRET_KEY} + SECURITY_PASSWORD_HASH: ${SECURITY_PASSWORD_HASH} + MAIL_DEFAULT_SENDER: ${MAIL_DEFAULT_SENDER} + MAIL_PASSWORD: ${MAIL_PASSWORD} + MAIL_PORT: ${MAIL_PORT} + MAIL_SERVER: ${MAIL_SERVER} + MAIL_USE_TLS: ${MAIL_USE_TLS} + MAIL_USERNAME: ${MAIL_USERNAME} + GOOGLE_MAPS_API_KEY: ${GOOGLE_MAPS_API_KEY} + SEO_SITEMAP_PING_GOOGLE: ${SEO_SITEMAP_PING_GOOGLE} + SERVER_NAME: ${SERVER_NAME} + PREFERRED_URL_SCHEME: ${PREFERRED_URL_SCHEME} + GUNICORN_ACCESS_LOG: "-" + STATIC_FILES_MIRROR: /static + CACHE_PATH: tmp + JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY} + JWT_PUBLIC_JWKS: ${JWT_PUBLIC_JWKS} + +x-web: + &default-web + image: danielgrams/gsevpt:${WEB_TAG} + restart: always + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + <<: *default-web-env + volumes: + - ${CACHE_PATH}:/app/project/tmp + - ${STATIC_PATH}:/static + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + services: db: image: postgis/postgis:12-3.1 @@ -33,9 +73,19 @@ services: db: condition: service_healthy - web: - image: danielgrams/gsevpt:${WEB_TAG} + redis: + image: bitnami/redis:6.2 restart: always + healthcheck: + test: "redis-cli -a '${REDIS_PASSWORD}' ping | grep PONG" + start_period: "5s" + volumes: + - ${REDIS_DATA_PATH}:/bitnami/redis/data + environment: + REDIS_PASSWORD: ${REDIS_PASSWORD} + + web: + <<: *default-web healthcheck: test: "curl -f ${SERVER_NAME}/up" interval: "60s" @@ -45,31 +95,16 @@ services: - "5000:5000" extra_hosts: - "host.docker.internal:host-gateway" - environment: - FLASK_APP: main.py - DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db/${POSTGRES_DB} - SECRET_KEY: ${SECRET_KEY} - SECURITY_PASSWORD_HASH: ${SECURITY_PASSWORD_HASH} - MAIL_DEFAULT_SENDER: ${MAIL_DEFAULT_SENDER} - MAIL_PASSWORD: ${MAIL_PASSWORD} - MAIL_PORT: ${MAIL_PORT} - MAIL_SERVER: ${MAIL_SERVER} - MAIL_USE_TLS: ${MAIL_USE_TLS} - MAIL_USERNAME: ${MAIL_USERNAME} - GOOGLE_MAPS_API_KEY: ${GOOGLE_MAPS_API_KEY} - SERVER_NAME: ${SERVER_NAME} - PREFERRED_URL_SCHEME: ${PREFERRED_URL_SCHEME} - GUNICORN_ACCESS_LOG: "-" - STATIC_FILES_MIRROR: /static - CACHE_PATH: tmp - JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY} - JWT_PUBLIC_JWKS: ${JWT_PUBLIC_JWKS} - volumes: - - ${CACHE_PATH}:/app/project/tmp - - ${STATIC_PATH}:/static - depends_on: - db: - condition: service_healthy + + worker: + <<: *default-web + command: celery -A project.celery worker --loglevel=error + entrypoint: [] + + scheduler: + <<: *default-web + command: celery -A project.celery beat --loglevel=error + entrypoint: [] fluentd: image: danielgrams/fluentd diff --git a/doc/deployment.md b/doc/deployment.md index 1ad297f..9828e46 100644 --- a/doc/deployment.md +++ b/doc/deployment.md @@ -42,6 +42,7 @@ Jobs that should run on a regular basis. ### Daily ```sh +flask cache clear-images flask event update-recurring-dates flask dump all flask seo generate-sitemap --pinggoogle @@ -50,10 +51,18 @@ flask seo generate-robots-txt ## Administration +### Users + ```sh flask user add-admin-roles super@hero.com ``` +### Worker active tasks + +```sh +celery -A project.celery inspect active +``` + ## Configuration Create `.env` file in the root directory or pass as environment variables. diff --git a/doc/development.md b/doc/development.md index 5a2eac4..cf89e06 100644 --- a/doc/development.md +++ b/doc/development.md @@ -82,3 +82,9 @@ docker run -p 5000:5000 -e "DATABASE_URL=postgresql://postgres@localhost/gsevpt" ```sh docker-compose build && docker-compose up ``` + +## Celery + +```sh +dotenv run celery -A project.celery purge +``` diff --git a/docker-compose.yml b/docker-compose.yml index 136606c..1992db5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,20 +1,83 @@ version: "3.9" +name: "oveda-dev" + +x-web-env: + &default-web-env + FLASK_APP: main.py + DATABASE_URL: postgresql://user:pass@db/gsevpt + REDIS_URL: redis://default:pass@redis + MAIL_DEFAULT_SENDER: noresponse@gsevpt.de + MAIL_SERVER: mailhog + MAIL_PORT: 1025 + MAIL_USE_TLS: False + GUNICORN_ACCESS_LOG: "-" + GUNICORN_LOG_LEVEL: debug + FLASK_DEBUG: 1 + SERVER_NAME: "127.0.0.1:5000" + +x-web: + &default-web + build: . + environment: + <<: *default-web-env + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + mailhog: + condition: service_started services: - db: - image: mdillon/postgis + image: postgis/postgis:12-3.1 + healthcheck: + test: "pg_isready --username=user && psql --username=user --list" + start_period: "5s" environment: - POSTGRES_DB=gsevpt - POSTGRES_USER=user - POSTGRES_PASSWORD=pass + redis: + image: bitnami/redis:6.2 + healthcheck: + test: "redis-cli -a 'pass' ping | grep PONG" + start_period: "5s" + environment: + REDIS_PASSWORD: pass + + mailhog: + image: mailhog/mailhog + healthcheck: + test: "curl -f localhost:8025" + interval: "60s" + timeout: "5s" + start_period: "5s" + ports: + - "8026:8025" + web: - build: . + <<: *default-web ports: - "5000:5000" + + worker: + <<: *default-web + command: celery -A project.celery worker --loglevel=error + entrypoint: [] + + scheduler: + <<: *default-web + command: celery -A project.celery beat --loglevel=error + entrypoint: [] + + flower: + image: mher/flower:1.2 + ports: + - "5555:5555" environment: - FLASK_APP: main.py - DATABASE_URL: postgresql://user:pass@db/gsevpt + CELERY_BROKER_URL: redis://default:pass@redis depends_on: - - db + redis: + condition: service_healthy diff --git a/entrypoint.sh b/entrypoint.sh index 846f2ce..4417401 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,6 +5,17 @@ if [[ ! -z "${STATIC_FILES_MIRROR}" ]]; then rsync -a --delete project/static/ "${STATIC_FILES_MIRROR}" fi +echo "Using redis ${REDIS_URL}" + +PONG=`redis-cli -u ${REDIS_URL} ping | grep PONG` +while [ -z "$PONG" ]; do + sleep 2 + echo "Waiting for redis server ${REDIS_URL} to become available..." + PONG=`redis-cli -u ${REDIS_URL} ping | grep PONG` +done + +echo "Using database server ${DATABASE_URL}" + until flask db upgrade do echo "Waiting for postgres server to become available..." diff --git a/project/__init__.py b/project/__init__.py index 44135c1..373bd0d 100644 --- a/project/__init__.py +++ b/project/__init__.py @@ -1,5 +1,6 @@ import logging import os +from datetime import timedelta from flask import Flask from flask_babelex import Babel @@ -22,6 +23,7 @@ def getenv_bool(name: str, default: str = "False"): # pragma: no cover # Create app app = Flask(__name__) app.config["SQLALCHEMY_DATABASE_URI"] = os.environ["DATABASE_URL"] +app.config["REDIS_URL"] = os.getenv("REDIS_URL") app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False app.config["SECURITY_CONFIRMABLE"] = True app.config["SECURITY_POST_LOGIN_VIEW"] = "manage_after_login" @@ -36,6 +38,7 @@ app.config["SERVER_NAME"] = os.getenv("SERVER_NAME") app.config["ADMIN_UNIT_CREATE_REQUIRES_ADMIN"] = os.getenv( "ADMIN_UNIT_CREATE_REQUIRES_ADMIN", False ) +app.config["SEO_SITEMAP_PING_GOOGLE"] = getenv_bool("SEO_SITEMAP_PING_GOOGLE", "False") # Proxy handling if os.getenv("PREFERRED_URL_SCHEME"): # pragma: no cover @@ -45,6 +48,33 @@ from project.reverse_proxied import ReverseProxied app.wsgi_app = ReverseProxied(app.wsgi_app) +# Celery +task_always_eager = "REDIS_URL" not in app.config or not app.config["REDIS_URL"] +app.config.update( + CELERY_CONFIG={ + "broker_url": app.config["REDIS_URL"], + "result_backend": app.config["REDIS_URL"], + "result_expires": timedelta(hours=1), + "broker_pool_limit": None, + "redis_max_connections": 2, + "timezone": "Europe/Berlin", + "broker_transport_options": { + "max_connections": 2, + "queue_order_strategy": "priority", + "priority_steps": list(range(3)), + "sep": ":", + "queue_order_strategy": "priority", + }, + "task_default_priority": 1, # 0=high, 1=normal, 2=low priority + "task_always_eager": task_always_eager, + } +) + + +from project.celery import create_celery + +celery = create_celery(app) + # Generate a nice key using secrets.token_urlsafe() app.config["SECRET_KEY"] = os.environ.get( "SECRET_KEY", "pf9Wkove4IKEAXvy-cQkeDPhv9Cb3Ag-wyJILbq_dFw" @@ -67,6 +97,12 @@ if __name__ != "__main__": app.logger.handlers = gunicorn_logger.handlers app.logger.setLevel(gunicorn_logger.level) +# One line logging +from project.one_line_formatter import init_logger_with_one_line_formatter + +init_logger_with_one_line_formatter(logging.getLogger()) +init_logger_with_one_line_formatter(app.logger) + # Gzip gzip = Gzip(app) @@ -127,6 +163,9 @@ if app.config["MAIL_SUPPRESS_SEND"]: db = SQLAlchemy(app) migrate = Migrate(app, db) +# Celery tasks +from project import celery_tasks + # API from project.api import RestApi diff --git a/project/celery.py b/project/celery.py new file mode 100644 index 0000000..980fa30 --- /dev/null +++ b/project/celery.py @@ -0,0 +1,91 @@ +from smtplib import SMTPException +from urllib.error import URLError + +from celery import Celery +from celery.signals import ( + after_setup_logger, + after_setup_task_logger, + task_postrun, + worker_ready, +) +from celery_singleton import Singleton, clear_locks +from requests.exceptions import RequestException + + +class HttpTaskException(Exception): + pass + + +def create_celery(app): + celery = Celery(app.import_name) + celery.conf.update(app.config["CELERY_CONFIG"]) + TaskBase = Singleton + + class ContextTask(TaskBase): + abstract = True + + def __call__(self, *args, **kwargs): + with app.app_context(): + return TaskBase.__call__(self, *args, **kwargs) + + celery.Task = ContextTask + + class HttpTask(ContextTask): + abstract = True + autoretry_for = (HttpTaskException,) + retry_backoff = 5 + max_retries = 3 + retry_jitter = True + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._real_run = self.run + self.run = self._wrapped_run + + def _wrapped_run(self, *args, **kwargs): + try: + self._real_run(*args, **kwargs) + except ( + URLError, + RequestException, + SMTPException, + ) as e: + raise HttpTaskException(repr(e)) + + setattr(app, "celery_http_task_cls", HttpTask) + + return celery + + +@after_setup_logger.connect +def setup_logger(logger, *args, **kwargs): + from project.one_line_formatter import init_logger_with_one_line_formatter + + init_logger_with_one_line_formatter(logger) + + +@after_setup_task_logger.connect +def setup_task_logger(logger, *args, **kwargs): + from project.one_line_formatter import init_logger_with_one_line_formatter + + init_logger_with_one_line_formatter(logger) + + +@worker_ready.connect +def unlock_all(**kwargs): + from project import celery + + clear_locks(celery) + + +@task_postrun.connect +def close_session(*args, **kwargs): + from project import app + from project import db as sqlalchemydb + + # Flask SQLAlchemy will automatically create new sessions for you from + # a scoped session factory, given that we are maintaining the same app + # context, this ensures tasks have a fresh session (e.g. session errors + # won't propagate across tasks) + with app.app_context(): + sqlalchemydb.session.remove() diff --git a/project/celery_tasks.py b/project/celery_tasks.py new file mode 100644 index 0000000..01d0b7c --- /dev/null +++ b/project/celery_tasks.py @@ -0,0 +1,63 @@ +from celery.schedules import crontab + +from project import celery + + +@celery.on_after_configure.connect +def setup_periodic_tasks(sender, **kwargs): + sender.add_periodic_task(crontab(hour=0, minute=0), clear_images_task) + sender.add_periodic_task(crontab(hour=1, minute=0), update_recurring_dates_task) + sender.add_periodic_task(crontab(hour=2, minute=0), dump_all_task) + sender.add_periodic_task(crontab(hour=3, minute=0), seo_generate_sitemap_task) + sender.add_periodic_task(crontab(hour=4, minute=0), generate_robots_txt_task) + + +@celery.task( + acks_late=True, + reject_on_worker_lost=True, +) +def clear_images_task(): + from project.services.cache import clear_images + + clear_images() + + +@celery.task( + acks_late=True, + reject_on_worker_lost=True, +) +def update_recurring_dates_task(): + from project.services.event import update_recurring_dates + + update_recurring_dates() + + +@celery.task( + acks_late=True, + reject_on_worker_lost=True, +) +def dump_all_task(): + from project.services.dump import dump_all + + dump_all() + + +@celery.task( + acks_late=True, + reject_on_worker_lost=True, +) +def seo_generate_sitemap_task(): + from project import app + from project.services.seo import generate_sitemap + + generate_sitemap(app.config["SEO_SITEMAP_PING_GOOGLE"]) + + +@celery.task( + acks_late=True, + reject_on_worker_lost=True, +) +def generate_robots_txt_task(): + from project.services.seo import generate_robots_txt + + generate_robots_txt() diff --git a/project/cli/cache.py b/project/cli/cache.py index f29781b..f176788 100644 --- a/project/cli/cache.py +++ b/project/cli/cache.py @@ -1,17 +1,14 @@ -import click from flask.cli import AppGroup -from project import app, img_path -from project.utils import clear_files_in_dir +from project import app +from project.services import cache cache_cli = AppGroup("cache") @cache_cli.command("clear-images") def clear_images(): - click.echo("Clearing images..") - clear_files_in_dir(img_path) - click.echo("Done.") + cache.clear_images() app.cli.add_command(cache_cli) diff --git a/project/cli/dump.py b/project/cli/dump.py index 5fd2184..647b1fa 100644 --- a/project/cli/dump.py +++ b/project/cli/dump.py @@ -1,102 +1,14 @@ -import json -import os -import shutil - -import click from flask.cli import AppGroup -from sqlalchemy import and_ -from sqlalchemy.orm import joinedload -from project import app, dump_path -from project.api.event.schemas import EventDumpSchema -from project.api.event_category.schemas import EventCategoryDumpSchema -from project.api.event_reference.schemas import EventReferenceDumpSchema -from project.api.organization.schemas import OrganizationDumpSchema -from project.api.organizer.schemas import OrganizerDumpSchema -from project.api.place.schemas import PlaceDumpSchema -from project.models import ( - AdminUnit, - Event, - EventCategory, - EventOrganizer, - EventPlace, - EventReference, - PublicStatus, -) -from project.utils import make_dir +from project import app +from project.services import dump dump_cli = AppGroup("dump") -def dump_items(items, schema, file_base_name, dump_path): - result = schema.dump(items) - path = os.path.join(dump_path, file_base_name + ".json") - - with open(path, "w") as outfile: - json.dump(result, outfile, ensure_ascii=False) - - click.echo(f"{len(items)} item(s) dumped to {path}.") - - @dump_cli.command("all") def dump_all(): - # Setup temp dir - tmp_path = os.path.join(dump_path, "tmp") - make_dir(tmp_path) - - # Events - events = ( - Event.query.join(Event.admin_unit) - .options(joinedload(Event.categories)) - .filter( - and_( - Event.public_status == PublicStatus.published, - AdminUnit.is_verified, - ) - ) - .all() - ) - dump_items(events, EventDumpSchema(many=True), "events", tmp_path) - - # Places - places = EventPlace.query.all() - dump_items(places, PlaceDumpSchema(many=True), "places", tmp_path) - - # Event categories - event_categories = EventCategory.query.all() - dump_items( - event_categories, - EventCategoryDumpSchema(many=True), - "event_categories", - tmp_path, - ) - - # Organizers - organizers = EventOrganizer.query.all() - dump_items(organizers, OrganizerDumpSchema(many=True), "organizers", tmp_path) - - # Organizations - organizations = AdminUnit.query.all() - dump_items( - organizations, OrganizationDumpSchema(many=True), "organizations", tmp_path - ) - - # Event references - event_references = EventReference.query.all() - dump_items( - event_references, - EventReferenceDumpSchema(many=True), - "event_references", - tmp_path, - ) - - # Zip - zip_base_name = os.path.join(dump_path, "all") - zip_path = shutil.make_archive(zip_base_name, "zip", tmp_path) - click.echo(f"Zipped all up to {zip_path}.") - - # Clean up temp dir - shutil.rmtree(tmp_path, ignore_errors=True) + dump.dump_all() app.cli.add_command(dump_cli) diff --git a/project/cli/event.py b/project/cli/event.py index 98d7520..76c7c98 100644 --- a/project/cli/event.py +++ b/project/cli/event.py @@ -1,28 +1,14 @@ -import click from flask.cli import AppGroup -from project import app, db -from project.dateutils import berlin_tz -from project.services.event import ( - get_recurring_events, - update_event_dates_with_recurrence_rule, -) +from project import app +from project.services import event event_cli = AppGroup("event") @event_cli.command("update-recurring-dates") def update_recurring_dates(): - # Setting the timezone is neccessary for cli command - db.session.execute("SET timezone TO :val;", {"val": berlin_tz.zone}) - - events = get_recurring_events() - - for event in events: - update_event_dates_with_recurrence_rule(event) - db.session.commit() - - click.echo(f"{len(events)} event(s) were updated.") + event.update_recurring_dates() app.cli.add_command(event_cli) diff --git a/project/cli/seo.py b/project/cli/seo.py index 14e899a..7f20d99 100644 --- a/project/cli/seo.py +++ b/project/cli/seo.py @@ -1,18 +1,8 @@ -import os -import shutil -from io import StringIO - import click -import requests -from flask import url_for from flask.cli import AppGroup, with_appcontext -from sqlalchemy import and_ -from sqlalchemy.orm import load_only -from project import app, cache_path, robots_txt_path, sitemap_path -from project.dateutils import get_today -from project.models import AdminUnit, Event, EventDate, PublicStatus -from project.utils import make_dir +from project import app +from project.services import seo seo_cli = AppGroup("seo") @@ -21,82 +11,13 @@ seo_cli = AppGroup("seo") @click.option("--pinggoogle/--no-pinggoogle", default=False) @with_appcontext def generate_sitemap(pinggoogle): - click.echo("Generating sitemap..") - make_dir(cache_path) - - buf = StringIO() - buf.write('') - buf.write('') - - today = get_today() - events = ( - Event.query.join(Event.admin_unit) - .options(load_only(Event.id, Event.updated_at)) - .filter(Event.dates.any(EventDate.start >= today)) - .filter( - and_( - Event.public_status == PublicStatus.published, - AdminUnit.is_verified, - ) - ) - .all() - ) - click.echo(f"Found {len(events)} events") - - for event in events: - loc = url_for("event", event_id=event.id) - lastmod = event.updated_at.strftime("%Y-%m-%d") if event.updated_at else None - lastmod_tag = f"{lastmod}" if lastmod else "" - buf.write(f"{loc}{lastmod_tag}") - - buf.write("") - - with open(sitemap_path, "w") as fd: - buf.seek(0) - shutil.copyfileobj(buf, fd) - - size = os.path.getsize(sitemap_path) - click.echo(f"Generated sitemap at {sitemap_path} ({size} Bytes)") - - if size > 52428800: # pragma: no cover - app.logger.error(f"Size of sitemap ({size} Bytes) is larger than 50MB.") - - if pinggoogle: # pragma: no cover - sitemap_url = requests.utils.quote(url_for("sitemap_xml")) - google_url = f"http://www.google.com/ping?sitemap={sitemap_url}" - click.echo(f"Pinging {google_url} ..") - - response = requests.get(google_url) - click.echo(f"Response {response.status_code}") - - if response.status_code != 200: - app.logger.error( - f"Google ping returned unexpected status code {response.status_code}." - ) + seo.generate_sitemap(pinggoogle) @seo_cli.command("generate-robots-txt") @with_appcontext def generate_robots_txt(): - click.echo("Generating robots.txt..") - make_dir(cache_path) - - buf = StringIO() - buf.write(f"user-agent: *{os.linesep}") - buf.write(f"Disallow: /{os.linesep}") - buf.write(f"Allow: /eventdates{os.linesep}") - buf.write(f"Allow: /eventdate/{os.linesep}") - buf.write(f"Allow: /event/{os.linesep}") - - if os.path.exists(sitemap_path): - sitemap_url = url_for("sitemap_xml") - buf.write(f"Sitemap: {sitemap_url}{os.linesep}") - - with open(robots_txt_path, "w") as fd: - buf.seek(0) - shutil.copyfileobj(buf, fd) - - click.echo(f"Generated robots.txt at {robots_txt_path}") + seo.generate_robots_txt() app.cli.add_command(seo_cli) diff --git a/project/one_line_formatter.py b/project/one_line_formatter.py new file mode 100644 index 0000000..1630a9b --- /dev/null +++ b/project/one_line_formatter.py @@ -0,0 +1,23 @@ +import logging + + +class OneLineFormatter(logging.Formatter): + def format(self, record): # pragma: no cover + result = super(OneLineFormatter, self).format(record) + return result.replace("\n", "\\n") + + +def init_logger_with_one_line_formatter(logger): + if not logger: # pragma: no cover + return + + for handler in logger.handlers: + if handler.formatter: + fmt = handler.formatter._fmt + + if fmt: + fmt = fmt.replace(" %(levelname)s", " [%(levelname)s]") + + handler.formatter = OneLineFormatter(fmt, handler.formatter.datefmt) + else: # pragma: no cover + handler.formatter = OneLineFormatter() diff --git a/project/services/cache.py b/project/services/cache.py new file mode 100644 index 0000000..436669b --- /dev/null +++ b/project/services/cache.py @@ -0,0 +1,8 @@ +from project import app, img_path +from project.utils import clear_files_in_dir + + +def clear_images(): + app.logger.info("Clearing images..") + clear_files_in_dir(img_path) + app.logger.info("Done.") diff --git a/project/services/dump.py b/project/services/dump.py new file mode 100644 index 0000000..2e197f7 --- /dev/null +++ b/project/services/dump.py @@ -0,0 +1,94 @@ +import json +import os +import shutil + +from sqlalchemy import and_ +from sqlalchemy.orm import joinedload + +from project import app, dump_path +from project.api.event.schemas import EventDumpSchema +from project.api.event_category.schemas import EventCategoryDumpSchema +from project.api.event_reference.schemas import EventReferenceDumpSchema +from project.api.organization.schemas import OrganizationDumpSchema +from project.api.organizer.schemas import OrganizerDumpSchema +from project.api.place.schemas import PlaceDumpSchema +from project.models import ( + AdminUnit, + Event, + EventCategory, + EventOrganizer, + EventPlace, + EventReference, + PublicStatus, +) +from project.utils import make_dir + + +def dump_items(items, schema, file_base_name, dump_path): + result = schema.dump(items) + path = os.path.join(dump_path, file_base_name + ".json") + + with open(path, "w") as outfile: + json.dump(result, outfile, ensure_ascii=False) + + app.logger.info(f"{len(items)} item(s) dumped to {path}.") + + +def dump_all(): + # Setup temp dir + tmp_path = os.path.join(dump_path, "tmp") + make_dir(tmp_path) + + # Events + events = ( + Event.query.join(Event.admin_unit) + .options(joinedload(Event.categories)) + .filter( + and_( + Event.public_status == PublicStatus.published, + AdminUnit.is_verified, + ) + ) + .all() + ) + dump_items(events, EventDumpSchema(many=True), "events", tmp_path) + + # Places + places = EventPlace.query.all() + dump_items(places, PlaceDumpSchema(many=True), "places", tmp_path) + + # Event categories + event_categories = EventCategory.query.all() + dump_items( + event_categories, + EventCategoryDumpSchema(many=True), + "event_categories", + tmp_path, + ) + + # Organizers + organizers = EventOrganizer.query.all() + dump_items(organizers, OrganizerDumpSchema(many=True), "organizers", tmp_path) + + # Organizations + organizations = AdminUnit.query.all() + dump_items( + organizations, OrganizationDumpSchema(many=True), "organizations", tmp_path + ) + + # Event references + event_references = EventReference.query.all() + dump_items( + event_references, + EventReferenceDumpSchema(many=True), + "event_references", + tmp_path, + ) + + # Zip + zip_base_name = os.path.join(dump_path, "all") + zip_path = shutil.make_archive(zip_base_name, "zip", tmp_path) + app.logger.info(f"Zipped all up to {zip_path}.") + + # Clean up temp dir + shutil.rmtree(tmp_path, ignore_errors=True) diff --git a/project/services/event.py b/project/services/event.py index 66a5c54..96add7b 100644 --- a/project/services/event.py +++ b/project/services/event.py @@ -8,7 +8,7 @@ from sqlalchemy import and_, case, func, or_ from sqlalchemy.orm import aliased, contains_eager, defaultload, joinedload, lazyload from sqlalchemy.sql import extract -from project import db +from project import app, db from project.dateutils import ( berlin_tz, date_add_time, @@ -500,3 +500,16 @@ def create_ical_event_for_date(event_date: EventDate) -> icalendar.Event: event.add("location", get_place_str(event_date.event.event_place)) return event + + +def update_recurring_dates(): + # Setting the timezone is neccessary for cli command + db.session.execute("SET timezone TO :val;", {"val": berlin_tz.zone}) + + events = get_recurring_events() + + for event in events: + update_event_dates_with_recurrence_rule(event) + db.session.commit() + + app.logger.info(f"{len(events)} event(s) were updated.") diff --git a/project/services/seo.py b/project/services/seo.py new file mode 100644 index 0000000..155faca --- /dev/null +++ b/project/services/seo.py @@ -0,0 +1,90 @@ +import os +import shutil +from io import StringIO + +import requests +from flask import url_for +from sqlalchemy import and_ +from sqlalchemy.orm import load_only + +from project import app, cache_path, robots_txt_path, sitemap_path +from project.dateutils import get_today +from project.models import AdminUnit, Event, EventDate, PublicStatus +from project.utils import make_dir + + +def generate_sitemap(pinggoogle: bool): + app.logger.info("Generating sitemap..") + make_dir(cache_path) + + buf = StringIO() + buf.write('') + buf.write('') + + today = get_today() + events = ( + Event.query.join(Event.admin_unit) + .options(load_only(Event.id, Event.updated_at)) + .filter(Event.dates.any(EventDate.start >= today)) + .filter( + and_( + Event.public_status == PublicStatus.published, + AdminUnit.is_verified, + ) + ) + .all() + ) + app.logger.info(f"Found {len(events)} events") + + for event in events: + loc = url_for("event", event_id=event.id) + lastmod = event.updated_at.strftime("%Y-%m-%d") if event.updated_at else None + lastmod_tag = f"{lastmod}" if lastmod else "" + buf.write(f"{loc}{lastmod_tag}") + + buf.write("") + + with open(sitemap_path, "w") as fd: + buf.seek(0) + shutil.copyfileobj(buf, fd) + + size = os.path.getsize(sitemap_path) + app.logger.info(f"Generated sitemap at {sitemap_path} ({size} Bytes)") + + if size > 52428800: # pragma: no cover + app.logger.error(f"Size of sitemap ({size} Bytes) is larger than 50MB.") + + if pinggoogle: # pragma: no cover + sitemap_url = requests.utils.quote(url_for("sitemap_xml")) + google_url = f"http://www.google.com/ping?sitemap={sitemap_url}" + app.logger.info(f"Pinging {google_url} ..") + + response = requests.get(google_url) + app.logger.info(f"Response {response.status_code}") + + if response.status_code != 200: + app.logger.error( + f"Google ping returned unexpected status code {response.status_code}." + ) + + +def generate_robots_txt(): + app.logger.info("Generating robots.txt..") + make_dir(cache_path) + + buf = StringIO() + buf.write(f"user-agent: *{os.linesep}") + buf.write(f"Disallow: /{os.linesep}") + buf.write(f"Allow: /eventdates{os.linesep}") + buf.write(f"Allow: /eventdate/{os.linesep}") + buf.write(f"Allow: /event/{os.linesep}") + + if os.path.exists(sitemap_path): + sitemap_url = url_for("sitemap_xml") + buf.write(f"Sitemap: {sitemap_url}{os.linesep}") + + with open(robots_txt_path, "w") as fd: + buf.seek(0) + shutil.copyfileobj(buf, fd) + + app.logger.info(f"Generated robots.txt at {robots_txt_path}") diff --git a/project/views/root.py b/project/views/root.py index 5289e3a..a77e8d3 100644 --- a/project/views/root.py +++ b/project/views/root.py @@ -5,7 +5,15 @@ from flask import redirect, render_template, request, send_from_directory, url_f from flask_babelex import gettext from markupsafe import Markup -from project import app, cache_path, db, dump_path, robots_txt_file, sitemap_file +from project import ( + app, + cache_path, + celery, + db, + dump_path, + robots_txt_file, + sitemap_file, +) from project.services.admin import upsert_settings from project.views.utils import track_analytics @@ -35,6 +43,10 @@ def home(): @app.route("/up") def up(): db.engine.execute("SELECT 1") + + if "REDIS_URL" in app.config and app.config["REDIS_URL"]: # pragma: no cover + celery.control.ping() + return "OK" diff --git a/requirements.txt b/requirements.txt index f5ffbfc..c21be42 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,31 @@ alembic==1.4.3 +amqp==5.1.1 aniso8601==8.1.0 apispec==4.0.0 apispec-webframeworks==0.5.2 appdirs==1.4.4 argh==0.26.2 arrow==0.14.7 +async-timeout==4.0.2 attrs==20.3.0 Authlib==0.15.3 Babel==2.9.1 bcrypt==3.2.0 beautifulsoup4==4.9.3 +billiard==3.6.4.0 black==23.1.0 blinker==1.4 +cached-property==1.5.2 +celery==5.2.7 +celery-singleton==0.3.1 certifi==2020.12.5 cffi==1.14.4 cfgv==3.2.0 chardet==3.0.4 click==8.1.3 +click-didyoumean==0.3.0 +click-plugins==1.1.1 +click-repl==0.2.0 colour==0.1.5 coverage==5.5 coveralls==2.2.0 @@ -58,6 +67,7 @@ isort==5.7.0 itsdangerous==1.1.0 Jinja2==2.11.3 jsonschema==3.2.0 +kombu==5.2.4 Mako==1.1.3 MarkupSafe==1.1.1 marshmallow==3.10.0 @@ -76,6 +86,7 @@ Pillow==9.0.0 platformdirs==3.1.0 pluggy==0.13.1 pre-commit==2.9.3 +prompt-toolkit==3.0.38 psycopg2-binary==2.8.6 py==1.10.0 pycodestyle==2.6.0 @@ -91,9 +102,10 @@ pytest-split==0.6.0 python-dateutil==2.8.1 python-dotenv==0.15.0 python-editor==1.0.4 -pytz==2020.4 +pytz==2022.7.1 PyYAML==5.4.1 qrcode==6.1 +redis==4.5.1 regex==2020.11.13 requests==2.25.0 requests-mock==1.9.3 @@ -113,8 +125,10 @@ typing_extensions==4.5.0 urllib3==1.26.5 URLObject==2.4.3 validators==0.18.2 +vine==5.0.0 virtualenv==20.2.2 visitor==0.1.3 +wcwidth==0.2.6 webargs==7.0.1 Werkzeug==1.0.1 WTForms==2.3.3 diff --git a/tests/cli/test_cache.py b/tests/cli/test_cache.py index d8944f5..cf7ba20 100644 --- a/tests/cli/test_cache.py +++ b/tests/cli/test_cache.py @@ -1,4 +1,4 @@ -def test_clear_images(client, seeder, app, utils): +def test_clear_images(client, seeder, app, utils, caplog): user_id, admin_unit_id = seeder.setup_base() image_id = seeder.upsert_default_image() @@ -7,4 +7,4 @@ def test_clear_images(client, seeder, app, utils): runner = app.test_cli_runner() result = runner.invoke(args=["cache", "clear-images"]) - assert "Done." in result.output + assert result.exit_code == 0 diff --git a/tests/cli/test_dump.py b/tests/cli/test_dump.py index 22c48fc..16f00ed 100644 --- a/tests/cli/test_dump.py +++ b/tests/cli/test_dump.py @@ -5,7 +5,7 @@ def test_all(client, seeder, app, utils): runner = app.test_cli_runner() result = runner.invoke(args=["dump", "all"]) - assert "Zipped all up" in result.output + assert result.exit_code == 0 utils.get_endpoint_ok("developer") utils.get_endpoint_ok("dump_files", path="all.zip") diff --git a/tests/cli/test_event.py b/tests/cli/test_event.py index 59e9d30..c3c207b 100644 --- a/tests/cli/test_event.py +++ b/tests/cli/test_event.py @@ -4,4 +4,4 @@ def test_update_recurring_dates(client, seeder, app): runner = app.test_cli_runner() result = runner.invoke(args=["event", "update-recurring-dates"]) - assert "1 event(s) were updated." in result.output + assert result.exit_code == 0 diff --git a/tests/views/test_root.py b/tests/views/test_root.py index cd3a72c..1e5b6d9 100644 --- a/tests/views/test_root.py +++ b/tests/views/test_root.py @@ -93,7 +93,7 @@ def test_robots_txt(app, utils): runner = app.test_cli_runner() runner.invoke(args=["seo", "generate-sitemap"]) result = runner.invoke(args=["seo", "generate-robots-txt"]) - assert "Generated robots.txt" in result.output + assert result.exit_code == 0 utils.get_endpoint_ok("robots_txt") @@ -104,5 +104,5 @@ def test_sitemap_xml(seeder, app, utils): app.config["SERVER_NAME"] = "localhost" runner = app.test_cli_runner() result = runner.invoke(args=["seo", "generate-sitemap"]) - assert "Generated sitemap" in result.output + assert result.exit_code == 0 utils.get_endpoint_ok("sitemap_xml")