Introduce celery #378

This commit is contained in:
Daniel Grams 2023-03-14 23:52:30 +01:00
parent 567abb3afb
commit 58ed3b3a66
30 changed files with 754 additions and 297 deletions

View File

@ -1,4 +1,6 @@
[run] [run]
omit = omit =
project/celery.py
project/celery_tasks.py
project/cli/test.py project/cli/test.py
project/templates/email/* project/templates/email/*

View File

@ -24,3 +24,4 @@
**/values.dev.yaml **/values.dev.yaml
README.md README.md
tmp tmp
celerybeat-schedule

1
.gitignore vendored
View File

@ -13,6 +13,7 @@ deployment.yaml
node_modules node_modules
cypress/videos cypress/videos
cypress/screenshots cypress/screenshots
celerybeat-schedule
# C extensions # C extensions
*.so *.so

170
.vscode/launch.json vendored
View File

@ -1,58 +1,114 @@
{ {
// Use IntelliSense to learn about possible attributes. // Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes. // Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0", "version": "0.2.0",
"configurations": [ "configurations": [
{ {
"name": "Python: Flask", "name": "Flask",
"type": "python", "type": "python",
"request": "launch", "request": "launch",
"module": "flask", "module": "flask",
"env": { "env": {
"FLASK_APP": "project", "FLASK_APP": "project",
"FLASK_ENV": "development", "FLASK_ENV": "development",
"FLASK_DEBUG": "1" "FLASK_DEBUG": "1"
}, },
"args": [ "args": ["run", "--no-debugger"],
"run", "justMyCode": false,
"--no-debugger" "jinja": true
], },
"justMyCode": false, {
"jinja": true "name": "Flask HTTPS",
},{ "type": "python",
"name": "Python: Flask HTTPS", "request": "launch",
"type": "python", "module": "flask",
"request": "launch", "env": {
"module": "flask", "FLASK_APP": "project",
"env": { "FLASK_ENV": "development",
"FLASK_APP": "project", "FLASK_DEBUG": "1"
"FLASK_ENV": "development", },
"FLASK_DEBUG": "1" "args": [
}, "run",
"args": [ "--port=443",
"run", "--no-debugger",
"--port=443", "--cert=127.0.0.1.crt",
"--no-debugger", "--key=127.0.0.1.key"
"--cert=127.0.0.1.crt", ],
"--key=127.0.0.1.key" "sudo": true,
], "justMyCode": false,
"sudo": true, "jinja": true
"justMyCode": false, },
"jinja": true {
}, "name": "Flask CLI",
{ "type": "python",
"name": "Python: Aktuelle Datei", "request": "launch",
"type": "python", "module": "flask",
"request": "launch", "env": {
"program": "${file}", "FLASK_APP": "project",
"console": "integratedTerminal" "FLASK_ENV": "development",
}, "FLASK_DEBUG": "1"
{ },
"name": "Debug Unit Test", "args": ["cache", "clear-images"],
"type": "python", "justMyCode": false
"request": "test", },
"justMyCode": false, {
} "name": "Python: Aktuelle Datei",
] "type": "python",
} "request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Debug Unit Test",
"type": "python",
"request": "test",
"justMyCode": false
},
{
"name": "Celery worker",
"type": "python",
"request": "launch",
"module": "celery",
"args": [
"-A",
"project.celery",
"worker",
"--loglevel=debug",
"--concurrency=1"
],
"justMyCode": false,
"console": "integratedTerminal"
},
{
"name": "Celery beat",
"type": "python",
"request": "launch",
"module": "celery",
"args": ["-A", "project.celery", "beat", "--loglevel=debug"],
"justMyCode": false,
"console": "integratedTerminal"
},
{
"name": "Gunicorn",
"type": "python",
"request": "launch",
"module": "gunicorn",
"args": ["-c", "gunicorn.conf.py", "-w", "1", "project:app"],
"justMyCode": false,
"console": "integratedTerminal"
}
],
"compounds": [
{
"name": "Flask/Celery",
"configurations": ["Flask", "Celery worker"],
"stopAll": true
},
{
"name": "Flask/Celery/Beat",
"configurations": ["Flask", "Celery worker", "Celery beat"],
"stopAll": true
}
]
}

View File

@ -2,7 +2,7 @@ FROM python:3.7
# Add rsync # Add rsync
RUN apt update -qq && apt upgrade -y && apt autoremove -y RUN apt update -qq && apt upgrade -y && apt autoremove -y
RUN apt install -y rsync curl && apt autoremove -y RUN apt install -y rsync redis-tools curl && apt autoremove -y
EXPOSE 5000 EXPOSE 5000
@ -21,6 +21,7 @@ ENV SECRET_KEY=""
ENV SECURITY_PASSWORD_HASH="" ENV SECURITY_PASSWORD_HASH=""
ENV SERVER_NAME="" ENV SERVER_NAME=""
ENV STATIC_FILES_MIRROR="" ENV STATIC_FILES_MIRROR=""
ENV REDIS_URL=""
# Install pip requirements # Install pip requirements
COPY requirements.txt . COPY requirements.txt .

View File

@ -1,5 +1,6 @@
POSTGRES_DATA_PATH=./tmp/data/postgres/data POSTGRES_DATA_PATH=./tmp/data/postgres/data
POSTGRES_BACKUP_PATH=./tmp/data/postgres/backups POSTGRES_BACKUP_PATH=./tmp/data/postgres/backups
REDIS_DATA_PATH=./tmp/data/redis/data
CACHE_PATH=./tmp/cache CACHE_PATH=./tmp/cache
STATIC_PATH=./tmp/static STATIC_PATH=./tmp/static
FLUENTD_LOG_PATH=./tmp/logs/fluentd FLUENTD_LOG_PATH=./tmp/logs/fluentd
@ -9,6 +10,7 @@ FLUENTD_DOCKER_CONTAINERS_PATH=/var/lib/docker/containers
POSTGRES_USER=oveda POSTGRES_USER=oveda
POSTGRES_PASSWORD= POSTGRES_PASSWORD=
POSTGRES_DB=oveda POSTGRES_DB=oveda
REDIS_PASSWORD=
WEB_TAG=latest WEB_TAG=latest
SERVER_NAME= SERVER_NAME=
@ -22,5 +24,6 @@ MAIL_PASSWORD=
MAIL_DEFAULT_SENDER= MAIL_DEFAULT_SENDER=
MAIL_USE_TLS=True MAIL_USE_TLS=True
GOOGLE_MAPS_API_KEY=AIzaDummy GOOGLE_MAPS_API_KEY=AIzaDummy
SEO_SITEMAP_PING_GOOGLE=False
JWT_PRIVATE_KEY="" JWT_PRIVATE_KEY=""
JWT_PUBLIC_JWKS='' JWT_PUBLIC_JWKS=''

View File

@ -29,3 +29,9 @@ Adjust `WEB_TAG` in .env if necessary.
```sh ```sh
docker compose exec -it web /bin/sh docker compose exec -it web /bin/sh
``` ```
## Worker active tasks
```sh
docker compose exec -it worker celery -A project.celery inspect active
```

View File

@ -1,6 +1,46 @@
version: "3.9" version: "3.9"
name: "oveda" name: "oveda"
x-web-env:
&default-web-env
FLASK_APP: main.py
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db/${POSTGRES_DB}
REDIS_URL: redis://default:${REDIS_PASSWORD}@redis
SECRET_KEY: ${SECRET_KEY}
SECURITY_PASSWORD_HASH: ${SECURITY_PASSWORD_HASH}
MAIL_DEFAULT_SENDER: ${MAIL_DEFAULT_SENDER}
MAIL_PASSWORD: ${MAIL_PASSWORD}
MAIL_PORT: ${MAIL_PORT}
MAIL_SERVER: ${MAIL_SERVER}
MAIL_USE_TLS: ${MAIL_USE_TLS}
MAIL_USERNAME: ${MAIL_USERNAME}
GOOGLE_MAPS_API_KEY: ${GOOGLE_MAPS_API_KEY}
SEO_SITEMAP_PING_GOOGLE: ${SEO_SITEMAP_PING_GOOGLE}
SERVER_NAME: ${SERVER_NAME}
PREFERRED_URL_SCHEME: ${PREFERRED_URL_SCHEME}
GUNICORN_ACCESS_LOG: "-"
STATIC_FILES_MIRROR: /static
CACHE_PATH: tmp
JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY}
JWT_PUBLIC_JWKS: ${JWT_PUBLIC_JWKS}
x-web:
&default-web
image: danielgrams/gsevpt:${WEB_TAG}
restart: always
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
<<: *default-web-env
volumes:
- ${CACHE_PATH}:/app/project/tmp
- ${STATIC_PATH}:/static
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
services: services:
db: db:
image: postgis/postgis:12-3.1 image: postgis/postgis:12-3.1
@ -33,9 +73,19 @@ services:
db: db:
condition: service_healthy condition: service_healthy
web: redis:
image: danielgrams/gsevpt:${WEB_TAG} image: bitnami/redis:6.2
restart: always restart: always
healthcheck:
test: "redis-cli -a '${REDIS_PASSWORD}' ping | grep PONG"
start_period: "5s"
volumes:
- ${REDIS_DATA_PATH}:/bitnami/redis/data
environment:
REDIS_PASSWORD: ${REDIS_PASSWORD}
web:
<<: *default-web
healthcheck: healthcheck:
test: "curl -f ${SERVER_NAME}/up" test: "curl -f ${SERVER_NAME}/up"
interval: "60s" interval: "60s"
@ -45,31 +95,16 @@ services:
- "5000:5000" - "5000:5000"
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
environment:
FLASK_APP: main.py worker:
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db/${POSTGRES_DB} <<: *default-web
SECRET_KEY: ${SECRET_KEY} command: celery -A project.celery worker --loglevel=error
SECURITY_PASSWORD_HASH: ${SECURITY_PASSWORD_HASH} entrypoint: []
MAIL_DEFAULT_SENDER: ${MAIL_DEFAULT_SENDER}
MAIL_PASSWORD: ${MAIL_PASSWORD} scheduler:
MAIL_PORT: ${MAIL_PORT} <<: *default-web
MAIL_SERVER: ${MAIL_SERVER} command: celery -A project.celery beat --loglevel=error
MAIL_USE_TLS: ${MAIL_USE_TLS} entrypoint: []
MAIL_USERNAME: ${MAIL_USERNAME}
GOOGLE_MAPS_API_KEY: ${GOOGLE_MAPS_API_KEY}
SERVER_NAME: ${SERVER_NAME}
PREFERRED_URL_SCHEME: ${PREFERRED_URL_SCHEME}
GUNICORN_ACCESS_LOG: "-"
STATIC_FILES_MIRROR: /static
CACHE_PATH: tmp
JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY}
JWT_PUBLIC_JWKS: ${JWT_PUBLIC_JWKS}
volumes:
- ${CACHE_PATH}:/app/project/tmp
- ${STATIC_PATH}:/static
depends_on:
db:
condition: service_healthy
fluentd: fluentd:
image: danielgrams/fluentd image: danielgrams/fluentd

View File

@ -42,6 +42,7 @@ Jobs that should run on a regular basis.
### Daily ### Daily
```sh ```sh
flask cache clear-images
flask event update-recurring-dates flask event update-recurring-dates
flask dump all flask dump all
flask seo generate-sitemap --pinggoogle flask seo generate-sitemap --pinggoogle
@ -50,10 +51,18 @@ flask seo generate-robots-txt
## Administration ## Administration
### Users
```sh ```sh
flask user add-admin-roles super@hero.com flask user add-admin-roles super@hero.com
``` ```
### Worker active tasks
```sh
celery -A project.celery inspect active
```
## Configuration ## Configuration
Create `.env` file in the root directory or pass as environment variables. Create `.env` file in the root directory or pass as environment variables.

View File

@ -82,3 +82,9 @@ docker run -p 5000:5000 -e "DATABASE_URL=postgresql://postgres@localhost/gsevpt"
```sh ```sh
docker-compose build && docker-compose up docker-compose build && docker-compose up
``` ```
## Celery
```sh
dotenv run celery -A project.celery purge
```

View File

@ -1,20 +1,83 @@
version: "3.9" version: "3.9"
name: "oveda-dev"
x-web-env:
&default-web-env
FLASK_APP: main.py
DATABASE_URL: postgresql://user:pass@db/gsevpt
REDIS_URL: redis://default:pass@redis
MAIL_DEFAULT_SENDER: noresponse@gsevpt.de
MAIL_SERVER: mailhog
MAIL_PORT: 1025
MAIL_USE_TLS: False
GUNICORN_ACCESS_LOG: "-"
GUNICORN_LOG_LEVEL: debug
FLASK_DEBUG: 1
SERVER_NAME: "127.0.0.1:5000"
x-web:
&default-web
build: .
environment:
<<: *default-web-env
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
mailhog:
condition: service_started
services: services:
db: db:
image: mdillon/postgis image: postgis/postgis:12-3.1
healthcheck:
test: "pg_isready --username=user && psql --username=user --list"
start_period: "5s"
environment: environment:
- POSTGRES_DB=gsevpt - POSTGRES_DB=gsevpt
- POSTGRES_USER=user - POSTGRES_USER=user
- POSTGRES_PASSWORD=pass - POSTGRES_PASSWORD=pass
redis:
image: bitnami/redis:6.2
healthcheck:
test: "redis-cli -a 'pass' ping | grep PONG"
start_period: "5s"
environment:
REDIS_PASSWORD: pass
mailhog:
image: mailhog/mailhog
healthcheck:
test: "curl -f localhost:8025"
interval: "60s"
timeout: "5s"
start_period: "5s"
ports:
- "8026:8025"
web: web:
build: . <<: *default-web
ports: ports:
- "5000:5000" - "5000:5000"
worker:
<<: *default-web
command: celery -A project.celery worker --loglevel=error
entrypoint: []
scheduler:
<<: *default-web
command: celery -A project.celery beat --loglevel=error
entrypoint: []
flower:
image: mher/flower:1.2
ports:
- "5555:5555"
environment: environment:
FLASK_APP: main.py CELERY_BROKER_URL: redis://default:pass@redis
DATABASE_URL: postgresql://user:pass@db/gsevpt
depends_on: depends_on:
- db redis:
condition: service_healthy

View File

@ -5,6 +5,17 @@ if [[ ! -z "${STATIC_FILES_MIRROR}" ]]; then
rsync -a --delete project/static/ "${STATIC_FILES_MIRROR}" rsync -a --delete project/static/ "${STATIC_FILES_MIRROR}"
fi fi
echo "Using redis ${REDIS_URL}"
PONG=`redis-cli -u ${REDIS_URL} ping | grep PONG`
while [ -z "$PONG" ]; do
sleep 2
echo "Waiting for redis server ${REDIS_URL} to become available..."
PONG=`redis-cli -u ${REDIS_URL} ping | grep PONG`
done
echo "Using database server ${DATABASE_URL}"
until flask db upgrade until flask db upgrade
do do
echo "Waiting for postgres server to become available..." echo "Waiting for postgres server to become available..."

View File

@ -1,5 +1,6 @@
import logging import logging
import os import os
from datetime import timedelta
from flask import Flask from flask import Flask
from flask_babelex import Babel from flask_babelex import Babel
@ -22,6 +23,7 @@ def getenv_bool(name: str, default: str = "False"): # pragma: no cover
# Create app # Create app
app = Flask(__name__) app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = os.environ["DATABASE_URL"] app.config["SQLALCHEMY_DATABASE_URI"] = os.environ["DATABASE_URL"]
app.config["REDIS_URL"] = os.getenv("REDIS_URL")
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
app.config["SECURITY_CONFIRMABLE"] = True app.config["SECURITY_CONFIRMABLE"] = True
app.config["SECURITY_POST_LOGIN_VIEW"] = "manage_after_login" app.config["SECURITY_POST_LOGIN_VIEW"] = "manage_after_login"
@ -36,6 +38,7 @@ app.config["SERVER_NAME"] = os.getenv("SERVER_NAME")
app.config["ADMIN_UNIT_CREATE_REQUIRES_ADMIN"] = os.getenv( app.config["ADMIN_UNIT_CREATE_REQUIRES_ADMIN"] = os.getenv(
"ADMIN_UNIT_CREATE_REQUIRES_ADMIN", False "ADMIN_UNIT_CREATE_REQUIRES_ADMIN", False
) )
app.config["SEO_SITEMAP_PING_GOOGLE"] = getenv_bool("SEO_SITEMAP_PING_GOOGLE", "False")
# Proxy handling # Proxy handling
if os.getenv("PREFERRED_URL_SCHEME"): # pragma: no cover if os.getenv("PREFERRED_URL_SCHEME"): # pragma: no cover
@ -45,6 +48,33 @@ from project.reverse_proxied import ReverseProxied
app.wsgi_app = ReverseProxied(app.wsgi_app) app.wsgi_app = ReverseProxied(app.wsgi_app)
# Celery
task_always_eager = "REDIS_URL" not in app.config or not app.config["REDIS_URL"]
app.config.update(
CELERY_CONFIG={
"broker_url": app.config["REDIS_URL"],
"result_backend": app.config["REDIS_URL"],
"result_expires": timedelta(hours=1),
"broker_pool_limit": None,
"redis_max_connections": 2,
"timezone": "Europe/Berlin",
"broker_transport_options": {
"max_connections": 2,
"queue_order_strategy": "priority",
"priority_steps": list(range(3)),
"sep": ":",
"queue_order_strategy": "priority",
},
"task_default_priority": 1, # 0=high, 1=normal, 2=low priority
"task_always_eager": task_always_eager,
}
)
from project.celery import create_celery
celery = create_celery(app)
# Generate a nice key using secrets.token_urlsafe() # Generate a nice key using secrets.token_urlsafe()
app.config["SECRET_KEY"] = os.environ.get( app.config["SECRET_KEY"] = os.environ.get(
"SECRET_KEY", "pf9Wkove4IKEAXvy-cQkeDPhv9Cb3Ag-wyJILbq_dFw" "SECRET_KEY", "pf9Wkove4IKEAXvy-cQkeDPhv9Cb3Ag-wyJILbq_dFw"
@ -67,6 +97,12 @@ if __name__ != "__main__":
app.logger.handlers = gunicorn_logger.handlers app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level) app.logger.setLevel(gunicorn_logger.level)
# One line logging
from project.one_line_formatter import init_logger_with_one_line_formatter
init_logger_with_one_line_formatter(logging.getLogger())
init_logger_with_one_line_formatter(app.logger)
# Gzip # Gzip
gzip = Gzip(app) gzip = Gzip(app)
@ -127,6 +163,9 @@ if app.config["MAIL_SUPPRESS_SEND"]:
db = SQLAlchemy(app) db = SQLAlchemy(app)
migrate = Migrate(app, db) migrate = Migrate(app, db)
# Celery tasks
from project import celery_tasks
# API # API
from project.api import RestApi from project.api import RestApi

91
project/celery.py Normal file
View File

@ -0,0 +1,91 @@
from smtplib import SMTPException
from urllib.error import URLError
from celery import Celery
from celery.signals import (
after_setup_logger,
after_setup_task_logger,
task_postrun,
worker_ready,
)
from celery_singleton import Singleton, clear_locks
from requests.exceptions import RequestException
class HttpTaskException(Exception):
pass
def create_celery(app):
celery = Celery(app.import_name)
celery.conf.update(app.config["CELERY_CONFIG"])
TaskBase = Singleton
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
class HttpTask(ContextTask):
abstract = True
autoretry_for = (HttpTaskException,)
retry_backoff = 5
max_retries = 3
retry_jitter = True
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._real_run = self.run
self.run = self._wrapped_run
def _wrapped_run(self, *args, **kwargs):
try:
self._real_run(*args, **kwargs)
except (
URLError,
RequestException,
SMTPException,
) as e:
raise HttpTaskException(repr(e))
setattr(app, "celery_http_task_cls", HttpTask)
return celery
@after_setup_logger.connect
def setup_logger(logger, *args, **kwargs):
from project.one_line_formatter import init_logger_with_one_line_formatter
init_logger_with_one_line_formatter(logger)
@after_setup_task_logger.connect
def setup_task_logger(logger, *args, **kwargs):
from project.one_line_formatter import init_logger_with_one_line_formatter
init_logger_with_one_line_formatter(logger)
@worker_ready.connect
def unlock_all(**kwargs):
from project import celery
clear_locks(celery)
@task_postrun.connect
def close_session(*args, **kwargs):
from project import app
from project import db as sqlalchemydb
# Flask SQLAlchemy will automatically create new sessions for you from
# a scoped session factory, given that we are maintaining the same app
# context, this ensures tasks have a fresh session (e.g. session errors
# won't propagate across tasks)
with app.app_context():
sqlalchemydb.session.remove()

63
project/celery_tasks.py Normal file
View File

@ -0,0 +1,63 @@
from celery.schedules import crontab
from project import celery
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(crontab(hour=0, minute=0), clear_images_task)
sender.add_periodic_task(crontab(hour=1, minute=0), update_recurring_dates_task)
sender.add_periodic_task(crontab(hour=2, minute=0), dump_all_task)
sender.add_periodic_task(crontab(hour=3, minute=0), seo_generate_sitemap_task)
sender.add_periodic_task(crontab(hour=4, minute=0), generate_robots_txt_task)
@celery.task(
acks_late=True,
reject_on_worker_lost=True,
)
def clear_images_task():
from project.services.cache import clear_images
clear_images()
@celery.task(
acks_late=True,
reject_on_worker_lost=True,
)
def update_recurring_dates_task():
from project.services.event import update_recurring_dates
update_recurring_dates()
@celery.task(
acks_late=True,
reject_on_worker_lost=True,
)
def dump_all_task():
from project.services.dump import dump_all
dump_all()
@celery.task(
acks_late=True,
reject_on_worker_lost=True,
)
def seo_generate_sitemap_task():
from project import app
from project.services.seo import generate_sitemap
generate_sitemap(app.config["SEO_SITEMAP_PING_GOOGLE"])
@celery.task(
acks_late=True,
reject_on_worker_lost=True,
)
def generate_robots_txt_task():
from project.services.seo import generate_robots_txt
generate_robots_txt()

View File

@ -1,17 +1,14 @@
import click
from flask.cli import AppGroup from flask.cli import AppGroup
from project import app, img_path from project import app
from project.utils import clear_files_in_dir from project.services import cache
cache_cli = AppGroup("cache") cache_cli = AppGroup("cache")
@cache_cli.command("clear-images") @cache_cli.command("clear-images")
def clear_images(): def clear_images():
click.echo("Clearing images..") cache.clear_images()
clear_files_in_dir(img_path)
click.echo("Done.")
app.cli.add_command(cache_cli) app.cli.add_command(cache_cli)

View File

@ -1,102 +1,14 @@
import json
import os
import shutil
import click
from flask.cli import AppGroup from flask.cli import AppGroup
from sqlalchemy import and_
from sqlalchemy.orm import joinedload
from project import app, dump_path from project import app
from project.api.event.schemas import EventDumpSchema from project.services import dump
from project.api.event_category.schemas import EventCategoryDumpSchema
from project.api.event_reference.schemas import EventReferenceDumpSchema
from project.api.organization.schemas import OrganizationDumpSchema
from project.api.organizer.schemas import OrganizerDumpSchema
from project.api.place.schemas import PlaceDumpSchema
from project.models import (
AdminUnit,
Event,
EventCategory,
EventOrganizer,
EventPlace,
EventReference,
PublicStatus,
)
from project.utils import make_dir
dump_cli = AppGroup("dump") dump_cli = AppGroup("dump")
def dump_items(items, schema, file_base_name, dump_path):
result = schema.dump(items)
path = os.path.join(dump_path, file_base_name + ".json")
with open(path, "w") as outfile:
json.dump(result, outfile, ensure_ascii=False)
click.echo(f"{len(items)} item(s) dumped to {path}.")
@dump_cli.command("all") @dump_cli.command("all")
def dump_all(): def dump_all():
# Setup temp dir dump.dump_all()
tmp_path = os.path.join(dump_path, "tmp")
make_dir(tmp_path)
# Events
events = (
Event.query.join(Event.admin_unit)
.options(joinedload(Event.categories))
.filter(
and_(
Event.public_status == PublicStatus.published,
AdminUnit.is_verified,
)
)
.all()
)
dump_items(events, EventDumpSchema(many=True), "events", tmp_path)
# Places
places = EventPlace.query.all()
dump_items(places, PlaceDumpSchema(many=True), "places", tmp_path)
# Event categories
event_categories = EventCategory.query.all()
dump_items(
event_categories,
EventCategoryDumpSchema(many=True),
"event_categories",
tmp_path,
)
# Organizers
organizers = EventOrganizer.query.all()
dump_items(organizers, OrganizerDumpSchema(many=True), "organizers", tmp_path)
# Organizations
organizations = AdminUnit.query.all()
dump_items(
organizations, OrganizationDumpSchema(many=True), "organizations", tmp_path
)
# Event references
event_references = EventReference.query.all()
dump_items(
event_references,
EventReferenceDumpSchema(many=True),
"event_references",
tmp_path,
)
# Zip
zip_base_name = os.path.join(dump_path, "all")
zip_path = shutil.make_archive(zip_base_name, "zip", tmp_path)
click.echo(f"Zipped all up to {zip_path}.")
# Clean up temp dir
shutil.rmtree(tmp_path, ignore_errors=True)
app.cli.add_command(dump_cli) app.cli.add_command(dump_cli)

View File

@ -1,28 +1,14 @@
import click
from flask.cli import AppGroup from flask.cli import AppGroup
from project import app, db from project import app
from project.dateutils import berlin_tz from project.services import event
from project.services.event import (
get_recurring_events,
update_event_dates_with_recurrence_rule,
)
event_cli = AppGroup("event") event_cli = AppGroup("event")
@event_cli.command("update-recurring-dates") @event_cli.command("update-recurring-dates")
def update_recurring_dates(): def update_recurring_dates():
# Setting the timezone is neccessary for cli command event.update_recurring_dates()
db.session.execute("SET timezone TO :val;", {"val": berlin_tz.zone})
events = get_recurring_events()
for event in events:
update_event_dates_with_recurrence_rule(event)
db.session.commit()
click.echo(f"{len(events)} event(s) were updated.")
app.cli.add_command(event_cli) app.cli.add_command(event_cli)

View File

@ -1,18 +1,8 @@
import os
import shutil
from io import StringIO
import click import click
import requests
from flask import url_for
from flask.cli import AppGroup, with_appcontext from flask.cli import AppGroup, with_appcontext
from sqlalchemy import and_
from sqlalchemy.orm import load_only
from project import app, cache_path, robots_txt_path, sitemap_path from project import app
from project.dateutils import get_today from project.services import seo
from project.models import AdminUnit, Event, EventDate, PublicStatus
from project.utils import make_dir
seo_cli = AppGroup("seo") seo_cli = AppGroup("seo")
@ -21,82 +11,13 @@ seo_cli = AppGroup("seo")
@click.option("--pinggoogle/--no-pinggoogle", default=False) @click.option("--pinggoogle/--no-pinggoogle", default=False)
@with_appcontext @with_appcontext
def generate_sitemap(pinggoogle): def generate_sitemap(pinggoogle):
click.echo("Generating sitemap..") seo.generate_sitemap(pinggoogle)
make_dir(cache_path)
buf = StringIO()
buf.write('<?xml version="1.0" encoding="UTF-8"?>')
buf.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')
today = get_today()
events = (
Event.query.join(Event.admin_unit)
.options(load_only(Event.id, Event.updated_at))
.filter(Event.dates.any(EventDate.start >= today))
.filter(
and_(
Event.public_status == PublicStatus.published,
AdminUnit.is_verified,
)
)
.all()
)
click.echo(f"Found {len(events)} events")
for event in events:
loc = url_for("event", event_id=event.id)
lastmod = event.updated_at.strftime("%Y-%m-%d") if event.updated_at else None
lastmod_tag = f"<lastmod>{lastmod}</lastmod>" if lastmod else ""
buf.write(f"<url><loc>{loc}</loc>{lastmod_tag}</url>")
buf.write("</urlset>")
with open(sitemap_path, "w") as fd:
buf.seek(0)
shutil.copyfileobj(buf, fd)
size = os.path.getsize(sitemap_path)
click.echo(f"Generated sitemap at {sitemap_path} ({size} Bytes)")
if size > 52428800: # pragma: no cover
app.logger.error(f"Size of sitemap ({size} Bytes) is larger than 50MB.")
if pinggoogle: # pragma: no cover
sitemap_url = requests.utils.quote(url_for("sitemap_xml"))
google_url = f"http://www.google.com/ping?sitemap={sitemap_url}"
click.echo(f"Pinging {google_url} ..")
response = requests.get(google_url)
click.echo(f"Response {response.status_code}")
if response.status_code != 200:
app.logger.error(
f"Google ping returned unexpected status code {response.status_code}."
)
@seo_cli.command("generate-robots-txt") @seo_cli.command("generate-robots-txt")
@with_appcontext @with_appcontext
def generate_robots_txt(): def generate_robots_txt():
click.echo("Generating robots.txt..") seo.generate_robots_txt()
make_dir(cache_path)
buf = StringIO()
buf.write(f"user-agent: *{os.linesep}")
buf.write(f"Disallow: /{os.linesep}")
buf.write(f"Allow: /eventdates{os.linesep}")
buf.write(f"Allow: /eventdate/{os.linesep}")
buf.write(f"Allow: /event/{os.linesep}")
if os.path.exists(sitemap_path):
sitemap_url = url_for("sitemap_xml")
buf.write(f"Sitemap: {sitemap_url}{os.linesep}")
with open(robots_txt_path, "w") as fd:
buf.seek(0)
shutil.copyfileobj(buf, fd)
click.echo(f"Generated robots.txt at {robots_txt_path}")
app.cli.add_command(seo_cli) app.cli.add_command(seo_cli)

View File

@ -0,0 +1,23 @@
import logging
class OneLineFormatter(logging.Formatter):
def format(self, record): # pragma: no cover
result = super(OneLineFormatter, self).format(record)
return result.replace("\n", "\\n")
def init_logger_with_one_line_formatter(logger):
if not logger: # pragma: no cover
return
for handler in logger.handlers:
if handler.formatter:
fmt = handler.formatter._fmt
if fmt:
fmt = fmt.replace(" %(levelname)s", " [%(levelname)s]")
handler.formatter = OneLineFormatter(fmt, handler.formatter.datefmt)
else: # pragma: no cover
handler.formatter = OneLineFormatter()

View File

@ -0,0 +1,8 @@
from project import app, img_path
from project.utils import clear_files_in_dir
def clear_images():
app.logger.info("Clearing images..")
clear_files_in_dir(img_path)
app.logger.info("Done.")

94
project/services/dump.py Normal file
View File

@ -0,0 +1,94 @@
import json
import os
import shutil
from sqlalchemy import and_
from sqlalchemy.orm import joinedload
from project import app, dump_path
from project.api.event.schemas import EventDumpSchema
from project.api.event_category.schemas import EventCategoryDumpSchema
from project.api.event_reference.schemas import EventReferenceDumpSchema
from project.api.organization.schemas import OrganizationDumpSchema
from project.api.organizer.schemas import OrganizerDumpSchema
from project.api.place.schemas import PlaceDumpSchema
from project.models import (
AdminUnit,
Event,
EventCategory,
EventOrganizer,
EventPlace,
EventReference,
PublicStatus,
)
from project.utils import make_dir
def dump_items(items, schema, file_base_name, dump_path):
result = schema.dump(items)
path = os.path.join(dump_path, file_base_name + ".json")
with open(path, "w") as outfile:
json.dump(result, outfile, ensure_ascii=False)
app.logger.info(f"{len(items)} item(s) dumped to {path}.")
def dump_all():
# Setup temp dir
tmp_path = os.path.join(dump_path, "tmp")
make_dir(tmp_path)
# Events
events = (
Event.query.join(Event.admin_unit)
.options(joinedload(Event.categories))
.filter(
and_(
Event.public_status == PublicStatus.published,
AdminUnit.is_verified,
)
)
.all()
)
dump_items(events, EventDumpSchema(many=True), "events", tmp_path)
# Places
places = EventPlace.query.all()
dump_items(places, PlaceDumpSchema(many=True), "places", tmp_path)
# Event categories
event_categories = EventCategory.query.all()
dump_items(
event_categories,
EventCategoryDumpSchema(many=True),
"event_categories",
tmp_path,
)
# Organizers
organizers = EventOrganizer.query.all()
dump_items(organizers, OrganizerDumpSchema(many=True), "organizers", tmp_path)
# Organizations
organizations = AdminUnit.query.all()
dump_items(
organizations, OrganizationDumpSchema(many=True), "organizations", tmp_path
)
# Event references
event_references = EventReference.query.all()
dump_items(
event_references,
EventReferenceDumpSchema(many=True),
"event_references",
tmp_path,
)
# Zip
zip_base_name = os.path.join(dump_path, "all")
zip_path = shutil.make_archive(zip_base_name, "zip", tmp_path)
app.logger.info(f"Zipped all up to {zip_path}.")
# Clean up temp dir
shutil.rmtree(tmp_path, ignore_errors=True)

View File

@ -8,7 +8,7 @@ from sqlalchemy import and_, case, func, or_
from sqlalchemy.orm import aliased, contains_eager, defaultload, joinedload, lazyload from sqlalchemy.orm import aliased, contains_eager, defaultload, joinedload, lazyload
from sqlalchemy.sql import extract from sqlalchemy.sql import extract
from project import db from project import app, db
from project.dateutils import ( from project.dateutils import (
berlin_tz, berlin_tz,
date_add_time, date_add_time,
@ -500,3 +500,16 @@ def create_ical_event_for_date(event_date: EventDate) -> icalendar.Event:
event.add("location", get_place_str(event_date.event.event_place)) event.add("location", get_place_str(event_date.event.event_place))
return event return event
def update_recurring_dates():
# Setting the timezone is neccessary for cli command
db.session.execute("SET timezone TO :val;", {"val": berlin_tz.zone})
events = get_recurring_events()
for event in events:
update_event_dates_with_recurrence_rule(event)
db.session.commit()
app.logger.info(f"{len(events)} event(s) were updated.")

90
project/services/seo.py Normal file
View File

@ -0,0 +1,90 @@
import os
import shutil
from io import StringIO
import requests
from flask import url_for
from sqlalchemy import and_
from sqlalchemy.orm import load_only
from project import app, cache_path, robots_txt_path, sitemap_path
from project.dateutils import get_today
from project.models import AdminUnit, Event, EventDate, PublicStatus
from project.utils import make_dir
def generate_sitemap(pinggoogle: bool):
app.logger.info("Generating sitemap..")
make_dir(cache_path)
buf = StringIO()
buf.write('<?xml version="1.0" encoding="UTF-8"?>')
buf.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')
today = get_today()
events = (
Event.query.join(Event.admin_unit)
.options(load_only(Event.id, Event.updated_at))
.filter(Event.dates.any(EventDate.start >= today))
.filter(
and_(
Event.public_status == PublicStatus.published,
AdminUnit.is_verified,
)
)
.all()
)
app.logger.info(f"Found {len(events)} events")
for event in events:
loc = url_for("event", event_id=event.id)
lastmod = event.updated_at.strftime("%Y-%m-%d") if event.updated_at else None
lastmod_tag = f"<lastmod>{lastmod}</lastmod>" if lastmod else ""
buf.write(f"<url><loc>{loc}</loc>{lastmod_tag}</url>")
buf.write("</urlset>")
with open(sitemap_path, "w") as fd:
buf.seek(0)
shutil.copyfileobj(buf, fd)
size = os.path.getsize(sitemap_path)
app.logger.info(f"Generated sitemap at {sitemap_path} ({size} Bytes)")
if size > 52428800: # pragma: no cover
app.logger.error(f"Size of sitemap ({size} Bytes) is larger than 50MB.")
if pinggoogle: # pragma: no cover
sitemap_url = requests.utils.quote(url_for("sitemap_xml"))
google_url = f"http://www.google.com/ping?sitemap={sitemap_url}"
app.logger.info(f"Pinging {google_url} ..")
response = requests.get(google_url)
app.logger.info(f"Response {response.status_code}")
if response.status_code != 200:
app.logger.error(
f"Google ping returned unexpected status code {response.status_code}."
)
def generate_robots_txt():
app.logger.info("Generating robots.txt..")
make_dir(cache_path)
buf = StringIO()
buf.write(f"user-agent: *{os.linesep}")
buf.write(f"Disallow: /{os.linesep}")
buf.write(f"Allow: /eventdates{os.linesep}")
buf.write(f"Allow: /eventdate/{os.linesep}")
buf.write(f"Allow: /event/{os.linesep}")
if os.path.exists(sitemap_path):
sitemap_url = url_for("sitemap_xml")
buf.write(f"Sitemap: {sitemap_url}{os.linesep}")
with open(robots_txt_path, "w") as fd:
buf.seek(0)
shutil.copyfileobj(buf, fd)
app.logger.info(f"Generated robots.txt at {robots_txt_path}")

View File

@ -5,7 +5,15 @@ from flask import redirect, render_template, request, send_from_directory, url_f
from flask_babelex import gettext from flask_babelex import gettext
from markupsafe import Markup from markupsafe import Markup
from project import app, cache_path, db, dump_path, robots_txt_file, sitemap_file from project import (
app,
cache_path,
celery,
db,
dump_path,
robots_txt_file,
sitemap_file,
)
from project.services.admin import upsert_settings from project.services.admin import upsert_settings
from project.views.utils import track_analytics from project.views.utils import track_analytics
@ -35,6 +43,10 @@ def home():
@app.route("/up") @app.route("/up")
def up(): def up():
db.engine.execute("SELECT 1") db.engine.execute("SELECT 1")
if "REDIS_URL" in app.config and app.config["REDIS_URL"]: # pragma: no cover
celery.control.ping()
return "OK" return "OK"

View File

@ -1,22 +1,31 @@
alembic==1.4.3 alembic==1.4.3
amqp==5.1.1
aniso8601==8.1.0 aniso8601==8.1.0
apispec==4.0.0 apispec==4.0.0
apispec-webframeworks==0.5.2 apispec-webframeworks==0.5.2
appdirs==1.4.4 appdirs==1.4.4
argh==0.26.2 argh==0.26.2
arrow==0.14.7 arrow==0.14.7
async-timeout==4.0.2
attrs==20.3.0 attrs==20.3.0
Authlib==0.15.3 Authlib==0.15.3
Babel==2.9.1 Babel==2.9.1
bcrypt==3.2.0 bcrypt==3.2.0
beautifulsoup4==4.9.3 beautifulsoup4==4.9.3
billiard==3.6.4.0
black==23.1.0 black==23.1.0
blinker==1.4 blinker==1.4
cached-property==1.5.2
celery==5.2.7
celery-singleton==0.3.1
certifi==2020.12.5 certifi==2020.12.5
cffi==1.14.4 cffi==1.14.4
cfgv==3.2.0 cfgv==3.2.0
chardet==3.0.4 chardet==3.0.4
click==8.1.3 click==8.1.3
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
colour==0.1.5 colour==0.1.5
coverage==5.5 coverage==5.5
coveralls==2.2.0 coveralls==2.2.0
@ -58,6 +67,7 @@ isort==5.7.0
itsdangerous==1.1.0 itsdangerous==1.1.0
Jinja2==2.11.3 Jinja2==2.11.3
jsonschema==3.2.0 jsonschema==3.2.0
kombu==5.2.4
Mako==1.1.3 Mako==1.1.3
MarkupSafe==1.1.1 MarkupSafe==1.1.1
marshmallow==3.10.0 marshmallow==3.10.0
@ -76,6 +86,7 @@ Pillow==9.0.0
platformdirs==3.1.0 platformdirs==3.1.0
pluggy==0.13.1 pluggy==0.13.1
pre-commit==2.9.3 pre-commit==2.9.3
prompt-toolkit==3.0.38
psycopg2-binary==2.8.6 psycopg2-binary==2.8.6
py==1.10.0 py==1.10.0
pycodestyle==2.6.0 pycodestyle==2.6.0
@ -91,9 +102,10 @@ pytest-split==0.6.0
python-dateutil==2.8.1 python-dateutil==2.8.1
python-dotenv==0.15.0 python-dotenv==0.15.0
python-editor==1.0.4 python-editor==1.0.4
pytz==2020.4 pytz==2022.7.1
PyYAML==5.4.1 PyYAML==5.4.1
qrcode==6.1 qrcode==6.1
redis==4.5.1
regex==2020.11.13 regex==2020.11.13
requests==2.25.0 requests==2.25.0
requests-mock==1.9.3 requests-mock==1.9.3
@ -113,8 +125,10 @@ typing_extensions==4.5.0
urllib3==1.26.5 urllib3==1.26.5
URLObject==2.4.3 URLObject==2.4.3
validators==0.18.2 validators==0.18.2
vine==5.0.0
virtualenv==20.2.2 virtualenv==20.2.2
visitor==0.1.3 visitor==0.1.3
wcwidth==0.2.6
webargs==7.0.1 webargs==7.0.1
Werkzeug==1.0.1 Werkzeug==1.0.1
WTForms==2.3.3 WTForms==2.3.3

View File

@ -1,4 +1,4 @@
def test_clear_images(client, seeder, app, utils): def test_clear_images(client, seeder, app, utils, caplog):
user_id, admin_unit_id = seeder.setup_base() user_id, admin_unit_id = seeder.setup_base()
image_id = seeder.upsert_default_image() image_id = seeder.upsert_default_image()
@ -7,4 +7,4 @@ def test_clear_images(client, seeder, app, utils):
runner = app.test_cli_runner() runner = app.test_cli_runner()
result = runner.invoke(args=["cache", "clear-images"]) result = runner.invoke(args=["cache", "clear-images"])
assert "Done." in result.output assert result.exit_code == 0

View File

@ -5,7 +5,7 @@ def test_all(client, seeder, app, utils):
runner = app.test_cli_runner() runner = app.test_cli_runner()
result = runner.invoke(args=["dump", "all"]) result = runner.invoke(args=["dump", "all"])
assert "Zipped all up" in result.output assert result.exit_code == 0
utils.get_endpoint_ok("developer") utils.get_endpoint_ok("developer")
utils.get_endpoint_ok("dump_files", path="all.zip") utils.get_endpoint_ok("dump_files", path="all.zip")

View File

@ -4,4 +4,4 @@ def test_update_recurring_dates(client, seeder, app):
runner = app.test_cli_runner() runner = app.test_cli_runner()
result = runner.invoke(args=["event", "update-recurring-dates"]) result = runner.invoke(args=["event", "update-recurring-dates"])
assert "1 event(s) were updated." in result.output assert result.exit_code == 0

View File

@ -93,7 +93,7 @@ def test_robots_txt(app, utils):
runner = app.test_cli_runner() runner = app.test_cli_runner()
runner.invoke(args=["seo", "generate-sitemap"]) runner.invoke(args=["seo", "generate-sitemap"])
result = runner.invoke(args=["seo", "generate-robots-txt"]) result = runner.invoke(args=["seo", "generate-robots-txt"])
assert "Generated robots.txt" in result.output assert result.exit_code == 0
utils.get_endpoint_ok("robots_txt") utils.get_endpoint_ok("robots_txt")
@ -104,5 +104,5 @@ def test_sitemap_xml(seeder, app, utils):
app.config["SERVER_NAME"] = "localhost" app.config["SERVER_NAME"] = "localhost"
runner = app.test_cli_runner() runner = app.test_cli_runner()
result = runner.invoke(args=["seo", "generate-sitemap"]) result = runner.invoke(args=["seo", "generate-sitemap"])
assert "Generated sitemap" in result.output assert result.exit_code == 0
utils.get_endpoint_ok("sitemap_xml") utils.get_endpoint_ok("sitemap_xml")