API Write Access with OAuth2 #104

This commit is contained in:
Daniel Grams 2021-02-11 09:27:39 +01:00
parent e0cb02b37f
commit 6a49bc40dd
6 changed files with 1 additions and 804 deletions

View File

@ -1,237 +0,0 @@
from project import (
db,
get_admin_unit_by_name,
update_event_dates_with_recurrence_rule,
upsert_event_category,
)
from pprint import pprint
import datetime
import pytz
from urllib.request import urlopen
from bs4 import BeautifulSoup
from os import path
import json
import re
import decimal
from project.models import (
EventReviewStatus,
EventTargetGroupOrigin,
Location,
Event,
EventStatus,
EventPlace,
EventOrganizer,
)
from sqlalchemy import and_
berlin_tz = pytz.timezone("Europe/Berlin")
def scrape(debug):
url = "https://goslar.feripro.de/programm/42/anmeldung/veranstaltungen"
if debug:
filename = "tmp/fp.html"
if not path.exists(filename):
response = urlopen(url)
with open(filename, "wb") as text_file:
text_file.write(response.read())
doc = BeautifulSoup(open(filename), "html.parser")
else:
response = urlopen(url)
doc = BeautifulSoup(response, "html.parser")
js_assigns_regex = r"(\w*)\s*:\s*JSON\.parse\('(.*)'\)"
js_assigns = dict()
javascripts = doc.find_all("script")
for javascript in javascripts:
javascript_contents = (
javascript.contents[0] if len(javascript.contents) > 0 else ""
)
if "window.fp_initial" in javascript_contents:
matches = re.findall(js_assigns_regex, javascript_contents, re.MULTILINE)
for match in matches:
key = match[0]
if key not in ["events"]:
continue
json_str = match[1]
decoded_json_str = (
json_str.encode("utf-8")
.decode("unicode_escape")
.encode("latin-1")
.decode("utf-8")
)
value = json.loads(decoded_json_str, strict=False)
js_assigns[key] = value
break
admin_unit = get_admin_unit_by_name("Ferienpass Goslar")
category = upsert_event_category("Other")
for js_event in js_assigns["events"]:
if "event_id" not in js_event:
continue
event_id = js_event["event_id"]
if not event_id:
continue
try:
external_link = url + "#" + str(event_id)
event = Event.query.filter(Event.external_link == external_link).first()
did_create = False
# Event
if event is None:
if js_event["name"] in [
"Entfällt",
"Diese Veranstaltung muss leider ausfallen ...",
]:
continue
event = Event()
event.admin_unit = admin_unit
did_create = True
event.category = category
event.external_link = external_link
event.review_status = EventReviewStatus.verified
event.rating = 5
event.target_group_origin = EventTargetGroupOrigin.resident
event.name = js_event["name"]
event.description = js_event["description"]
start = parse_date_time_str(js_event["start"])
end = parse_date_time_str(js_event["end"])
update_event_dates_with_recurrence_rule(event, start, end)
# Organizer
js_organizer = js_event["organizer"]
organizer_name = (
js_event["name_public"]
if js_event["name_public"]
else js_organizer["name"]
)
organizer_phone = (
js_event["phone_public"]
if js_event["phone_public"]
else js_organizer["phone"]
)
organizer_email = (
js_event["email_public"]
if js_event["email_public"]
else js_organizer["email"]
)
organizer_url = (
js_organizer["website"]
if js_organizer["website"]
else js_organizer["facebook"]
)
organizer = EventOrganizer.query.filter(
and_(
EventOrganizer.admin_unit_id == admin_unit.id,
EventOrganizer.name == organizer_name,
)
).first()
if organizer is None:
organizer = EventOrganizer(
admin_unit_id=admin_unit.id, name=organizer_name
)
organizer.phone = organizer_phone
organizer.email = organizer_email
organizer.url = organizer_url
event.organizer = organizer
# Place
place_name = ""
place_description = ""
place_location = None
meeting_point = js_event["meeting_point"].replace("\r\n", ", ")
if len(meeting_point) > 80:
place_name = meeting_point[:80] + "..."
place_description = meeting_point
else:
place_name = meeting_point
if (
"meeting_point_latitude" in js_event
and "meeting_point_longitude" in js_event
):
meeting_point_latitude = js_event["meeting_point_latitude"]
meeting_point_longitude = js_event["meeting_point_longitude"]
if meeting_point_latitude and meeting_point_longitude:
latitude = decimal.Decimal(meeting_point_latitude)
longitude = decimal.Decimal(meeting_point_longitude)
if latitude != 0 and longitude != 0:
place_location = Location()
place_location.latitude = latitude
place_location.longitude = longitude
place = EventPlace.query.filter(
and_(
EventPlace.admin_unit_id == admin_unit.id,
EventPlace.organizer_id == organizer.id,
EventPlace.name == place_name,
)
).first()
if place is None:
place = EventPlace(
admin_unit_id=admin_unit.id,
organizer_id=organizer.id,
name=place_name,
)
place.description = place_description
place.location = place_location
event.event_place = place
# Additional data
event.status = (
EventStatus.cancelled if js_event["canceled"] else EventStatus.scheduled
)
event.kid_friendly = True
event.accessible_for_free = js_event["price"] == "0.00"
tag_list = js_event["tags"]
tag_list.append("Ferienpass")
event.tags = ",".join(tag_list)
if js_event["min_age"]:
event.age_from = int(js_event["min_age"])
if js_event["max_age"]:
event.age_to = int(js_event["max_age"])
print(
"%s %s %s %s"
% (event.dates[0].start, event.name, organizer.id, organizer.name)
)
if did_create:
db.session.add(event)
db.session.commit()
except Exception:
print("Exception")
pprint(js_event)
def parse_date_time_str(date_time_str):
if not date_time_str:
return None
date_time = datetime.datetime.fromisoformat(date_time_str)
return berlin_tz.localize(date_time)
if __name__ == "__main__":
scrape(False)

View File

@ -1,218 +0,0 @@
from project import db
from pprint import pprint
import datetime
from urllib import request
from os import path
import json
import decimal
from project.models import (
EventReviewStatus,
Location,
Event,
EventStatus,
EventPlace,
EventOrganizer,
)
from sqlalchemy import and_, not_
from project.services.admin_unit import get_admin_unit_by_name
from project.services.event import (
upsert_event_category,
update_event_dates_with_recurrence_rule,
)
admin_unit = get_admin_unit_by_name("Harzinfo")
category = upsert_event_category("Other")
base_url = "https://www.harzinfo.de"
url = base_url + "/?ndssearch=fullsearch&no_cache=1&L=0"
with open("scrape_hi_req.json") as json_file:
request_object = json.load(json_file)
with open("scrape_hi_cities.json") as json_file:
cities = json.load(json_file)
def response_from_url(city):
body = request_object
body["searchFilter"]["ndsdestinationdataevent"]["city"] = {
str(city["id"]): city["short_name"] or city["title"]
}
req = request.Request(url, data=bytes(json.dumps(body), encoding="utf-8"))
req.add_header("Content-Type", "application/json")
return request.urlopen(req)
def load_json(debug, city):
if debug:
filename = "tmp/hi_%d.html" % (city["id"])
if not path.exists(filename):
response = response_from_url(city)
with open(filename, "wb") as text_file:
text_file.write(response.read())
with open(filename) as json_file:
return json.load(json_file)
else:
response = response_from_url(city)
return json.load(response)
def parse_date_time_str(date_time_str):
if not date_time_str:
return None
return datetime.datetime.fromisoformat(date_time_str + ":00")
def scrape(debug, city):
# Organizer
organizer_name = city["short_name"] or city["title"]
organizer = EventOrganizer.query.filter(
and_(
EventOrganizer.admin_unit_id == admin_unit.id,
EventOrganizer.name == organizer_name,
)
).first()
if organizer is None:
organizer = EventOrganizer(admin_unit_id=admin_unit.id, name=organizer_name)
db.session.add(organizer)
db.session.commit()
print(organizer_name)
response = load_json(debug, city)
result = response["result"]
event_ids = list()
for item in result:
try:
uid = str(item["uid"])
external_link = base_url + item["link"] + "#" + uid
event = Event.query.filter(
and_(
Event.organizer_id == organizer.id,
Event.external_link == external_link,
)
).first()
did_create = False
if event is None:
event = Event()
event.admin_unit = admin_unit
event.organizer = organizer
did_create = True
event_ids.append(event.id)
event.category = category
event.external_link = external_link
event.review_status = EventReviewStatus.verified
event.rating = int(item["rating"])
event.name = item["title"]
event.description = item["title"]
start = parse_date_time_str(item["date"])
update_event_dates_with_recurrence_rule(event, start, None)
# Place
place_name = item["location"]
place_description = ""
place_location = None
if "latitude" in item and "longitude" in item:
meeting_point_latitude = item["latitude"]
meeting_point_longitude = item["longitude"]
if meeting_point_latitude and meeting_point_longitude:
latitude = decimal.Decimal(meeting_point_latitude)
longitude = decimal.Decimal(meeting_point_longitude)
if latitude != 0 and longitude != 0:
place_location = Location()
place_location.latitude = latitude
place_location.longitude = longitude
place = EventPlace.query.filter(
and_(
EventPlace.admin_unit_id == admin_unit.id,
EventPlace.organizer_id == organizer.id,
EventPlace.name == place_name,
)
).first()
if place is None:
place = EventPlace(
admin_unit_id=admin_unit.id,
organizer_id=organizer.id,
name=place_name,
)
place.description = place_description
place.location = place_location
event.event_place = place
# Additional data
event.status = (
EventStatus.cancelled if item["canceled"] else EventStatus.scheduled
)
if "categories" in item:
tag_list = list(item["categories"].values())
if "Ausstellung/Kunst" in tag_list:
event.category = upsert_event_category("Art")
elif "Comedy" in tag_list:
event.category = upsert_event_category("Comedy")
elif "Konzert/Musik" in tag_list:
event.category = upsert_event_category("Music")
elif "Theater" in tag_list:
event.category = upsert_event_category("Theater")
elif "Genuss/Gourmet" in tag_list:
event.category = upsert_event_category("Dining")
elif "Gesundheit/Wellness" in tag_list:
event.category = upsert_event_category("Fitness")
elif "Kinder/Jugend" in tag_list:
event.category = upsert_event_category("Family")
elif "Markt/Flohmarkt" in tag_list:
event.category = upsert_event_category("Shopping")
elif "Sport" in tag_list:
event.category = upsert_event_category("Sports")
elif "Vortrag/Lesung" in tag_list:
event.category = upsert_event_category("Book")
elif "Kabarett" in tag_list:
event.category = upsert_event_category("Art")
elif "Musical" in tag_list:
event.category = upsert_event_category("Theater")
elif "Weihnachtsmärkte" in tag_list:
event.category = upsert_event_category("Festival")
elif "Stadt- und Volksfeste" in tag_list:
event.category = upsert_event_category("Festival")
if "Kinder/Jugend" in tag_list:
event.kid_friendly = True
tag_list.append("Harzinfo")
event.tags = ",".join(tag_list)
print("%s %s %d" % (event.dates[0].start, event.name, event.rating))
if did_create:
db.session.add(event)
db.session.commit()
except Exception:
print("Exception")
pprint(item)
Event.query.filter(
and_(
Event.admin_unit_id == admin_unit.id,
Event.organizer_id == organizer.id,
not_(Event.id.in_(event_ids)),
)
).delete(synchronize_session="fetch")
db.session.commit()
if __name__ == "__main__":
for city in cities.values():
scrape(False, city)

View File

@ -1,206 +0,0 @@
{
"10": {
"district": null,
"id": 10,
"short_name": "",
"title": "Blankenburg"
},
"105": {
"district": null,
"id": 105,
"short_name": "Tilleda",
"title": "Kelbra OT Tilleda"
},
"111": {
"district": null,
"id": 111,
"short_name": "",
"title": "Lutherstadt Eisleben"
},
"12": {
"district": null,
"id": 12,
"short_name": "",
"title": "Braunlage"
},
"14": {
"district": null,
"id": 14,
"short_name": "",
"title": "Clausthal-Zellerfeld"
},
"148": {
"district": null,
"id": 148,
"short_name": "",
"title": "Leinefelde-Worbis"
},
"150": {
"district": null,
"id": 150,
"short_name": "",
"title": "Abbenrode"
},
"17": {
"district": null,
"id": 17,
"short_name": "",
"title": "Duderstadt"
},
"18": {
"district": null,
"id": 18,
"short_name": "",
"title": "Goslar"
},
"2": {
"district": null,
"id": 2,
"short_name": "Torfhaus",
"title": "Altenau OT Torfhaus"
},
"20": {
"district": null,
"id": 20,
"short_name": "Vienenburg",
"title": "Goslar OT Vienenburg"
},
"21": {
"district": null,
"id": 21,
"short_name": "",
"title": "Goslar- Hahnenklee"
},
"22": {
"district": null,
"id": 22,
"short_name": "",
"title": "Halberstadt"
},
"23": {
"district": null,
"id": 23,
"short_name": "",
"title": "Harzgerode"
},
"27": {
"district": null,
"id": 27,
"short_name": "",
"title": "Herzberg am Harz"
},
"28": {
"district": null,
"id": 28,
"short_name": "",
"title": "Ilsenburg"
},
"29": {
"district": null,
"id": 29,
"short_name": "Drübeck",
"title": "Ilsenburg OT Drübeck"
},
"35": {
"district": null,
"id": 35,
"short_name": "Wolfshagen",
"title": "Langelsheim OT Wolfshagen"
},
"37": {
"district": null,
"id": 37,
"short_name": "",
"title": "Nordhausen"
},
"4": {
"district": null,
"id": 4,
"short_name": "",
"title": "Aschersleben"
},
"42": {
"district": null,
"id": 42,
"short_name": "Höhlenort Rübeland",
"title": "Oberharz am Brocken OT Höhlenort Rübeland"
},
"44": {
"district": null,
"id": 44,
"short_name": "Stiege",
"title": "Oberharz am Brocken OT Stiege"
},
"47": {
"district": null,
"id": 47,
"short_name": "",
"title": "Osterode am Harz"
},
"5": {
"district": null,
"id": 5,
"short_name": "Bad Gandersheim",
"title": "Bad Gandersheim"
},
"50": {
"district": null,
"id": 50,
"short_name": "",
"title": "Quedlinburg"
},
"6": {
"district": null,
"id": 6,
"short_name": "",
"title": "Bad Grund (Harz)"
},
"60": {
"district": null,
"id": 60,
"short_name": "",
"title": "Thale"
},
"63": {
"district": null,
"id": 63,
"short_name": "",
"title": "Walkenried"
},
"65": {
"district": null,
"id": 65,
"short_name": "",
"title": "Wernigerode"
},
"67": {
"district": null,
"id": 67,
"short_name": "Schierke",
"title": "Wernigerode OT Schierke"
},
"7": {
"district": null,
"id": 7,
"short_name": "Bad Harzburg",
"title": "Bad Harzburg"
},
"8": {
"district": null,
"id": 8,
"short_name": "",
"title": "Bad Lauterberg im Harz"
},
"82": {
"district": null,
"id": 82,
"short_name": "",
"title": "Ballenstedt"
},
"9": {
"district": null,
"id": 9,
"short_name": "",
"title": "Bad Sachsa"
}
}

View File

@ -1,142 +0,0 @@
{
"contentId": 5801,
"languageId": 0,
"languageIsocode": "de",
"pageId": 637,
"searchFilter": {
"additionalSearchOptions": {
"keepFiltersForNewSearch": "0",
"showMainCategoriesInFilter": "0"
},
"ndsdestinationdataevent": {
"bookableFilter": false,
"category": {},
"city": {
"7": "Bad Harzburg"
},
"cityMode": "fix",
"district": [],
"endDate": "2021-07-22",
"formOptions": {
"categories": {
"24": {
"id": 24,
"title": "Ausstellung/Kunst"
},
"26": {
"id": 26,
"title": "Bergbau/Industriekultur"
},
"27": {
"id": 27,
"title": "Brauchtum"
},
"28": {
"id": 28,
"title": "Führung/Besichtigung"
},
"29": {
"id": 29,
"title": "Genuss/Gourmet"
},
"30": {
"id": 30,
"title": "Gesundheit/Wellness"
},
"31": {
"id": 31,
"title": "Harzer KlosterSommer"
},
"33": {
"id": 33,
"title": "Kinder/Jugend"
},
"34": {
"id": 34,
"title": "Konzert/Musik"
},
"35": {
"id": 35,
"title": "Luther"
},
"36": {
"id": 36,
"title": "Markt/Flohmarkt"
},
"40": {
"id": 40,
"title": "Silvester"
},
"41": {
"id": 41,
"title": "Sonstiges"
},
"42": {
"id": 42,
"title": "Sport"
},
"43": {
"id": 43,
"title": "Stadt- und Volksfeste"
},
"45": {
"id": 45,
"title": "Theater"
},
"46": {
"id": 46,
"title": "TheaterNatur-Festival"
},
"47": {
"id": 47,
"title": "Vortrag/Lesung"
},
"49": {
"id": 49,
"title": "Wanderung"
},
"50": {
"id": 50,
"title": "Weihnachtsmärkte"
},
"51": {
"id": 51,
"title": "Comedy"
},
"52": {
"id": 52,
"title": "Kabarett"
},
"53": {
"id": 53,
"title": "Musical"
},
"54": {
"id": 54,
"title": "Jubiläum"
},
"55": {
"id": 55,
"title": "Weihnachten"
},
"56": {
"id": 56,
"title": "Advent"
}
}
},
"geolocation": {
"distance": "20000",
"latitude": 0,
"longitude": 0
},
"searchWithoutDate": true,
"searchWithoutDateBackupEnd": "2020-09-20",
"searchWithoutDateBackupStart": "2020-07-22",
"startDate": "2020-07-22"
}
},
"sources": [
"ndsdestinationdataevent"
]
}

View File

@ -76,7 +76,7 @@ class UtilActions(object):
)
def create_form_data(self, response, values: dict) -> dict:
from project.scrape.form import Form
from tests.form import Form
soup = BeautifulSoup(response.data, "html.parser")
form = Form(soup.find("form"))