mirror of
https://github.com/lucaspalomodevelop/eventcally.git
synced 2026-03-13 00:07:22 +00:00
219 lines
7.3 KiB
Python
219 lines
7.3 KiB
Python
from project import db
|
|
from pprint import pprint
|
|
import datetime
|
|
from urllib import request
|
|
from os import path
|
|
import json
|
|
import decimal
|
|
from project.models import (
|
|
EventReviewStatus,
|
|
Location,
|
|
Event,
|
|
EventStatus,
|
|
EventPlace,
|
|
EventOrganizer,
|
|
)
|
|
from sqlalchemy import and_, not_
|
|
from project.services.admin_unit import get_admin_unit
|
|
from project.services.event import (
|
|
upsert_event_category,
|
|
update_event_dates_with_recurrence_rule,
|
|
)
|
|
|
|
admin_unit = get_admin_unit("Harzinfo")
|
|
category = upsert_event_category("Other")
|
|
base_url = "https://www.harzinfo.de"
|
|
url = base_url + "/?ndssearch=fullsearch&no_cache=1&L=0"
|
|
|
|
with open("scrape_hi_req.json") as json_file:
|
|
request_object = json.load(json_file)
|
|
|
|
with open("scrape_hi_cities.json") as json_file:
|
|
cities = json.load(json_file)
|
|
|
|
|
|
def response_from_url(city):
|
|
body = request_object
|
|
body["searchFilter"]["ndsdestinationdataevent"]["city"] = {
|
|
str(city["id"]): city["short_name"] or city["title"]
|
|
}
|
|
req = request.Request(url, data=bytes(json.dumps(body), encoding="utf-8"))
|
|
req.add_header("Content-Type", "application/json")
|
|
return request.urlopen(req)
|
|
|
|
|
|
def load_json(debug, city):
|
|
if debug:
|
|
filename = "tmp/hi_%d.html" % (city["id"])
|
|
|
|
if not path.exists(filename):
|
|
response = response_from_url(city)
|
|
with open(filename, "wb") as text_file:
|
|
text_file.write(response.read())
|
|
|
|
with open(filename) as json_file:
|
|
return json.load(json_file)
|
|
else:
|
|
response = response_from_url(city)
|
|
return json.load(response)
|
|
|
|
|
|
def parse_date_time_str(date_time_str):
|
|
if not date_time_str:
|
|
return None
|
|
|
|
return datetime.datetime.fromisoformat(date_time_str + ":00")
|
|
|
|
|
|
def scrape(debug, city):
|
|
|
|
# Organizer
|
|
organizer_name = city["short_name"] or city["title"]
|
|
organizer = EventOrganizer.query.filter(
|
|
and_(
|
|
EventOrganizer.admin_unit_id == admin_unit.id,
|
|
EventOrganizer.name == organizer_name,
|
|
)
|
|
).first()
|
|
|
|
if organizer is None:
|
|
organizer = EventOrganizer(admin_unit_id=admin_unit.id, name=organizer_name)
|
|
db.session.add(organizer)
|
|
db.session.commit()
|
|
|
|
print(organizer_name)
|
|
response = load_json(debug, city)
|
|
result = response["result"]
|
|
event_ids = list()
|
|
|
|
for item in result:
|
|
try:
|
|
uid = str(item["uid"])
|
|
external_link = base_url + item["link"] + "#" + uid
|
|
event = Event.query.filter(
|
|
and_(
|
|
Event.organizer_id == organizer.id,
|
|
Event.external_link == external_link,
|
|
)
|
|
).first()
|
|
did_create = False
|
|
|
|
if event is None:
|
|
event = Event()
|
|
event.admin_unit = admin_unit
|
|
event.organizer = organizer
|
|
did_create = True
|
|
|
|
event_ids.append(event.id)
|
|
|
|
event.category = category
|
|
event.external_link = external_link
|
|
event.review_status = EventReviewStatus.verified
|
|
event.rating = int(item["rating"])
|
|
event.name = item["title"]
|
|
event.description = item["title"]
|
|
|
|
start = parse_date_time_str(item["date"])
|
|
update_event_dates_with_recurrence_rule(event, start, None)
|
|
|
|
# Place
|
|
place_name = item["location"]
|
|
place_description = ""
|
|
place_location = None
|
|
|
|
if "latitude" in item and "longitude" in item:
|
|
meeting_point_latitude = item["latitude"]
|
|
meeting_point_longitude = item["longitude"]
|
|
if meeting_point_latitude and meeting_point_longitude:
|
|
latitude = decimal.Decimal(meeting_point_latitude)
|
|
longitude = decimal.Decimal(meeting_point_longitude)
|
|
if latitude != 0 and longitude != 0:
|
|
place_location = Location()
|
|
place_location.latitude = latitude
|
|
place_location.longitude = longitude
|
|
|
|
place = EventPlace.query.filter(
|
|
and_(
|
|
EventPlace.admin_unit_id == admin_unit.id,
|
|
EventPlace.organizer_id == organizer.id,
|
|
EventPlace.name == place_name,
|
|
)
|
|
).first()
|
|
|
|
if place is None:
|
|
place = EventPlace(
|
|
admin_unit_id=admin_unit.id,
|
|
organizer_id=organizer.id,
|
|
name=place_name,
|
|
)
|
|
|
|
place.description = place_description
|
|
place.location = place_location
|
|
event.event_place = place
|
|
|
|
# Additional data
|
|
event.status = (
|
|
EventStatus.cancelled if item["canceled"] else EventStatus.scheduled
|
|
)
|
|
|
|
if "categories" in item:
|
|
tag_list = list(item["categories"].values())
|
|
|
|
if "Ausstellung/Kunst" in tag_list:
|
|
event.category = upsert_event_category("Art")
|
|
elif "Comedy" in tag_list:
|
|
event.category = upsert_event_category("Comedy")
|
|
elif "Konzert/Musik" in tag_list:
|
|
event.category = upsert_event_category("Music")
|
|
elif "Theater" in tag_list:
|
|
event.category = upsert_event_category("Theater")
|
|
elif "Genuss/Gourmet" in tag_list:
|
|
event.category = upsert_event_category("Dining")
|
|
elif "Gesundheit/Wellness" in tag_list:
|
|
event.category = upsert_event_category("Fitness")
|
|
elif "Kinder/Jugend" in tag_list:
|
|
event.category = upsert_event_category("Family")
|
|
elif "Markt/Flohmarkt" in tag_list:
|
|
event.category = upsert_event_category("Shopping")
|
|
elif "Sport" in tag_list:
|
|
event.category = upsert_event_category("Sports")
|
|
elif "Vortrag/Lesung" in tag_list:
|
|
event.category = upsert_event_category("Book")
|
|
elif "Kabarett" in tag_list:
|
|
event.category = upsert_event_category("Art")
|
|
elif "Musical" in tag_list:
|
|
event.category = upsert_event_category("Theater")
|
|
elif "Weihnachtsmärkte" in tag_list:
|
|
event.category = upsert_event_category("Festival")
|
|
elif "Stadt- und Volksfeste" in tag_list:
|
|
event.category = upsert_event_category("Festival")
|
|
|
|
if "Kinder/Jugend" in tag_list:
|
|
event.kid_friendly = True
|
|
|
|
tag_list.append("Harzinfo")
|
|
event.tags = ",".join(tag_list)
|
|
|
|
print("%s %s %d" % (event.dates[0].start, event.name, event.rating))
|
|
if did_create:
|
|
db.session.add(event)
|
|
|
|
db.session.commit()
|
|
except Exception:
|
|
print("Exception")
|
|
pprint(item)
|
|
|
|
Event.query.filter(
|
|
and_(
|
|
Event.admin_unit_id == admin_unit.id,
|
|
Event.organizer_id == organizer.id,
|
|
not_(Event.id.in_(event_ids)),
|
|
)
|
|
).delete(synchronize_session="fetch")
|
|
db.session.commit()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
for city in cities.values():
|
|
scrape(False, city)
|