core/migration/tables/content_items.py

378 lines
14 KiB
Python
Raw Normal View History

2022-11-23 14:09:35 +00:00
from datetime import datetime, timezone
2022-11-13 17:58:47 +00:00
import json
2022-08-11 09:14:12 +00:00
from dateutil.parser import parse as date_parse
from sqlalchemy.exc import IntegrityError
2022-08-11 09:14:12 +00:00
from transliterate import translit
2022-08-11 09:59:35 +00:00
from base.orm import local_session
2022-11-26 15:19:45 +00:00
from migration.extract import extract_html, extract_media
2022-08-11 09:14:12 +00:00
from orm.reaction import Reaction, ReactionKind
2022-09-19 13:50:43 +00:00
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
from orm.user import User
2022-11-29 12:36:46 +00:00
from orm.topic import TopicFollower, Topic
2022-11-18 18:22:10 +00:00
from services.stat.viewed import ViewedStorage
2022-11-30 19:47:34 +00:00
import re
2022-08-11 09:14:12 +00:00
2022-09-03 10:50:14 +00:00
OLD_DATE = "2016-03-05 22:22:00.350000"
2022-11-23 14:09:35 +00:00
ts = datetime.now(tz=timezone.utc)
2022-08-11 09:14:12 +00:00
type2layout = {
2022-11-13 05:14:28 +00:00
"Article": "article",
2022-11-13 04:49:20 +00:00
"Literature": "literature",
"Music": "audio",
2022-09-03 10:50:14 +00:00
"Video": "video",
"Image": "image",
2022-08-11 09:14:12 +00:00
}
2022-11-30 19:47:34 +00:00
anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
2022-12-13 12:01:42 +00:00
discours = {"slug": "discours", "id": 2, "name": "Дискурс"}
2022-11-30 19:47:34 +00:00
2022-09-03 10:50:14 +00:00
2022-08-11 09:14:12 +00:00
def get_shout_slug(entry):
2022-09-03 10:50:14 +00:00
slug = entry.get("slug", "")
if not slug:
for friend in entry.get("friendlySlugs", []):
slug = friend.get("slug", "")
if slug:
break
2022-11-30 19:47:34 +00:00
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
2022-09-03 10:50:14 +00:00
return slug
2022-08-11 09:14:12 +00:00
2022-09-18 22:11:26 +00:00
def create_author_from_app(app):
2022-12-13 12:01:42 +00:00
userdata = None
if app:
try:
with local_session() as session:
# check if email is used
user = session.query(User).where(User.email == app['email']).first()
2022-09-18 22:11:26 +00:00
if not user:
2022-12-13 12:01:42 +00:00
print('[migration] creating user...')
name = app.get('name')
slug = translit(name, "ru", reversed=True).lower()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
# check if nameslug is used
user = session.query(User).where(User.slug == slug).first()
# get slug from email
if user:
slug = app['email'].split('@')[0]
user = session.query(User).where(User.slug == slug).first()
# one more try
if user:
slug += '-author'
user = session.query(User).where(User.slug == slug).first()
# create user with application data
if not user:
userdata = {
"username": app["email"],
"email": app["email"],
"name": app.get("name", ""),
"bio": app.get("bio", ""),
"emailConfirmed": False,
"slug": slug,
"createdAt": ts,
"lastSeen": ts,
}
user = User.create(**userdata)
session.add(user)
session.commit()
userdata = user.dict()
except Exception as e:
print(app)
raise e
return userdata
2022-09-18 22:11:26 +00:00
2022-12-13 12:01:42 +00:00
async def create_shout(shout_dict):
2022-09-18 18:10:57 +00:00
s = Shout.create(**shout_dict)
2022-12-13 12:01:42 +00:00
author = shout_dict['authors'][0]
2022-09-18 18:10:57 +00:00
with local_session() as session:
srf = session.query(ShoutReactionsFollower).where(
2022-11-30 06:27:12 +00:00
ShoutReactionsFollower.shout == s.id
2022-09-18 18:10:57 +00:00
).filter(
2022-12-13 12:01:42 +00:00
ShoutReactionsFollower.follower == author.id
2022-09-18 18:10:57 +00:00
).first()
if not srf:
2022-12-13 12:01:42 +00:00
srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True)
2022-09-18 18:10:57 +00:00
session.add(srf)
session.commit()
2022-11-30 19:47:34 +00:00
return s
2022-12-13 12:01:42 +00:00
async def get_user(entry, storage):
user_oid = entry.get("createdBy")
userdata = storage["users"]["by_oid"].get(user_oid)
if not userdata:
userdata = create_author_from_app(entry.get("application"))
print("[migration] from user_oid")
if not userdata:
print("[migration] no app no user_oid")
userdata = anondict
# cleanup slug
slug = userdata.get("slug", "")
2022-11-30 19:47:34 +00:00
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
2022-12-13 12:01:42 +00:00
user = await process_user(userdata, storage, user_oid)
return user, user_oid
2022-09-18 18:10:57 +00:00
2022-08-18 06:12:46 +00:00
async def migrate(entry, storage):
2022-12-13 12:01:42 +00:00
author, user_oid = await get_user(entry, storage)
2022-09-03 10:50:14 +00:00
r = {
"layout": type2layout[entry["type"]],
"title": entry["title"],
2022-12-13 12:01:42 +00:00
"authors": [author, ],
2022-11-29 11:51:06 +00:00
"slug": get_shout_slug(entry),
"cover": (
"https://assets.discours.io/unsafe/1600x/" +
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
),
"visibility": "public" if entry.get("published") else "authors",
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
"deletedAt": date_parse(entry.get("deletedAt")) if entry.get("deletedAt") else None,
"createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
2022-12-13 12:01:42 +00:00
"topics": await add_topics_follower(entry, storage, author),
2022-11-29 11:51:06 +00:00
"body": extract_html(entry)
2022-09-03 10:50:14 +00:00
}
2022-11-27 08:19:38 +00:00
2022-11-29 11:51:06 +00:00
# main topic patch
r['mainTopic'] = r['topics'][0]
2022-11-27 08:19:38 +00:00
2022-11-29 11:51:06 +00:00
# published author auto-confirm
2022-09-03 10:50:14 +00:00
if entry.get("published"):
2022-11-16 09:23:32 +00:00
with local_session() as session:
# update user.emailConfirmed if published
author.emailConfirmed = True
session.add(author)
session.commit()
2022-11-27 08:19:38 +00:00
2022-11-29 11:51:06 +00:00
# media
2022-11-27 08:19:38 +00:00
media = extract_media(entry)
2022-11-29 11:51:06 +00:00
r["media"] = json.dumps(media, ensure_ascii=True) if media else None
2022-11-27 08:19:38 +00:00
2022-11-29 11:51:06 +00:00
# ----------------------------------- copy
2022-11-27 08:19:38 +00:00
shout_dict = r.copy()
del shout_dict["topics"]
2022-12-13 12:01:42 +00:00
2022-11-27 08:19:38 +00:00
try:
# save shout to db
2022-11-30 19:47:34 +00:00
shout_dict["oid"] = entry.get("_id", "")
2022-12-13 12:01:42 +00:00
shout = await create_shout(shout_dict)
2022-11-27 08:19:38 +00:00
except IntegrityError as e:
2022-11-30 19:47:34 +00:00
print('[migration] create_shout integrity error', e)
2022-12-13 12:01:42 +00:00
shout = await resolve_create_shout(shout_dict)
2022-11-27 08:19:38 +00:00
except Exception as e:
raise Exception(e)
2022-11-30 19:47:34 +00:00
# udpate data
shout_dict = shout.dict()
2022-12-13 12:01:42 +00:00
shout_dict["authors"] = [author.dict(), ]
2022-11-30 19:47:34 +00:00
2022-11-27 08:19:38 +00:00
# shout topics aftermath
shout_dict["topics"] = await topics_aftermath(r, storage)
# content_item ratings to reactions
await content_ratings_to_reactions(entry, shout_dict["slug"])
# shout views
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
# del shout_dict['ratings']
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
2022-11-29 11:51:06 +00:00
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
2022-11-27 08:19:38 +00:00
return shout_dict
2022-11-30 19:47:34 +00:00
async def add_topics_follower(entry, storage, user):
2022-11-27 08:19:38 +00:00
topics = set([])
category = entry.get("category")
topics_by_oid = storage["topics"]["by_oid"]
oids = [category, ] + entry.get("tags", [])
for toid in oids:
tslug = topics_by_oid.get(toid, {}).get("slug")
if tslug:
topics.add(tslug)
ttt = list(topics)
2022-09-19 15:24:43 +00:00
# add author as TopicFollower
with local_session() as session:
2022-11-30 19:47:34 +00:00
for tpcslug in topics:
2022-11-19 11:35:34 +00:00
try:
2022-11-30 19:47:34 +00:00
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
2022-12-01 08:12:48 +00:00
if tpc:
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == user.id
).filter(
TopicFollower.topic == tpc.id
).first()
if not tf:
tf = TopicFollower.create(
topic=tpc.id,
follower=user.id,
auto=True
)
session.add(tf)
session.commit()
2022-11-19 11:35:34 +00:00
except IntegrityError:
2022-11-30 19:47:34 +00:00
print('[migration.shout] hidden by topic ' + tpc.slug)
2022-11-27 08:19:38 +00:00
# main topic
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
if maintopic in ttt:
ttt.remove(maintopic)
ttt.insert(0, maintopic)
return ttt
2022-09-19 15:24:43 +00:00
2022-09-03 10:50:14 +00:00
2022-12-13 12:01:42 +00:00
async def process_user(userdata, storage, oid):
2022-09-03 10:50:14 +00:00
with local_session() as session:
2022-12-13 12:01:42 +00:00
uid = userdata.get("id", 1) # anonymous as
user = session.query(User).filter(User.id == uid).first()
if not user:
2022-09-03 10:50:14 +00:00
try:
2022-11-30 19:47:34 +00:00
slug = userdata["slug"].lower().strip()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
2022-09-03 10:50:14 +00:00
user = User.create(**userdata)
2022-09-18 22:11:26 +00:00
session.add(user)
session.commit()
except IntegrityError:
2022-12-13 12:01:42 +00:00
print(f"[migration] user creating with slug {userdata['slug']}")
print("[migration] from userdata")
print(userdata)
2022-11-30 19:47:34 +00:00
raise Exception("[migration] cannot create user in content_items.get_user()")
2022-12-13 12:01:42 +00:00
if user.id == 946:
print("[migration] ***************** ALPINA")
if user.id == 2:
print("[migration] +++++++++++++++++ DISCOURS")
2022-11-30 19:47:34 +00:00
userdata["id"] = user.id
userdata["createdAt"] = user.createdAt
storage["users"]["by_slug"][userdata["slug"]] = userdata
storage["users"]["by_oid"][oid] = userdata
2022-12-13 12:01:42 +00:00
if not user:
raise Exception("could not get a user")
return user
2022-11-27 08:19:38 +00:00
2022-12-13 12:01:42 +00:00
async def resolve_create_shout(shout_dict):
2022-11-27 08:19:38 +00:00
with local_session() as session:
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
bump = False
if s:
2022-12-13 12:01:42 +00:00
if s.createdAt != shout_dict['createdAt']:
2022-11-27 08:19:38 +00:00
# create new with different slug
shout_dict["slug"] += '-' + shout_dict["layout"]
try:
2022-12-13 12:01:42 +00:00
await create_shout(shout_dict)
2022-11-27 08:19:38 +00:00
except IntegrityError as e:
print(e)
bump = True
2022-09-03 10:50:14 +00:00
else:
2022-11-27 08:19:38 +00:00
# update old
for key in shout_dict:
if key in s.__dict__:
if s.__dict__[key] != shout_dict[key]:
print(
"[migration] shout already exists, but differs in %s"
% key
)
bump = True
else:
print("[migration] shout already exists, but lacks %s" % key)
bump = True
if bump:
s.update(shout_dict)
else:
print("[migration] something went wrong with shout: \n%r" % shout_dict)
raise Exception("")
session.commit()
2022-11-30 19:47:34 +00:00
return s
2022-09-03 10:50:14 +00:00
2022-11-27 08:19:38 +00:00
async def topics_aftermath(entry, storage):
r = []
for tpc in filter(lambda x: bool(x), entry["topics"]):
2022-09-03 10:50:14 +00:00
oldslug = tpc
newslug = storage["replacements"].get(oldslug, oldslug)
2022-11-29 12:36:46 +00:00
2022-09-03 10:50:14 +00:00
if newslug:
with local_session() as session:
2022-12-13 12:01:42 +00:00
shout = session.query(Shout).where(Shout.slug == entry["slug"]).first()
new_topic = session.query(Topic).where(Topic.slug == newslug).first()
2022-11-29 12:36:46 +00:00
2022-09-03 10:50:14 +00:00
shout_topic_old = (
session.query(ShoutTopic)
2022-11-30 19:47:34 +00:00
.join(Shout)
.join(Topic)
.filter(Shout.slug == entry["slug"])
.filter(Topic.slug == oldslug)
2022-09-03 10:50:14 +00:00
.first()
)
if shout_topic_old:
2022-11-30 06:27:12 +00:00
shout_topic_old.update({"topic": new_topic.id})
2022-09-03 10:50:14 +00:00
else:
shout_topic_new = (
session.query(ShoutTopic)
2022-11-30 19:47:34 +00:00
.join(Shout)
.join(Topic)
.filter(Shout.slug == entry["slug"])
.filter(Topic.slug == newslug)
2022-09-03 10:50:14 +00:00
.first()
)
if not shout_topic_new:
try:
ShoutTopic.create(
2022-11-30 06:27:12 +00:00
**{"shout": shout.id, "topic": new_topic.id}
2022-09-03 10:50:14 +00:00
)
2022-09-05 02:09:44 +00:00
except Exception:
2022-09-03 10:50:14 +00:00
print("[migration] shout topic error: " + newslug)
session.commit()
2022-11-27 08:19:38 +00:00
if newslug not in r:
r.append(newslug)
2022-09-03 10:50:14 +00:00
else:
print("[migration] ignored topic slug: \n%r" % tpc["slug"])
# raise Exception
2022-11-27 08:19:38 +00:00
return r
2022-09-03 10:50:14 +00:00
2022-11-27 08:19:38 +00:00
async def content_ratings_to_reactions(entry, slug):
2022-09-03 10:50:14 +00:00
try:
2022-10-14 09:25:45 +00:00
with local_session() as session:
for content_rating in entry.get("ratings", []):
2022-09-03 10:50:14 +00:00
rater = (
session.query(User)
.filter(User.oid == content_rating["createdBy"])
.first()
2022-11-29 11:51:06 +00:00
) or User.default_user
2022-11-30 19:47:34 +00:00
shout = session.query(Shout).where(Shout.slug == slug).first()
2022-11-29 11:51:06 +00:00
cts = content_rating.get("createdAt")
reaction_dict = {
"createdAt": date_parse(cts) if cts else None,
"kind": ReactionKind.LIKE
if content_rating["value"] > 0
else ReactionKind.DISLIKE,
2022-11-30 19:47:34 +00:00
"createdBy": rater.id,
"shout": shout.id
2022-11-29 11:51:06 +00:00
}
reaction = (
session.query(Reaction)
.filter(Reaction.shout == reaction_dict["shout"])
.filter(Reaction.createdBy == reaction_dict["createdBy"])
.filter(Reaction.kind == reaction_dict["kind"])
.first()
2022-09-03 10:50:14 +00:00
)
2022-11-29 11:51:06 +00:00
if reaction:
k = ReactionKind.AGREE if content_rating["value"] > 0 else ReactionKind.DISAGREE
reaction_dict["kind"] = k
reaction.update(reaction_dict)
session.add(reaction)
else:
rea = Reaction.create(**reaction_dict)
session.add(rea)
# shout_dict['ratings'].append(reaction_dict)
2022-10-14 09:25:45 +00:00
session.commit()
2022-09-05 02:09:44 +00:00
except Exception:
2022-11-29 11:51:06 +00:00
print("[migration] content_item.ratings error: \n%r" % content_rating)