2022-11-23 14:09:35 +00:00
|
|
|
from datetime import datetime, timezone
|
2022-11-13 17:58:47 +00:00
|
|
|
import json
|
2022-08-11 09:14:12 +00:00
|
|
|
from dateutil.parser import parse as date_parse
|
2022-09-17 18:12:14 +00:00
|
|
|
from sqlalchemy.exc import IntegrityError
|
2022-08-11 09:14:12 +00:00
|
|
|
from transliterate import translit
|
2022-08-11 09:59:35 +00:00
|
|
|
from base.orm import local_session
|
2022-11-26 15:19:45 +00:00
|
|
|
from migration.extract import extract_html, extract_media
|
2022-08-11 09:14:12 +00:00
|
|
|
from orm.reaction import Reaction, ReactionKind
|
2022-09-19 13:50:43 +00:00
|
|
|
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
|
|
|
from orm.user import User
|
2022-11-29 12:36:46 +00:00
|
|
|
from orm.topic import TopicFollower, Topic
|
2022-11-18 18:22:10 +00:00
|
|
|
from services.stat.viewed import ViewedStorage
|
2022-11-30 19:47:34 +00:00
|
|
|
import re
|
2022-08-11 09:14:12 +00:00
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
OLD_DATE = "2016-03-05 22:22:00.350000"
|
2022-11-23 14:09:35 +00:00
|
|
|
ts = datetime.now(tz=timezone.utc)
|
2022-08-11 09:14:12 +00:00
|
|
|
type2layout = {
|
2022-11-13 05:14:28 +00:00
|
|
|
"Article": "article",
|
2022-11-13 04:49:20 +00:00
|
|
|
"Literature": "literature",
|
|
|
|
"Music": "audio",
|
2022-09-03 10:50:14 +00:00
|
|
|
"Video": "video",
|
|
|
|
"Image": "image",
|
2022-08-11 09:14:12 +00:00
|
|
|
}
|
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
|
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
|
2022-08-11 09:14:12 +00:00
|
|
|
def get_shout_slug(entry):
|
2022-09-03 10:50:14 +00:00
|
|
|
slug = entry.get("slug", "")
|
|
|
|
if not slug:
|
|
|
|
for friend in entry.get("friendlySlugs", []):
|
|
|
|
slug = friend.get("slug", "")
|
|
|
|
if slug:
|
|
|
|
break
|
2022-11-30 19:47:34 +00:00
|
|
|
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
2022-09-03 10:50:14 +00:00
|
|
|
return slug
|
|
|
|
|
2022-08-11 09:14:12 +00:00
|
|
|
|
2022-09-18 22:11:26 +00:00
|
|
|
def create_author_from_app(app):
|
|
|
|
try:
|
|
|
|
with local_session() as session:
|
|
|
|
# check if email is used
|
|
|
|
user = session.query(User).where(User.email == app['email']).first()
|
|
|
|
if not user:
|
|
|
|
name = app.get('name')
|
2022-11-30 19:47:34 +00:00
|
|
|
slug = translit(name, "ru", reversed=True).lower()
|
|
|
|
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
2022-09-18 22:11:26 +00:00
|
|
|
# check if nameslug is used
|
|
|
|
user = session.query(User).where(User.slug == slug).first()
|
|
|
|
# get slug from email
|
|
|
|
if user:
|
|
|
|
slug = app['email'].split('@')[0]
|
|
|
|
user = session.query(User).where(User.slug == slug).first()
|
|
|
|
# one more try
|
|
|
|
if user:
|
|
|
|
slug += '-author'
|
|
|
|
user = session.query(User).where(User.slug == slug).first()
|
|
|
|
|
|
|
|
# create user with application data
|
|
|
|
if not user:
|
|
|
|
userdata = {
|
|
|
|
"username": app["email"],
|
|
|
|
"email": app["email"],
|
|
|
|
"name": app.get("name", ""),
|
|
|
|
"bio": app.get("bio", ""),
|
|
|
|
"emailConfirmed": False,
|
|
|
|
"slug": slug,
|
|
|
|
"createdAt": ts,
|
2022-10-05 15:54:29 +00:00
|
|
|
"lastSeen": ts,
|
2022-09-18 22:11:26 +00:00
|
|
|
}
|
|
|
|
user = User.create(**userdata)
|
|
|
|
session.add(user)
|
|
|
|
session.commit()
|
|
|
|
userdata = user.dict()
|
|
|
|
if not userdata:
|
2022-11-29 11:51:06 +00:00
|
|
|
userdata = User.default_user.dict() # anonymous
|
2022-09-18 22:11:26 +00:00
|
|
|
except Exception as e:
|
|
|
|
print(app)
|
|
|
|
raise e
|
|
|
|
return userdata
|
|
|
|
|
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
async def create_shout(shout_dict, user):
|
2022-09-18 18:10:57 +00:00
|
|
|
s = Shout.create(**shout_dict)
|
|
|
|
with local_session() as session:
|
|
|
|
srf = session.query(ShoutReactionsFollower).where(
|
2022-11-30 06:27:12 +00:00
|
|
|
ShoutReactionsFollower.shout == s.id
|
2022-09-18 18:10:57 +00:00
|
|
|
).filter(
|
2022-11-30 19:47:34 +00:00
|
|
|
ShoutReactionsFollower.follower == user.id
|
2022-09-18 18:10:57 +00:00
|
|
|
).first()
|
|
|
|
if not srf:
|
2022-11-30 19:47:34 +00:00
|
|
|
srf = ShoutReactionsFollower.create(shout=s.id, follower=user.id, auto=True)
|
2022-09-18 18:10:57 +00:00
|
|
|
session.add(srf)
|
|
|
|
session.commit()
|
2022-11-30 19:47:34 +00:00
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
|
|
def get_userdata(entry, storage):
|
|
|
|
user_oid = entry.get("createdBy", "")
|
|
|
|
userdata = None
|
|
|
|
app = entry.get("application")
|
|
|
|
if app:
|
|
|
|
userdata = create_author_from_app(app) or anondict
|
|
|
|
else:
|
|
|
|
userdata = storage["users"]["by_oid"].get(user_oid) or anondict
|
|
|
|
slug = userdata.get("slug")
|
|
|
|
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
|
|
|
userdata["slug"] = slug
|
|
|
|
return userdata, user_oid
|
2022-09-18 18:10:57 +00:00
|
|
|
|
|
|
|
|
2022-08-18 06:12:46 +00:00
|
|
|
async def migrate(entry, storage):
|
2022-11-30 19:47:34 +00:00
|
|
|
userdata, user_oid = get_userdata(entry, storage)
|
|
|
|
user = await get_user(userdata, storage, user_oid)
|
2022-09-03 10:50:14 +00:00
|
|
|
r = {
|
|
|
|
"layout": type2layout[entry["type"]],
|
|
|
|
"title": entry["title"],
|
2022-11-30 19:47:34 +00:00
|
|
|
"authors": [userdata["slug"], ],
|
2022-11-29 11:51:06 +00:00
|
|
|
"slug": get_shout_slug(entry),
|
|
|
|
"cover": (
|
|
|
|
"https://assets.discours.io/unsafe/1600x/" +
|
|
|
|
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
|
|
|
|
),
|
|
|
|
"visibility": "public" if entry.get("published") else "authors",
|
|
|
|
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
|
|
|
|
"deletedAt": date_parse(entry.get("deletedAt")) if entry.get("deletedAt") else None,
|
|
|
|
"createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
|
|
|
|
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
|
2022-11-30 19:47:34 +00:00
|
|
|
"topics": await add_topics_follower(entry, storage, user),
|
2022-11-29 11:51:06 +00:00
|
|
|
"body": extract_html(entry)
|
2022-09-03 10:50:14 +00:00
|
|
|
}
|
2022-11-27 08:19:38 +00:00
|
|
|
|
2022-11-29 11:51:06 +00:00
|
|
|
# main topic patch
|
|
|
|
r['mainTopic'] = r['topics'][0]
|
2022-11-27 08:19:38 +00:00
|
|
|
|
2022-11-29 11:51:06 +00:00
|
|
|
# published author auto-confirm
|
2022-09-03 10:50:14 +00:00
|
|
|
if entry.get("published"):
|
2022-11-16 09:23:32 +00:00
|
|
|
with local_session() as session:
|
|
|
|
# update user.emailConfirmed if published
|
2022-11-30 19:47:34 +00:00
|
|
|
author = session.query(User).where(User.slug == userdata["slug"]).first()
|
2022-11-16 09:23:32 +00:00
|
|
|
author.emailConfirmed = True
|
|
|
|
session.add(author)
|
|
|
|
session.commit()
|
2022-11-27 08:19:38 +00:00
|
|
|
|
2022-11-29 11:51:06 +00:00
|
|
|
# media
|
2022-11-27 08:19:38 +00:00
|
|
|
media = extract_media(entry)
|
2022-11-29 11:51:06 +00:00
|
|
|
r["media"] = json.dumps(media, ensure_ascii=True) if media else None
|
2022-11-27 08:19:38 +00:00
|
|
|
|
2022-11-29 11:51:06 +00:00
|
|
|
# ----------------------------------- copy
|
2022-11-27 08:19:38 +00:00
|
|
|
shout_dict = r.copy()
|
|
|
|
|
|
|
|
# user
|
|
|
|
shout_dict["authors"] = [user, ]
|
|
|
|
del shout_dict["topics"]
|
|
|
|
try:
|
|
|
|
# save shout to db
|
2022-11-30 19:47:34 +00:00
|
|
|
shout_dict["oid"] = entry.get("_id", "")
|
|
|
|
shout = await create_shout(shout_dict, user)
|
2022-11-27 08:19:38 +00:00
|
|
|
except IntegrityError as e:
|
2022-11-30 19:47:34 +00:00
|
|
|
print('[migration] create_shout integrity error', e)
|
|
|
|
shout = await resolve_create_shout(shout_dict, userdata["slug"])
|
2022-11-27 08:19:38 +00:00
|
|
|
except Exception as e:
|
|
|
|
raise Exception(e)
|
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
# udpate data
|
|
|
|
shout_dict = shout.dict()
|
|
|
|
shout_dict["authors"] = [user.dict(), ]
|
|
|
|
|
2022-11-27 08:19:38 +00:00
|
|
|
# shout topics aftermath
|
|
|
|
shout_dict["topics"] = await topics_aftermath(r, storage)
|
|
|
|
|
|
|
|
# content_item ratings to reactions
|
|
|
|
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
|
|
|
|
|
|
|
# shout views
|
|
|
|
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
|
|
|
|
# del shout_dict['ratings']
|
|
|
|
|
|
|
|
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
2022-11-29 11:51:06 +00:00
|
|
|
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
|
2022-11-27 08:19:38 +00:00
|
|
|
return shout_dict
|
|
|
|
|
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
async def add_topics_follower(entry, storage, user):
|
2022-11-27 08:19:38 +00:00
|
|
|
topics = set([])
|
|
|
|
category = entry.get("category")
|
|
|
|
topics_by_oid = storage["topics"]["by_oid"]
|
|
|
|
oids = [category, ] + entry.get("tags", [])
|
|
|
|
for toid in oids:
|
|
|
|
tslug = topics_by_oid.get(toid, {}).get("slug")
|
|
|
|
if tslug:
|
|
|
|
topics.add(tslug)
|
|
|
|
ttt = list(topics)
|
2022-09-19 15:24:43 +00:00
|
|
|
# add author as TopicFollower
|
|
|
|
with local_session() as session:
|
2022-11-30 19:47:34 +00:00
|
|
|
for tpcslug in topics:
|
2022-11-19 11:35:34 +00:00
|
|
|
try:
|
2022-11-30 19:47:34 +00:00
|
|
|
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
|
2022-12-01 08:12:48 +00:00
|
|
|
if tpc:
|
|
|
|
tf = session.query(
|
|
|
|
TopicFollower
|
|
|
|
).where(
|
|
|
|
TopicFollower.follower == user.id
|
|
|
|
).filter(
|
|
|
|
TopicFollower.topic == tpc.id
|
|
|
|
).first()
|
|
|
|
if not tf:
|
|
|
|
tf = TopicFollower.create(
|
|
|
|
topic=tpc.id,
|
|
|
|
follower=user.id,
|
|
|
|
auto=True
|
|
|
|
)
|
|
|
|
session.add(tf)
|
|
|
|
session.commit()
|
2022-11-19 11:35:34 +00:00
|
|
|
except IntegrityError:
|
2022-11-30 19:47:34 +00:00
|
|
|
print('[migration.shout] hidden by topic ' + tpc.slug)
|
2022-11-27 08:19:38 +00:00
|
|
|
# main topic
|
|
|
|
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
|
|
|
|
if maintopic in ttt:
|
|
|
|
ttt.remove(maintopic)
|
|
|
|
ttt.insert(0, maintopic)
|
|
|
|
return ttt
|
2022-09-19 15:24:43 +00:00
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
async def get_user(userdata, storage, oid):
|
2022-09-03 10:50:14 +00:00
|
|
|
user = None
|
|
|
|
with local_session() as session:
|
2022-11-30 19:47:34 +00:00
|
|
|
uid = userdata.get("id")
|
|
|
|
if uid:
|
|
|
|
user = session.query(User).filter(User.id == uid).first()
|
|
|
|
elif userdata:
|
2022-09-03 10:50:14 +00:00
|
|
|
try:
|
2022-11-30 19:47:34 +00:00
|
|
|
slug = userdata["slug"].lower().strip()
|
|
|
|
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
|
|
|
userdata["slug"] = slug
|
2022-09-03 10:50:14 +00:00
|
|
|
user = User.create(**userdata)
|
2022-09-18 22:11:26 +00:00
|
|
|
session.add(user)
|
|
|
|
session.commit()
|
2022-09-17 18:12:14 +00:00
|
|
|
except IntegrityError:
|
2022-11-30 19:47:34 +00:00
|
|
|
print("[migration] user creating with slug %s" % userdata["slug"])
|
|
|
|
print("[migration] from userdata: %r" % userdata)
|
|
|
|
raise Exception("[migration] cannot create user in content_items.get_user()")
|
|
|
|
userdata["id"] = user.id
|
|
|
|
userdata["createdAt"] = user.createdAt
|
|
|
|
storage["users"]["by_slug"][userdata["slug"]] = userdata
|
|
|
|
storage["users"]["by_oid"][oid] = userdata
|
2022-09-17 18:12:14 +00:00
|
|
|
if not user:
|
|
|
|
raise Exception("could not get a user")
|
2022-11-27 08:19:38 +00:00
|
|
|
return user
|
|
|
|
|
|
|
|
|
|
|
|
async def resolve_create_shout(shout_dict, userslug):
|
|
|
|
with local_session() as session:
|
|
|
|
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
|
|
|
bump = False
|
|
|
|
if s:
|
|
|
|
if s.authors[0] != userslug:
|
|
|
|
# create new with different slug
|
|
|
|
shout_dict["slug"] += '-' + shout_dict["layout"]
|
|
|
|
try:
|
|
|
|
await create_shout(shout_dict, userslug)
|
|
|
|
except IntegrityError as e:
|
|
|
|
print(e)
|
|
|
|
bump = True
|
2022-09-03 10:50:14 +00:00
|
|
|
else:
|
2022-11-27 08:19:38 +00:00
|
|
|
# update old
|
|
|
|
for key in shout_dict:
|
|
|
|
if key in s.__dict__:
|
|
|
|
if s.__dict__[key] != shout_dict[key]:
|
|
|
|
print(
|
|
|
|
"[migration] shout already exists, but differs in %s"
|
|
|
|
% key
|
|
|
|
)
|
|
|
|
bump = True
|
|
|
|
else:
|
|
|
|
print("[migration] shout already exists, but lacks %s" % key)
|
|
|
|
bump = True
|
|
|
|
if bump:
|
|
|
|
s.update(shout_dict)
|
|
|
|
else:
|
|
|
|
print("[migration] something went wrong with shout: \n%r" % shout_dict)
|
|
|
|
raise Exception("")
|
|
|
|
session.commit()
|
2022-11-30 19:47:34 +00:00
|
|
|
return s
|
2022-09-03 10:50:14 +00:00
|
|
|
|
2022-11-27 08:19:38 +00:00
|
|
|
|
|
|
|
async def topics_aftermath(entry, storage):
|
|
|
|
r = []
|
|
|
|
for tpc in filter(lambda x: bool(x), entry["topics"]):
|
2022-09-03 10:50:14 +00:00
|
|
|
oldslug = tpc
|
|
|
|
newslug = storage["replacements"].get(oldslug, oldslug)
|
2022-11-29 12:36:46 +00:00
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
if newslug:
|
|
|
|
with local_session() as session:
|
2022-11-29 12:36:46 +00:00
|
|
|
shout = session.query(Shout).where(Shout.slug == entry["slug"]).one()
|
|
|
|
new_topic = session.query(Topic).where(Topic.slug == newslug).one()
|
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
shout_topic_old = (
|
|
|
|
session.query(ShoutTopic)
|
2022-11-30 19:47:34 +00:00
|
|
|
.join(Shout)
|
|
|
|
.join(Topic)
|
|
|
|
.filter(Shout.slug == entry["slug"])
|
|
|
|
.filter(Topic.slug == oldslug)
|
2022-09-03 10:50:14 +00:00
|
|
|
.first()
|
|
|
|
)
|
|
|
|
if shout_topic_old:
|
2022-11-30 06:27:12 +00:00
|
|
|
shout_topic_old.update({"topic": new_topic.id})
|
2022-09-03 10:50:14 +00:00
|
|
|
else:
|
|
|
|
shout_topic_new = (
|
|
|
|
session.query(ShoutTopic)
|
2022-11-30 19:47:34 +00:00
|
|
|
.join(Shout)
|
|
|
|
.join(Topic)
|
|
|
|
.filter(Shout.slug == entry["slug"])
|
|
|
|
.filter(Topic.slug == newslug)
|
2022-09-03 10:50:14 +00:00
|
|
|
.first()
|
|
|
|
)
|
|
|
|
if not shout_topic_new:
|
|
|
|
try:
|
|
|
|
ShoutTopic.create(
|
2022-11-30 06:27:12 +00:00
|
|
|
**{"shout": shout.id, "topic": new_topic.id}
|
2022-09-03 10:50:14 +00:00
|
|
|
)
|
2022-09-05 02:09:44 +00:00
|
|
|
except Exception:
|
2022-09-03 10:50:14 +00:00
|
|
|
print("[migration] shout topic error: " + newslug)
|
|
|
|
session.commit()
|
2022-11-27 08:19:38 +00:00
|
|
|
if newslug not in r:
|
|
|
|
r.append(newslug)
|
2022-09-03 10:50:14 +00:00
|
|
|
else:
|
|
|
|
print("[migration] ignored topic slug: \n%r" % tpc["slug"])
|
|
|
|
# raise Exception
|
2022-11-27 08:19:38 +00:00
|
|
|
return r
|
2022-09-03 10:50:14 +00:00
|
|
|
|
2022-11-27 08:19:38 +00:00
|
|
|
|
|
|
|
async def content_ratings_to_reactions(entry, slug):
|
2022-09-03 10:50:14 +00:00
|
|
|
try:
|
2022-10-14 09:25:45 +00:00
|
|
|
with local_session() as session:
|
|
|
|
for content_rating in entry.get("ratings", []):
|
2022-09-03 10:50:14 +00:00
|
|
|
rater = (
|
|
|
|
session.query(User)
|
|
|
|
.filter(User.oid == content_rating["createdBy"])
|
|
|
|
.first()
|
2022-11-29 11:51:06 +00:00
|
|
|
) or User.default_user
|
2022-11-30 19:47:34 +00:00
|
|
|
shout = session.query(Shout).where(Shout.slug == slug).first()
|
2022-11-29 11:51:06 +00:00
|
|
|
cts = content_rating.get("createdAt")
|
|
|
|
reaction_dict = {
|
|
|
|
"createdAt": date_parse(cts) if cts else None,
|
|
|
|
"kind": ReactionKind.LIKE
|
|
|
|
if content_rating["value"] > 0
|
|
|
|
else ReactionKind.DISLIKE,
|
2022-11-30 19:47:34 +00:00
|
|
|
"createdBy": rater.id,
|
|
|
|
"shout": shout.id
|
2022-11-29 11:51:06 +00:00
|
|
|
}
|
|
|
|
reaction = (
|
|
|
|
session.query(Reaction)
|
|
|
|
.filter(Reaction.shout == reaction_dict["shout"])
|
|
|
|
.filter(Reaction.createdBy == reaction_dict["createdBy"])
|
|
|
|
.filter(Reaction.kind == reaction_dict["kind"])
|
|
|
|
.first()
|
2022-09-03 10:50:14 +00:00
|
|
|
)
|
2022-11-29 11:51:06 +00:00
|
|
|
if reaction:
|
|
|
|
k = ReactionKind.AGREE if content_rating["value"] > 0 else ReactionKind.DISAGREE
|
|
|
|
reaction_dict["kind"] = k
|
|
|
|
reaction.update(reaction_dict)
|
|
|
|
session.add(reaction)
|
|
|
|
else:
|
|
|
|
rea = Reaction.create(**reaction_dict)
|
|
|
|
session.add(rea)
|
|
|
|
# shout_dict['ratings'].append(reaction_dict)
|
2022-10-14 09:25:45 +00:00
|
|
|
|
|
|
|
session.commit()
|
2022-09-05 02:09:44 +00:00
|
|
|
except Exception:
|
2022-11-29 11:51:06 +00:00
|
|
|
print("[migration] content_item.ratings error: \n%r" % content_rating)
|