This commit is contained in:
2022-11-30 23:20:08 +03:00
35 changed files with 617 additions and 782 deletions

View File

@@ -96,16 +96,16 @@ async def shouts_handle(storage, args):
continue
# migrate
shout = await migrateShout(entry, storage)
if shout:
storage["shouts"]["by_oid"][entry["_id"]] = shout
storage["shouts"]["by_slug"][shout["slug"]] = shout
shout_dict = await migrateShout(entry, storage)
if shout_dict:
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
# shouts.topics
if not shout["topics"]:
if not shout_dict["topics"]:
print("[migration] no topics!")
# with author
author: str = shout["authors"][0].dict()
author = shout_dict["authors"][0]
if author["slug"] == "discours":
discours_author += 1
if author["slug"] == "anonymous":
@@ -114,19 +114,20 @@ async def shouts_handle(storage, args):
if entry.get("published"):
if "mdx" in args:
export_mdx(shout)
export_mdx(shout_dict)
pub_counter += 1
# print main counter
counter += 1
line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
print(line)
print('[migration] shouts_handle %d: %s @%s' % (
(counter + 1), shout_dict["slug"], author["slug"]
))
b = bs4.BeautifulSoup(shout["body"], "html.parser")
texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
texts = texts + b.findAll(text=True)
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
topics_dataset_tlist.append(shout["topics"])
topics_dataset_tlist.append(shout_dict["topics"])
else:
ignored += 1
@@ -134,9 +135,7 @@ async def shouts_handle(storage, args):
# ', fmt='%s')
print("[migration] " + str(counter) + " content items were migrated")
print("[migration] " + str(ignored) + " content items were ignored")
print("[migration] " + str(pub_counter) + " have been published")
print("[migration] " + str(discours_author) + " authored by @discours")
print("[migration] " + str(anonymous_author) + " authored by @anonymous")

View File

@@ -8,12 +8,13 @@ from orm.reaction import Reaction, ReactionKind
from orm.shout import ShoutReactionsFollower
from orm.topic import TopicFollower
from orm.user import User
from orm.shout import Shout
# from services.stat.reacted import ReactedStorage
ts = datetime.now(tz=timezone.utc)
def auto_followers(session, shout_dict, reaction_dict):
def auto_followers(session, topics, reaction_dict):
# creating shout's reactions following for reaction author
following1 = session.query(
ShoutReactionsFollower
@@ -30,18 +31,18 @@ def auto_followers(session, shout_dict, reaction_dict):
)
session.add(following1)
# creating topics followings for reaction author
for t in shout_dict["topics"]:
for t in topics:
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == reaction_dict["createdBy"]
).filter(
TopicFollower.topic == t
TopicFollower.topic == t['id']
).first()
if not tf:
topic_following = TopicFollower.create(
follower=reaction_dict["createdBy"],
topic=t,
topic=t['id'],
auto=True
)
session.add(topic_following)
@@ -60,7 +61,7 @@ def migrate_ratings(session, entry, reaction_dict):
"kind": ReactionKind.LIKE
if comment_rating_old["value"] > 0
else ReactionKind.DISLIKE,
"createdBy": rater.slug if rater else "anonymous",
"createdBy": rater.id if rater else 1,
}
cts = comment_rating_old.get("createdAt")
if cts:
@@ -108,9 +109,7 @@ async def migrate(entry, storage):
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
"updatedBy": "0"
}
->
type Reaction {
id: Int!
shout: Shout!
@@ -143,30 +142,41 @@ async def migrate(entry, storage):
raise Exception
return
else:
stage = "started"
reaction = None
with local_session() as session:
author = session.query(User).filter(User.oid == entry["createdBy"]).first()
shout_dict = storage["shouts"]["by_oid"][shout_oid]
if shout_dict:
reaction_dict["shout"] = shout_dict["slug"]
reaction_dict["createdBy"] = author.slug if author else "discours"
reaction_dict["kind"] = ReactionKind.COMMENT
# creating reaction from old comment
reaction = Reaction.create(**reaction_dict)
session.add(reaction)
# await ReactedStorage.react(reaction)
reaction_dict = reaction.dict()
auto_followers(session, shout_dict, reaction_dict)
migrate_ratings(session, shout_dict, reaction_dict)
old_shout = storage["shouts"]["by_oid"].get(shout_oid)
if not old_shout:
raise Exception("no old shout in storage")
else:
print(
"[migration] error: cannot find shout for comment %r"
% reaction_dict
)
return reaction
stage = "author and old id found"
try:
shout = session.query(
Shout
).where(Shout.slug == old_shout["slug"]).one()
if shout:
reaction_dict["shout"] = shout.id
reaction_dict["createdBy"] = author.id if author else 1
reaction_dict["kind"] = ReactionKind.COMMENT
# creating reaction from old comment
reaction = Reaction.create(**reaction_dict)
session.add(reaction)
# session.commit()
stage = "new reaction commited"
reaction_dict = reaction.dict()
topics = [t.dict() for t in shout.topics]
auto_followers(session, topics, reaction_dict)
migrate_ratings(session, entry, reaction_dict)
return reaction
except Exception as e:
print(e)
print(reaction)
raise Exception(stage)
return
def migrate_2stage(old_comment, idmap):

View File

@@ -8,9 +8,10 @@ from migration.extract import extract_html, extract_media
from orm.reaction import Reaction, ReactionKind
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
from orm.user import User
from orm.topic import TopicFollower
from orm.topic import TopicFollower, Topic
# from services.stat.reacted import ReactedStorage
from services.stat.viewed import ViewedStorage
import re
OLD_DATE = "2016-03-05 22:22:00.350000"
ts = datetime.now(tz=timezone.utc)
@@ -22,6 +23,8 @@ type2layout = {
"Image": "image",
}
anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
def get_shout_slug(entry):
slug = entry.get("slug", "")
@@ -30,6 +33,7 @@ def get_shout_slug(entry):
slug = friend.get("slug", "")
if slug:
break
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
return slug
@@ -40,13 +44,8 @@ def create_author_from_app(app):
user = session.query(User).where(User.email == app['email']).first()
if not user:
name = app.get('name')
slug = (
translit(name, "ru", reversed=True)
.replace(" ", "-")
.replace("'", "")
.replace(".", "-")
.lower()
)
slug = translit(name, "ru", reversed=True).lower()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
# check if nameslug is used
user = session.query(User).where(User.slug == slug).first()
# get slug from email
@@ -82,18 +81,33 @@ def create_author_from_app(app):
return userdata
async def create_shout(shout_dict, userslug):
async def create_shout(shout_dict, user):
s = Shout.create(**shout_dict)
with local_session() as session:
srf = session.query(ShoutReactionsFollower).where(
ShoutReactionsFollower.shout == s.slug
ShoutReactionsFollower.shout == s.id
).filter(
ShoutReactionsFollower.follower == userslug
ShoutReactionsFollower.follower == user.id
).first()
if not srf:
srf = ShoutReactionsFollower.create(shout=s.slug, follower=userslug, auto=True)
srf = ShoutReactionsFollower.create(shout=s.id, follower=user.id, auto=True)
session.add(srf)
session.commit()
return s
def get_userdata(entry, storage):
user_oid = entry.get("createdBy", "")
userdata = None
app = entry.get("application")
if app:
userdata = create_author_from_app(app) or anondict
else:
userdata = storage["users"]["by_oid"].get(user_oid) or anondict
slug = userdata.get("slug")
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
return userdata, user_oid
def get_userdata(entry, storage):
@@ -109,12 +123,12 @@ def get_userdata(entry, storage):
async def migrate(entry, storage):
userslug, userdata, user_oid = get_userdata(entry, storage)
user = await get_user(userslug, userdata, storage, user_oid)
userdata, user_oid = get_userdata(entry, storage)
user = await get_user(userdata, storage, user_oid)
r = {
"layout": type2layout[entry["type"]],
"title": entry["title"],
"authors": [userslug, ],
"authors": [userdata["slug"], ],
"slug": get_shout_slug(entry),
"cover": (
"https://assets.discours.io/unsafe/1600x/" +
@@ -125,7 +139,7 @@ async def migrate(entry, storage):
"deletedAt": date_parse(entry.get("deletedAt")) if entry.get("deletedAt") else None,
"createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
"topics": await add_topics_follower(entry, storage, userslug),
"topics": await add_topics_follower(entry, storage, user),
"body": extract_html(entry)
}
@@ -136,7 +150,7 @@ async def migrate(entry, storage):
if entry.get("published"):
with local_session() as session:
# update user.emailConfirmed if published
author = session.query(User).where(User.slug == userslug).first()
author = session.query(User).where(User.slug == userdata["slug"]).first()
author.emailConfirmed = True
session.add(author)
session.commit()
@@ -153,13 +167,18 @@ async def migrate(entry, storage):
del shout_dict["topics"]
try:
# save shout to db
await create_shout(shout_dict, userslug)
shout_dict["oid"] = entry.get("_id", "")
shout = await create_shout(shout_dict, user)
except IntegrityError as e:
print(e)
await resolve_create_shout(shout_dict, userslug)
print('[migration] create_shout integrity error', e)
shout = await resolve_create_shout(shout_dict, userdata["slug"])
except Exception as e:
raise Exception(e)
# udpate data
shout_dict = shout.dict()
shout_dict["authors"] = [user.dict(), ]
# shout topics aftermath
shout_dict["topics"] = await topics_aftermath(r, storage)
@@ -170,13 +189,12 @@ async def migrate(entry, storage):
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
# del shout_dict['ratings']
shout_dict["oid"] = entry.get("_id", "")
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
return shout_dict
async def add_topics_follower(entry, storage, userslug):
async def add_topics_follower(entry, storage, user):
topics = set([])
category = entry.get("category")
topics_by_oid = storage["topics"]["by_oid"]
@@ -188,25 +206,26 @@ async def add_topics_follower(entry, storage, userslug):
ttt = list(topics)
# add author as TopicFollower
with local_session() as session:
for tpc in topics:
for tpcslug in topics:
try:
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == userslug
TopicFollower.follower == user.id
).filter(
TopicFollower.topic == tpc
TopicFollower.topic == tpc.id
).first()
if not tf:
tf = TopicFollower.create(
topic=tpc,
follower=userslug,
topic=tpc.id,
follower=user.id,
auto=True
)
session.add(tf)
session.commit()
except IntegrityError:
print('[migration.shout] hidden by topic ' + tpc)
print('[migration.shout] hidden by topic ' + tpc.slug)
# main topic
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
if maintopic in ttt:
@@ -215,23 +234,28 @@ async def add_topics_follower(entry, storage, userslug):
return ttt
async def get_user(userslug, userdata, storage, oid):
async def get_user(userdata, storage, oid):
user = None
with local_session() as session:
if not user and userslug:
user = session.query(User).filter(User.slug == userslug).first()
if not user and userdata:
uid = userdata.get("id")
if uid:
user = session.query(User).filter(User.id == uid).first()
elif userdata:
try:
userdata["slug"] = userdata["slug"].lower().strip().replace(" ", "-")
slug = userdata["slug"].lower().strip()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
user = User.create(**userdata)
session.add(user)
session.commit()
except IntegrityError:
print("[migration] user error: " + userdata)
userdata["id"] = user.id
userdata["createdAt"] = user.createdAt
storage["users"]["by_slug"][userdata["slug"]] = userdata
storage["users"]["by_oid"][oid] = userdata
print("[migration] user creating with slug %s" % userdata["slug"])
print("[migration] from userdata: %r" % userdata)
raise Exception("[migration] cannot create user in content_items.get_user()")
userdata["id"] = user.id
userdata["createdAt"] = user.createdAt
storage["users"]["by_slug"][userdata["slug"]] = userdata
storage["users"]["by_oid"][oid] = userdata
if not user:
raise Exception("could not get a user")
return user
@@ -269,6 +293,7 @@ async def resolve_create_shout(shout_dict, userslug):
print("[migration] something went wrong with shout: \n%r" % shout_dict)
raise Exception("")
session.commit()
return s
async def topics_aftermath(entry, storage):
@@ -276,27 +301,35 @@ async def topics_aftermath(entry, storage):
for tpc in filter(lambda x: bool(x), entry["topics"]):
oldslug = tpc
newslug = storage["replacements"].get(oldslug, oldslug)
if newslug:
with local_session() as session:
shout = session.query(Shout).where(Shout.slug == entry["slug"]).one()
new_topic = session.query(Topic).where(Topic.slug == newslug).one()
shout_topic_old = (
session.query(ShoutTopic)
.filter(ShoutTopic.shout == entry["slug"])
.filter(ShoutTopic.topic == oldslug)
.join(Shout)
.join(Topic)
.filter(Shout.slug == entry["slug"])
.filter(Topic.slug == oldslug)
.first()
)
if shout_topic_old:
shout_topic_old.update({"slug": newslug})
shout_topic_old.update({"topic": new_topic.id})
else:
shout_topic_new = (
session.query(ShoutTopic)
.filter(ShoutTopic.shout == entry["slug"])
.filter(ShoutTopic.topic == newslug)
.join(Shout)
.join(Topic)
.filter(Shout.slug == entry["slug"])
.filter(Topic.slug == newslug)
.first()
)
if not shout_topic_new:
try:
ShoutTopic.create(
**{"shout": entry["slug"], "topic": newslug}
**{"shout": shout.id, "topic": new_topic.id}
)
except Exception:
print("[migration] shout topic error: " + newslug)
@@ -318,14 +351,15 @@ async def content_ratings_to_reactions(entry, slug):
.filter(User.oid == content_rating["createdBy"])
.first()
) or User.default_user
shout = session.query(Shout).where(Shout.slug == slug).first()
cts = content_rating.get("createdAt")
reaction_dict = {
"createdAt": date_parse(cts) if cts else None,
"kind": ReactionKind.LIKE
if content_rating["value"] > 0
else ReactionKind.DISLIKE,
"createdBy": rater.slug,
"shout": slug
"createdBy": rater.id,
"shout": shout.id
}
reaction = (
session.query(Reaction)

View File

@@ -9,9 +9,10 @@ def migrate(entry):
topic_dict = {
"slug": entry["slug"],
"oid": entry["_id"],
"title": entry["title"].replace(" ", " ")
"title": entry["title"].replace(" ", " "),
"body": extract_md(html2text(body_orig), entry["_id"])
}
topic_dict["body"] = extract_md(html2text(body_orig), entry["_id"])
with local_session() as session:
slug = topic_dict["slug"]
topic = session.query(Topic).filter(Topic.slug == slug).first() or Topic.create(

View File

@@ -1,7 +1,7 @@
from dateutil.parser import parse
from sqlalchemy.exc import IntegrityError
from bs4 import BeautifulSoup
import re
from base.orm import local_session
from orm.user import AuthorFollower, User, UserRating
@@ -23,17 +23,18 @@ def migrate(entry):
"notifications": [],
"links": [],
"name": "anonymous",
"password": entry["services"]["password"].get("bcrypt")
}
user_dict["password"] = entry["services"]["password"].get("bcrypt")
if "updatedAt" in entry:
user_dict["updatedAt"] = parse(entry["updatedAt"])
if "wasOnineAt" in entry:
user_dict["lastSeen"] = parse(entry["wasOnlineAt"])
if entry.get("profile"):
# slug
user_dict["slug"] = (
entry["profile"].get("path").lower().replace(" ", "-").strip()
)
slug = entry["profile"].get("path").lower()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
user_dict["slug"] = slug
bio = BeautifulSoup(entry.get("profile").get("bio") or "", features="lxml").text
if bio.startswith('<'):
print('[migration] bio! ' + bio)
@@ -114,18 +115,23 @@ def migrate_2stage(entry, id_map):
continue
oid = entry["_id"]
author_slug = id_map.get(oid)
user_rating_dict = {
"value": rating_entry["value"],
"rater": rater_slug,
"user": author_slug,
}
with local_session() as session:
try:
rater = session.query(User).where(User.slug == rater_slug).one()
user = session.query(User).where(User.slug == author_slug).one()
user_rating_dict = {
"value": rating_entry["value"],
"raterId": rater.id,
"user": user.id,
}
user_rating = UserRating.create(**user_rating_dict)
if user_rating_dict['value'] > 0:
af = AuthorFollower.create(
author=user_rating_dict['user'],
follower=user_rating_dict['rater'],
author=user.id,
follower=rater.id,
auto=True
)
session.add(af)