migration-upgrade

This commit is contained in:
tonyrewin 2022-11-27 11:19:38 +03:00
parent 9a4cd6ba06
commit 2adf442224
5 changed files with 143 additions and 110 deletions

View File

@ -24,10 +24,8 @@ apt install redis nginx
First, install Postgres. Then you'll need some data First, install Postgres. Then you'll need some data
``` ```
brew install postgres
psql -U postgres createdb discoursio
> create database discoursio;
> \q
python server.py migrate python server.py migrate
``` ```
@ -42,3 +40,7 @@ python3 server.py dev
Put the header 'Authorization' with token from signIn query or registerUser mutation. Put the header 'Authorization' with token from signIn query or registerUser mutation.
# How to debug Ackee
Set ACKEE_TOKEN var

View File

@ -262,9 +262,12 @@ def extract_md(body, oid=""):
def extract_media(entry): def extract_media(entry):
''' normalized media extraction method ''' ''' normalized media extraction method '''
# media [ { title pic url body } ]} # media [ { title pic url body } ]}
kind = entry.get("layout") kind = entry.get("type")
if not kind:
print(entry)
raise Exception("shout no layout")
media = [] media = []
for m in entry.get("media", []): for m in entry.get("media") or []:
# title # title
title = m.get("title", "").replace("\n", " ").replace(" ", " ") title = m.get("title", "").replace("\n", " ").replace(" ", " ")
artist = m.get("performer") or m.get("artist") artist = m.get("performer") or m.get("artist")
@ -274,7 +277,7 @@ def extract_media(entry):
# pic # pic
url = m.get("fileUrl") or m.get("url", "") url = m.get("fileUrl") or m.get("url", "")
pic = "" pic = ""
if "thumborId" in m: if m.get("thumborId"):
pic = cdn + "/unsafe/1600x/" + m["thumborId"] pic = cdn + "/unsafe/1600x/" + m["thumborId"]
# url # url
@ -285,11 +288,8 @@ def extract_media(entry):
url = "https://youtube.com/?watch=" + m["youtubeId"] url = "https://youtube.com/?watch=" + m["youtubeId"]
elif "vimeoId" in m: elif "vimeoId" in m:
url = "https://vimeo.com/" + m["vimeoId"] url = "https://vimeo.com/" + m["vimeoId"]
else:
print("[extract] media is not supported")
# body # body
body = m.get("body") or m.get("literatureBody") body = m.get("body") or m.get("literatureBody") or ""
media.append({ media.append({
"url": url, "url": url,
"pic": pic, "pic": pic,
@ -306,7 +306,7 @@ def prepare_html_body(entry):
addon = "" addon = ""
if kind == "Video": if kind == "Video":
addon = "" addon = ""
for m in entry.get("media", []): for m in entry.get("media") or []:
if "youtubeId" in m: if "youtubeId" in m:
addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/' addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
addon += m["youtubeId"] addon += m["youtubeId"]
@ -323,7 +323,7 @@ def prepare_html_body(entry):
elif kind == "Music": elif kind == "Music":
addon = "" addon = ""
for m in entry.get("media", []): for m in entry.get("media") or []:
artist = m.get("performer") artist = m.get("performer")
trackname = "" trackname = ""
if artist: if artist:
@ -339,14 +339,10 @@ def prepare_html_body(entry):
body = extract_html(entry) body = extract_html(entry)
# if body_orig: body += extract_md(html2text(body_orig), entry['_id']) # if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
if not body:
print("[extract] empty HTML body")
return body return body
def extract_html(entry): def extract_html(entry):
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')') body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
if not body_orig:
print("[extract] empty HTML body")
body_html = str(BeautifulSoup(body_orig, features="html.parser")) body_html = str(BeautifulSoup(body_orig, features="html.parser"))
return body_html return body_html

View File

@ -104,11 +104,11 @@ async def migrate(entry, storage):
"authors": [], "authors": [],
"topics": set([]) "topics": set([])
} }
topics_by_oid = storage["topics"]["by_oid"]
users_by_oid = storage["users"]["by_oid"]
# author # author
oid = entry.get("createdBy", entry.get("_id", entry.get("oid"))) users_by_oid = storage["users"]["by_oid"]
userdata = users_by_oid.get(oid) user_oid = entry.get("createdBy", "")
userdata = users_by_oid.get(user_oid)
user = None user = None
if not userdata: if not userdata:
app = entry.get("application") app = entry.get("application")
@ -140,6 +140,8 @@ async def migrate(entry, storage):
# timestamps # timestamps
r["createdAt"] = date_parse(entry.get("createdAt", OLD_DATE)) r["createdAt"] = date_parse(entry.get("createdAt", OLD_DATE))
r["updatedAt"] = date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts r["updatedAt"] = date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts
# visibility
if entry.get("published"): if entry.get("published"):
r["publishedAt"] = date_parse(entry.get("publishedAt", OLD_DATE)) r["publishedAt"] = date_parse(entry.get("publishedAt", OLD_DATE))
r["visibility"] = "public" r["visibility"] = "public"
@ -151,25 +153,67 @@ async def migrate(entry, storage):
session.commit() session.commit()
else: else:
r["visibility"] = "authors" r["visibility"] = "authors"
if "deletedAt" in entry: if "deletedAt" in entry:
r["deletedAt"] = date_parse(entry["deletedAt"]) r["deletedAt"] = date_parse(entry["deletedAt"])
# topics # topics
category = entry.get("category") r['topics'] = await add_topics_follower(entry, storage, userslug)
for oid in [category, ] + entry.get("tags", []): r['mainTopic'] = r['topics'][0]
t = storage["topics"]["by_oid"].get(oid)
if t:
tslug = storage["topics"]["by_oid"][oid]["slug"]
r["topics"].add(tslug)
r["topics"] = list(r["topics"])
# main topic
mt = topics_by_oid.get(category)
if mt and mt.get("slug"):
r["mainTopic"] = storage["replacements"].get(mt["slug"]) or r["topics"][0]
entry["topics"] = r["topics"]
entry["cover"] = r["cover"]
# body
r["body"] = extract_html(entry)
media = extract_media(entry)
if media:
r["media"] = json.dumps(media, ensure_ascii=True)
shout_dict = r.copy()
# user
user = await get_user(userslug, userdata, storage, user_oid)
shout_dict["authors"] = [user, ]
del shout_dict["topics"]
try:
# save shout to db
await create_shout(shout_dict, userslug)
except IntegrityError as e:
print(e)
await resolve_create_shout(shout_dict, userslug)
except Exception as e:
raise Exception(e)
# shout topics aftermath
shout_dict["topics"] = await topics_aftermath(r, storage)
# content_item ratings to reactions
await content_ratings_to_reactions(entry, shout_dict["slug"])
# shout views
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
# del shout_dict['ratings']
shout_dict["oid"] = entry.get("_id", "")
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
storage["shouts"]["by_slug"][slug] = shout_dict
return shout_dict
async def add_topics_follower(entry, storage, userslug):
topics = set([])
category = entry.get("category")
topics_by_oid = storage["topics"]["by_oid"]
oids = [category, ] + entry.get("tags", [])
for toid in oids:
tslug = topics_by_oid.get(toid, {}).get("slug")
if tslug:
topics.add(tslug)
ttt = list(topics)
# add author as TopicFollower # add author as TopicFollower
with local_session() as session: with local_session() as session:
for tpc in r['topics']: for tpc in topics:
try: try:
tf = session.query( tf = session.query(
TopicFollower TopicFollower
@ -185,25 +229,19 @@ async def migrate(entry, storage):
auto=True auto=True
) )
session.add(tf) session.add(tf)
session.commit()
except IntegrityError: except IntegrityError:
print('[migration.shout] hidden by topic ' + tpc) print('[migration.shout] hidden by topic ' + tpc)
r["visibility"] = "authors" # main topic
r["publishedAt"] = None maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
r["topics"].remove(tpc) if maintopic in ttt:
ttt.remove(maintopic)
ttt.insert(0, maintopic)
return ttt
entry["topics"] = r["topics"]
entry["cover"] = r["cover"]
# body async def get_user(userslug, userdata, storage, oid):
r["body"] = extract_html(entry)
media = extract_media(entry)
if media:
r["media"] = json.dumps(media, ensure_ascii=True)
# save shout to db
s = object()
shout_dict = r.copy()
user = None user = None
del shout_dict["topics"]
with local_session() as session: with local_session() as session:
if not user and userslug: if not user and userslug:
user = session.query(User).filter(User.slug == userslug).first() user = session.query(User).filter(User.slug == userslug).first()
@ -218,60 +256,56 @@ async def migrate(entry, storage):
userdata["id"] = user.id userdata["id"] = user.id
userdata["createdAt"] = user.createdAt userdata["createdAt"] = user.createdAt
storage["users"]["by_slug"][userdata["slug"]] = userdata storage["users"]["by_slug"][userdata["slug"]] = userdata
storage["users"]["by_oid"][entry["_id"]] = userdata storage["users"]["by_oid"][oid] = userdata
if not user: if not user:
raise Exception("could not get a user") raise Exception("could not get a user")
shout_dict["authors"] = [user, ] return user
try:
await create_shout(shout_dict, userslug)
except IntegrityError as e:
with local_session() as session:
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
bump = False
if s:
if s.authors[0] != userslug:
# create new with different slug
shout_dict["slug"] += '-' + shout_dict["layout"]
try:
await create_shout(shout_dict, userslug)
except IntegrityError as e:
print(e)
bump = True
else:
# update old
for key in shout_dict:
if key in s.__dict__:
if s.__dict__[key] != shout_dict[key]:
print(
"[migration] shout already exists, but differs in %s"
% key
)
bump = True
else:
print("[migration] shout already exists, but lacks %s" % key)
bump = True
if bump:
s.update(shout_dict)
else:
print("[migration] something went wrong with shout: \n%r" % shout_dict)
raise e
session.commit()
except Exception as e:
print(e)
print(s)
raise Exception
# shout topics aftermath
shout_dict["topics"] = [] async def resolve_create_shout(shout_dict, userslug):
for tpc in r["topics"]: with local_session() as session:
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
bump = False
if s:
if s.authors[0] != userslug:
# create new with different slug
shout_dict["slug"] += '-' + shout_dict["layout"]
try:
await create_shout(shout_dict, userslug)
except IntegrityError as e:
print(e)
bump = True
else:
# update old
for key in shout_dict:
if key in s.__dict__:
if s.__dict__[key] != shout_dict[key]:
print(
"[migration] shout already exists, but differs in %s"
% key
)
bump = True
else:
print("[migration] shout already exists, but lacks %s" % key)
bump = True
if bump:
s.update(shout_dict)
else:
print("[migration] something went wrong with shout: \n%r" % shout_dict)
raise Exception("")
session.commit()
async def topics_aftermath(entry, storage):
r = []
for tpc in filter(lambda x: bool(x), entry["topics"]):
oldslug = tpc oldslug = tpc
newslug = storage["replacements"].get(oldslug, oldslug) newslug = storage["replacements"].get(oldslug, oldslug)
if newslug: if newslug:
with local_session() as session: with local_session() as session:
shout_topic_old = ( shout_topic_old = (
session.query(ShoutTopic) session.query(ShoutTopic)
.filter(ShoutTopic.shout == shout_dict["slug"]) .filter(ShoutTopic.shout == entry["slug"])
.filter(ShoutTopic.topic == oldslug) .filter(ShoutTopic.topic == oldslug)
.first() .first()
) )
@ -280,25 +314,27 @@ async def migrate(entry, storage):
else: else:
shout_topic_new = ( shout_topic_new = (
session.query(ShoutTopic) session.query(ShoutTopic)
.filter(ShoutTopic.shout == shout_dict["slug"]) .filter(ShoutTopic.shout == entry["slug"])
.filter(ShoutTopic.topic == newslug) .filter(ShoutTopic.topic == newslug)
.first() .first()
) )
if not shout_topic_new: if not shout_topic_new:
try: try:
ShoutTopic.create( ShoutTopic.create(
**{"shout": shout_dict["slug"], "topic": newslug} **{"shout": entry["slug"], "topic": newslug}
) )
except Exception: except Exception:
print("[migration] shout topic error: " + newslug) print("[migration] shout topic error: " + newslug)
session.commit() session.commit()
if newslug not in shout_dict["topics"]: if newslug not in r:
shout_dict["topics"].append(newslug) r.append(newslug)
else: else:
print("[migration] ignored topic slug: \n%r" % tpc["slug"]) print("[migration] ignored topic slug: \n%r" % tpc["slug"])
# raise Exception # raise Exception
return r
# content_item ratings to reactions
async def content_ratings_to_reactions(entry, slug):
try: try:
with local_session() as session: with local_session() as session:
for content_rating in entry.get("ratings", []): for content_rating in entry.get("ratings", []):
@ -318,7 +354,7 @@ async def migrate(entry, storage):
if content_rating["value"] > 0 if content_rating["value"] > 0
else ReactionKind.DISLIKE, else ReactionKind.DISLIKE,
"createdBy": reactedBy.slug, "createdBy": reactedBy.slug,
"shout": shout_dict["slug"], "shout": slug,
} }
cts = content_rating.get("createdAt") cts = content_rating.get("createdAt")
if cts: if cts:
@ -343,11 +379,3 @@ async def migrate(entry, storage):
session.commit() session.commit()
except Exception: except Exception:
raise Exception("[migration] content_item.ratings error: \n%r" % content_rating) raise Exception("[migration] content_item.ratings error: \n%r" % content_rating)
# shout views
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
# del shout_dict['ratings']
shout_dict["oid"] = entry.get("_id")
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
storage["shouts"]["by_slug"][slug] = shout_dict
return shout_dict

View File

@ -547,6 +547,7 @@
"poetry-slam": "poetry-slam", "poetry-slam": "poetry-slam",
"pokoy": "peace", "pokoy": "peace",
"police": "police", "police": "police",
"politicheskoe-fentezi": "political-fantasy",
"politics": "politics", "politics": "politics",
"politzaklyuchennye": "political-prisoners", "politzaklyuchennye": "political-prisoners",
"polsha": "poland", "polsha": "poland",

View File

@ -199,7 +199,9 @@ async def delete_reaction(_, info, rid):
def map_result_item(result_item): def map_result_item(result_item):
reaction = result_item[0] reaction = result_item[0]
user = result_item[1] user = result_item[1]
shout = result_item[2]
reaction.createdBy = user reaction.createdBy = user
reaction.shout = shout
return reaction return reaction
@ -222,10 +224,14 @@ async def load_reactions_by(_, _info, by, limit=50, offset=0):
""" """
CreatedByUser = aliased(User) CreatedByUser = aliased(User)
ReactedShout = aliased(Shout)
q = select( q = select(
Reaction, CreatedByUser Reaction, CreatedByUser, ReactedShout
).join(CreatedByUser, Reaction.createdBy == CreatedByUser.slug) ).join(
CreatedByUser, Reaction.createdBy == CreatedByUser.slug
).join(
ReactedShout, Reaction.shout == ReactedShout.slug
)
if by.get("shout"): if by.get("shout"):
q = q.filter(Reaction.shout == by["shout"]) q = q.filter(Reaction.shout == by["shout"])
@ -245,7 +251,7 @@ async def load_reactions_by(_, _info, by, limit=50, offset=0):
order_way = asc if by.get("sort", "").startswith("-") else desc order_way = asc if by.get("sort", "").startswith("-") else desc
order_field = by.get("sort") or Reaction.createdAt order_field = by.get("sort") or Reaction.createdAt
q = q.group_by( q = q.group_by(
Reaction.id, CreatedByUser.id Reaction.id, CreatedByUser.id, ReactedShout.id
).order_by( ).order_by(
order_way(order_field) order_way(order_field)
) )