Merge remote-tracking branch 'origin/main' into storages-to-qeuries
This commit is contained in:
@@ -314,9 +314,6 @@ async def handle_auto():
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) > 1:
|
||||
cmd = sys.argv[1]
|
||||
if type(cmd) == str:
|
||||
print("[migration] command: " + cmd)
|
||||
init_tables()
|
||||
await handle_auto()
|
||||
else:
|
||||
|
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
|
||||
|
||||
import frontmatter
|
||||
|
||||
from .extract import extract_html, prepare_html_body
|
||||
from .extract import extract_html, extract_media
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
@@ -50,11 +50,12 @@ def export_mdx(r):
|
||||
def export_body(shout, storage):
|
||||
entry = storage["content_items"]["by_oid"][shout["oid"]]
|
||||
if entry:
|
||||
shout["body"], media = prepare_html_body(entry) # prepare_md_body(entry)
|
||||
body = extract_html(entry)
|
||||
media = extract_media(entry)
|
||||
shout["body"] = body # prepare_html_body(entry) # prepare_md_body(entry)
|
||||
shout["media"] = media
|
||||
export_mdx(shout)
|
||||
print("[export] html for %s" % shout["slug"])
|
||||
body, _media = extract_html(entry)
|
||||
open(contentDir + shout["slug"] + ".html", "w").write(body)
|
||||
else:
|
||||
raise Exception("no content_items entry found")
|
||||
|
@@ -3,7 +3,8 @@ import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .html2text import html2text
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
||||
contentDir = os.path.join(
|
||||
@@ -258,47 +259,44 @@ def extract_md(body, oid=""):
|
||||
return newbody
|
||||
|
||||
|
||||
def prepare_md_body(entry):
|
||||
# body modifications
|
||||
body = ""
|
||||
def extract_media(entry):
|
||||
''' normalized media extraction method '''
|
||||
# media [ { title pic url body } ]}
|
||||
kind = entry.get("type")
|
||||
addon = ""
|
||||
if kind == "Video":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
if "youtubeId" in m:
|
||||
addon += "<VideoPlayer youtubeId='" + m["youtubeId"] + "' />\n"
|
||||
if not kind:
|
||||
print(entry)
|
||||
raise Exception("shout no layout")
|
||||
media = []
|
||||
for m in entry.get("media") or []:
|
||||
# title
|
||||
title = m.get("title", "").replace("\n", " ").replace(" ", " ")
|
||||
artist = m.get("performer") or m.get("artist")
|
||||
if artist:
|
||||
title = artist + " - " + title
|
||||
|
||||
# pic
|
||||
url = m.get("fileUrl") or m.get("url", "")
|
||||
pic = ""
|
||||
if m.get("thumborId"):
|
||||
pic = cdn + "/unsafe/1600x/" + m["thumborId"]
|
||||
|
||||
# url
|
||||
if not url:
|
||||
if kind == "Image":
|
||||
url = pic
|
||||
elif "youtubeId" in m:
|
||||
url = "https://youtube.com/?watch=" + m["youtubeId"]
|
||||
elif "vimeoId" in m:
|
||||
addon += "<VideoPlayer vimeoId='" + m["vimeoId"] + "' />\n"
|
||||
else:
|
||||
print("[extract] media is not supported")
|
||||
print(m)
|
||||
body = "import VideoPlayer from '$/components/Article/VideoPlayer'\n\n" + addon
|
||||
|
||||
elif kind == "Music":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
artist = m.get("performer")
|
||||
trackname = ""
|
||||
if artist:
|
||||
trackname += artist + " - "
|
||||
if "title" in m:
|
||||
trackname += m.get("title", "")
|
||||
addon += (
|
||||
'<AudioPlayer src="'
|
||||
+ m.get("fileUrl", "")
|
||||
+ '" title="'
|
||||
+ trackname
|
||||
+ '" />\n'
|
||||
)
|
||||
body = "import AudioPlayer from '$/components/Article/AudioPlayer'\n\n" + addon
|
||||
|
||||
body_orig, media = extract_html(entry)
|
||||
if body_orig:
|
||||
body += extract_md(html2text(body_orig), entry["_id"])
|
||||
if not body:
|
||||
print("[extract] empty MDX body")
|
||||
return body, media
|
||||
url = "https://vimeo.com/" + m["vimeoId"]
|
||||
# body
|
||||
body = m.get("body") or m.get("literatureBody") or ""
|
||||
media.append({
|
||||
"url": url,
|
||||
"pic": pic,
|
||||
"title": title,
|
||||
"body": body
|
||||
})
|
||||
return media
|
||||
|
||||
|
||||
def prepare_html_body(entry):
|
||||
@@ -308,7 +306,7 @@ def prepare_html_body(entry):
|
||||
addon = ""
|
||||
if kind == "Video":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
for m in entry.get("media") or []:
|
||||
if "youtubeId" in m:
|
||||
addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
|
||||
addon += m["youtubeId"]
|
||||
@@ -325,7 +323,7 @@ def prepare_html_body(entry):
|
||||
|
||||
elif kind == "Music":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
for m in entry.get("media") or []:
|
||||
artist = m.get("performer")
|
||||
trackname = ""
|
||||
if artist:
|
||||
@@ -339,68 +337,12 @@ def prepare_html_body(entry):
|
||||
addon += '"></audio></figure>'
|
||||
body += addon
|
||||
|
||||
body, media = extract_html(entry)
|
||||
body = extract_html(entry)
|
||||
# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
|
||||
if not body:
|
||||
print("[extract] empty HTML body")
|
||||
return body, media
|
||||
return body
|
||||
|
||||
|
||||
def extract_html(entry):
|
||||
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
|
||||
media = entry.get("media", [])
|
||||
kind = entry.get("type") or ""
|
||||
print("[extract] kind: " + kind)
|
||||
mbodies = set([])
|
||||
if media:
|
||||
# print('[extract] media is found')
|
||||
for m in media:
|
||||
mbody = m.get("body", "")
|
||||
addon = ""
|
||||
if kind == "Literature":
|
||||
mbody = m.get("literatureBody") or m.get("body", "")
|
||||
elif kind == "Image":
|
||||
cover = ""
|
||||
if "thumborId" in entry:
|
||||
cover = cdn + "/unsafe/1600x/" + entry["thumborId"]
|
||||
if not cover:
|
||||
if "image" in entry:
|
||||
cover = entry["image"].get("url", "")
|
||||
if "cloudinary" in cover:
|
||||
cover = ""
|
||||
# else: print('[extract] cover: ' + cover)
|
||||
title = m.get("title", "").replace("\n", " ").replace(" ", " ")
|
||||
u = m.get("thumborId") or cover or ""
|
||||
if title:
|
||||
addon += "<h4>" + title + "</h4>\n"
|
||||
if not u.startswith("http"):
|
||||
u = s3 + u
|
||||
if not u:
|
||||
print("[extract] no image url for " + str(m))
|
||||
if "cloudinary" in u:
|
||||
u = "img/lost.svg"
|
||||
if u != cover or (u == cover and media.index(m) == 0):
|
||||
addon += '<img src="' + u + '" alt="' + title + '" />\n'
|
||||
if addon:
|
||||
body_orig += addon
|
||||
# print('[extract] item addon: ' + addon)
|
||||
# if addon: print('[extract] addon: %s' % addon)
|
||||
if mbody and mbody not in mbodies:
|
||||
mbodies.add(mbody)
|
||||
body_orig += mbody
|
||||
if len(list(mbodies)) != len(media):
|
||||
print(
|
||||
"[extract] %d/%d media item bodies appended"
|
||||
% (len(list(mbodies)), len(media))
|
||||
)
|
||||
# print('[extract] media items body: \n' + body_orig)
|
||||
if not body_orig:
|
||||
for up in entry.get("bodyHistory", []) or []:
|
||||
body_orig = up.get("text", "") or ""
|
||||
if body_orig:
|
||||
print("[extract] got html body from history")
|
||||
break
|
||||
if not body_orig:
|
||||
print("[extract] empty HTML body")
|
||||
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
return body_orig, media
|
||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
return body_html
|
||||
|
@@ -4,7 +4,7 @@ from dateutil.parser import parse as date_parse
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from transliterate import translit
|
||||
from base.orm import local_session
|
||||
from migration.extract import prepare_html_body
|
||||
from migration.extract import extract_html, extract_media
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
||||
from orm.user import User
|
||||
@@ -103,11 +103,11 @@ async def migrate(entry, storage):
|
||||
"authors": [],
|
||||
"topics": set([])
|
||||
}
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
users_by_oid = storage["users"]["by_oid"]
|
||||
|
||||
# author
|
||||
oid = entry.get("createdBy", entry.get("_id", entry.get("oid")))
|
||||
userdata = users_by_oid.get(oid)
|
||||
users_by_oid = storage["users"]["by_oid"]
|
||||
user_oid = entry.get("createdBy", "")
|
||||
userdata = users_by_oid.get(user_oid)
|
||||
user = None
|
||||
if not userdata:
|
||||
app = entry.get("application")
|
||||
@@ -139,6 +139,8 @@ async def migrate(entry, storage):
|
||||
# timestamps
|
||||
r["createdAt"] = date_parse(entry.get("createdAt", OLD_DATE))
|
||||
r["updatedAt"] = date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts
|
||||
|
||||
# visibility
|
||||
if entry.get("published"):
|
||||
r["publishedAt"] = date_parse(entry.get("publishedAt", OLD_DATE))
|
||||
r["visibility"] = "public"
|
||||
@@ -150,25 +152,67 @@ async def migrate(entry, storage):
|
||||
session.commit()
|
||||
else:
|
||||
r["visibility"] = "authors"
|
||||
|
||||
if "deletedAt" in entry:
|
||||
r["deletedAt"] = date_parse(entry["deletedAt"])
|
||||
|
||||
# topics
|
||||
category = entry.get("category")
|
||||
for oid in [category, ] + entry.get("tags", []):
|
||||
t = storage["topics"]["by_oid"].get(oid)
|
||||
if t:
|
||||
tslug = storage["topics"]["by_oid"][oid]["slug"]
|
||||
r["topics"].add(tslug)
|
||||
r["topics"] = list(r["topics"])
|
||||
# main topic
|
||||
mt = topics_by_oid.get(category)
|
||||
if mt and mt.get("slug"):
|
||||
r["mainTopic"] = storage["replacements"].get(mt["slug"]) or r["topics"][0]
|
||||
r['topics'] = await add_topics_follower(entry, storage, userslug)
|
||||
r['mainTopic'] = r['topics'][0]
|
||||
|
||||
entry["topics"] = r["topics"]
|
||||
entry["cover"] = r["cover"]
|
||||
|
||||
# body
|
||||
r["body"] = extract_html(entry)
|
||||
media = extract_media(entry)
|
||||
if media:
|
||||
r["media"] = json.dumps(media, ensure_ascii=True)
|
||||
|
||||
shout_dict = r.copy()
|
||||
|
||||
# user
|
||||
user = await get_user(userslug, userdata, storage, user_oid)
|
||||
shout_dict["authors"] = [user, ]
|
||||
del shout_dict["topics"]
|
||||
try:
|
||||
# save shout to db
|
||||
await create_shout(shout_dict, userslug)
|
||||
except IntegrityError as e:
|
||||
print(e)
|
||||
await resolve_create_shout(shout_dict, userslug)
|
||||
except Exception as e:
|
||||
raise Exception(e)
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = await topics_aftermath(r, storage)
|
||||
|
||||
# content_item ratings to reactions
|
||||
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
||||
|
||||
# shout views
|
||||
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
|
||||
# del shout_dict['ratings']
|
||||
|
||||
shout_dict["oid"] = entry.get("_id", "")
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
storage["shouts"]["by_slug"][slug] = shout_dict
|
||||
return shout_dict
|
||||
|
||||
|
||||
async def add_topics_follower(entry, storage, userslug):
|
||||
topics = set([])
|
||||
category = entry.get("category")
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
oids = [category, ] + entry.get("tags", [])
|
||||
for toid in oids:
|
||||
tslug = topics_by_oid.get(toid, {}).get("slug")
|
||||
if tslug:
|
||||
topics.add(tslug)
|
||||
ttt = list(topics)
|
||||
# add author as TopicFollower
|
||||
with local_session() as session:
|
||||
for tpc in r['topics']:
|
||||
for tpc in topics:
|
||||
try:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
@@ -184,24 +228,19 @@ async def migrate(entry, storage):
|
||||
auto=True
|
||||
)
|
||||
session.add(tf)
|
||||
session.commit()
|
||||
except IntegrityError:
|
||||
print('[migration.shout] hidden by topic ' + tpc)
|
||||
r["visibility"] = "authors"
|
||||
r["publishedAt"] = None
|
||||
r["topics"].remove(tpc)
|
||||
# main topic
|
||||
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
|
||||
if maintopic in ttt:
|
||||
ttt.remove(maintopic)
|
||||
ttt.insert(0, maintopic)
|
||||
return ttt
|
||||
|
||||
entry["topics"] = r["topics"]
|
||||
entry["cover"] = r["cover"]
|
||||
|
||||
# body
|
||||
r["body"], media = prepare_html_body(entry)
|
||||
if media:
|
||||
r["media"] = json.dumps(media, ensure_ascii=True)
|
||||
# save shout to db
|
||||
s = object()
|
||||
shout_dict = r.copy()
|
||||
async def get_user(userslug, userdata, storage, oid):
|
||||
user = None
|
||||
del shout_dict["topics"]
|
||||
with local_session() as session:
|
||||
if not user and userslug:
|
||||
user = session.query(User).filter(User.slug == userslug).first()
|
||||
@@ -216,60 +255,56 @@ async def migrate(entry, storage):
|
||||
userdata["id"] = user.id
|
||||
userdata["createdAt"] = user.createdAt
|
||||
storage["users"]["by_slug"][userdata["slug"]] = userdata
|
||||
storage["users"]["by_oid"][entry["_id"]] = userdata
|
||||
|
||||
storage["users"]["by_oid"][oid] = userdata
|
||||
if not user:
|
||||
raise Exception("could not get a user")
|
||||
shout_dict["authors"] = [user, ]
|
||||
try:
|
||||
await create_shout(shout_dict, userslug)
|
||||
except IntegrityError as e:
|
||||
with local_session() as session:
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
||||
bump = False
|
||||
if s:
|
||||
if s.authors[0] != userslug:
|
||||
# create new with different slug
|
||||
shout_dict["slug"] += '-' + shout_dict["layout"]
|
||||
try:
|
||||
await create_shout(shout_dict, userslug)
|
||||
except IntegrityError as e:
|
||||
print(e)
|
||||
bump = True
|
||||
else:
|
||||
# update old
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
bump = True
|
||||
if bump:
|
||||
s.update(shout_dict)
|
||||
else:
|
||||
print("[migration] something went wrong with shout: \n%r" % shout_dict)
|
||||
raise e
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(s)
|
||||
raise Exception
|
||||
return user
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = []
|
||||
for tpc in r["topics"]:
|
||||
|
||||
async def resolve_create_shout(shout_dict, userslug):
|
||||
with local_session() as session:
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
||||
bump = False
|
||||
if s:
|
||||
if s.authors[0] != userslug:
|
||||
# create new with different slug
|
||||
shout_dict["slug"] += '-' + shout_dict["layout"]
|
||||
try:
|
||||
await create_shout(shout_dict, userslug)
|
||||
except IntegrityError as e:
|
||||
print(e)
|
||||
bump = True
|
||||
else:
|
||||
# update old
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
bump = True
|
||||
if bump:
|
||||
s.update(shout_dict)
|
||||
else:
|
||||
print("[migration] something went wrong with shout: \n%r" % shout_dict)
|
||||
raise Exception("")
|
||||
session.commit()
|
||||
|
||||
|
||||
async def topics_aftermath(entry, storage):
|
||||
r = []
|
||||
for tpc in filter(lambda x: bool(x), entry["topics"]):
|
||||
oldslug = tpc
|
||||
newslug = storage["replacements"].get(oldslug, oldslug)
|
||||
if newslug:
|
||||
with local_session() as session:
|
||||
shout_topic_old = (
|
||||
session.query(ShoutTopic)
|
||||
.filter(ShoutTopic.shout == shout_dict["slug"])
|
||||
.filter(ShoutTopic.shout == entry["slug"])
|
||||
.filter(ShoutTopic.topic == oldslug)
|
||||
.first()
|
||||
)
|
||||
@@ -278,25 +313,27 @@ async def migrate(entry, storage):
|
||||
else:
|
||||
shout_topic_new = (
|
||||
session.query(ShoutTopic)
|
||||
.filter(ShoutTopic.shout == shout_dict["slug"])
|
||||
.filter(ShoutTopic.shout == entry["slug"])
|
||||
.filter(ShoutTopic.topic == newslug)
|
||||
.first()
|
||||
)
|
||||
if not shout_topic_new:
|
||||
try:
|
||||
ShoutTopic.create(
|
||||
**{"shout": shout_dict["slug"], "topic": newslug}
|
||||
**{"shout": entry["slug"], "topic": newslug}
|
||||
)
|
||||
except Exception:
|
||||
print("[migration] shout topic error: " + newslug)
|
||||
session.commit()
|
||||
if newslug not in shout_dict["topics"]:
|
||||
shout_dict["topics"].append(newslug)
|
||||
if newslug not in r:
|
||||
r.append(newslug)
|
||||
else:
|
||||
print("[migration] ignored topic slug: \n%r" % tpc["slug"])
|
||||
# raise Exception
|
||||
return r
|
||||
|
||||
# content_item ratings to reactions
|
||||
|
||||
async def content_ratings_to_reactions(entry, slug):
|
||||
try:
|
||||
with local_session() as session:
|
||||
for content_rating in entry.get("ratings", []):
|
||||
@@ -316,7 +353,7 @@ async def migrate(entry, storage):
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": reactedBy.slug,
|
||||
"shout": shout_dict["slug"],
|
||||
"shout": slug,
|
||||
}
|
||||
cts = content_rating.get("createdAt")
|
||||
if cts:
|
||||
@@ -340,11 +377,3 @@ async def migrate(entry, storage):
|
||||
session.commit()
|
||||
except Exception:
|
||||
raise Exception("[migration] content_item.ratings error: \n%r" % content_rating)
|
||||
|
||||
# shout views
|
||||
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1))
|
||||
# del shout_dict['ratings']
|
||||
shout_dict["oid"] = entry.get("_id")
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
storage["shouts"]["by_slug"][slug] = shout_dict
|
||||
return shout_dict
|
||||
|
@@ -547,6 +547,7 @@
|
||||
"poetry-slam": "poetry-slam",
|
||||
"pokoy": "peace",
|
||||
"police": "police",
|
||||
"politicheskoe-fentezi": "political-fantasy",
|
||||
"politics": "politics",
|
||||
"politzaklyuchennye": "political-prisoners",
|
||||
"polsha": "poland",
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_md, html2text
|
||||
from migration.extract import extract_md
|
||||
from migration.html2text import html2text
|
||||
from orm import Topic
|
||||
|
||||
|
||||
|
@@ -17,7 +17,7 @@ def migrate(entry):
|
||||
"username": email,
|
||||
"email": email,
|
||||
"createdAt": parse(entry["createdAt"]),
|
||||
"emailConfirmed": bool(entry["emails"][0]["verified"]),
|
||||
"emailConfirmed": ("@discours.io" in email) or bool(entry["emails"][0]["verified"]),
|
||||
"muted": False, # amnesty
|
||||
"bio": entry["profile"].get("bio", ""),
|
||||
"notifications": [],
|
||||
|
Reference in New Issue
Block a user