configured isort, black, flake8
This commit is contained in:
@@ -12,10 +12,12 @@ from migration.tables.comments import migrate as migrateComment
|
||||
from migration.tables.comments import migrate_2stage as migrateComment_2stage
|
||||
from migration.tables.content_items import get_shout_slug
|
||||
from migration.tables.content_items import migrate as migrateShout
|
||||
from migration.tables.remarks import migrate as migrateRemark
|
||||
|
||||
# from migration.tables.remarks import migrate as migrateRemark
|
||||
from migration.tables.topics import migrate as migrateTopic
|
||||
from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
|
||||
from migration.tables.users import migrate as migrateUser
|
||||
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
||||
from migration.tables.users import post_migrate as users_post_migrate
|
||||
from orm import init_tables
|
||||
from orm.reaction import Reaction
|
||||
|
||||
@@ -63,16 +65,8 @@ async def topics_handle(storage):
|
||||
del storage["topics"]["by_slug"][oldslug]
|
||||
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
||||
print("[migration] " + str(counter) + " topics migrated")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_oid"].values()))
|
||||
+ " topics by oid"
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_slug"].values()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
print("[migration] " + str(len(storage["topics"]["by_oid"].values())) + " topics by oid")
|
||||
print("[migration] " + str(len(storage["topics"]["by_slug"].values())) + " topics by slug")
|
||||
|
||||
|
||||
async def shouts_handle(storage, args):
|
||||
@@ -117,9 +111,10 @@ async def shouts_handle(storage, args):
|
||||
|
||||
# print main counter
|
||||
counter += 1
|
||||
print('[migration] shouts_handle %d: %s @%s' % (
|
||||
(counter + 1), shout_dict["slug"], author["slug"]
|
||||
))
|
||||
print(
|
||||
"[migration] shouts_handle %d: %s @%s"
|
||||
% ((counter + 1), shout_dict["slug"], author["slug"])
|
||||
)
|
||||
|
||||
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
|
||||
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
||||
@@ -138,13 +133,13 @@ async def shouts_handle(storage, args):
|
||||
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
|
||||
|
||||
|
||||
async def remarks_handle(storage):
|
||||
print("[migration] comments")
|
||||
c = 0
|
||||
for entry_remark in storage["remarks"]["data"]:
|
||||
remark = await migrateRemark(entry_remark, storage)
|
||||
c += 1
|
||||
print("[migration] " + str(c) + " remarks migrated")
|
||||
# async def remarks_handle(storage):
|
||||
# print("[migration] comments")
|
||||
# c = 0
|
||||
# for entry_remark in storage["remarks"]["data"]:
|
||||
# remark = await migrateRemark(entry_remark, storage)
|
||||
# c += 1
|
||||
# print("[migration] " + str(c) + " remarks migrated")
|
||||
|
||||
|
||||
async def comments_handle(storage):
|
||||
@@ -155,9 +150,9 @@ async def comments_handle(storage):
|
||||
for oldcomment in storage["reactions"]["data"]:
|
||||
if not oldcomment.get("deleted"):
|
||||
reaction = await migrateComment(oldcomment, storage)
|
||||
if type(reaction) == str:
|
||||
if isinstance(reaction, str):
|
||||
missed_shouts[reaction] = oldcomment
|
||||
elif type(reaction) == Reaction:
|
||||
elif isinstance(reaction, Reaction):
|
||||
reaction = reaction.dict()
|
||||
rid = reaction["id"]
|
||||
oid = reaction["oid"]
|
||||
@@ -214,9 +209,7 @@ def data_load():
|
||||
tags_data = json.loads(open("migration/data/tags.json").read())
|
||||
storage["topics"]["tags"] = tags_data
|
||||
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
||||
cats_data = json.loads(
|
||||
open("migration/data/content_item_categories.json").read()
|
||||
)
|
||||
cats_data = json.loads(open("migration/data/content_item_categories.json").read())
|
||||
storage["topics"]["cats"] = cats_data
|
||||
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
||||
comments_data = json.loads(open("migration/data/comments.json").read())
|
||||
@@ -235,11 +228,7 @@ def data_load():
|
||||
storage["users"]["by_oid"][x["_id"]] = x
|
||||
# storage['users']['by_slug'][x['slug']] = x
|
||||
# no user.slug yet
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["users"]["by_oid"].keys()))
|
||||
+ " users by oid"
|
||||
)
|
||||
print("[migration.load] " + str(len(storage["users"]["by_oid"].keys())) + " users by oid")
|
||||
for x in tags_data:
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
@@ -247,9 +236,7 @@ def data_load():
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["topics"]["by_slug"].keys()))
|
||||
+ " topics by slug"
|
||||
"[migration.load] " + str(len(storage["topics"]["by_slug"].keys())) + " topics by slug"
|
||||
)
|
||||
for item in content_data:
|
||||
slug = get_shout_slug(item)
|
||||
|
@@ -1,8 +1,9 @@
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
|
||||
import bson
|
||||
import gc
|
||||
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
|
||||
@@ -15,10 +16,10 @@ def json_tables():
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": [],
|
||||
"remarks": []
|
||||
"remarks": [],
|
||||
}
|
||||
for table in data.keys():
|
||||
print('[migration] bson2json for ' + table)
|
||||
print("[migration] bson2json for " + table)
|
||||
gc.collect()
|
||||
lc = []
|
||||
bs = open("dump/discours/" + table + ".bson", "rb").read()
|
||||
|
@@ -71,47 +71,29 @@ def export_slug(slug, storage):
|
||||
|
||||
|
||||
def export_email_subscriptions():
|
||||
email_subscriptions_data = json.loads(
|
||||
open("migration/data/email_subscriptions.json").read()
|
||||
)
|
||||
email_subscriptions_data = json.loads(open("migration/data/email_subscriptions.json").read())
|
||||
for data in email_subscriptions_data:
|
||||
# TODO: migrate to mailgun list manually
|
||||
# migrate_email_subscription(data)
|
||||
pass
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(email_subscriptions_data))
|
||||
+ " email subscriptions exported"
|
||||
)
|
||||
print("[migration] " + str(len(email_subscriptions_data)) + " email subscriptions exported")
|
||||
|
||||
|
||||
def export_shouts(storage):
|
||||
# update what was just migrated or load json again
|
||||
if len(storage["users"]["by_slugs"].keys()) == 0:
|
||||
storage["users"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "authors.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["users"]["by_slugs"].keys()))
|
||||
+ " exported authors "
|
||||
)
|
||||
storage["users"]["by_slugs"] = json.loads(open(EXPORT_DEST + "authors.json").read())
|
||||
print("[migration] " + str(len(storage["users"]["by_slugs"].keys())) + " exported authors ")
|
||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
||||
storage["shouts"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "articles.json").read()
|
||||
)
|
||||
storage["shouts"]["by_slugs"] = json.loads(open(EXPORT_DEST + "articles.json").read())
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["shouts"]["by_slugs"].keys()))
|
||||
+ " exported articles "
|
||||
"[migration] " + str(len(storage["shouts"]["by_slugs"].keys())) + " exported articles "
|
||||
)
|
||||
for slug in storage["shouts"]["by_slugs"].keys():
|
||||
export_slug(slug, storage)
|
||||
|
||||
|
||||
def export_json(
|
||||
export_articles={}, export_authors={}, export_topics={}, export_comments={}
|
||||
):
|
||||
def export_json(export_articles={}, export_authors={}, export_topics={}, export_comments={}):
|
||||
open(EXPORT_DEST + "authors.json", "w").write(
|
||||
json.dumps(
|
||||
export_authors,
|
||||
@@ -152,8 +134,4 @@ def export_json(
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(export_comments.items()))
|
||||
+ " exported articles with comments"
|
||||
)
|
||||
print("[migration] " + str(len(export_comments.items())) + " exported articles with comments")
|
||||
|
@@ -1,11 +1,8 @@
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
||||
contentDir = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
|
||||
@@ -27,76 +24,79 @@ def replace_tooltips(body):
|
||||
return newbody
|
||||
|
||||
|
||||
|
||||
def extract_footnotes(body, shout_dict):
|
||||
parts = body.split("&&&")
|
||||
lll = len(parts)
|
||||
newparts = list(parts)
|
||||
placed = False
|
||||
if lll & 1:
|
||||
if lll > 1:
|
||||
i = 1
|
||||
print("[extract] found %d footnotes in body" % (lll - 1))
|
||||
for part in parts[1:]:
|
||||
if i & 1:
|
||||
placed = True
|
||||
if 'a class="footnote-url" href=' in part:
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
|
||||
print("[extract] footnote link: " + extracted_link)
|
||||
with local_session() as session:
|
||||
Reaction.create({
|
||||
"shout": shout_dict['id'],
|
||||
"kind": ReactionKind.FOOTNOTE,
|
||||
"body": extracted_body,
|
||||
"range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
|
||||
})
|
||||
newparts[i] = "<a href='#'>ℹ️</a>"
|
||||
else:
|
||||
newparts[i] = part
|
||||
i += 1
|
||||
return ("".join(newparts), placed)
|
||||
# def extract_footnotes(body, shout_dict):
|
||||
# parts = body.split("&&&")
|
||||
# lll = len(parts)
|
||||
# newparts = list(parts)
|
||||
# placed = False
|
||||
# if lll & 1:
|
||||
# if lll > 1:
|
||||
# i = 1
|
||||
# print("[extract] found %d footnotes in body" % (lll - 1))
|
||||
# for part in parts[1:]:
|
||||
# if i & 1:
|
||||
# placed = True
|
||||
# if 'a class="footnote-url" href=' in part:
|
||||
# print("[extract] footnote: " + part)
|
||||
# fn = 'a class="footnote-url" href="'
|
||||
# exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
# extracted_body = part.split(fn, 1)[1].split(">", 1)[1].split("</a>", 1)[0]
|
||||
# print("[extract] footnote link: " + extracted_link)
|
||||
# with local_session() as session:
|
||||
# Reaction.create(
|
||||
# {
|
||||
# "shout": shout_dict["id"],
|
||||
# "kind": ReactionKind.FOOTNOTE,
|
||||
# "body": extracted_body,
|
||||
# "range": str(body.index(fn + link) - len("<"))
|
||||
# + ":"
|
||||
# + str(body.index(extracted_body) + len("</a>")),
|
||||
# }
|
||||
# )
|
||||
# newparts[i] = "<a href='#'>ℹ️</a>"
|
||||
# else:
|
||||
# newparts[i] = part
|
||||
# i += 1
|
||||
# return ("".join(newparts), placed)
|
||||
|
||||
|
||||
def place_tooltips(body):
|
||||
parts = body.split("&&&")
|
||||
lll = len(parts)
|
||||
newparts = list(parts)
|
||||
placed = False
|
||||
if lll & 1:
|
||||
if lll > 1:
|
||||
i = 1
|
||||
print("[extract] found %d tooltips" % (lll - 1))
|
||||
for part in parts[1:]:
|
||||
if i & 1:
|
||||
placed = True
|
||||
if 'a class="footnote-url" href=' in part:
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_part = (
|
||||
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
)
|
||||
newparts[i] = (
|
||||
"<Tooltip"
|
||||
+ (' link="' + link + '" ' if link else "")
|
||||
+ ">"
|
||||
+ extracted_part
|
||||
+ "</Tooltip>"
|
||||
)
|
||||
else:
|
||||
newparts[i] = "<Tooltip>%s</Tooltip>" % part
|
||||
# print('[extract] ' + newparts[i])
|
||||
else:
|
||||
# print('[extract] ' + part[:10] + '..')
|
||||
newparts[i] = part
|
||||
i += 1
|
||||
return ("".join(newparts), placed)
|
||||
# def place_tooltips(body):
|
||||
# parts = body.split("&&&")
|
||||
# lll = len(parts)
|
||||
# newparts = list(parts)
|
||||
# placed = False
|
||||
# if lll & 1:
|
||||
# if lll > 1:
|
||||
# i = 1
|
||||
# print("[extract] found %d tooltips" % (lll - 1))
|
||||
# for part in parts[1:]:
|
||||
# if i & 1:
|
||||
# placed = True
|
||||
# if 'a class="footnote-url" href=' in part:
|
||||
# print("[extract] footnote: " + part)
|
||||
# fn = 'a class="footnote-url" href="'
|
||||
# link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
# extracted_part = part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
# newparts[i] = (
|
||||
# "<Tooltip"
|
||||
# + (' link="' + link + '" ' if link else "")
|
||||
# + ">"
|
||||
# + extracted_part
|
||||
# + "</Tooltip>"
|
||||
# )
|
||||
# else:
|
||||
# newparts[i] = "<Tooltip>%s</Tooltip>" % part
|
||||
# # print('[extract] ' + newparts[i])
|
||||
# else:
|
||||
# # print('[extract] ' + part[:10] + '..')
|
||||
# newparts[i] = part
|
||||
# i += 1
|
||||
# return ("".join(newparts), placed)
|
||||
|
||||
|
||||
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
IMG_REGEX = (
|
||||
r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
)
|
||||
IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
|
||||
|
||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
||||
@@ -104,29 +104,29 @@ public = parentDir + "/discoursio-web/public"
|
||||
cache = {}
|
||||
|
||||
|
||||
def reextract_images(body, oid):
|
||||
# change if you prefer regexp
|
||||
matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
i = 0
|
||||
for match in matches:
|
||||
print("[extract] image " + match.group(1))
|
||||
ext = match.group(3)
|
||||
name = oid + str(i)
|
||||
link = public + "/upload/image-" + name + "." + ext
|
||||
img = match.group(4)
|
||||
title = match.group(1) # NOTE: this is not the title
|
||||
if img not in cache:
|
||||
content = base64.b64decode(img + "==")
|
||||
print(str(len(img)) + " image bytes been written")
|
||||
open("../" + link, "wb").write(content)
|
||||
cache[img] = name
|
||||
i += 1
|
||||
else:
|
||||
print("[extract] image cached " + cache[img])
|
||||
body.replace(
|
||||
str(match), ""
|
||||
) # WARNING: this does not work
|
||||
return body
|
||||
# def reextract_images(body, oid):
|
||||
# # change if you prefer regexp
|
||||
# matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
# i = 0
|
||||
# for match in matches:
|
||||
# print("[extract] image " + match.group(1))
|
||||
# ext = match.group(3)
|
||||
# name = oid + str(i)
|
||||
# link = public + "/upload/image-" + name + "." + ext
|
||||
# img = match.group(4)
|
||||
# title = match.group(1) # NOTE: this is not the title
|
||||
# if img not in cache:
|
||||
# content = base64.b64decode(img + "==")
|
||||
# print(str(len(img)) + " image bytes been written")
|
||||
# open("../" + link, "wb").write(content)
|
||||
# cache[img] = name
|
||||
# i += 1
|
||||
# else:
|
||||
# print("[extract] image cached " + cache[img])
|
||||
# body.replace(
|
||||
# str(match), ""
|
||||
# ) # WARNING: this does not work
|
||||
# return body
|
||||
|
||||
|
||||
IMAGES = {
|
||||
@@ -137,163 +137,11 @@ IMAGES = {
|
||||
|
||||
b64 = ";base64,"
|
||||
|
||||
|
||||
def extract_imageparts(bodyparts, prefix):
|
||||
# recursive loop
|
||||
newparts = list(bodyparts)
|
||||
for current in bodyparts:
|
||||
i = bodyparts.index(current)
|
||||
for mime in IMAGES.keys():
|
||||
if mime == current[-len(mime) :] and (i + 1 < len(bodyparts)):
|
||||
print("[extract] " + mime)
|
||||
next = bodyparts[i + 1]
|
||||
ext = IMAGES[mime]
|
||||
b64end = next.index(")")
|
||||
b64encoded = next[:b64end]
|
||||
name = prefix + "-" + str(len(cache))
|
||||
link = "/upload/image-" + name + "." + ext
|
||||
print("[extract] name: " + name)
|
||||
print("[extract] link: " + link)
|
||||
print("[extract] %d bytes" % len(b64encoded))
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print(
|
||||
"[extract] "
|
||||
+ str(len(content))
|
||||
+ " image bytes been written"
|
||||
)
|
||||
cache[b64encoded] = name
|
||||
except Exception:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print("[extract] cached link " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
newparts[i] = (
|
||||
current[: -len(mime)]
|
||||
+ current[-len(mime) :]
|
||||
+ link
|
||||
+ next[-b64end:]
|
||||
)
|
||||
newparts[i + 1] = next[:-b64end]
|
||||
break
|
||||
return (
|
||||
extract_imageparts(
|
||||
newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
|
||||
)
|
||||
if len(bodyparts) > (i + 1)
|
||||
else "".join(newparts)
|
||||
)
|
||||
|
||||
|
||||
def extract_dataimages(parts, prefix):
|
||||
newparts = list(parts)
|
||||
for part in parts:
|
||||
i = parts.index(part)
|
||||
if part.endswith("]("):
|
||||
[ext, rest] = parts[i + 1].split(b64)
|
||||
name = prefix + "-" + str(len(cache))
|
||||
if ext == "/jpeg":
|
||||
ext = "jpg"
|
||||
else:
|
||||
ext = ext.replace("/", "")
|
||||
link = "/upload/image-" + name + "." + ext
|
||||
print("[extract] filename: " + link)
|
||||
b64end = rest.find(")")
|
||||
if b64end != -1:
|
||||
b64encoded = rest[:b64end]
|
||||
print("[extract] %d text bytes" % len(b64encoded))
|
||||
# write if not cached
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print("[extract] " + str(len(content)) + " image bytes")
|
||||
cache[b64encoded] = name
|
||||
except Exception:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print("[extract] 0 image bytes, cached for " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
|
||||
# update link with CDN
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
|
||||
# patch newparts
|
||||
newparts[i + 1] = link + rest[b64end:]
|
||||
else:
|
||||
raise Exception("cannot find the end of base64 encoded string")
|
||||
else:
|
||||
print("[extract] dataimage skipping part " + str(i))
|
||||
continue
|
||||
return "".join(newparts)
|
||||
|
||||
|
||||
di = "data:image"
|
||||
|
||||
|
||||
def extract_md_images(body, prefix):
|
||||
newbody = ""
|
||||
body = (
|
||||
body.replace("\n! [](" + di, "\n 
|
||||
.replace("\n[](" + di, "\n
|
||||
.replace(" [](" + di, " 
|
||||
)
|
||||
parts = body.split(di)
|
||||
if len(parts) > 1:
|
||||
newbody = extract_dataimages(parts, prefix)
|
||||
else:
|
||||
newbody = body
|
||||
return newbody
|
||||
|
||||
|
||||
def cleanup_md(body):
|
||||
newbody = (
|
||||
body.replace("<", "")
|
||||
.replace(">", "")
|
||||
.replace("{", "(")
|
||||
.replace("}", ")")
|
||||
.replace("…", "...")
|
||||
.replace(" __ ", " ")
|
||||
.replace("_ _", " ")
|
||||
.replace("****", "")
|
||||
.replace("\u00a0", " ")
|
||||
.replace("\u02c6", "^")
|
||||
.replace("\u00a0", " ")
|
||||
.replace("\ufeff", "")
|
||||
.replace("\u200b", "")
|
||||
.replace("\u200c", "")
|
||||
) # .replace('\u2212', '-')
|
||||
return newbody
|
||||
|
||||
|
||||
def extract_md(body, shout_dict = None):
|
||||
newbody = body
|
||||
if newbody:
|
||||
newbody = cleanup_md(newbody)
|
||||
if not newbody:
|
||||
raise Exception("cleanup error")
|
||||
|
||||
if shout_dict:
|
||||
|
||||
uid = shout_dict['id'] or uuid.uuid4()
|
||||
newbody = extract_md_images(newbody, uid)
|
||||
if not newbody:
|
||||
raise Exception("extract_images error")
|
||||
|
||||
newbody, placed = extract_footnotes(body, shout_dict)
|
||||
if not newbody:
|
||||
raise Exception("extract_footnotes error")
|
||||
|
||||
return newbody
|
||||
|
||||
|
||||
def extract_media(entry):
|
||||
''' normalized media extraction method '''
|
||||
"""normalized media extraction method"""
|
||||
# media [ { title pic url body } ]}
|
||||
kind = entry.get("type")
|
||||
if not kind:
|
||||
@@ -323,12 +171,7 @@ def extract_media(entry):
|
||||
url = "https://vimeo.com/" + m["vimeoId"]
|
||||
# body
|
||||
body = m.get("body") or m.get("literatureBody") or ""
|
||||
media.append({
|
||||
"url": url,
|
||||
"pic": pic,
|
||||
"title": title,
|
||||
"body": body
|
||||
})
|
||||
media.append({"url": url, "pic": pic, "title": title, "body": body})
|
||||
return media
|
||||
|
||||
|
||||
@@ -398,9 +241,7 @@ def cleanup_html(body: str) -> str:
|
||||
r"<h4>\s*</h4>",
|
||||
r"<div>\s*</div>",
|
||||
]
|
||||
regex_replace = {
|
||||
r"<br>\s*</p>": "</p>"
|
||||
}
|
||||
regex_replace = {r"<br>\s*</p>": "</p>"}
|
||||
changed = True
|
||||
while changed:
|
||||
# we need several iterations to clean nested tags this way
|
||||
@@ -414,16 +255,17 @@ def cleanup_html(body: str) -> str:
|
||||
changed = True
|
||||
return new_body
|
||||
|
||||
def extract_html(entry, shout_id = None, cleanup=False):
|
||||
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
|
||||
|
||||
def extract_html(entry, shout_id=None, cleanup=False):
|
||||
body_orig = (entry.get("body") or "").replace(r"\(", "(").replace(r"\)", ")")
|
||||
if cleanup:
|
||||
# we do that before bs parsing to catch the invalid html
|
||||
body_clean = cleanup_html(body_orig)
|
||||
if body_clean != body_orig:
|
||||
print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
|
||||
body_orig = body_clean
|
||||
if shout_id:
|
||||
extract_footnotes(body_orig, shout_id)
|
||||
# if shout_id:
|
||||
# extract_footnotes(body_orig, shout_id)
|
||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
if cleanup:
|
||||
# we do that after bs parsing because it can add dummy tags
|
||||
|
@@ -33,7 +33,7 @@ __version__ = (2020, 1, 16)
|
||||
# TODO: Support decoded entities with UNIFIABLE.
|
||||
|
||||
|
||||
class HTML2Text(html.parser.HTMLParser):
|
||||
class HTML2Text(html.parser.HTMLParser): # noqa: C901
|
||||
def __init__(
|
||||
self,
|
||||
out: Optional[OutCallback] = None,
|
||||
@@ -85,7 +85,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.tag_callback = None
|
||||
self.open_quote = config.OPEN_QUOTE # covered in cli
|
||||
self.close_quote = config.CLOSE_QUOTE # covered in cli
|
||||
self.header_id = None
|
||||
self.header_id: str | None = None
|
||||
self.span_highlight = False
|
||||
self.span_lead = False
|
||||
|
||||
@@ -119,9 +119,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.lastWasList = False
|
||||
self.style = 0
|
||||
self.style_def = {} # type: Dict[str, Dict[str, str]]
|
||||
self.tag_stack = (
|
||||
[]
|
||||
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.tag_stack = [] # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.emphasis = 0
|
||||
self.drop_white_space = 0
|
||||
self.inheader = False
|
||||
@@ -227,7 +225,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
return i
|
||||
return None
|
||||
|
||||
def handle_emphasis(
|
||||
def handle_emphasis( # noqa: C901
|
||||
self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
|
||||
) -> None:
|
||||
"""
|
||||
@@ -300,7 +298,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
if strikethrough:
|
||||
self.quiet -= 1
|
||||
|
||||
def handle_tag(
|
||||
def handle_tag( # noqa: C901
|
||||
self, tag: str, attrs: Dict[str, Optional[str]], start: bool
|
||||
) -> None:
|
||||
self.current_tag = tag
|
||||
@@ -333,9 +331,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
tag_style = element_style(attrs, self.style_def, parent_style)
|
||||
self.tag_stack.append((tag, attrs, tag_style))
|
||||
else:
|
||||
dummy, attrs, tag_style = (
|
||||
self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
)
|
||||
dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
if self.tag_stack:
|
||||
parent_style = self.tag_stack[-1][2]
|
||||
|
||||
@@ -385,11 +381,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
):
|
||||
self.o("`") # NOTE: same as <code>
|
||||
self.span_highlight = True
|
||||
elif (
|
||||
self.current_class == "lead"
|
||||
and not self.inheader
|
||||
and not self.span_highlight
|
||||
):
|
||||
elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
|
||||
# self.o("==") # NOTE: CriticMarkup {==
|
||||
self.span_lead = True
|
||||
else:
|
||||
@@ -479,11 +471,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
and not self.span_lead
|
||||
and not self.span_highlight
|
||||
):
|
||||
if (
|
||||
start
|
||||
and self.preceding_data
|
||||
and self.preceding_data[-1] == self.strong_mark[0]
|
||||
):
|
||||
if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
|
||||
strong = " " + self.strong_mark
|
||||
self.preceding_data += " "
|
||||
else:
|
||||
@@ -548,13 +536,8 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
"href" in attrs
|
||||
and not attrs["href"].startswith("#_ftn")
|
||||
and attrs["href"] is not None
|
||||
and not (
|
||||
self.skip_internal_links and attrs["href"].startswith("#")
|
||||
)
|
||||
and not (
|
||||
self.ignore_mailto_links
|
||||
and attrs["href"].startswith("mailto:")
|
||||
)
|
||||
and not (self.skip_internal_links and attrs["href"].startswith("#"))
|
||||
and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
|
||||
):
|
||||
self.astack.append(attrs)
|
||||
self.maybe_automatic_link = attrs["href"]
|
||||
@@ -591,7 +574,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
|
||||
if tag == "img" and start and not self.ignore_images:
|
||||
# skip cloudinary images
|
||||
if "src" in attrs and "cloudinary" not in attrs["src"]:
|
||||
if "src" in attrs and ("cloudinary" not in attrs["src"]):
|
||||
assert attrs["src"] is not None
|
||||
if not self.images_to_alt:
|
||||
attrs["href"] = attrs["src"]
|
||||
@@ -638,9 +621,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.o("![" + escape_md(alt) + "]")
|
||||
if self.inline_links:
|
||||
href = attrs.get("href") or ""
|
||||
self.o(
|
||||
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
|
||||
)
|
||||
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
|
||||
else:
|
||||
i = self.previousIndex(attrs)
|
||||
if i is not None:
|
||||
@@ -696,9 +677,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
# WARNING: does not line up <ol><li>s > 9 correctly.
|
||||
parent_list = None
|
||||
for list in self.list:
|
||||
self.o(
|
||||
" " if parent_list == "ol" and list.name == "ul" else " "
|
||||
)
|
||||
self.o(" " if parent_list == "ol" and list.name == "ul" else " ")
|
||||
parent_list = list.name
|
||||
|
||||
if li.name == "ul":
|
||||
@@ -787,7 +766,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.pbr()
|
||||
self.br_toggle = " "
|
||||
|
||||
def o(
|
||||
def o( # noqa: C901
|
||||
self, data: str, puredata: bool = False, force: Union[bool, str] = False
|
||||
) -> None:
|
||||
"""
|
||||
@@ -864,9 +843,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.out(" ")
|
||||
self.space = False
|
||||
|
||||
if self.a and (
|
||||
(self.p_p == 2 and self.links_each_paragraph) or force == "end"
|
||||
):
|
||||
if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
|
||||
if force == "end":
|
||||
self.out("\n")
|
||||
|
||||
@@ -925,11 +902,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
|
||||
if self.maybe_automatic_link is not None:
|
||||
href = self.maybe_automatic_link
|
||||
if (
|
||||
href == data
|
||||
and self.absolute_url_matcher.match(href)
|
||||
and self.use_automatic_links
|
||||
):
|
||||
if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
|
||||
self.o("<" + data + ">")
|
||||
self.empty_link = False
|
||||
return
|
||||
@@ -980,7 +953,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
|
||||
return nest_count
|
||||
|
||||
def optwrap(self, text: str) -> str:
|
||||
def optwrap(self, text: str) -> str: # noqa: C901
|
||||
"""
|
||||
Wrap all paragraphs in the provided text.
|
||||
|
||||
@@ -1000,9 +973,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.inline_links = False
|
||||
for para in text.split("\n"):
|
||||
if len(para) > 0:
|
||||
if not skipwrap(
|
||||
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
|
||||
):
|
||||
if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
|
||||
indent = ""
|
||||
if para.startswith(" " + self.ul_item_mark):
|
||||
# list item continuation: add a double indent to the
|
||||
@@ -1043,12 +1014,10 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
return result
|
||||
|
||||
|
||||
def html2text(
|
||||
html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
|
||||
) -> str:
|
||||
def html2text(html: str, baseurl: str = "", bodywidth: int = config.BODY_WIDTH) -> str:
|
||||
h = html.strip() or ""
|
||||
if h:
|
||||
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
|
||||
h = h.handle(html.strip())
|
||||
h2t = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
|
||||
h = h2t.handle(html.strip())
|
||||
# print('[html2text] %d bytes' % len(html))
|
||||
return h
|
||||
|
@@ -117,10 +117,7 @@ def main() -> None:
|
||||
dest="images_with_size",
|
||||
action="store_true",
|
||||
default=config.IMAGES_WITH_SIZE,
|
||||
help=(
|
||||
"Write image tags with height and width attrs as raw html to retain "
|
||||
"dimensions"
|
||||
),
|
||||
help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
|
||||
)
|
||||
p.add_argument(
|
||||
"-g",
|
||||
@@ -260,9 +257,7 @@ def main() -> None:
|
||||
default=config.CLOSE_QUOTE,
|
||||
help="The character used to close quotes",
|
||||
)
|
||||
p.add_argument(
|
||||
"--version", action="version", version=".".join(map(str, __version__))
|
||||
)
|
||||
p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
|
||||
p.add_argument("filename", nargs="?")
|
||||
p.add_argument("encoding", nargs="?", default="utf-8")
|
||||
args = p.parse_args()
|
||||
|
@@ -4,9 +4,7 @@ from typing import Dict, List, Optional
|
||||
from . import config
|
||||
|
||||
unifiable_n = {
|
||||
html.entities.name2codepoint[k]: v
|
||||
for k, v in config.UNIFIABLE.items()
|
||||
if k != "nbsp"
|
||||
html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
|
||||
}
|
||||
|
||||
|
||||
@@ -68,12 +66,14 @@ def element_style(
|
||||
:rtype: dict
|
||||
"""
|
||||
style = parent_style.copy()
|
||||
if attrs.get("class"):
|
||||
for css_class in attrs["class"].split():
|
||||
attrs_class = attrs.get("class")
|
||||
if attrs_class:
|
||||
for css_class in attrs_class.split():
|
||||
css_style = style_def.get("." + css_class, {})
|
||||
style.update(css_style)
|
||||
if attrs.get("style"):
|
||||
immediate_style = dumb_property_dict(attrs["style"])
|
||||
attrs_style = attrs.get("style")
|
||||
if attrs_style:
|
||||
immediate_style = dumb_property_dict(attrs_style)
|
||||
style.update(immediate_style)
|
||||
|
||||
return style
|
||||
@@ -147,18 +147,17 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
|
||||
|
||||
:rtype: int or None
|
||||
"""
|
||||
if attrs.get("start"):
|
||||
attrs_start = attrs.get("start")
|
||||
if attrs_start:
|
||||
try:
|
||||
return int(attrs["start"]) - 1
|
||||
return int(attrs_start) - 1
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def skipwrap(
|
||||
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
|
||||
) -> bool:
|
||||
def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
|
||||
# If it appears to contain a link
|
||||
# don't wrap
|
||||
if not wrap_links and config.RE_LINK.search(para):
|
||||
@@ -236,9 +235,7 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
|
||||
max_cols = num_cols
|
||||
|
||||
max_width = [
|
||||
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
|
||||
]
|
||||
max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
|
||||
|
||||
# reformat
|
||||
new_lines = []
|
||||
@@ -247,15 +244,13 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
if set(line.strip()) == set("-|"):
|
||||
filler = "-"
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("|-" + "|".join(new_cols) + "|")
|
||||
else:
|
||||
filler = " "
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("| " + "|".join(new_cols) + "|")
|
||||
return new_lines
|
||||
|
@@ -1 +0,0 @@
|
||||
__all__ = (["users", "topics", "content_items", "comments"],)
|
@@ -5,61 +5,48 @@ from dateutil.parser import parse as date_parse
|
||||
from base.orm import local_session
|
||||
from migration.html2text import html2text
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import ShoutReactionsFollower
|
||||
from orm.shout import Shout, ShoutReactionsFollower
|
||||
from orm.topic import TopicFollower
|
||||
from orm.user import User
|
||||
from orm.shout import Shout
|
||||
|
||||
ts = datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
def auto_followers(session, topics, reaction_dict):
|
||||
# creating shout's reactions following for reaction author
|
||||
following1 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == reaction_dict["shout"]
|
||||
).first()
|
||||
following1 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
|
||||
.first()
|
||||
)
|
||||
if not following1:
|
||||
following1 = ShoutReactionsFollower.create(
|
||||
follower=reaction_dict["createdBy"],
|
||||
shout=reaction_dict["shout"],
|
||||
auto=True
|
||||
follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
|
||||
)
|
||||
session.add(following1)
|
||||
# creating topics followings for reaction author
|
||||
for t in topics:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
TopicFollower.topic == t['id']
|
||||
).first()
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(TopicFollower.topic == t["id"])
|
||||
.first()
|
||||
)
|
||||
if not tf:
|
||||
topic_following = TopicFollower.create(
|
||||
follower=reaction_dict["createdBy"],
|
||||
topic=t['id'],
|
||||
auto=True
|
||||
follower=reaction_dict["createdBy"], topic=t["id"], auto=True
|
||||
)
|
||||
session.add(topic_following)
|
||||
|
||||
|
||||
def migrate_ratings(session, entry, reaction_dict):
|
||||
for comment_rating_old in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == comment_rating_old["createdBy"])
|
||||
.first()
|
||||
)
|
||||
rater = session.query(User).filter(User.oid == comment_rating_old["createdBy"]).first()
|
||||
re_reaction_dict = {
|
||||
"shout": reaction_dict["shout"],
|
||||
"replyTo": reaction_dict["id"],
|
||||
"kind": ReactionKind.LIKE
|
||||
if comment_rating_old["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"kind": ReactionKind.LIKE if comment_rating_old["value"] > 0 else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id if rater else 1,
|
||||
}
|
||||
cts = comment_rating_old.get("createdAt")
|
||||
@@ -68,18 +55,15 @@ def migrate_ratings(session, entry, reaction_dict):
|
||||
try:
|
||||
# creating reaction from old rating
|
||||
rr = Reaction.create(**re_reaction_dict)
|
||||
following2 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == re_reaction_dict['createdBy']
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == rr.shout
|
||||
).first()
|
||||
following2 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
|
||||
.filter(ShoutReactionsFollower.shout == rr.shout)
|
||||
.first()
|
||||
)
|
||||
if not following2:
|
||||
following2 = ShoutReactionsFollower.create(
|
||||
follower=re_reaction_dict['createdBy'],
|
||||
shout=rr.shout,
|
||||
auto=True
|
||||
follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
|
||||
)
|
||||
session.add(following2)
|
||||
session.add(rr)
|
||||
@@ -150,9 +134,7 @@ async def migrate(entry, storage):
|
||||
else:
|
||||
stage = "author and old id found"
|
||||
try:
|
||||
shout = session.query(
|
||||
Shout
|
||||
).where(Shout.slug == old_shout["slug"]).one()
|
||||
shout = session.query(Shout).where(Shout.slug == old_shout["slug"]).one()
|
||||
if shout:
|
||||
reaction_dict["shout"] = shout.id
|
||||
reaction_dict["createdBy"] = author.id if author else 1
|
||||
@@ -178,9 +160,9 @@ async def migrate(entry, storage):
|
||||
|
||||
|
||||
def migrate_2stage(old_comment, idmap):
|
||||
if old_comment.get('body'):
|
||||
new_id = idmap.get(old_comment.get('oid'))
|
||||
new_id = idmap.get(old_comment.get('_id'))
|
||||
if old_comment.get("body"):
|
||||
new_id = idmap.get(old_comment.get("oid"))
|
||||
new_id = idmap.get(old_comment.get("_id"))
|
||||
if new_id:
|
||||
new_replyto_id = None
|
||||
old_replyto_id = old_comment.get("replyTo")
|
||||
@@ -190,17 +172,20 @@ def migrate_2stage(old_comment, idmap):
|
||||
comment = session.query(Reaction).where(Reaction.id == new_id).first()
|
||||
try:
|
||||
if new_replyto_id:
|
||||
new_reply = session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
new_reply = (
|
||||
session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
)
|
||||
if not new_reply:
|
||||
print(new_replyto_id)
|
||||
raise Exception("cannot find reply by id!")
|
||||
comment.replyTo = new_reply.id
|
||||
session.add(comment)
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == comment.shout
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == comment.createdBy
|
||||
).first()
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == comment.shout)
|
||||
.filter(ShoutReactionsFollower.follower == comment.createdBy)
|
||||
.first()
|
||||
)
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(
|
||||
shout=comment.shout, follower=comment.createdBy, auto=True
|
||||
|
@@ -1,16 +1,18 @@
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from dateutil.parser import parse as date_parse
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from transliterate import translit
|
||||
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_html, extract_media
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
||||
from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic
|
||||
from orm.topic import Topic, TopicFollower
|
||||
from orm.user import User
|
||||
from orm.topic import TopicFollower, Topic
|
||||
from services.stat.viewed import ViewedStorage
|
||||
import re
|
||||
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
ts = datetime.now(tz=timezone.utc)
|
||||
@@ -33,7 +35,7 @@ def get_shout_slug(entry):
|
||||
slug = friend.get("slug", "")
|
||||
if slug:
|
||||
break
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
return slug
|
||||
|
||||
|
||||
@@ -41,27 +43,27 @@ def create_author_from_app(app):
|
||||
user = None
|
||||
userdata = None
|
||||
# check if email is used
|
||||
if app['email']:
|
||||
if app["email"]:
|
||||
with local_session() as session:
|
||||
user = session.query(User).where(User.email == app['email']).first()
|
||||
user = session.query(User).where(User.email == app["email"]).first()
|
||||
if not user:
|
||||
# print('[migration] app %r' % app)
|
||||
name = app.get('name')
|
||||
name = app.get("name")
|
||||
if name:
|
||||
slug = translit(name, "ru", reversed=True).lower()
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
print('[migration] created slug %s' % slug)
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
print("[migration] created slug %s" % slug)
|
||||
# check if slug is used
|
||||
if slug:
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
|
||||
# get slug from email
|
||||
if user:
|
||||
slug = app['email'].split('@')[0]
|
||||
slug = app["email"].split("@")[0]
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
# one more try
|
||||
if user:
|
||||
slug += '-author'
|
||||
slug += "-author"
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
|
||||
# create user with application data
|
||||
@@ -79,7 +81,7 @@ def create_author_from_app(app):
|
||||
user = User.create(**userdata)
|
||||
session.add(user)
|
||||
session.commit()
|
||||
userdata['id'] = user.id
|
||||
userdata["id"] = user.id
|
||||
|
||||
userdata = user.dict()
|
||||
return userdata
|
||||
@@ -91,11 +93,12 @@ async def create_shout(shout_dict):
|
||||
s = Shout.create(**shout_dict)
|
||||
author = s.authors[0]
|
||||
with local_session() as session:
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == s.id
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == author.id
|
||||
).first()
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == s.id)
|
||||
.filter(ShoutReactionsFollower.follower == author.id)
|
||||
.first()
|
||||
)
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True)
|
||||
session.add(srf)
|
||||
@@ -116,14 +119,14 @@ async def get_user(entry, storage):
|
||||
elif user_oid:
|
||||
userdata = storage["users"]["by_oid"].get(user_oid)
|
||||
if not userdata:
|
||||
print('no userdata by oid, anonymous')
|
||||
print("no userdata by oid, anonymous")
|
||||
userdata = anondict
|
||||
print(app)
|
||||
# cleanup slug
|
||||
if userdata:
|
||||
slug = userdata.get("slug", "")
|
||||
if slug:
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
userdata["slug"] = slug
|
||||
else:
|
||||
userdata = anondict
|
||||
@@ -137,11 +140,14 @@ async def migrate(entry, storage):
|
||||
r = {
|
||||
"layout": type2layout[entry["type"]],
|
||||
"title": entry["title"],
|
||||
"authors": [author, ],
|
||||
"authors": [
|
||||
author,
|
||||
],
|
||||
"slug": get_shout_slug(entry),
|
||||
"cover": (
|
||||
"https://images.discours.io/unsafe/" +
|
||||
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
|
||||
"https://images.discours.io/unsafe/" + entry["thumborId"]
|
||||
if entry.get("thumborId")
|
||||
else entry.get("image", {}).get("url")
|
||||
),
|
||||
"visibility": "public" if entry.get("published") else "community",
|
||||
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
|
||||
@@ -150,11 +156,11 @@ async def migrate(entry, storage):
|
||||
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
|
||||
"createdBy": author.id,
|
||||
"topics": await add_topics_follower(entry, storage, author),
|
||||
"body": extract_html(entry, cleanup=True)
|
||||
"body": extract_html(entry, cleanup=True),
|
||||
}
|
||||
|
||||
# main topic patch
|
||||
r['mainTopic'] = r['topics'][0]
|
||||
r["mainTopic"] = r["topics"][0]
|
||||
|
||||
# published author auto-confirm
|
||||
if entry.get("published"):
|
||||
@@ -177,14 +183,16 @@ async def migrate(entry, storage):
|
||||
shout_dict["oid"] = entry.get("_id", "")
|
||||
shout = await create_shout(shout_dict)
|
||||
except IntegrityError as e:
|
||||
print('[migration] create_shout integrity error', e)
|
||||
print("[migration] create_shout integrity error", e)
|
||||
shout = await resolve_create_shout(shout_dict)
|
||||
except Exception as e:
|
||||
raise Exception(e)
|
||||
|
||||
# udpate data
|
||||
shout_dict = shout.dict()
|
||||
shout_dict["authors"] = [author.dict(), ]
|
||||
shout_dict["authors"] = [
|
||||
author.dict(),
|
||||
]
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = await topics_aftermath(r, storage)
|
||||
@@ -193,7 +201,9 @@ async def migrate(entry, storage):
|
||||
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
||||
|
||||
# shout views
|
||||
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours')
|
||||
await ViewedStorage.increment(
|
||||
shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
|
||||
)
|
||||
# del shout_dict['ratings']
|
||||
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
@@ -205,7 +215,9 @@ async def add_topics_follower(entry, storage, user):
|
||||
topics = set([])
|
||||
category = entry.get("category")
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
oids = [category, ] + entry.get("tags", [])
|
||||
oids = [
|
||||
category,
|
||||
] + entry.get("tags", [])
|
||||
for toid in oids:
|
||||
tslug = topics_by_oid.get(toid, {}).get("slug")
|
||||
if tslug:
|
||||
@@ -217,23 +229,18 @@ async def add_topics_follower(entry, storage, user):
|
||||
try:
|
||||
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
|
||||
if tpc:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == user.id
|
||||
).filter(
|
||||
TopicFollower.topic == tpc.id
|
||||
).first()
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == user.id)
|
||||
.filter(TopicFollower.topic == tpc.id)
|
||||
.first()
|
||||
)
|
||||
if not tf:
|
||||
tf = TopicFollower.create(
|
||||
topic=tpc.id,
|
||||
follower=user.id,
|
||||
auto=True
|
||||
)
|
||||
tf = TopicFollower.create(topic=tpc.id, follower=user.id, auto=True)
|
||||
session.add(tf)
|
||||
session.commit()
|
||||
except IntegrityError:
|
||||
print('[migration.shout] hidden by topic ' + tpc.slug)
|
||||
print("[migration.shout] hidden by topic " + tpc.slug)
|
||||
# main topic
|
||||
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
|
||||
if maintopic in ttt:
|
||||
@@ -254,7 +261,7 @@ async def process_user(userdata, storage, oid):
|
||||
if not user:
|
||||
try:
|
||||
slug = userdata["slug"].lower().strip()
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
userdata["slug"] = slug
|
||||
user = User.create(**userdata)
|
||||
session.add(user)
|
||||
@@ -282,9 +289,9 @@ async def resolve_create_shout(shout_dict):
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
||||
bump = False
|
||||
if s:
|
||||
if s.createdAt != shout_dict['createdAt']:
|
||||
if s.createdAt != shout_dict["createdAt"]:
|
||||
# create new with different slug
|
||||
shout_dict["slug"] += '-' + shout_dict["layout"]
|
||||
shout_dict["slug"] += "-" + shout_dict["layout"]
|
||||
try:
|
||||
await create_shout(shout_dict)
|
||||
except IntegrityError as e:
|
||||
@@ -295,10 +302,7 @@ async def resolve_create_shout(shout_dict):
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
print("[migration] shout already exists, but differs in %s" % key)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
@@ -344,9 +348,7 @@ async def topics_aftermath(entry, storage):
|
||||
)
|
||||
if not shout_topic_new:
|
||||
try:
|
||||
ShoutTopic.create(
|
||||
**{"shout": shout.id, "topic": new_topic.id}
|
||||
)
|
||||
ShoutTopic.create(**{"shout": shout.id, "topic": new_topic.id})
|
||||
except Exception:
|
||||
print("[migration] shout topic error: " + newslug)
|
||||
session.commit()
|
||||
@@ -363,9 +365,7 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
with local_session() as session:
|
||||
for content_rating in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == content_rating["createdBy"])
|
||||
.first()
|
||||
session.query(User).filter(User.oid == content_rating["createdBy"]).first()
|
||||
) or User.default_user
|
||||
shout = session.query(Shout).where(Shout.slug == slug).first()
|
||||
cts = content_rating.get("createdAt")
|
||||
@@ -375,7 +375,7 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id,
|
||||
"shout": shout.id
|
||||
"shout": shout.id,
|
||||
}
|
||||
reaction = (
|
||||
session.query(Reaction)
|
||||
|
@@ -1,42 +1,35 @@
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_md
|
||||
from migration.html2text import html2text
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
# from base.orm import local_session
|
||||
|
||||
# from migration.extract import extract_md
|
||||
# from migration.html2text import html2text
|
||||
# from orm.reaction import Reaction, ReactionKind
|
||||
|
||||
|
||||
def migrate(entry, storage):
|
||||
post_oid = entry['contentItem']
|
||||
print(post_oid)
|
||||
shout_dict = storage['shouts']['by_oid'].get(post_oid)
|
||||
if shout_dict:
|
||||
print(shout_dict['body'])
|
||||
remark = {
|
||||
"shout": shout_dict['id'],
|
||||
"body": extract_md(
|
||||
html2text(entry['body']),
|
||||
shout_dict
|
||||
),
|
||||
"kind": ReactionKind.REMARK
|
||||
}
|
||||
|
||||
if entry.get('textBefore'):
|
||||
remark['range'] = str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textBefore'] or ''
|
||||
)
|
||||
) + ':' + str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textAfter'] or ''
|
||||
) + len(
|
||||
entry['textAfter'] or ''
|
||||
)
|
||||
)
|
||||
|
||||
with local_session() as session:
|
||||
rmrk = Reaction.create(**remark)
|
||||
session.commit()
|
||||
del rmrk["_sa_instance_state"]
|
||||
return rmrk
|
||||
return
|
||||
# def migrate(entry, storage):
|
||||
# post_oid = entry["contentItem"]
|
||||
# print(post_oid)
|
||||
# shout_dict = storage["shouts"]["by_oid"].get(post_oid)
|
||||
# if shout_dict:
|
||||
# print(shout_dict["body"])
|
||||
# remark = {
|
||||
# "shout": shout_dict["id"],
|
||||
# "body": extract_md(html2text(entry["body"]), shout_dict),
|
||||
# "kind": ReactionKind.REMARK,
|
||||
# }
|
||||
#
|
||||
# if entry.get("textBefore"):
|
||||
# remark["range"] = (
|
||||
# str(shout_dict["body"].index(entry["textBefore"] or ""))
|
||||
# + ":"
|
||||
# + str(
|
||||
# shout_dict["body"].index(entry["textAfter"] or "")
|
||||
# + len(entry["textAfter"] or "")
|
||||
# )
|
||||
# )
|
||||
#
|
||||
# with local_session() as session:
|
||||
# rmrk = Reaction.create(**remark)
|
||||
# session.commit()
|
||||
# del rmrk["_sa_instance_state"]
|
||||
# return rmrk
|
||||
# return
|
||||
|
@@ -1,5 +1,4 @@
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_md
|
||||
from migration.html2text import html2text
|
||||
from orm import Topic
|
||||
|
||||
@@ -10,7 +9,7 @@ def migrate(entry):
|
||||
"slug": entry["slug"],
|
||||
"oid": entry["_id"],
|
||||
"title": entry["title"].replace(" ", " "),
|
||||
"body": extract_md(html2text(body_orig))
|
||||
"body": html2text(body_orig),
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
|
@@ -8,7 +8,7 @@ from base.orm import local_session
|
||||
from orm.user import AuthorFollower, User, UserRating
|
||||
|
||||
|
||||
def migrate(entry):
|
||||
def migrate(entry): # noqa: C901
|
||||
if "subscribedTo" in entry:
|
||||
del entry["subscribedTo"]
|
||||
email = entry["emails"][0]["address"]
|
||||
@@ -23,7 +23,7 @@ def migrate(entry):
|
||||
"muted": False, # amnesty
|
||||
"links": [],
|
||||
"name": "anonymous",
|
||||
"password": entry["services"]["password"].get("bcrypt")
|
||||
"password": entry["services"]["password"].get("bcrypt"),
|
||||
}
|
||||
|
||||
if "updatedAt" in entry:
|
||||
@@ -33,9 +33,13 @@ def migrate(entry):
|
||||
if entry.get("profile"):
|
||||
# slug
|
||||
slug = entry["profile"].get("path").lower()
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
|
||||
user_dict["slug"] = slug
|
||||
bio = (entry.get("profile", {"bio": ""}).get("bio") or "").replace('\(', '(').replace('\)', ')')
|
||||
bio = (
|
||||
(entry.get("profile", {"bio": ""}).get("bio") or "")
|
||||
.replace(r"\(", "(")
|
||||
.replace(r"\)", ")")
|
||||
)
|
||||
bio_text = BeautifulSoup(bio, features="lxml").text
|
||||
|
||||
if len(bio_text) > 120:
|
||||
@@ -46,8 +50,7 @@ def migrate(entry):
|
||||
# userpic
|
||||
try:
|
||||
user_dict["userpic"] = (
|
||||
"https://images.discours.io/unsafe/"
|
||||
+ entry["profile"]["thumborId"]
|
||||
"https://images.discours.io/unsafe/" + entry["profile"]["thumborId"]
|
||||
)
|
||||
except KeyError:
|
||||
try:
|
||||
@@ -62,11 +65,7 @@ def migrate(entry):
|
||||
name = (name + " " + ln) if ln else name
|
||||
if not name:
|
||||
name = slug if slug else "anonymous"
|
||||
name = (
|
||||
entry["profile"]["path"].lower().strip().replace(" ", "-")
|
||||
if len(name) < 2
|
||||
else name
|
||||
)
|
||||
name = entry["profile"]["path"].lower().strip().replace(" ", "-") if len(name) < 2 else name
|
||||
user_dict["name"] = name
|
||||
|
||||
# links
|
||||
@@ -95,9 +94,7 @@ def migrate(entry):
|
||||
except IntegrityError:
|
||||
print("[migration] cannot create user " + user_dict["slug"])
|
||||
with local_session() as session:
|
||||
old_user = (
|
||||
session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
)
|
||||
old_user = session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
old_user.oid = oid
|
||||
old_user.password = user_dict["password"]
|
||||
session.commit()
|
||||
@@ -114,7 +111,7 @@ def post_migrate():
|
||||
"slug": "old-discours",
|
||||
"username": "old-discours",
|
||||
"email": "old@discours.io",
|
||||
"name": "Просмотры на старой версии сайта"
|
||||
"name": "Просмотры на старой версии сайта",
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
@@ -147,12 +144,8 @@ def migrate_2stage(entry, id_map):
|
||||
}
|
||||
|
||||
user_rating = UserRating.create(**user_rating_dict)
|
||||
if user_rating_dict['value'] > 0:
|
||||
af = AuthorFollower.create(
|
||||
author=user.id,
|
||||
follower=rater.id,
|
||||
auto=True
|
||||
)
|
||||
if user_rating_dict["value"] > 0:
|
||||
af = AuthorFollower.create(author=user.id, follower=rater.id, auto=True)
|
||||
session.add(af)
|
||||
session.add(user_rating)
|
||||
session.commit()
|
||||
|
Reference in New Issue
Block a user