Revert "Feature/lint"
This commit is contained in:
@@ -1,25 +1,24 @@
|
||||
""" cmd managed migration """
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import bs4
|
||||
|
||||
from migration.export import export_mdx
|
||||
from migration.tables.comments import migrate as migrateComment
|
||||
from migration.tables.comments import migrate_2stage as migrateComment_2stage
|
||||
from migration.tables.content_items import get_shout_slug
|
||||
from migration.tables.content_items import migrate as migrateShout
|
||||
|
||||
# from migration.tables.remarks import migrate as migrateRemark
|
||||
from migration.tables.remarks import migrate as migrateRemark
|
||||
from migration.tables.topics import migrate as migrateTopic
|
||||
from migration.tables.users import migrate as migrateUser
|
||||
from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
|
||||
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
||||
from migration.tables.users import post_migrate as users_post_migrate
|
||||
from orm import init_tables
|
||||
from orm.reaction import Reaction
|
||||
|
||||
import asyncio
|
||||
import bs4
|
||||
import gc
|
||||
import json
|
||||
import sys
|
||||
|
||||
TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
|
||||
@@ -64,8 +63,16 @@ async def topics_handle(storage):
|
||||
del storage["topics"]["by_slug"][oldslug]
|
||||
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
||||
print("[migration] " + str(counter) + " topics migrated")
|
||||
print("[migration] " + str(len(storage["topics"]["by_oid"].values())) + " topics by oid")
|
||||
print("[migration] " + str(len(storage["topics"]["by_slug"].values())) + " topics by slug")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_oid"].values()))
|
||||
+ " topics by oid"
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_slug"].values()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
|
||||
|
||||
async def shouts_handle(storage, args):
|
||||
@@ -110,10 +117,9 @@ async def shouts_handle(storage, args):
|
||||
|
||||
# print main counter
|
||||
counter += 1
|
||||
print(
|
||||
"[migration] shouts_handle %d: %s @%s"
|
||||
% ((counter + 1), shout_dict["slug"], author["slug"])
|
||||
)
|
||||
print('[migration] shouts_handle %d: %s @%s' % (
|
||||
(counter + 1), shout_dict["slug"], author["slug"]
|
||||
))
|
||||
|
||||
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
|
||||
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
||||
@@ -132,13 +138,13 @@ async def shouts_handle(storage, args):
|
||||
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
|
||||
|
||||
|
||||
# async def remarks_handle(storage):
|
||||
# print("[migration] comments")
|
||||
# c = 0
|
||||
# for entry_remark in storage["remarks"]["data"]:
|
||||
# remark = await migrateRemark(entry_remark, storage)
|
||||
# c += 1
|
||||
# print("[migration] " + str(c) + " remarks migrated")
|
||||
async def remarks_handle(storage):
|
||||
print("[migration] comments")
|
||||
c = 0
|
||||
for entry_remark in storage["remarks"]["data"]:
|
||||
remark = await migrateRemark(entry_remark, storage)
|
||||
c += 1
|
||||
print("[migration] " + str(c) + " remarks migrated")
|
||||
|
||||
|
||||
async def comments_handle(storage):
|
||||
@@ -149,9 +155,9 @@ async def comments_handle(storage):
|
||||
for oldcomment in storage["reactions"]["data"]:
|
||||
if not oldcomment.get("deleted"):
|
||||
reaction = await migrateComment(oldcomment, storage)
|
||||
if isinstance(reaction, str):
|
||||
if type(reaction) == str:
|
||||
missed_shouts[reaction] = oldcomment
|
||||
elif isinstance(reaction, Reaction):
|
||||
elif type(reaction) == Reaction:
|
||||
reaction = reaction.dict()
|
||||
rid = reaction["id"]
|
||||
oid = reaction["oid"]
|
||||
@@ -208,7 +214,9 @@ def data_load():
|
||||
tags_data = json.loads(open("migration/data/tags.json").read())
|
||||
storage["topics"]["tags"] = tags_data
|
||||
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
||||
cats_data = json.loads(open("migration/data/content_item_categories.json").read())
|
||||
cats_data = json.loads(
|
||||
open("migration/data/content_item_categories.json").read()
|
||||
)
|
||||
storage["topics"]["cats"] = cats_data
|
||||
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
||||
comments_data = json.loads(open("migration/data/comments.json").read())
|
||||
@@ -227,7 +235,11 @@ def data_load():
|
||||
storage["users"]["by_oid"][x["_id"]] = x
|
||||
# storage['users']['by_slug'][x['slug']] = x
|
||||
# no user.slug yet
|
||||
print("[migration.load] " + str(len(storage["users"]["by_oid"].keys())) + " users by oid")
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["users"]["by_oid"].keys()))
|
||||
+ " users by oid"
|
||||
)
|
||||
for x in tags_data:
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
@@ -235,7 +247,9 @@ def data_load():
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
print(
|
||||
"[migration.load] " + str(len(storage["topics"]["by_slug"].keys())) + " topics by slug"
|
||||
"[migration.load] "
|
||||
+ str(len(storage["topics"]["by_slug"].keys()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
for item in content_data:
|
||||
slug = get_shout_slug(item)
|
||||
|
@@ -1,9 +1,9 @@
|
||||
from .utils import DateTimeEncoder
|
||||
import json
|
||||
import os
|
||||
|
||||
import bson
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
|
||||
def json_tables():
|
||||
@@ -15,10 +15,10 @@ def json_tables():
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": [],
|
||||
"remarks": [],
|
||||
"remarks": []
|
||||
}
|
||||
for table in data.keys():
|
||||
print("[migration] bson2json for " + table)
|
||||
print('[migration] bson2json for ' + table)
|
||||
gc.collect()
|
||||
lc = []
|
||||
bs = open("dump/discours/" + table + ".bson", "rb").read()
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from .extract import extract_html, extract_media
|
||||
from .utils import DateTimeEncoder
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import frontmatter
|
||||
import json
|
||||
import os
|
||||
|
||||
from .extract import extract_html, extract_media
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
EXPORT_DEST = "../discoursio-web/data/"
|
||||
@@ -70,29 +71,47 @@ def export_slug(slug, storage):
|
||||
|
||||
|
||||
def export_email_subscriptions():
|
||||
email_subscriptions_data = json.loads(open("migration/data/email_subscriptions.json").read())
|
||||
email_subscriptions_data = json.loads(
|
||||
open("migration/data/email_subscriptions.json").read()
|
||||
)
|
||||
for data in email_subscriptions_data:
|
||||
# TODO: migrate to mailgun list manually
|
||||
# migrate_email_subscription(data)
|
||||
pass
|
||||
print("[migration] " + str(len(email_subscriptions_data)) + " email subscriptions exported")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(email_subscriptions_data))
|
||||
+ " email subscriptions exported"
|
||||
)
|
||||
|
||||
|
||||
def export_shouts(storage):
|
||||
# update what was just migrated or load json again
|
||||
if len(storage["users"]["by_slugs"].keys()) == 0:
|
||||
storage["users"]["by_slugs"] = json.loads(open(EXPORT_DEST + "authors.json").read())
|
||||
print("[migration] " + str(len(storage["users"]["by_slugs"].keys())) + " exported authors ")
|
||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
||||
storage["shouts"]["by_slugs"] = json.loads(open(EXPORT_DEST + "articles.json").read())
|
||||
storage["users"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "authors.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] " + str(len(storage["shouts"]["by_slugs"].keys())) + " exported articles "
|
||||
"[migration] "
|
||||
+ str(len(storage["users"]["by_slugs"].keys()))
|
||||
+ " exported authors "
|
||||
)
|
||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
||||
storage["shouts"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "articles.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["shouts"]["by_slugs"].keys()))
|
||||
+ " exported articles "
|
||||
)
|
||||
for slug in storage["shouts"]["by_slugs"].keys():
|
||||
export_slug(slug, storage)
|
||||
|
||||
|
||||
def export_json(export_articles={}, export_authors={}, export_topics={}, export_comments={}):
|
||||
def export_json(
|
||||
export_articles={}, export_authors={}, export_topics={}, export_comments={}
|
||||
):
|
||||
open(EXPORT_DEST + "authors.json", "w").write(
|
||||
json.dumps(
|
||||
export_authors,
|
||||
@@ -133,4 +152,8 @@ def export_json(export_articles={}, export_authors={}, export_topics={}, export_
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print("[migration] " + str(len(export_comments.items())) + " exported articles with comments")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(export_comments.items()))
|
||||
+ " exported articles with comments"
|
||||
)
|
||||
|
@@ -1,10 +1,9 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
# import uuid
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
||||
@@ -28,40 +27,37 @@ def replace_tooltips(body):
|
||||
return newbody
|
||||
|
||||
|
||||
# def extract_footnotes(body, shout_dict):
|
||||
# parts = body.split("&&&")
|
||||
# lll = len(parts)
|
||||
# newparts = list(parts)
|
||||
# placed = False
|
||||
# if lll & 1:
|
||||
# if lll > 1:
|
||||
# i = 1
|
||||
# print("[extract] found %d footnotes in body" % (lll - 1))
|
||||
# for part in parts[1:]:
|
||||
# if i & 1:
|
||||
# placed = True
|
||||
# if 'a class="footnote-url" href=' in part:
|
||||
# print("[extract] footnote: " + part)
|
||||
# fn = 'a class="footnote-url" href="'
|
||||
# # exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
# extracted_body = part.split(fn, 1)[1].split(">", 1)[1].split("</a>", 1)[0]
|
||||
# print("[extract] footnote link: " + extracted_link)
|
||||
# with local_session() as session:
|
||||
# Reaction.create(
|
||||
# {
|
||||
# "shout": shout_dict["id"],
|
||||
# "kind": ReactionKind.FOOTNOTE,
|
||||
# "body": extracted_body,
|
||||
# "range": str(body.index(fn + link) - len("<"))
|
||||
# + ":"
|
||||
# + str(body.index(extracted_body) + len("</a>")),
|
||||
# }
|
||||
# )
|
||||
# newparts[i] = "<a href='#'>ℹ️</a>"
|
||||
# else:
|
||||
# newparts[i] = part
|
||||
# i += 1
|
||||
# return ("".join(newparts), placed)
|
||||
|
||||
def extract_footnotes(body, shout_dict):
|
||||
parts = body.split("&&&")
|
||||
lll = len(parts)
|
||||
newparts = list(parts)
|
||||
placed = False
|
||||
if lll & 1:
|
||||
if lll > 1:
|
||||
i = 1
|
||||
print("[extract] found %d footnotes in body" % (lll - 1))
|
||||
for part in parts[1:]:
|
||||
if i & 1:
|
||||
placed = True
|
||||
if 'a class="footnote-url" href=' in part:
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
|
||||
print("[extract] footnote link: " + extracted_link)
|
||||
with local_session() as session:
|
||||
Reaction.create({
|
||||
"shout": shout_dict['id'],
|
||||
"kind": ReactionKind.FOOTNOTE,
|
||||
"body": extracted_body,
|
||||
"range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
|
||||
})
|
||||
newparts[i] = "<a href='#'>ℹ️</a>"
|
||||
else:
|
||||
newparts[i] = part
|
||||
i += 1
|
||||
return ("".join(newparts), placed)
|
||||
|
||||
|
||||
def place_tooltips(body):
|
||||
@@ -80,7 +76,9 @@ def place_tooltips(body):
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_part = part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
extracted_part = (
|
||||
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
)
|
||||
newparts[i] = (
|
||||
"<Tooltip"
|
||||
+ (' link="' + link + '" ' if link else "")
|
||||
@@ -98,9 +96,7 @@ def place_tooltips(body):
|
||||
return ("".join(newparts), placed)
|
||||
|
||||
|
||||
IMG_REGEX = (
|
||||
r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
)
|
||||
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
|
||||
|
||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
||||
@@ -163,7 +159,11 @@ def extract_imageparts(bodyparts, prefix):
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print("[extract] " + str(len(content)) + " image bytes been written")
|
||||
print(
|
||||
"[extract] "
|
||||
+ str(len(content))
|
||||
+ " image bytes been written"
|
||||
)
|
||||
cache[b64encoded] = name
|
||||
except Exception:
|
||||
raise Exception
|
||||
@@ -172,11 +172,18 @@ def extract_imageparts(bodyparts, prefix):
|
||||
print("[extract] cached link " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
newparts[i] = current[: -len(mime)] + current[-len(mime) :] + link + next[-b64end:]
|
||||
newparts[i] = (
|
||||
current[: -len(mime)]
|
||||
+ current[-len(mime) :]
|
||||
+ link
|
||||
+ next[-b64end:]
|
||||
)
|
||||
newparts[i + 1] = next[:-b64end]
|
||||
break
|
||||
return (
|
||||
extract_imageparts(newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix)
|
||||
extract_imageparts(
|
||||
newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
|
||||
)
|
||||
if len(bodyparts) > (i + 1)
|
||||
else "".join(newparts)
|
||||
)
|
||||
@@ -230,6 +237,7 @@ di = "data:image"
|
||||
|
||||
|
||||
def extract_md_images(body, prefix):
|
||||
newbody = ""
|
||||
body = (
|
||||
body.replace("\n! [](" + di, "\n 
|
||||
.replace("\n[](" + di, "\n
|
||||
@@ -237,10 +245,10 @@ def extract_md_images(body, prefix):
|
||||
)
|
||||
parts = body.split(di)
|
||||
if len(parts) > 1:
|
||||
new_body = extract_dataimages(parts, prefix)
|
||||
newbody = extract_dataimages(parts, prefix)
|
||||
else:
|
||||
new_body = body
|
||||
return new_body
|
||||
newbody = body
|
||||
return newbody
|
||||
|
||||
|
||||
def cleanup_md(body):
|
||||
@@ -263,28 +271,29 @@ def cleanup_md(body):
|
||||
return newbody
|
||||
|
||||
|
||||
# def extract_md(body, shout_dict=None):
|
||||
# newbody = body
|
||||
# if newbody:
|
||||
# newbody = cleanup_md(newbody)
|
||||
# if not newbody:
|
||||
# raise Exception("cleanup error")
|
||||
#
|
||||
# if shout_dict:
|
||||
# uid = shout_dict["id"] or uuid.uuid4()
|
||||
# newbody = extract_md_images(newbody, uid)
|
||||
# if not newbody:
|
||||
# raise Exception("extract_images error")
|
||||
#
|
||||
# newbody, placed = extract_footnotes(body, shout_dict)
|
||||
# if not newbody:
|
||||
# raise Exception("extract_footnotes error")
|
||||
#
|
||||
# return newbody
|
||||
def extract_md(body, shout_dict = None):
|
||||
newbody = body
|
||||
if newbody:
|
||||
newbody = cleanup_md(newbody)
|
||||
if not newbody:
|
||||
raise Exception("cleanup error")
|
||||
|
||||
if shout_dict:
|
||||
|
||||
uid = shout_dict['id'] or uuid.uuid4()
|
||||
newbody = extract_md_images(newbody, uid)
|
||||
if not newbody:
|
||||
raise Exception("extract_images error")
|
||||
|
||||
newbody, placed = extract_footnotes(body, shout_dict)
|
||||
if not newbody:
|
||||
raise Exception("extract_footnotes error")
|
||||
|
||||
return newbody
|
||||
|
||||
|
||||
def extract_media(entry):
|
||||
"""normalized media extraction method"""
|
||||
''' normalized media extraction method '''
|
||||
# media [ { title pic url body } ]}
|
||||
kind = entry.get("type")
|
||||
if not kind:
|
||||
@@ -314,7 +323,12 @@ def extract_media(entry):
|
||||
url = "https://vimeo.com/" + m["vimeoId"]
|
||||
# body
|
||||
body = m.get("body") or m.get("literatureBody") or ""
|
||||
media.append({"url": url, "pic": pic, "title": title, "body": body})
|
||||
media.append({
|
||||
"url": url,
|
||||
"pic": pic,
|
||||
"title": title,
|
||||
"body": body
|
||||
})
|
||||
return media
|
||||
|
||||
|
||||
@@ -384,7 +398,9 @@ def cleanup_html(body: str) -> str:
|
||||
r"<h4>\s*</h4>",
|
||||
r"<div>\s*</div>",
|
||||
]
|
||||
regex_replace = {r"<br>\s*</p>": "</p>"}
|
||||
regex_replace = {
|
||||
r"<br>\s*</p>": "</p>"
|
||||
}
|
||||
changed = True
|
||||
while changed:
|
||||
# we need several iterations to clean nested tags this way
|
||||
@@ -398,15 +414,16 @@ def cleanup_html(body: str) -> str:
|
||||
changed = True
|
||||
return new_body
|
||||
|
||||
|
||||
def extract_html(entry, cleanup=False):
|
||||
body_orig = (entry.get("body") or "").replace(r"\(", "(").replace(r"\)", ")")
|
||||
def extract_html(entry, shout_id = None, cleanup=False):
|
||||
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
|
||||
if cleanup:
|
||||
# we do that before bs parsing to catch the invalid html
|
||||
body_clean = cleanup_html(body_orig)
|
||||
if body_clean != body_orig:
|
||||
print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
|
||||
body_orig = body_clean
|
||||
if shout_id:
|
||||
extract_footnotes(body_orig, shout_id)
|
||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
if cleanup:
|
||||
# we do that after bs parsing because it can add dummy tags
|
||||
|
@@ -1,5 +1,13 @@
|
||||
"""html2text: Turn HTML into equivalent Markdown-structured text."""
|
||||
|
||||
import html.entities
|
||||
import html.parser
|
||||
import re
|
||||
import string
|
||||
import urllib.parse as urlparse
|
||||
from textwrap import wrap
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
from . import config
|
||||
from .elements import AnchorElement, ListElement
|
||||
from .typing import OutCallback
|
||||
@@ -18,14 +26,6 @@ from .utils import (
|
||||
skipwrap,
|
||||
unifiable_n,
|
||||
)
|
||||
from textwrap import wrap
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
import html.entities
|
||||
import html.parser
|
||||
import re
|
||||
import string
|
||||
import urllib.parse as urlparse
|
||||
|
||||
__version__ = (2020, 1, 16)
|
||||
|
||||
@@ -119,7 +119,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.lastWasList = False
|
||||
self.style = 0
|
||||
self.style_def = {} # type: Dict[str, Dict[str, str]]
|
||||
self.tag_stack = [] # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.tag_stack = (
|
||||
[]
|
||||
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.emphasis = 0
|
||||
self.drop_white_space = 0
|
||||
self.inheader = False
|
||||
@@ -298,7 +300,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
if strikethrough:
|
||||
self.quiet -= 1
|
||||
|
||||
def handle_tag(self, tag: str, attrs: Dict[str, Optional[str]], start: bool) -> None:
|
||||
def handle_tag(
|
||||
self, tag: str, attrs: Dict[str, Optional[str]], start: bool
|
||||
) -> None:
|
||||
self.current_tag = tag
|
||||
|
||||
if self.tag_callback is not None:
|
||||
@@ -329,7 +333,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
tag_style = element_style(attrs, self.style_def, parent_style)
|
||||
self.tag_stack.append((tag, attrs, tag_style))
|
||||
else:
|
||||
dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
dummy, attrs, tag_style = (
|
||||
self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
)
|
||||
if self.tag_stack:
|
||||
parent_style = self.tag_stack[-1][2]
|
||||
|
||||
@@ -379,7 +385,11 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
):
|
||||
self.o("`") # NOTE: same as <code>
|
||||
self.span_highlight = True
|
||||
elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
|
||||
elif (
|
||||
self.current_class == "lead"
|
||||
and not self.inheader
|
||||
and not self.span_highlight
|
||||
):
|
||||
# self.o("==") # NOTE: CriticMarkup {==
|
||||
self.span_lead = True
|
||||
else:
|
||||
@@ -469,7 +479,11 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
and not self.span_lead
|
||||
and not self.span_highlight
|
||||
):
|
||||
if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
|
||||
if (
|
||||
start
|
||||
and self.preceding_data
|
||||
and self.preceding_data[-1] == self.strong_mark[0]
|
||||
):
|
||||
strong = " " + self.strong_mark
|
||||
self.preceding_data += " "
|
||||
else:
|
||||
@@ -534,8 +548,13 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
"href" in attrs
|
||||
and not attrs["href"].startswith("#_ftn")
|
||||
and attrs["href"] is not None
|
||||
and not (self.skip_internal_links and attrs["href"].startswith("#"))
|
||||
and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
|
||||
and not (
|
||||
self.skip_internal_links and attrs["href"].startswith("#")
|
||||
)
|
||||
and not (
|
||||
self.ignore_mailto_links
|
||||
and attrs["href"].startswith("mailto:")
|
||||
)
|
||||
):
|
||||
self.astack.append(attrs)
|
||||
self.maybe_automatic_link = attrs["href"]
|
||||
@@ -619,7 +638,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.o("![" + escape_md(alt) + "]")
|
||||
if self.inline_links:
|
||||
href = attrs.get("href") or ""
|
||||
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
|
||||
self.o(
|
||||
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
|
||||
)
|
||||
else:
|
||||
i = self.previousIndex(attrs)
|
||||
if i is not None:
|
||||
@@ -675,7 +696,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
# WARNING: does not line up <ol><li>s > 9 correctly.
|
||||
parent_list = None
|
||||
for list in self.list:
|
||||
self.o(" " if parent_list == "ol" and list.name == "ul" else " ")
|
||||
self.o(
|
||||
" " if parent_list == "ol" and list.name == "ul" else " "
|
||||
)
|
||||
parent_list = list.name
|
||||
|
||||
if li.name == "ul":
|
||||
@@ -764,7 +787,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.pbr()
|
||||
self.br_toggle = " "
|
||||
|
||||
def o(self, data: str, puredata: bool = False, force: Union[bool, str] = False) -> None:
|
||||
def o(
|
||||
self, data: str, puredata: bool = False, force: Union[bool, str] = False
|
||||
) -> None:
|
||||
"""
|
||||
Deal with indentation and whitespace
|
||||
"""
|
||||
@@ -839,7 +864,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.out(" ")
|
||||
self.space = False
|
||||
|
||||
if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
|
||||
if self.a and (
|
||||
(self.p_p == 2 and self.links_each_paragraph) or force == "end"
|
||||
):
|
||||
if force == "end":
|
||||
self.out("\n")
|
||||
|
||||
@@ -898,7 +925,11 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
|
||||
if self.maybe_automatic_link is not None:
|
||||
href = self.maybe_automatic_link
|
||||
if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
|
||||
if (
|
||||
href == data
|
||||
and self.absolute_url_matcher.match(href)
|
||||
and self.use_automatic_links
|
||||
):
|
||||
self.o("<" + data + ">")
|
||||
self.empty_link = False
|
||||
return
|
||||
@@ -969,7 +1000,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.inline_links = False
|
||||
for para in text.split("\n"):
|
||||
if len(para) > 0:
|
||||
if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
|
||||
if not skipwrap(
|
||||
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
|
||||
):
|
||||
indent = ""
|
||||
if para.startswith(" " + self.ul_item_mark):
|
||||
# list item continuation: add a double indent to the
|
||||
@@ -1010,7 +1043,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
return result
|
||||
|
||||
|
||||
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
|
||||
def html2text(
|
||||
html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
|
||||
) -> str:
|
||||
h = html.strip() or ""
|
||||
if h:
|
||||
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
|
||||
|
@@ -1,8 +1,8 @@
|
||||
from . import __version__, config, HTML2Text
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from . import HTML2Text, __version__, config
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
def main() -> None:
|
||||
@@ -117,7 +117,10 @@ def main() -> None:
|
||||
dest="images_with_size",
|
||||
action="store_true",
|
||||
default=config.IMAGES_WITH_SIZE,
|
||||
help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
|
||||
help=(
|
||||
"Write image tags with height and width attrs as raw html to retain "
|
||||
"dimensions"
|
||||
),
|
||||
)
|
||||
p.add_argument(
|
||||
"-g",
|
||||
@@ -257,7 +260,9 @@ def main() -> None:
|
||||
default=config.CLOSE_QUOTE,
|
||||
help="The character used to close quotes",
|
||||
)
|
||||
p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
|
||||
p.add_argument(
|
||||
"--version", action="version", version=".".join(map(str, __version__))
|
||||
)
|
||||
p.add_argument("filename", nargs="?")
|
||||
p.add_argument("encoding", nargs="?", default="utf-8")
|
||||
args = p.parse_args()
|
||||
|
@@ -1,10 +1,12 @@
|
||||
from . import config
|
||||
import html.entities
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import html.entities
|
||||
from . import config
|
||||
|
||||
unifiable_n = {
|
||||
html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
|
||||
html.entities.name2codepoint[k]: v
|
||||
for k, v in config.UNIFIABLE.items()
|
||||
if k != "nbsp"
|
||||
}
|
||||
|
||||
|
||||
@@ -154,7 +156,9 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
|
||||
def skipwrap(
|
||||
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
|
||||
) -> bool:
|
||||
# If it appears to contain a link
|
||||
# don't wrap
|
||||
if not wrap_links and config.RE_LINK.search(para):
|
||||
@@ -232,7 +236,9 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
|
||||
max_cols = num_cols
|
||||
|
||||
max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
|
||||
max_width = [
|
||||
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
|
||||
]
|
||||
|
||||
# reformat
|
||||
new_lines = []
|
||||
@@ -241,13 +247,15 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
if set(line.strip()) == set("-|"):
|
||||
filler = "-"
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("|-" + "|".join(new_cols) + "|")
|
||||
else:
|
||||
filler = " "
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("| " + "|".join(new_cols) + "|")
|
||||
return new_lines
|
||||
|
@@ -1,50 +1,65 @@
|
||||
from base.orm import local_session
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from dateutil.parser import parse as date_parse
|
||||
|
||||
from base.orm import local_session
|
||||
from migration.html2text import html2text
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutReactionsFollower
|
||||
from orm.shout import ShoutReactionsFollower
|
||||
from orm.topic import TopicFollower
|
||||
from orm.user import User
|
||||
from orm.shout import Shout
|
||||
|
||||
ts = datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
def auto_followers(session, topics, reaction_dict):
|
||||
# creating shout's reactions following for reaction author
|
||||
following1 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
|
||||
.first()
|
||||
)
|
||||
following1 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == reaction_dict["shout"]
|
||||
).first()
|
||||
if not following1:
|
||||
following1 = ShoutReactionsFollower.create(
|
||||
follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
|
||||
follower=reaction_dict["createdBy"],
|
||||
shout=reaction_dict["shout"],
|
||||
auto=True
|
||||
)
|
||||
session.add(following1)
|
||||
# creating topics followings for reaction author
|
||||
for t in topics:
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(TopicFollower.topic == t["id"])
|
||||
.first()
|
||||
)
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
TopicFollower.topic == t['id']
|
||||
).first()
|
||||
if not tf:
|
||||
topic_following = TopicFollower.create(
|
||||
follower=reaction_dict["createdBy"], topic=t["id"], auto=True
|
||||
follower=reaction_dict["createdBy"],
|
||||
topic=t['id'],
|
||||
auto=True
|
||||
)
|
||||
session.add(topic_following)
|
||||
|
||||
|
||||
def migrate_ratings(session, entry, reaction_dict):
|
||||
for comment_rating_old in entry.get("ratings", []):
|
||||
rater = session.query(User).filter(User.oid == comment_rating_old["createdBy"]).first()
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == comment_rating_old["createdBy"])
|
||||
.first()
|
||||
)
|
||||
re_reaction_dict = {
|
||||
"shout": reaction_dict["shout"],
|
||||
"replyTo": reaction_dict["id"],
|
||||
"kind": ReactionKind.LIKE if comment_rating_old["value"] > 0 else ReactionKind.DISLIKE,
|
||||
"kind": ReactionKind.LIKE
|
||||
if comment_rating_old["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id if rater else 1,
|
||||
}
|
||||
cts = comment_rating_old.get("createdAt")
|
||||
@@ -53,15 +68,18 @@ def migrate_ratings(session, entry, reaction_dict):
|
||||
try:
|
||||
# creating reaction from old rating
|
||||
rr = Reaction.create(**re_reaction_dict)
|
||||
following2 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
|
||||
.filter(ShoutReactionsFollower.shout == rr.shout)
|
||||
.first()
|
||||
)
|
||||
following2 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == re_reaction_dict['createdBy']
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == rr.shout
|
||||
).first()
|
||||
if not following2:
|
||||
following2 = ShoutReactionsFollower.create(
|
||||
follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
|
||||
follower=re_reaction_dict['createdBy'],
|
||||
shout=rr.shout,
|
||||
auto=True
|
||||
)
|
||||
session.add(following2)
|
||||
session.add(rr)
|
||||
@@ -132,7 +150,9 @@ async def migrate(entry, storage):
|
||||
else:
|
||||
stage = "author and old id found"
|
||||
try:
|
||||
shout = session.query(Shout).where(Shout.slug == old_shout["slug"]).one()
|
||||
shout = session.query(
|
||||
Shout
|
||||
).where(Shout.slug == old_shout["slug"]).one()
|
||||
if shout:
|
||||
reaction_dict["shout"] = shout.id
|
||||
reaction_dict["createdBy"] = author.id if author else 1
|
||||
@@ -158,9 +178,9 @@ async def migrate(entry, storage):
|
||||
|
||||
|
||||
def migrate_2stage(old_comment, idmap):
|
||||
if old_comment.get("body"):
|
||||
new_id = idmap.get(old_comment.get("oid"))
|
||||
new_id = idmap.get(old_comment.get("_id"))
|
||||
if old_comment.get('body'):
|
||||
new_id = idmap.get(old_comment.get('oid'))
|
||||
new_id = idmap.get(old_comment.get('_id'))
|
||||
if new_id:
|
||||
new_replyto_id = None
|
||||
old_replyto_id = old_comment.get("replyTo")
|
||||
@@ -170,20 +190,17 @@ def migrate_2stage(old_comment, idmap):
|
||||
comment = session.query(Reaction).where(Reaction.id == new_id).first()
|
||||
try:
|
||||
if new_replyto_id:
|
||||
new_reply = (
|
||||
session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
)
|
||||
new_reply = session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
if not new_reply:
|
||||
print(new_replyto_id)
|
||||
raise Exception("cannot find reply by id!")
|
||||
comment.replyTo = new_reply.id
|
||||
session.add(comment)
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == comment.shout)
|
||||
.filter(ShoutReactionsFollower.follower == comment.createdBy)
|
||||
.first()
|
||||
)
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == comment.shout
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == comment.createdBy
|
||||
).first()
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(
|
||||
shout=comment.shout, follower=comment.createdBy, auto=True
|
||||
|
@@ -1,16 +1,15 @@
|
||||
from base.orm import local_session
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from dateutil.parser import parse as date_parse
|
||||
from migration.extract import extract_html, extract_media
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic
|
||||
from orm.topic import Topic, TopicFollower
|
||||
from orm.user import User
|
||||
from services.stat.viewed import ViewedStorage
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from transliterate import translit
|
||||
|
||||
import json
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_html, extract_media
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
||||
from orm.user import User
|
||||
from orm.topic import TopicFollower, Topic
|
||||
from services.stat.viewed import ViewedStorage
|
||||
import re
|
||||
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
@@ -34,7 +33,7 @@ def get_shout_slug(entry):
|
||||
slug = friend.get("slug", "")
|
||||
if slug:
|
||||
break
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
return slug
|
||||
|
||||
|
||||
@@ -42,27 +41,27 @@ def create_author_from_app(app):
|
||||
user = None
|
||||
userdata = None
|
||||
# check if email is used
|
||||
if app["email"]:
|
||||
if app['email']:
|
||||
with local_session() as session:
|
||||
user = session.query(User).where(User.email == app["email"]).first()
|
||||
user = session.query(User).where(User.email == app['email']).first()
|
||||
if not user:
|
||||
# print('[migration] app %r' % app)
|
||||
name = app.get("name")
|
||||
name = app.get('name')
|
||||
if name:
|
||||
slug = translit(name, "ru", reversed=True).lower()
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
print("[migration] created slug %s" % slug)
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
print('[migration] created slug %s' % slug)
|
||||
# check if slug is used
|
||||
if slug:
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
|
||||
# get slug from email
|
||||
if user:
|
||||
slug = app["email"].split("@")[0]
|
||||
slug = app['email'].split('@')[0]
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
# one more try
|
||||
if user:
|
||||
slug += "-author"
|
||||
slug += '-author'
|
||||
user = session.query(User).where(User.slug == slug).first()
|
||||
|
||||
# create user with application data
|
||||
@@ -80,7 +79,7 @@ def create_author_from_app(app):
|
||||
user = User.create(**userdata)
|
||||
session.add(user)
|
||||
session.commit()
|
||||
userdata["id"] = user.id
|
||||
userdata['id'] = user.id
|
||||
|
||||
userdata = user.dict()
|
||||
return userdata
|
||||
@@ -92,12 +91,11 @@ async def create_shout(shout_dict):
|
||||
s = Shout.create(**shout_dict)
|
||||
author = s.authors[0]
|
||||
with local_session() as session:
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == s.id)
|
||||
.filter(ShoutReactionsFollower.follower == author.id)
|
||||
.first()
|
||||
)
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == s.id
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == author.id
|
||||
).first()
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True)
|
||||
session.add(srf)
|
||||
@@ -118,14 +116,14 @@ async def get_user(entry, storage):
|
||||
elif user_oid:
|
||||
userdata = storage["users"]["by_oid"].get(user_oid)
|
||||
if not userdata:
|
||||
print("no userdata by oid, anonymous")
|
||||
print('no userdata by oid, anonymous')
|
||||
userdata = anondict
|
||||
print(app)
|
||||
# cleanup slug
|
||||
if userdata:
|
||||
slug = userdata.get("slug", "")
|
||||
if slug:
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
userdata["slug"] = slug
|
||||
else:
|
||||
userdata = anondict
|
||||
@@ -139,14 +137,11 @@ async def migrate(entry, storage):
|
||||
r = {
|
||||
"layout": type2layout[entry["type"]],
|
||||
"title": entry["title"],
|
||||
"authors": [
|
||||
author,
|
||||
],
|
||||
"authors": [author, ],
|
||||
"slug": get_shout_slug(entry),
|
||||
"cover": (
|
||||
"https://images.discours.io/unsafe/" + entry["thumborId"]
|
||||
if entry.get("thumborId")
|
||||
else entry.get("image", {}).get("url")
|
||||
"https://images.discours.io/unsafe/" +
|
||||
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
|
||||
),
|
||||
"visibility": "public" if entry.get("published") else "community",
|
||||
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
|
||||
@@ -155,11 +150,11 @@ async def migrate(entry, storage):
|
||||
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
|
||||
"createdBy": author.id,
|
||||
"topics": await add_topics_follower(entry, storage, author),
|
||||
"body": extract_html(entry, cleanup=True),
|
||||
"body": extract_html(entry, cleanup=True)
|
||||
}
|
||||
|
||||
# main topic patch
|
||||
r["mainTopic"] = r["topics"][0]
|
||||
r['mainTopic'] = r['topics'][0]
|
||||
|
||||
# published author auto-confirm
|
||||
if entry.get("published"):
|
||||
@@ -182,16 +177,14 @@ async def migrate(entry, storage):
|
||||
shout_dict["oid"] = entry.get("_id", "")
|
||||
shout = await create_shout(shout_dict)
|
||||
except IntegrityError as e:
|
||||
print("[migration] create_shout integrity error", e)
|
||||
print('[migration] create_shout integrity error', e)
|
||||
shout = await resolve_create_shout(shout_dict)
|
||||
except Exception as e:
|
||||
raise Exception(e)
|
||||
|
||||
# udpate data
|
||||
shout_dict = shout.dict()
|
||||
shout_dict["authors"] = [
|
||||
author.dict(),
|
||||
]
|
||||
shout_dict["authors"] = [author.dict(), ]
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = await topics_aftermath(r, storage)
|
||||
@@ -200,9 +193,7 @@ async def migrate(entry, storage):
|
||||
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
||||
|
||||
# shout views
|
||||
await ViewedStorage.increment(
|
||||
shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
|
||||
)
|
||||
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours')
|
||||
# del shout_dict['ratings']
|
||||
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
@@ -214,9 +205,7 @@ async def add_topics_follower(entry, storage, user):
|
||||
topics = set([])
|
||||
category = entry.get("category")
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
oids = [
|
||||
category,
|
||||
] + entry.get("tags", [])
|
||||
oids = [category, ] + entry.get("tags", [])
|
||||
for toid in oids:
|
||||
tslug = topics_by_oid.get(toid, {}).get("slug")
|
||||
if tslug:
|
||||
@@ -228,18 +217,23 @@ async def add_topics_follower(entry, storage, user):
|
||||
try:
|
||||
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
|
||||
if tpc:
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == user.id)
|
||||
.filter(TopicFollower.topic == tpc.id)
|
||||
.first()
|
||||
)
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == user.id
|
||||
).filter(
|
||||
TopicFollower.topic == tpc.id
|
||||
).first()
|
||||
if not tf:
|
||||
tf = TopicFollower.create(topic=tpc.id, follower=user.id, auto=True)
|
||||
tf = TopicFollower.create(
|
||||
topic=tpc.id,
|
||||
follower=user.id,
|
||||
auto=True
|
||||
)
|
||||
session.add(tf)
|
||||
session.commit()
|
||||
except IntegrityError:
|
||||
print("[migration.shout] hidden by topic " + tpc.slug)
|
||||
print('[migration.shout] hidden by topic ' + tpc.slug)
|
||||
# main topic
|
||||
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
|
||||
if maintopic in ttt:
|
||||
@@ -260,7 +254,7 @@ async def process_user(userdata, storage, oid):
|
||||
if not user:
|
||||
try:
|
||||
slug = userdata["slug"].lower().strip()
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
|
||||
userdata["slug"] = slug
|
||||
user = User.create(**userdata)
|
||||
session.add(user)
|
||||
@@ -288,9 +282,9 @@ async def resolve_create_shout(shout_dict):
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
||||
bump = False
|
||||
if s:
|
||||
if s.createdAt != shout_dict["createdAt"]:
|
||||
if s.createdAt != shout_dict['createdAt']:
|
||||
# create new with different slug
|
||||
shout_dict["slug"] += "-" + shout_dict["layout"]
|
||||
shout_dict["slug"] += '-' + shout_dict["layout"]
|
||||
try:
|
||||
await create_shout(shout_dict)
|
||||
except IntegrityError as e:
|
||||
@@ -301,7 +295,10 @@ async def resolve_create_shout(shout_dict):
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print("[migration] shout already exists, but differs in %s" % key)
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
@@ -347,7 +344,9 @@ async def topics_aftermath(entry, storage):
|
||||
)
|
||||
if not shout_topic_new:
|
||||
try:
|
||||
ShoutTopic.create(**{"shout": shout.id, "topic": new_topic.id})
|
||||
ShoutTopic.create(
|
||||
**{"shout": shout.id, "topic": new_topic.id}
|
||||
)
|
||||
except Exception:
|
||||
print("[migration] shout topic error: " + newslug)
|
||||
session.commit()
|
||||
@@ -364,7 +363,9 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
with local_session() as session:
|
||||
for content_rating in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User).filter(User.oid == content_rating["createdBy"]).first()
|
||||
session.query(User)
|
||||
.filter(User.oid == content_rating["createdBy"])
|
||||
.first()
|
||||
) or User.default_user
|
||||
shout = session.query(Shout).where(Shout.slug == slug).first()
|
||||
cts = content_rating.get("createdAt")
|
||||
@@ -374,7 +375,7 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id,
|
||||
"shout": shout.id,
|
||||
"shout": shout.id
|
||||
}
|
||||
reaction = (
|
||||
session.query(Reaction)
|
||||
|
@@ -5,26 +5,34 @@ from orm.reaction import Reaction, ReactionKind
|
||||
|
||||
|
||||
def migrate(entry, storage):
|
||||
post_oid = entry["contentItem"]
|
||||
post_oid = entry['contentItem']
|
||||
print(post_oid)
|
||||
shout_dict = storage["shouts"]["by_oid"].get(post_oid)
|
||||
shout_dict = storage['shouts']['by_oid'].get(post_oid)
|
||||
if shout_dict:
|
||||
print(shout_dict["body"])
|
||||
print(shout_dict['body'])
|
||||
remark = {
|
||||
"shout": shout_dict["id"],
|
||||
"body": extract_md(html2text(entry["body"]), shout_dict),
|
||||
"kind": ReactionKind.REMARK,
|
||||
"shout": shout_dict['id'],
|
||||
"body": extract_md(
|
||||
html2text(entry['body']),
|
||||
shout_dict
|
||||
),
|
||||
"kind": ReactionKind.REMARK
|
||||
}
|
||||
|
||||
if entry.get("textBefore"):
|
||||
remark["range"] = (
|
||||
str(shout_dict["body"].index(entry["textBefore"] or ""))
|
||||
+ ":"
|
||||
+ str(
|
||||
shout_dict["body"].index(entry["textAfter"] or "")
|
||||
+ len(entry["textAfter"] or "")
|
||||
if entry.get('textBefore'):
|
||||
remark['range'] = str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textBefore'] or ''
|
||||
)
|
||||
) + ':' + str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textAfter'] or ''
|
||||
) + len(
|
||||
entry['textAfter'] or ''
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
with local_session() as session:
|
||||
rmrk = Reaction.create(**remark)
|
||||
|
@@ -10,7 +10,7 @@ def migrate(entry):
|
||||
"slug": entry["slug"],
|
||||
"oid": entry["_id"],
|
||||
"title": entry["title"].replace(" ", " "),
|
||||
"body": extract_md(html2text(body_orig)),
|
||||
"body": extract_md(html2text(body_orig))
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
|
@@ -1,10 +1,11 @@
|
||||
from base.orm import local_session
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil.parser import parse
|
||||
from orm.user import AuthorFollower, User, UserRating
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
import re
|
||||
from base.orm import local_session
|
||||
from orm.user import AuthorFollower, User, UserRating
|
||||
|
||||
|
||||
def migrate(entry):
|
||||
@@ -22,7 +23,7 @@ def migrate(entry):
|
||||
"muted": False, # amnesty
|
||||
"links": [],
|
||||
"name": "anonymous",
|
||||
"password": entry["services"]["password"].get("bcrypt"),
|
||||
"password": entry["services"]["password"].get("bcrypt")
|
||||
}
|
||||
|
||||
if "updatedAt" in entry:
|
||||
@@ -32,13 +33,9 @@ def migrate(entry):
|
||||
if entry.get("profile"):
|
||||
# slug
|
||||
slug = entry["profile"].get("path").lower()
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
|
||||
user_dict["slug"] = slug
|
||||
bio = (
|
||||
(entry.get("profile", {"bio": ""}).get("bio") or "")
|
||||
.replace(r"\(", "(")
|
||||
.replace(r"\)", ")")
|
||||
)
|
||||
bio = (entry.get("profile", {"bio": ""}).get("bio") or "").replace('\(', '(').replace('\)', ')')
|
||||
bio_text = BeautifulSoup(bio, features="lxml").text
|
||||
|
||||
if len(bio_text) > 120:
|
||||
@@ -49,7 +46,8 @@ def migrate(entry):
|
||||
# userpic
|
||||
try:
|
||||
user_dict["userpic"] = (
|
||||
"https://images.discours.io/unsafe/" + entry["profile"]["thumborId"]
|
||||
"https://images.discours.io/unsafe/"
|
||||
+ entry["profile"]["thumborId"]
|
||||
)
|
||||
except KeyError:
|
||||
try:
|
||||
@@ -64,7 +62,11 @@ def migrate(entry):
|
||||
name = (name + " " + ln) if ln else name
|
||||
if not name:
|
||||
name = slug if slug else "anonymous"
|
||||
name = entry["profile"]["path"].lower().strip().replace(" ", "-") if len(name) < 2 else name
|
||||
name = (
|
||||
entry["profile"]["path"].lower().strip().replace(" ", "-")
|
||||
if len(name) < 2
|
||||
else name
|
||||
)
|
||||
user_dict["name"] = name
|
||||
|
||||
# links
|
||||
@@ -93,7 +95,9 @@ def migrate(entry):
|
||||
except IntegrityError:
|
||||
print("[migration] cannot create user " + user_dict["slug"])
|
||||
with local_session() as session:
|
||||
old_user = session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
old_user = (
|
||||
session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
)
|
||||
old_user.oid = oid
|
||||
old_user.password = user_dict["password"]
|
||||
session.commit()
|
||||
@@ -110,7 +114,7 @@ def post_migrate():
|
||||
"slug": "old-discours",
|
||||
"username": "old-discours",
|
||||
"email": "old@discours.io",
|
||||
"name": "Просмотры на старой версии сайта",
|
||||
"name": "Просмотры на старой версии сайта"
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
@@ -143,8 +147,12 @@ def migrate_2stage(entry, id_map):
|
||||
}
|
||||
|
||||
user_rating = UserRating.create(**user_rating_dict)
|
||||
if user_rating_dict["value"] > 0:
|
||||
af = AuthorFollower.create(author=user.id, follower=rater.id, auto=True)
|
||||
if user_rating_dict['value'] > 0:
|
||||
af = AuthorFollower.create(
|
||||
author=user.id,
|
||||
follower=rater.id,
|
||||
auto=True
|
||||
)
|
||||
session.add(af)
|
||||
session.add(user_rating)
|
||||
session.commit()
|
||||
|
Reference in New Issue
Block a user