Revert "Feature/lint"

This commit is contained in:
Kosta
2023-10-27 00:07:35 +03:00
committed by GitHub
parent 05136699ee
commit b142949805
70 changed files with 1465 additions and 1223 deletions

View File

@@ -1,25 +1,24 @@
""" cmd managed migration """
import asyncio
import gc
import json
import sys
from datetime import datetime, timezone
import bs4
from migration.export import export_mdx
from migration.tables.comments import migrate as migrateComment
from migration.tables.comments import migrate_2stage as migrateComment_2stage
from migration.tables.content_items import get_shout_slug
from migration.tables.content_items import migrate as migrateShout
# from migration.tables.remarks import migrate as migrateRemark
from migration.tables.remarks import migrate as migrateRemark
from migration.tables.topics import migrate as migrateTopic
from migration.tables.users import migrate as migrateUser
from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
from migration.tables.users import migrate_2stage as migrateUser_2stage
from migration.tables.users import post_migrate as users_post_migrate
from orm import init_tables
from orm.reaction import Reaction
import asyncio
import bs4
import gc
import json
import sys
TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
OLD_DATE = "2016-03-05 22:22:00.350000"
@@ -64,8 +63,16 @@ async def topics_handle(storage):
del storage["topics"]["by_slug"][oldslug]
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
print("[migration] " + str(counter) + " topics migrated")
print("[migration] " + str(len(storage["topics"]["by_oid"].values())) + " topics by oid")
print("[migration] " + str(len(storage["topics"]["by_slug"].values())) + " topics by slug")
print(
"[migration] "
+ str(len(storage["topics"]["by_oid"].values()))
+ " topics by oid"
)
print(
"[migration] "
+ str(len(storage["topics"]["by_slug"].values()))
+ " topics by slug"
)
async def shouts_handle(storage, args):
@@ -110,10 +117,9 @@ async def shouts_handle(storage, args):
# print main counter
counter += 1
print(
"[migration] shouts_handle %d: %s @%s"
% ((counter + 1), shout_dict["slug"], author["slug"])
)
print('[migration] shouts_handle %d: %s @%s' % (
(counter + 1), shout_dict["slug"], author["slug"]
))
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
@@ -132,13 +138,13 @@ async def shouts_handle(storage, args):
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
# async def remarks_handle(storage):
# print("[migration] comments")
# c = 0
# for entry_remark in storage["remarks"]["data"]:
# remark = await migrateRemark(entry_remark, storage)
# c += 1
# print("[migration] " + str(c) + " remarks migrated")
async def remarks_handle(storage):
print("[migration] comments")
c = 0
for entry_remark in storage["remarks"]["data"]:
remark = await migrateRemark(entry_remark, storage)
c += 1
print("[migration] " + str(c) + " remarks migrated")
async def comments_handle(storage):
@@ -149,9 +155,9 @@ async def comments_handle(storage):
for oldcomment in storage["reactions"]["data"]:
if not oldcomment.get("deleted"):
reaction = await migrateComment(oldcomment, storage)
if isinstance(reaction, str):
if type(reaction) == str:
missed_shouts[reaction] = oldcomment
elif isinstance(reaction, Reaction):
elif type(reaction) == Reaction:
reaction = reaction.dict()
rid = reaction["id"]
oid = reaction["oid"]
@@ -208,7 +214,9 @@ def data_load():
tags_data = json.loads(open("migration/data/tags.json").read())
storage["topics"]["tags"] = tags_data
print("[migration.load] " + str(len(tags_data)) + " tags ")
cats_data = json.loads(open("migration/data/content_item_categories.json").read())
cats_data = json.loads(
open("migration/data/content_item_categories.json").read()
)
storage["topics"]["cats"] = cats_data
print("[migration.load] " + str(len(cats_data)) + " cats ")
comments_data = json.loads(open("migration/data/comments.json").read())
@@ -227,7 +235,11 @@ def data_load():
storage["users"]["by_oid"][x["_id"]] = x
# storage['users']['by_slug'][x['slug']] = x
# no user.slug yet
print("[migration.load] " + str(len(storage["users"]["by_oid"].keys())) + " users by oid")
print(
"[migration.load] "
+ str(len(storage["users"]["by_oid"].keys()))
+ " users by oid"
)
for x in tags_data:
storage["topics"]["by_oid"][x["_id"]] = x
storage["topics"]["by_slug"][x["slug"]] = x
@@ -235,7 +247,9 @@ def data_load():
storage["topics"]["by_oid"][x["_id"]] = x
storage["topics"]["by_slug"][x["slug"]] = x
print(
"[migration.load] " + str(len(storage["topics"]["by_slug"].keys())) + " topics by slug"
"[migration.load] "
+ str(len(storage["topics"]["by_slug"].keys()))
+ " topics by slug"
)
for item in content_data:
slug = get_shout_slug(item)

View File

@@ -1,9 +1,9 @@
from .utils import DateTimeEncoder
import json
import os
import bson
import gc
import json
import os
from .utils import DateTimeEncoder
def json_tables():
@@ -15,10 +15,10 @@ def json_tables():
"email_subscriptions": [],
"users": [],
"comments": [],
"remarks": [],
"remarks": []
}
for table in data.keys():
print("[migration] bson2json for " + table)
print('[migration] bson2json for ' + table)
gc.collect()
lc = []
bs = open("dump/discours/" + table + ".bson", "rb").read()

View File

@@ -1,10 +1,11 @@
from .extract import extract_html, extract_media
from .utils import DateTimeEncoder
import json
import os
from datetime import datetime, timezone
import frontmatter
import json
import os
from .extract import extract_html, extract_media
from .utils import DateTimeEncoder
OLD_DATE = "2016-03-05 22:22:00.350000"
EXPORT_DEST = "../discoursio-web/data/"
@@ -70,29 +71,47 @@ def export_slug(slug, storage):
def export_email_subscriptions():
email_subscriptions_data = json.loads(open("migration/data/email_subscriptions.json").read())
email_subscriptions_data = json.loads(
open("migration/data/email_subscriptions.json").read()
)
for data in email_subscriptions_data:
# TODO: migrate to mailgun list manually
# migrate_email_subscription(data)
pass
print("[migration] " + str(len(email_subscriptions_data)) + " email subscriptions exported")
print(
"[migration] "
+ str(len(email_subscriptions_data))
+ " email subscriptions exported"
)
def export_shouts(storage):
# update what was just migrated or load json again
if len(storage["users"]["by_slugs"].keys()) == 0:
storage["users"]["by_slugs"] = json.loads(open(EXPORT_DEST + "authors.json").read())
print("[migration] " + str(len(storage["users"]["by_slugs"].keys())) + " exported authors ")
if len(storage["shouts"]["by_slugs"].keys()) == 0:
storage["shouts"]["by_slugs"] = json.loads(open(EXPORT_DEST + "articles.json").read())
storage["users"]["by_slugs"] = json.loads(
open(EXPORT_DEST + "authors.json").read()
)
print(
"[migration] " + str(len(storage["shouts"]["by_slugs"].keys())) + " exported articles "
"[migration] "
+ str(len(storage["users"]["by_slugs"].keys()))
+ " exported authors "
)
if len(storage["shouts"]["by_slugs"].keys()) == 0:
storage["shouts"]["by_slugs"] = json.loads(
open(EXPORT_DEST + "articles.json").read()
)
print(
"[migration] "
+ str(len(storage["shouts"]["by_slugs"].keys()))
+ " exported articles "
)
for slug in storage["shouts"]["by_slugs"].keys():
export_slug(slug, storage)
def export_json(export_articles={}, export_authors={}, export_topics={}, export_comments={}):
def export_json(
export_articles={}, export_authors={}, export_topics={}, export_comments={}
):
open(EXPORT_DEST + "authors.json", "w").write(
json.dumps(
export_authors,
@@ -133,4 +152,8 @@ def export_json(export_articles={}, export_authors={}, export_topics={}, export_
ensure_ascii=False,
)
)
print("[migration] " + str(len(export_comments.items())) + " exported articles with comments")
print(
"[migration] "
+ str(len(export_comments.items()))
+ " exported articles with comments"
)

View File

@@ -1,10 +1,9 @@
from bs4 import BeautifulSoup
import base64
import os
import re
import uuid
# import uuid
from bs4 import BeautifulSoup
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
@@ -28,40 +27,37 @@ def replace_tooltips(body):
return newbody
# def extract_footnotes(body, shout_dict):
# parts = body.split("&&&")
# lll = len(parts)
# newparts = list(parts)
# placed = False
# if lll & 1:
# if lll > 1:
# i = 1
# print("[extract] found %d footnotes in body" % (lll - 1))
# for part in parts[1:]:
# if i & 1:
# placed = True
# if 'a class="footnote-url" href=' in part:
# print("[extract] footnote: " + part)
# fn = 'a class="footnote-url" href="'
# # exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
# extracted_body = part.split(fn, 1)[1].split(">", 1)[1].split("</a>", 1)[0]
# print("[extract] footnote link: " + extracted_link)
# with local_session() as session:
# Reaction.create(
# {
# "shout": shout_dict["id"],
# "kind": ReactionKind.FOOTNOTE,
# "body": extracted_body,
# "range": str(body.index(fn + link) - len("<"))
# + ":"
# + str(body.index(extracted_body) + len("</a>")),
# }
# )
# newparts[i] = "<a href='#'></a>"
# else:
# newparts[i] = part
# i += 1
# return ("".join(newparts), placed)
def extract_footnotes(body, shout_dict):
parts = body.split("&&&")
lll = len(parts)
newparts = list(parts)
placed = False
if lll & 1:
if lll > 1:
i = 1
print("[extract] found %d footnotes in body" % (lll - 1))
for part in parts[1:]:
if i & 1:
placed = True
if 'a class="footnote-url" href=' in part:
print("[extract] footnote: " + part)
fn = 'a class="footnote-url" href="'
exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
print("[extract] footnote link: " + extracted_link)
with local_session() as session:
Reaction.create({
"shout": shout_dict['id'],
"kind": ReactionKind.FOOTNOTE,
"body": extracted_body,
"range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
})
newparts[i] = "<a href='#'></a>"
else:
newparts[i] = part
i += 1
return ("".join(newparts), placed)
def place_tooltips(body):
@@ -80,7 +76,9 @@ def place_tooltips(body):
print("[extract] footnote: " + part)
fn = 'a class="footnote-url" href="'
link = part.split(fn, 1)[1].split('"', 1)[0]
extracted_part = part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
extracted_part = (
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
)
newparts[i] = (
"<Tooltip"
+ (' link="' + link + '" ' if link else "")
@@ -98,9 +96,7 @@ def place_tooltips(body):
return ("".join(newparts), placed)
IMG_REGEX = (
r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
)
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
parentDir = "/".join(os.getcwd().split("/")[:-1])
@@ -163,7 +159,11 @@ def extract_imageparts(bodyparts, prefix):
try:
content = base64.b64decode(b64encoded + "==")
open(public + link, "wb").write(content)
print("[extract] " + str(len(content)) + " image bytes been written")
print(
"[extract] "
+ str(len(content))
+ " image bytes been written"
)
cache[b64encoded] = name
except Exception:
raise Exception
@@ -172,11 +172,18 @@ def extract_imageparts(bodyparts, prefix):
print("[extract] cached link " + cache[b64encoded])
name = cache[b64encoded]
link = cdn + "/upload/image-" + name + "." + ext
newparts[i] = current[: -len(mime)] + current[-len(mime) :] + link + next[-b64end:]
newparts[i] = (
current[: -len(mime)]
+ current[-len(mime) :]
+ link
+ next[-b64end:]
)
newparts[i + 1] = next[:-b64end]
break
return (
extract_imageparts(newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix)
extract_imageparts(
newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
)
if len(bodyparts) > (i + 1)
else "".join(newparts)
)
@@ -230,6 +237,7 @@ di = "data:image"
def extract_md_images(body, prefix):
newbody = ""
body = (
body.replace("\n! [](" + di, "\n ![](" + di)
.replace("\n[](" + di, "\n![](" + di)
@@ -237,10 +245,10 @@ def extract_md_images(body, prefix):
)
parts = body.split(di)
if len(parts) > 1:
new_body = extract_dataimages(parts, prefix)
newbody = extract_dataimages(parts, prefix)
else:
new_body = body
return new_body
newbody = body
return newbody
def cleanup_md(body):
@@ -263,28 +271,29 @@ def cleanup_md(body):
return newbody
# def extract_md(body, shout_dict=None):
# newbody = body
# if newbody:
# newbody = cleanup_md(newbody)
# if not newbody:
# raise Exception("cleanup error")
#
# if shout_dict:
# uid = shout_dict["id"] or uuid.uuid4()
# newbody = extract_md_images(newbody, uid)
# if not newbody:
# raise Exception("extract_images error")
#
# newbody, placed = extract_footnotes(body, shout_dict)
# if not newbody:
# raise Exception("extract_footnotes error")
#
# return newbody
def extract_md(body, shout_dict = None):
newbody = body
if newbody:
newbody = cleanup_md(newbody)
if not newbody:
raise Exception("cleanup error")
if shout_dict:
uid = shout_dict['id'] or uuid.uuid4()
newbody = extract_md_images(newbody, uid)
if not newbody:
raise Exception("extract_images error")
newbody, placed = extract_footnotes(body, shout_dict)
if not newbody:
raise Exception("extract_footnotes error")
return newbody
def extract_media(entry):
"""normalized media extraction method"""
''' normalized media extraction method '''
# media [ { title pic url body } ]}
kind = entry.get("type")
if not kind:
@@ -314,7 +323,12 @@ def extract_media(entry):
url = "https://vimeo.com/" + m["vimeoId"]
# body
body = m.get("body") or m.get("literatureBody") or ""
media.append({"url": url, "pic": pic, "title": title, "body": body})
media.append({
"url": url,
"pic": pic,
"title": title,
"body": body
})
return media
@@ -384,7 +398,9 @@ def cleanup_html(body: str) -> str:
r"<h4>\s*</h4>",
r"<div>\s*</div>",
]
regex_replace = {r"<br>\s*</p>": "</p>"}
regex_replace = {
r"<br>\s*</p>": "</p>"
}
changed = True
while changed:
# we need several iterations to clean nested tags this way
@@ -398,15 +414,16 @@ def cleanup_html(body: str) -> str:
changed = True
return new_body
def extract_html(entry, cleanup=False):
body_orig = (entry.get("body") or "").replace(r"\(", "(").replace(r"\)", ")")
def extract_html(entry, shout_id = None, cleanup=False):
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
if cleanup:
# we do that before bs parsing to catch the invalid html
body_clean = cleanup_html(body_orig)
if body_clean != body_orig:
print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
body_orig = body_clean
if shout_id:
extract_footnotes(body_orig, shout_id)
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
if cleanup:
# we do that after bs parsing because it can add dummy tags

View File

@@ -1,5 +1,13 @@
"""html2text: Turn HTML into equivalent Markdown-structured text."""
import html.entities
import html.parser
import re
import string
import urllib.parse as urlparse
from textwrap import wrap
from typing import Dict, List, Optional, Tuple, Union
from . import config
from .elements import AnchorElement, ListElement
from .typing import OutCallback
@@ -18,14 +26,6 @@ from .utils import (
skipwrap,
unifiable_n,
)
from textwrap import wrap
from typing import Dict, List, Optional, Tuple, Union
import html.entities
import html.parser
import re
import string
import urllib.parse as urlparse
__version__ = (2020, 1, 16)
@@ -119,7 +119,9 @@ class HTML2Text(html.parser.HTMLParser):
self.lastWasList = False
self.style = 0
self.style_def = {} # type: Dict[str, Dict[str, str]]
self.tag_stack = [] # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.tag_stack = (
[]
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.emphasis = 0
self.drop_white_space = 0
self.inheader = False
@@ -298,7 +300,9 @@ class HTML2Text(html.parser.HTMLParser):
if strikethrough:
self.quiet -= 1
def handle_tag(self, tag: str, attrs: Dict[str, Optional[str]], start: bool) -> None:
def handle_tag(
self, tag: str, attrs: Dict[str, Optional[str]], start: bool
) -> None:
self.current_tag = tag
if self.tag_callback is not None:
@@ -329,7 +333,9 @@ class HTML2Text(html.parser.HTMLParser):
tag_style = element_style(attrs, self.style_def, parent_style)
self.tag_stack.append((tag, attrs, tag_style))
else:
dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
dummy, attrs, tag_style = (
self.tag_stack.pop() if self.tag_stack else (None, {}, {})
)
if self.tag_stack:
parent_style = self.tag_stack[-1][2]
@@ -379,7 +385,11 @@ class HTML2Text(html.parser.HTMLParser):
):
self.o("`") # NOTE: same as <code>
self.span_highlight = True
elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
elif (
self.current_class == "lead"
and not self.inheader
and not self.span_highlight
):
# self.o("==") # NOTE: CriticMarkup {==
self.span_lead = True
else:
@@ -469,7 +479,11 @@ class HTML2Text(html.parser.HTMLParser):
and not self.span_lead
and not self.span_highlight
):
if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
if (
start
and self.preceding_data
and self.preceding_data[-1] == self.strong_mark[0]
):
strong = " " + self.strong_mark
self.preceding_data += " "
else:
@@ -534,8 +548,13 @@ class HTML2Text(html.parser.HTMLParser):
"href" in attrs
and not attrs["href"].startswith("#_ftn")
and attrs["href"] is not None
and not (self.skip_internal_links and attrs["href"].startswith("#"))
and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
and not (
self.skip_internal_links and attrs["href"].startswith("#")
)
and not (
self.ignore_mailto_links
and attrs["href"].startswith("mailto:")
)
):
self.astack.append(attrs)
self.maybe_automatic_link = attrs["href"]
@@ -619,7 +638,9 @@ class HTML2Text(html.parser.HTMLParser):
self.o("![" + escape_md(alt) + "]")
if self.inline_links:
href = attrs.get("href") or ""
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
self.o(
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
)
else:
i = self.previousIndex(attrs)
if i is not None:
@@ -675,7 +696,9 @@ class HTML2Text(html.parser.HTMLParser):
# WARNING: does not line up <ol><li>s > 9 correctly.
parent_list = None
for list in self.list:
self.o(" " if parent_list == "ol" and list.name == "ul" else " ")
self.o(
" " if parent_list == "ol" and list.name == "ul" else " "
)
parent_list = list.name
if li.name == "ul":
@@ -764,7 +787,9 @@ class HTML2Text(html.parser.HTMLParser):
self.pbr()
self.br_toggle = " "
def o(self, data: str, puredata: bool = False, force: Union[bool, str] = False) -> None:
def o(
self, data: str, puredata: bool = False, force: Union[bool, str] = False
) -> None:
"""
Deal with indentation and whitespace
"""
@@ -839,7 +864,9 @@ class HTML2Text(html.parser.HTMLParser):
self.out(" ")
self.space = False
if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
if self.a and (
(self.p_p == 2 and self.links_each_paragraph) or force == "end"
):
if force == "end":
self.out("\n")
@@ -898,7 +925,11 @@ class HTML2Text(html.parser.HTMLParser):
if self.maybe_automatic_link is not None:
href = self.maybe_automatic_link
if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
if (
href == data
and self.absolute_url_matcher.match(href)
and self.use_automatic_links
):
self.o("<" + data + ">")
self.empty_link = False
return
@@ -969,7 +1000,9 @@ class HTML2Text(html.parser.HTMLParser):
self.inline_links = False
for para in text.split("\n"):
if len(para) > 0:
if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
if not skipwrap(
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
):
indent = ""
if para.startswith(" " + self.ul_item_mark):
# list item continuation: add a double indent to the
@@ -1010,7 +1043,9 @@ class HTML2Text(html.parser.HTMLParser):
return result
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
def html2text(
html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
) -> str:
h = html.strip() or ""
if h:
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)

View File

@@ -1,8 +1,8 @@
from . import __version__, config, HTML2Text
import argparse
import sys
from . import HTML2Text, __version__, config
# noinspection DuplicatedCode
def main() -> None:
@@ -117,7 +117,10 @@ def main() -> None:
dest="images_with_size",
action="store_true",
default=config.IMAGES_WITH_SIZE,
help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
help=(
"Write image tags with height and width attrs as raw html to retain "
"dimensions"
),
)
p.add_argument(
"-g",
@@ -257,7 +260,9 @@ def main() -> None:
default=config.CLOSE_QUOTE,
help="The character used to close quotes",
)
p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
p.add_argument(
"--version", action="version", version=".".join(map(str, __version__))
)
p.add_argument("filename", nargs="?")
p.add_argument("encoding", nargs="?", default="utf-8")
args = p.parse_args()

View File

@@ -1,10 +1,12 @@
from . import config
import html.entities
from typing import Dict, List, Optional
import html.entities
from . import config
unifiable_n = {
html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
html.entities.name2codepoint[k]: v
for k, v in config.UNIFIABLE.items()
if k != "nbsp"
}
@@ -154,7 +156,9 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
return 0
def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
def skipwrap(
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
) -> bool:
# If it appears to contain a link
# don't wrap
if not wrap_links and config.RE_LINK.search(para):
@@ -232,7 +236,9 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
max_cols = num_cols
max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
max_width = [
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
]
# reformat
new_lines = []
@@ -241,13 +247,15 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
if set(line.strip()) == set("-|"):
filler = "-"
new_cols = [
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
x.rstrip() + (filler * (M - len(x.rstrip())))
for x, M in zip(cols, max_width)
]
new_lines.append("|-" + "|".join(new_cols) + "|")
else:
filler = " "
new_cols = [
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
x.rstrip() + (filler * (M - len(x.rstrip())))
for x, M in zip(cols, max_width)
]
new_lines.append("| " + "|".join(new_cols) + "|")
return new_lines

View File

@@ -1,50 +1,65 @@
from base.orm import local_session
from datetime import datetime, timezone
from dateutil.parser import parse as date_parse
from base.orm import local_session
from migration.html2text import html2text
from orm.reaction import Reaction, ReactionKind
from orm.shout import Shout, ShoutReactionsFollower
from orm.shout import ShoutReactionsFollower
from orm.topic import TopicFollower
from orm.user import User
from orm.shout import Shout
ts = datetime.now(tz=timezone.utc)
def auto_followers(session, topics, reaction_dict):
# creating shout's reactions following for reaction author
following1 = (
session.query(ShoutReactionsFollower)
.where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
.filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
.first()
)
following1 = session.query(
ShoutReactionsFollower
).where(
ShoutReactionsFollower.follower == reaction_dict["createdBy"]
).filter(
ShoutReactionsFollower.shout == reaction_dict["shout"]
).first()
if not following1:
following1 = ShoutReactionsFollower.create(
follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
follower=reaction_dict["createdBy"],
shout=reaction_dict["shout"],
auto=True
)
session.add(following1)
# creating topics followings for reaction author
for t in topics:
tf = (
session.query(TopicFollower)
.where(TopicFollower.follower == reaction_dict["createdBy"])
.filter(TopicFollower.topic == t["id"])
.first()
)
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == reaction_dict["createdBy"]
).filter(
TopicFollower.topic == t['id']
).first()
if not tf:
topic_following = TopicFollower.create(
follower=reaction_dict["createdBy"], topic=t["id"], auto=True
follower=reaction_dict["createdBy"],
topic=t['id'],
auto=True
)
session.add(topic_following)
def migrate_ratings(session, entry, reaction_dict):
for comment_rating_old in entry.get("ratings", []):
rater = session.query(User).filter(User.oid == comment_rating_old["createdBy"]).first()
rater = (
session.query(User)
.filter(User.oid == comment_rating_old["createdBy"])
.first()
)
re_reaction_dict = {
"shout": reaction_dict["shout"],
"replyTo": reaction_dict["id"],
"kind": ReactionKind.LIKE if comment_rating_old["value"] > 0 else ReactionKind.DISLIKE,
"kind": ReactionKind.LIKE
if comment_rating_old["value"] > 0
else ReactionKind.DISLIKE,
"createdBy": rater.id if rater else 1,
}
cts = comment_rating_old.get("createdAt")
@@ -53,15 +68,18 @@ def migrate_ratings(session, entry, reaction_dict):
try:
# creating reaction from old rating
rr = Reaction.create(**re_reaction_dict)
following2 = (
session.query(ShoutReactionsFollower)
.where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
.filter(ShoutReactionsFollower.shout == rr.shout)
.first()
)
following2 = session.query(
ShoutReactionsFollower
).where(
ShoutReactionsFollower.follower == re_reaction_dict['createdBy']
).filter(
ShoutReactionsFollower.shout == rr.shout
).first()
if not following2:
following2 = ShoutReactionsFollower.create(
follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
follower=re_reaction_dict['createdBy'],
shout=rr.shout,
auto=True
)
session.add(following2)
session.add(rr)
@@ -132,7 +150,9 @@ async def migrate(entry, storage):
else:
stage = "author and old id found"
try:
shout = session.query(Shout).where(Shout.slug == old_shout["slug"]).one()
shout = session.query(
Shout
).where(Shout.slug == old_shout["slug"]).one()
if shout:
reaction_dict["shout"] = shout.id
reaction_dict["createdBy"] = author.id if author else 1
@@ -158,9 +178,9 @@ async def migrate(entry, storage):
def migrate_2stage(old_comment, idmap):
if old_comment.get("body"):
new_id = idmap.get(old_comment.get("oid"))
new_id = idmap.get(old_comment.get("_id"))
if old_comment.get('body'):
new_id = idmap.get(old_comment.get('oid'))
new_id = idmap.get(old_comment.get('_id'))
if new_id:
new_replyto_id = None
old_replyto_id = old_comment.get("replyTo")
@@ -170,20 +190,17 @@ def migrate_2stage(old_comment, idmap):
comment = session.query(Reaction).where(Reaction.id == new_id).first()
try:
if new_replyto_id:
new_reply = (
session.query(Reaction).where(Reaction.id == new_replyto_id).first()
)
new_reply = session.query(Reaction).where(Reaction.id == new_replyto_id).first()
if not new_reply:
print(new_replyto_id)
raise Exception("cannot find reply by id!")
comment.replyTo = new_reply.id
session.add(comment)
srf = (
session.query(ShoutReactionsFollower)
.where(ShoutReactionsFollower.shout == comment.shout)
.filter(ShoutReactionsFollower.follower == comment.createdBy)
.first()
)
srf = session.query(ShoutReactionsFollower).where(
ShoutReactionsFollower.shout == comment.shout
).filter(
ShoutReactionsFollower.follower == comment.createdBy
).first()
if not srf:
srf = ShoutReactionsFollower.create(
shout=comment.shout, follower=comment.createdBy, auto=True

View File

@@ -1,16 +1,15 @@
from base.orm import local_session
from datetime import datetime, timezone
import json
from dateutil.parser import parse as date_parse
from migration.extract import extract_html, extract_media
from orm.reaction import Reaction, ReactionKind
from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic
from orm.topic import Topic, TopicFollower
from orm.user import User
from services.stat.viewed import ViewedStorage
from sqlalchemy.exc import IntegrityError
from transliterate import translit
import json
from base.orm import local_session
from migration.extract import extract_html, extract_media
from orm.reaction import Reaction, ReactionKind
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
from orm.user import User
from orm.topic import TopicFollower, Topic
from services.stat.viewed import ViewedStorage
import re
OLD_DATE = "2016-03-05 22:22:00.350000"
@@ -34,7 +33,7 @@ def get_shout_slug(entry):
slug = friend.get("slug", "")
if slug:
break
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
return slug
@@ -42,27 +41,27 @@ def create_author_from_app(app):
user = None
userdata = None
# check if email is used
if app["email"]:
if app['email']:
with local_session() as session:
user = session.query(User).where(User.email == app["email"]).first()
user = session.query(User).where(User.email == app['email']).first()
if not user:
# print('[migration] app %r' % app)
name = app.get("name")
name = app.get('name')
if name:
slug = translit(name, "ru", reversed=True).lower()
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
print("[migration] created slug %s" % slug)
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
print('[migration] created slug %s' % slug)
# check if slug is used
if slug:
user = session.query(User).where(User.slug == slug).first()
# get slug from email
if user:
slug = app["email"].split("@")[0]
slug = app['email'].split('@')[0]
user = session.query(User).where(User.slug == slug).first()
# one more try
if user:
slug += "-author"
slug += '-author'
user = session.query(User).where(User.slug == slug).first()
# create user with application data
@@ -80,7 +79,7 @@ def create_author_from_app(app):
user = User.create(**userdata)
session.add(user)
session.commit()
userdata["id"] = user.id
userdata['id'] = user.id
userdata = user.dict()
return userdata
@@ -92,12 +91,11 @@ async def create_shout(shout_dict):
s = Shout.create(**shout_dict)
author = s.authors[0]
with local_session() as session:
srf = (
session.query(ShoutReactionsFollower)
.where(ShoutReactionsFollower.shout == s.id)
.filter(ShoutReactionsFollower.follower == author.id)
.first()
)
srf = session.query(ShoutReactionsFollower).where(
ShoutReactionsFollower.shout == s.id
).filter(
ShoutReactionsFollower.follower == author.id
).first()
if not srf:
srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True)
session.add(srf)
@@ -118,14 +116,14 @@ async def get_user(entry, storage):
elif user_oid:
userdata = storage["users"]["by_oid"].get(user_oid)
if not userdata:
print("no userdata by oid, anonymous")
print('no userdata by oid, anonymous')
userdata = anondict
print(app)
# cleanup slug
if userdata:
slug = userdata.get("slug", "")
if slug:
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
else:
userdata = anondict
@@ -139,14 +137,11 @@ async def migrate(entry, storage):
r = {
"layout": type2layout[entry["type"]],
"title": entry["title"],
"authors": [
author,
],
"authors": [author, ],
"slug": get_shout_slug(entry),
"cover": (
"https://images.discours.io/unsafe/" + entry["thumborId"]
if entry.get("thumborId")
else entry.get("image", {}).get("url")
"https://images.discours.io/unsafe/" +
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
),
"visibility": "public" if entry.get("published") else "community",
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
@@ -155,11 +150,11 @@ async def migrate(entry, storage):
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
"createdBy": author.id,
"topics": await add_topics_follower(entry, storage, author),
"body": extract_html(entry, cleanup=True),
"body": extract_html(entry, cleanup=True)
}
# main topic patch
r["mainTopic"] = r["topics"][0]
r['mainTopic'] = r['topics'][0]
# published author auto-confirm
if entry.get("published"):
@@ -182,16 +177,14 @@ async def migrate(entry, storage):
shout_dict["oid"] = entry.get("_id", "")
shout = await create_shout(shout_dict)
except IntegrityError as e:
print("[migration] create_shout integrity error", e)
print('[migration] create_shout integrity error', e)
shout = await resolve_create_shout(shout_dict)
except Exception as e:
raise Exception(e)
# udpate data
shout_dict = shout.dict()
shout_dict["authors"] = [
author.dict(),
]
shout_dict["authors"] = [author.dict(), ]
# shout topics aftermath
shout_dict["topics"] = await topics_aftermath(r, storage)
@@ -200,9 +193,7 @@ async def migrate(entry, storage):
await content_ratings_to_reactions(entry, shout_dict["slug"])
# shout views
await ViewedStorage.increment(
shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
)
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours')
# del shout_dict['ratings']
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
@@ -214,9 +205,7 @@ async def add_topics_follower(entry, storage, user):
topics = set([])
category = entry.get("category")
topics_by_oid = storage["topics"]["by_oid"]
oids = [
category,
] + entry.get("tags", [])
oids = [category, ] + entry.get("tags", [])
for toid in oids:
tslug = topics_by_oid.get(toid, {}).get("slug")
if tslug:
@@ -228,18 +217,23 @@ async def add_topics_follower(entry, storage, user):
try:
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
if tpc:
tf = (
session.query(TopicFollower)
.where(TopicFollower.follower == user.id)
.filter(TopicFollower.topic == tpc.id)
.first()
)
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == user.id
).filter(
TopicFollower.topic == tpc.id
).first()
if not tf:
tf = TopicFollower.create(topic=tpc.id, follower=user.id, auto=True)
tf = TopicFollower.create(
topic=tpc.id,
follower=user.id,
auto=True
)
session.add(tf)
session.commit()
except IntegrityError:
print("[migration.shout] hidden by topic " + tpc.slug)
print('[migration.shout] hidden by topic ' + tpc.slug)
# main topic
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
if maintopic in ttt:
@@ -260,7 +254,7 @@ async def process_user(userdata, storage, oid):
if not user:
try:
slug = userdata["slug"].lower().strip()
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
userdata["slug"] = slug
user = User.create(**userdata)
session.add(user)
@@ -288,9 +282,9 @@ async def resolve_create_shout(shout_dict):
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
bump = False
if s:
if s.createdAt != shout_dict["createdAt"]:
if s.createdAt != shout_dict['createdAt']:
# create new with different slug
shout_dict["slug"] += "-" + shout_dict["layout"]
shout_dict["slug"] += '-' + shout_dict["layout"]
try:
await create_shout(shout_dict)
except IntegrityError as e:
@@ -301,7 +295,10 @@ async def resolve_create_shout(shout_dict):
for key in shout_dict:
if key in s.__dict__:
if s.__dict__[key] != shout_dict[key]:
print("[migration] shout already exists, but differs in %s" % key)
print(
"[migration] shout already exists, but differs in %s"
% key
)
bump = True
else:
print("[migration] shout already exists, but lacks %s" % key)
@@ -347,7 +344,9 @@ async def topics_aftermath(entry, storage):
)
if not shout_topic_new:
try:
ShoutTopic.create(**{"shout": shout.id, "topic": new_topic.id})
ShoutTopic.create(
**{"shout": shout.id, "topic": new_topic.id}
)
except Exception:
print("[migration] shout topic error: " + newslug)
session.commit()
@@ -364,7 +363,9 @@ async def content_ratings_to_reactions(entry, slug):
with local_session() as session:
for content_rating in entry.get("ratings", []):
rater = (
session.query(User).filter(User.oid == content_rating["createdBy"]).first()
session.query(User)
.filter(User.oid == content_rating["createdBy"])
.first()
) or User.default_user
shout = session.query(Shout).where(Shout.slug == slug).first()
cts = content_rating.get("createdAt")
@@ -374,7 +375,7 @@ async def content_ratings_to_reactions(entry, slug):
if content_rating["value"] > 0
else ReactionKind.DISLIKE,
"createdBy": rater.id,
"shout": shout.id,
"shout": shout.id
}
reaction = (
session.query(Reaction)

View File

@@ -5,26 +5,34 @@ from orm.reaction import Reaction, ReactionKind
def migrate(entry, storage):
post_oid = entry["contentItem"]
post_oid = entry['contentItem']
print(post_oid)
shout_dict = storage["shouts"]["by_oid"].get(post_oid)
shout_dict = storage['shouts']['by_oid'].get(post_oid)
if shout_dict:
print(shout_dict["body"])
print(shout_dict['body'])
remark = {
"shout": shout_dict["id"],
"body": extract_md(html2text(entry["body"]), shout_dict),
"kind": ReactionKind.REMARK,
"shout": shout_dict['id'],
"body": extract_md(
html2text(entry['body']),
shout_dict
),
"kind": ReactionKind.REMARK
}
if entry.get("textBefore"):
remark["range"] = (
str(shout_dict["body"].index(entry["textBefore"] or ""))
+ ":"
+ str(
shout_dict["body"].index(entry["textAfter"] or "")
+ len(entry["textAfter"] or "")
if entry.get('textBefore'):
remark['range'] = str(
shout_dict['body']
.index(
entry['textBefore'] or ''
)
) + ':' + str(
shout_dict['body']
.index(
entry['textAfter'] or ''
) + len(
entry['textAfter'] or ''
)
)
)
with local_session() as session:
rmrk = Reaction.create(**remark)

View File

@@ -10,7 +10,7 @@ def migrate(entry):
"slug": entry["slug"],
"oid": entry["_id"],
"title": entry["title"].replace("&nbsp;", " "),
"body": extract_md(html2text(body_orig)),
"body": extract_md(html2text(body_orig))
}
with local_session() as session:

View File

@@ -1,10 +1,11 @@
from base.orm import local_session
import re
from bs4 import BeautifulSoup
from dateutil.parser import parse
from orm.user import AuthorFollower, User, UserRating
from sqlalchemy.exc import IntegrityError
import re
from base.orm import local_session
from orm.user import AuthorFollower, User, UserRating
def migrate(entry):
@@ -22,7 +23,7 @@ def migrate(entry):
"muted": False, # amnesty
"links": [],
"name": "anonymous",
"password": entry["services"]["password"].get("bcrypt"),
"password": entry["services"]["password"].get("bcrypt")
}
if "updatedAt" in entry:
@@ -32,13 +33,9 @@ def migrate(entry):
if entry.get("profile"):
# slug
slug = entry["profile"].get("path").lower()
slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
user_dict["slug"] = slug
bio = (
(entry.get("profile", {"bio": ""}).get("bio") or "")
.replace(r"\(", "(")
.replace(r"\)", ")")
)
bio = (entry.get("profile", {"bio": ""}).get("bio") or "").replace('\(', '(').replace('\)', ')')
bio_text = BeautifulSoup(bio, features="lxml").text
if len(bio_text) > 120:
@@ -49,7 +46,8 @@ def migrate(entry):
# userpic
try:
user_dict["userpic"] = (
"https://images.discours.io/unsafe/" + entry["profile"]["thumborId"]
"https://images.discours.io/unsafe/"
+ entry["profile"]["thumborId"]
)
except KeyError:
try:
@@ -64,7 +62,11 @@ def migrate(entry):
name = (name + " " + ln) if ln else name
if not name:
name = slug if slug else "anonymous"
name = entry["profile"]["path"].lower().strip().replace(" ", "-") if len(name) < 2 else name
name = (
entry["profile"]["path"].lower().strip().replace(" ", "-")
if len(name) < 2
else name
)
user_dict["name"] = name
# links
@@ -93,7 +95,9 @@ def migrate(entry):
except IntegrityError:
print("[migration] cannot create user " + user_dict["slug"])
with local_session() as session:
old_user = session.query(User).filter(User.slug == user_dict["slug"]).first()
old_user = (
session.query(User).filter(User.slug == user_dict["slug"]).first()
)
old_user.oid = oid
old_user.password = user_dict["password"]
session.commit()
@@ -110,7 +114,7 @@ def post_migrate():
"slug": "old-discours",
"username": "old-discours",
"email": "old@discours.io",
"name": "Просмотры на старой версии сайта",
"name": "Просмотры на старой версии сайта"
}
with local_session() as session:
@@ -143,8 +147,12 @@ def migrate_2stage(entry, id_map):
}
user_rating = UserRating.create(**user_rating_dict)
if user_rating_dict["value"] > 0:
af = AuthorFollower.create(author=user.id, follower=rater.id, auto=True)
if user_rating_dict['value'] > 0:
af = AuthorFollower.create(
author=user.id,
follower=rater.id,
auto=True
)
session.add(af)
session.add(user_rating)
session.commit()