lint

2023-10-26 22:38:31 +02:00
parent 1c49780cd4
commit c2cc428abe
64 changed files with 631 additions and 626 deletions
--- a/migration/init.py
+++ b/migration/init.py
@@ -1,18 +1,12 @@
 """ cmd managed migration """
-import asyncio
-import gc
-import json
-import sys
 from datetime import datetime, timezone
-
-import bs4
-
 from migration.export import export_mdx
 from migration.tables.comments import migrate as migrateComment
 from migration.tables.comments import migrate_2stage as migrateComment_2stage
 from migration.tables.content_items import get_shout_slug
 from migration.tables.content_items import migrate as migrateShout
-from migration.tables.remarks import migrate as migrateRemark
+
+# from migration.tables.remarks import migrate as migrateRemark
 from migration.tables.topics import migrate as migrateTopic
 from migration.tables.users import migrate as migrateUser
 from migration.tables.users import migrate_2stage as migrateUser_2stage
@@ -20,6 +14,12 @@ from migration.tables.users import post_migrate as users_post_migrate
 from orm import init_tables
 from orm.reaction import Reaction

+import asyncio
+import bs4
+import gc
+import json
+import sys
+
 TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
 OLD_DATE = "2016-03-05 22:22:00.350000"

@@ -111,7 +111,7 @@ async def shouts_handle(storage, args):
            # print main counter
            counter += 1
            print(
-                '[migration] shouts_handle %d: %s @%s'
+                "[migration] shouts_handle %d: %s @%s"
                % ((counter + 1), shout_dict["slug"], author["slug"])
            )

@@ -132,13 +132,13 @@ async def shouts_handle(storage, args):
    print("[migration] " + str(anonymous_author) + " authored by @anonymous")


-async def remarks_handle(storage):
-    print("[migration] comments")
-    c = 0
-    for entry_remark in storage["remarks"]["data"]:
-        remark = await migrateRemark(entry_remark, storage)
-        c += 1
-    print("[migration] " + str(c) + " remarks migrated")
+# async def remarks_handle(storage):
+#     print("[migration] comments")
+#     c = 0
+#     for entry_remark in storage["remarks"]["data"]:
+#         remark = await migrateRemark(entry_remark, storage)
+#         c += 1
+#     print("[migration] " + str(c) + " remarks migrated")


 async def comments_handle(storage):
@@ -149,9 +149,9 @@ async def comments_handle(storage):
    for oldcomment in storage["reactions"]["data"]:
        if not oldcomment.get("deleted"):
            reaction = await migrateComment(oldcomment, storage)
-            if type(reaction) == str:
+            if isinstance(reaction, str):
                missed_shouts[reaction] = oldcomment
-            elif type(reaction) == Reaction:
+            elif isinstance(reaction, Reaction):
                reaction = reaction.dict()
                rid = reaction["id"]
                oid = reaction["oid"]
--- a/migration/bson2json.py
+++ b/migration/bson2json.py
@@ -1,11 +1,10 @@
+from .utils import DateTimeEncoder
+
+import bson
 import gc
 import json
 import os

-import bson
-
-from .utils import DateTimeEncoder
-

 def json_tables():
    print("[migration] unpack dump/discours/*.bson to migration/data/*.json")
@@ -19,7 +18,7 @@ def json_tables():
        "remarks": [],
    }
    for table in data.keys():
-        print('[migration] bson2json for ' + table)
+        print("[migration] bson2json for " + table)
        gc.collect()
        lc = []
        bs = open("dump/discours/" + table + ".bson", "rb").read()
--- a/migration/export.py
+++ b/migration/export.py
@@ -1,11 +1,10 @@
-import json
-import os
+from .extract import extract_html, extract_media
+from .utils import DateTimeEncoder
 from datetime import datetime, timezone

 import frontmatter
-
-from .extract import extract_html, extract_media
-from .utils import DateTimeEncoder
+import json
+import os

 OLD_DATE = "2016-03-05 22:22:00.350000"
 EXPORT_DEST = "../discoursio-web/data/"
--- a/migration/extract.py
+++ b/migration/extract.py
@@ -1,9 +1,11 @@
+from bs4 import BeautifulSoup
+
 import base64
 import os
 import re
-import uuid

-from bs4 import BeautifulSoup
+# import uuid
+

 TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
 contentDir = os.path.join(
@@ -26,40 +28,40 @@ def replace_tooltips(body):
    return newbody


-def extract_footnotes(body, shout_dict):
-    parts = body.split("&&&")
-    lll = len(parts)
-    newparts = list(parts)
-    placed = False
-    if lll & 1:
-        if lll > 1:
-            i = 1
-            print("[extract] found %d footnotes in body" % (lll - 1))
-            for part in parts[1:]:
-                if i & 1:
-                    placed = True
-                    if 'a class="footnote-url" href=' in part:
-                        print("[extract] footnote: " + part)
-                        fn = 'a class="footnote-url" href="'
-                        exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
-                        extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
-                        print("[extract] footnote link: " + extracted_link)
-                        with local_session() as session:
-                            Reaction.create(
-                                {
-                                    "shout": shout_dict['id'],
-                                    "kind": ReactionKind.FOOTNOTE,
-                                    "body": extracted_body,
-                                    "range": str(body.index(fn + link) - len('<'))
-                                    + ':'
-                                    + str(body.index(extracted_body) + len('</a>')),
-                                }
-                            )
-                        newparts[i] = "<a href='#'>ℹ️</a>"
-                else:
-                    newparts[i] = part
-                i += 1
-    return ("".join(newparts), placed)
+# def extract_footnotes(body, shout_dict):
+#     parts = body.split("&&&")
+#     lll = len(parts)
+#     newparts = list(parts)
+#     placed = False
+#     if lll & 1:
+#         if lll > 1:
+#             i = 1
+#             print("[extract] found %d footnotes in body" % (lll - 1))
+#             for part in parts[1:]:
+#                 if i & 1:
+#                     placed = True
+#                     if 'a class="footnote-url" href=' in part:
+#                         print("[extract] footnote: " + part)
+#                         fn = 'a class="footnote-url" href="'
+#                         # exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
+#                         extracted_body = part.split(fn, 1)[1].split(">", 1)[1].split("</a>", 1)[0]
+#                         print("[extract] footnote link: " + extracted_link)
+#                         with local_session() as session:
+#                             Reaction.create(
+#                                 {
+#                                     "shout": shout_dict["id"],
+#                                     "kind": ReactionKind.FOOTNOTE,
+#                                     "body": extracted_body,
+#                                     "range": str(body.index(fn + link) - len("<"))
+#                                     + ":"
+#                                     + str(body.index(extracted_body) + len("</a>")),
+#                                 }
+#                             )
+#                         newparts[i] = "<a href='#'>ℹ️</a>"
+#                 else:
+#                     newparts[i] = part
+#                 i += 1
+#     return ("".join(newparts), placed)


 def place_tooltips(body):
@@ -228,7 +230,6 @@ di = "data:image"


 def extract_md_images(body, prefix):
-    newbody = ""
    body = (
        body.replace("\n! [](" + di, "\n ![](" + di)
        .replace("\n[](" + di, "\n![](" + di)
@@ -236,10 +237,10 @@ def extract_md_images(body, prefix):
    )
    parts = body.split(di)
    if len(parts) > 1:
-        newbody = extract_dataimages(parts, prefix)
+        new_body = extract_dataimages(parts, prefix)
    else:
-        newbody = body
-    return newbody
+        new_body = body
+    return new_body


 def cleanup_md(body):
@@ -262,28 +263,28 @@ def cleanup_md(body):
    return newbody


-def extract_md(body, shout_dict=None):
-    newbody = body
-    if newbody:
-        newbody = cleanup_md(newbody)
-        if not newbody:
-            raise Exception("cleanup error")
-
-        if shout_dict:
-            uid = shout_dict['id'] or uuid.uuid4()
-            newbody = extract_md_images(newbody, uid)
-            if not newbody:
-                raise Exception("extract_images error")
-
-            newbody, placed = extract_footnotes(body, shout_dict)
-            if not newbody:
-                raise Exception("extract_footnotes error")
-
-    return newbody
+# def extract_md(body, shout_dict=None):
+#     newbody = body
+#     if newbody:
+#         newbody = cleanup_md(newbody)
+#         if not newbody:
+#             raise Exception("cleanup error")
+#
+#         if shout_dict:
+#             uid = shout_dict["id"] or uuid.uuid4()
+#             newbody = extract_md_images(newbody, uid)
+#             if not newbody:
+#                 raise Exception("extract_images error")
+#
+#             newbody, placed = extract_footnotes(body, shout_dict)
+#             if not newbody:
+#                 raise Exception("extract_footnotes error")
+#
+#     return newbody


 def extract_media(entry):
-    '''normalized media extraction method'''
+    """normalized media extraction method"""
    # media [ { title pic url body } ]}
    kind = entry.get("type")
    if not kind:
@@ -398,16 +399,14 @@ def cleanup_html(body: str) -> str:
    return new_body


-def extract_html(entry, shout_id=None, cleanup=False):
-    body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
+def extract_html(entry, cleanup=False):
+    body_orig = (entry.get("body") or "").replace(r"\(", "(").replace(r"\)", ")")
    if cleanup:
        # we do that before bs parsing to catch the invalid html
        body_clean = cleanup_html(body_orig)
        if body_clean != body_orig:
            print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
        body_orig = body_clean
-    if shout_id:
-        extract_footnotes(body_orig, shout_id)
    body_html = str(BeautifulSoup(body_orig, features="html.parser"))
    if cleanup:
        # we do that after bs parsing because it can add dummy tags
--- a/migration/html2text/init.py
+++ b/migration/html2text/init.py
@@ -1,13 +1,5 @@
 """html2text: Turn HTML into equivalent Markdown-structured text."""

-import html.entities
-import html.parser
-import re
-import string
-import urllib.parse as urlparse
-from textwrap import wrap
-from typing import Dict, List, Optional, Tuple, Union
-
 from . import config
 from .elements import AnchorElement, ListElement
 from .typing import OutCallback
@@ -26,6 +18,14 @@ from .utils import (
    skipwrap,
    unifiable_n,
 )
+from textwrap import wrap
+from typing import Dict, List, Optional, Tuple, Union
+
+import html.entities
+import html.parser
+import re
+import string
+import urllib.parse as urlparse

 __version__ = (2020, 1, 16)

--- a/migration/html2text/cli.py
+++ b/migration/html2text/cli.py
@@ -1,8 +1,8 @@
+from . import __version__, config, HTML2Text
+
 import argparse
 import sys

-from . import HTML2Text, __version__, config
-

 # noinspection DuplicatedCode
 def main() -> None:
--- a/migration/html2text/utils.py
+++ b/migration/html2text/utils.py
@@ -1,7 +1,7 @@
-import html.entities
+from . import config
 from typing import Dict, List, Optional

-from . import config
+import html.entities

 unifiable_n = {
    html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
--- a/migration/tables/comments.py
+++ b/migration/tables/comments.py
@@ -1,8 +1,6 @@
-from datetime import datetime, timezone
-
-from dateutil.parser import parse as date_parse
-
 from base.orm import local_session
+from datetime import datetime, timezone
+from dateutil.parser import parse as date_parse
 from migration.html2text import html2text
 from orm.reaction import Reaction, ReactionKind
 from orm.shout import Shout, ShoutReactionsFollower
@@ -30,12 +28,12 @@ def auto_followers(session, topics, reaction_dict):
        tf = (
            session.query(TopicFollower)
            .where(TopicFollower.follower == reaction_dict["createdBy"])
-            .filter(TopicFollower.topic == t['id'])
+            .filter(TopicFollower.topic == t["id"])
            .first()
        )
        if not tf:
            topic_following = TopicFollower.create(
-                follower=reaction_dict["createdBy"], topic=t['id'], auto=True
+                follower=reaction_dict["createdBy"], topic=t["id"], auto=True
            )
            session.add(topic_following)

@@ -57,13 +55,13 @@ def migrate_ratings(session, entry, reaction_dict):
            rr = Reaction.create(**re_reaction_dict)
            following2 = (
                session.query(ShoutReactionsFollower)
-                .where(ShoutReactionsFollower.follower == re_reaction_dict['createdBy'])
+                .where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
                .filter(ShoutReactionsFollower.shout == rr.shout)
                .first()
            )
            if not following2:
                following2 = ShoutReactionsFollower.create(
-                    follower=re_reaction_dict['createdBy'], shout=rr.shout, auto=True
+                    follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
                )
                session.add(following2)
            session.add(rr)
@@ -160,9 +158,9 @@ async def migrate(entry, storage):


 def migrate_2stage(old_comment, idmap):
-    if old_comment.get('body'):
-        new_id = idmap.get(old_comment.get('oid'))
-        new_id = idmap.get(old_comment.get('_id'))
+    if old_comment.get("body"):
+        new_id = idmap.get(old_comment.get("oid"))
+        new_id = idmap.get(old_comment.get("_id"))
        if new_id:
            new_replyto_id = None
            old_replyto_id = old_comment.get("replyTo")
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@@ -1,18 +1,17 @@
-import json
-import re
-from datetime import datetime, timezone
-
-from dateutil.parser import parse as date_parse
-from sqlalchemy.exc import IntegrityError
-from transliterate import translit
-
 from base.orm import local_session
+from datetime import datetime, timezone
+from dateutil.parser import parse as date_parse
 from migration.extract import extract_html, extract_media
 from orm.reaction import Reaction, ReactionKind
 from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic
 from orm.topic import Topic, TopicFollower
 from orm.user import User
 from services.stat.viewed import ViewedStorage
+from sqlalchemy.exc import IntegrityError
+from transliterate import translit
+
+import json
+import re

 OLD_DATE = "2016-03-05 22:22:00.350000"
 ts = datetime.now(tz=timezone.utc)
@@ -35,7 +34,7 @@ def get_shout_slug(entry):
            slug = friend.get("slug", "")
            if slug:
                break
-    slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
+    slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
    return slug


@@ -43,27 +42,27 @@ def create_author_from_app(app):
    user = None
    userdata = None
    # check if email is used
-    if app['email']:
+    if app["email"]:
        with local_session() as session:
-            user = session.query(User).where(User.email == app['email']).first()
+            user = session.query(User).where(User.email == app["email"]).first()
            if not user:
                # print('[migration] app %r' % app)
-                name = app.get('name')
+                name = app.get("name")
                if name:
                    slug = translit(name, "ru", reversed=True).lower()
-                    slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
-                    print('[migration] created slug %s' % slug)
+                    slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
+                    print("[migration] created slug %s" % slug)
                    # check if slug is used
                    if slug:
                        user = session.query(User).where(User.slug == slug).first()

                        # get slug from email
                        if user:
-                            slug = app['email'].split('@')[0]
+                            slug = app["email"].split("@")[0]
                            user = session.query(User).where(User.slug == slug).first()
                            # one more try
                            if user:
-                                slug += '-author'
+                                slug += "-author"
                                user = session.query(User).where(User.slug == slug).first()

                # create user with application data
@@ -81,7 +80,7 @@ def create_author_from_app(app):
                    user = User.create(**userdata)
                    session.add(user)
                    session.commit()
-                    userdata['id'] = user.id
+                    userdata["id"] = user.id

            userdata = user.dict()
        return userdata
@@ -119,14 +118,14 @@ async def get_user(entry, storage):
        elif user_oid:
            userdata = storage["users"]["by_oid"].get(user_oid)
        if not userdata:
-            print('no userdata by oid, anonymous')
+            print("no userdata by oid, anonymous")
            userdata = anondict
            print(app)
    # cleanup slug
    if userdata:
        slug = userdata.get("slug", "")
        if slug:
-            slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
+            slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
            userdata["slug"] = slug
    else:
        userdata = anondict
@@ -160,7 +159,7 @@ async def migrate(entry, storage):
    }

    # main topic patch
-    r['mainTopic'] = r['topics'][0]
+    r["mainTopic"] = r["topics"][0]

    # published author auto-confirm
    if entry.get("published"):
@@ -183,7 +182,7 @@ async def migrate(entry, storage):
        shout_dict["oid"] = entry.get("_id", "")
        shout = await create_shout(shout_dict)
    except IntegrityError as e:
-        print('[migration] create_shout integrity error', e)
+        print("[migration] create_shout integrity error", e)
        shout = await resolve_create_shout(shout_dict)
    except Exception as e:
        raise Exception(e)
@@ -202,7 +201,7 @@ async def migrate(entry, storage):

    # shout views
    await ViewedStorage.increment(
-        shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours'
+        shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
    )
    # del shout_dict['ratings']

@@ -240,7 +239,7 @@ async def add_topics_follower(entry, storage, user):
                        session.add(tf)
                        session.commit()
            except IntegrityError:
-                print('[migration.shout] hidden by topic ' + tpc.slug)
+                print("[migration.shout] hidden by topic " + tpc.slug)
    # main topic
    maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
    if maintopic in ttt:
@@ -261,7 +260,7 @@ async def process_user(userdata, storage, oid):
        if not user:
            try:
                slug = userdata["slug"].lower().strip()
-                slug = re.sub('[^0-9a-zA-Z]+', '-', slug)
+                slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
                userdata["slug"] = slug
                user = User.create(**userdata)
                session.add(user)
@@ -289,9 +288,9 @@ async def resolve_create_shout(shout_dict):
        s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
        bump = False
        if s:
-            if s.createdAt != shout_dict['createdAt']:
+            if s.createdAt != shout_dict["createdAt"]:
                # create new with different slug
-                shout_dict["slug"] += '-' + shout_dict["layout"]
+                shout_dict["slug"] += "-" + shout_dict["layout"]
                try:
                    await create_shout(shout_dict)
                except IntegrityError as e:
--- a/migration/tables/remarks.py
+++ b/migration/tables/remarks.py
@@ -5,24 +5,24 @@ from orm.reaction import Reaction, ReactionKind


 def migrate(entry, storage):
-    post_oid = entry['contentItem']
+    post_oid = entry["contentItem"]
    print(post_oid)
-    shout_dict = storage['shouts']['by_oid'].get(post_oid)
+    shout_dict = storage["shouts"]["by_oid"].get(post_oid)
    if shout_dict:
-        print(shout_dict['body'])
+        print(shout_dict["body"])
        remark = {
-            "shout": shout_dict['id'],
-            "body": extract_md(html2text(entry['body']), shout_dict),
+            "shout": shout_dict["id"],
+            "body": extract_md(html2text(entry["body"]), shout_dict),
            "kind": ReactionKind.REMARK,
        }

-        if entry.get('textBefore'):
-            remark['range'] = (
-                str(shout_dict['body'].index(entry['textBefore'] or ''))
-                + ':'
+        if entry.get("textBefore"):
+            remark["range"] = (
+                str(shout_dict["body"].index(entry["textBefore"] or ""))
+                + ":"
                + str(
-                    shout_dict['body'].index(entry['textAfter'] or '')
-                    + len(entry['textAfter'] or '')
+                    shout_dict["body"].index(entry["textAfter"] or "")
+                    + len(entry["textAfter"] or "")
                )
            )

--- a/migration/tables/users.py
+++ b/migration/tables/users.py
@@ -1,11 +1,10 @@
-import re
-
+from base.orm import local_session
 from bs4 import BeautifulSoup
 from dateutil.parser import parse
+from orm.user import AuthorFollower, User, UserRating
 from sqlalchemy.exc import IntegrityError

-from base.orm import local_session
-from orm.user import AuthorFollower, User, UserRating
+import re


 def migrate(entry):
@@ -33,12 +32,12 @@ def migrate(entry):
    if entry.get("profile"):
        # slug
        slug = entry["profile"].get("path").lower()
-        slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
+        slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
        user_dict["slug"] = slug
        bio = (
            (entry.get("profile", {"bio": ""}).get("bio") or "")
-            .replace('\(', '(')
-            .replace('\)', ')')
+            .replace(r"\(", "(")
+            .replace(r"\)", ")")
        )
        bio_text = BeautifulSoup(bio, features="lxml").text

@@ -144,7 +143,7 @@ def migrate_2stage(entry, id_map):
                }

                user_rating = UserRating.create(**user_rating_dict)
-                if user_rating_dict['value'] > 0:
+                if user_rating_dict["value"] > 0:
                    af = AuthorFollower.create(author=user.id, follower=rater.id, auto=True)
                    session.add(af)
                session.add(user_rating)