From 5fedd007c7ac2cbd1e64a5340503d9580233893b Mon Sep 17 00:00:00 2001
From: Tony Rewin <anton.rewin@gmail.com>
Date: Thu, 5 Oct 2023 23:18:06 +0300
Subject: [PATCH] git-dep3

---
 migration/__init__.py              |  292 --------
 migration/bson2json.py             |   32 -
 migration/data/.gitkeep            |    0
 migration/export.py                |  159 -----
 migration/extract.py               |  434 ------------
 migration/html2text/__init__.py    | 1054 ----------------------------
 migration/html2text/__main__.py    |    3 -
 migration/html2text/cli.py         |  323 ---------
 migration/html2text/config.py      |  164 -----
 migration/html2text/elements.py    |   18 -
 migration/html2text/py.typed       |    0
 migration/html2text/typing.py      |    3 -
 migration/html2text/utils.py       |  287 --------
 migration/tables/__init__.py       |    1 -
 migration/tables/comments.py       |  209 ------
 migration/tables/content_items.py  |  420 -----------
 migration/tables/remarks.py        |   34 -
 migration/tables/replacements.json |  828 ----------------------
 migration/tables/topics.py         |   32 -
 migration/tables/users.py          |  167 -----
 migration/utils.py                 |   10 -
 requirements.txt                   |    5 +-
 services/viewed.py                 |   21 +-
 23 files changed, 8 insertions(+), 4488 deletions(-)
 delete mode 100644 migration/__init__.py
 delete mode 100644 migration/bson2json.py
 delete mode 100644 migration/data/.gitkeep
 delete mode 100644 migration/export.py
 delete mode 100644 migration/extract.py
 delete mode 100644 migration/html2text/__init__.py
 delete mode 100644 migration/html2text/__main__.py
 delete mode 100644 migration/html2text/cli.py
 delete mode 100644 migration/html2text/config.py
 delete mode 100644 migration/html2text/elements.py
 delete mode 100644 migration/html2text/py.typed
 delete mode 100644 migration/html2text/typing.py
 delete mode 100644 migration/html2text/utils.py
 delete mode 100644 migration/tables/__init__.py
 delete mode 100644 migration/tables/comments.py
 delete mode 100644 migration/tables/content_items.py
 delete mode 100644 migration/tables/remarks.py
 delete mode 100644 migration/tables/replacements.json
 delete mode 100644 migration/tables/topics.py
 delete mode 100644 migration/tables/users.py
 delete mode 100644 migration/utils.py

diff --git a/migration/__init__.py b/migration/__init__.py
deleted file mode 100644
index 468fa886..00000000
--- a/migration/__init__.py
+++ /dev/null
@@ -1,292 +0,0 @@
-""" cmd managed migration """
-import asyncio
-import gc
-import json
-import sys
-from datetime import datetime, timezone
-
-import bs4
-
-from migration.export import export_mdx
-from migration.tables.comments import migrate as migrateComment
-from migration.tables.comments import migrate_2stage as migrateComment_2stage
-from migration.tables.content_items import get_shout_slug
-from migration.tables.content_items import migrate as migrateShout
-from migration.tables.remarks import migrate as migrateRemark
-from migration.tables.topics import migrate as migrateTopic
-from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
-from migration.tables.users import migrate_2stage as migrateUser_2stage
-from orm import init_tables
-from orm.reaction import Reaction
-
-TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
-OLD_DATE = "2016-03-05 22:22:00.350000"
-
-
-async def users_handle(storage):
-    """migrating users first"""
-    counter = 0
-    id_map = {}
-    print("[migration] migrating %d users" % (len(storage["users"]["data"])))
-    for entry in storage["users"]["data"]:
-        oid = entry["_id"]
-        user = migrateUser(entry)
-        storage["users"]["by_oid"][oid] = user  # full
-        del user["password"]
-        del user["emailConfirmed"]
-        del user["username"]
-        del user["email"]
-        storage["users"]["by_slug"][user["slug"]] = user  # public
-        id_map[user["oid"]] = user["slug"]
-        counter += 1
-    ce = 0
-    for entry in storage["users"]["data"]:
-        ce += migrateUser_2stage(entry, id_map)
-    users_post_migrate()
-
-
-async def topics_handle(storage):
-    """topics from categories and tags"""
-    counter = 0
-    for t in storage["topics"]["tags"] + storage["topics"]["cats"]:
-        if t["slug"] in storage["replacements"]:
-            t["slug"] = storage["replacements"][t["slug"]]
-            topic = migrateTopic(t)
-            storage["topics"]["by_oid"][t["_id"]] = topic
-            storage["topics"]["by_slug"][t["slug"]] = topic
-            counter += 1
-        else:
-            print("[migration] topic " + t["slug"] + " ignored")
-    for oldslug, newslug in storage["replacements"].items():
-        if oldslug != newslug and oldslug in storage["topics"]["by_slug"]:
-            oid = storage["topics"]["by_slug"][oldslug]["_id"]
-            del storage["topics"]["by_slug"][oldslug]
-            storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
-    print("[migration] " + str(counter) + " topics migrated")
-    print(
-        "[migration] "
-        + str(len(storage["topics"]["by_oid"].values()))
-        + " topics by oid"
-    )
-    print(
-        "[migration] "
-        + str(len(storage["topics"]["by_slug"].values()))
-        + " topics by slug"
-    )
-
-
-async def shouts_handle(storage, args):
-    """migrating content items one by one"""
-    counter = 0
-    discours_author = 0
-    anonymous_author = 0
-    pub_counter = 0
-    ignored = 0
-    topics_dataset_bodies = []
-    topics_dataset_tlist = []
-    for entry in storage["shouts"]["data"]:
-        gc.collect()
-        # slug
-        slug = get_shout_slug(entry)
-
-        # single slug mode
-        if "-" in args and slug not in args:
-            continue
-
-        # migrate
-        shout_dict = await migrateShout(entry, storage)
-        if shout_dict:
-            storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
-            storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
-            # shouts.topics
-            if not shout_dict["topics"]:
-                print("[migration] no topics!")
-
-            # with author
-            author = shout_dict["authors"][0]
-            if author["slug"] == "discours":
-                discours_author += 1
-            if author["slug"] == "anonymous":
-                anonymous_author += 1
-            # print('[migration] ' + shout['slug'] + ' with author ' + author)
-
-            if entry.get("published"):
-                if "mdx" in args:
-                    export_mdx(shout_dict)
-                pub_counter += 1
-
-            # print main counter
-            counter += 1
-            print('[migration] shouts_handle %d: %s @%s' % (
-                (counter + 1), shout_dict["slug"], author["slug"]
-            ))
-
-            b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
-            texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
-            texts = texts + b.findAll(text=True)
-            topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
-            topics_dataset_tlist.append(shout_dict["topics"])
-        else:
-            ignored += 1
-
-    # np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
-    # ', fmt='%s')
-
-    print("[migration] " + str(counter) + " content items were migrated")
-    print("[migration] " + str(pub_counter) + " have been published")
-    print("[migration] " + str(discours_author) + " authored by @discours")
-    print("[migration] " + str(anonymous_author) + " authored by @anonymous")
-
-
-async def remarks_handle(storage):
-    print("[migration] comments")
-    c = 0
-    for entry_remark in storage["remarks"]["data"]:
-        remark = await migrateRemark(entry_remark, storage)
-        c += 1
-    print("[migration] " + str(c) + " remarks migrated")
-
-
-async def comments_handle(storage):
-    print("[migration] comments")
-    id_map = {}
-    ignored_counter = 0
-    missed_shouts = {}
-    for oldcomment in storage["reactions"]["data"]:
-        if not oldcomment.get("deleted"):
-            reaction = await migrateComment(oldcomment, storage)
-            if type(reaction) == str:
-                missed_shouts[reaction] = oldcomment
-            elif type(reaction) == Reaction:
-                reaction = reaction.dict()
-                rid = reaction["id"]
-                oid = reaction["oid"]
-                id_map[oid] = rid
-            else:
-                ignored_counter += 1
-
-    for reaction in storage["reactions"]["data"]:
-        migrateComment_2stage(reaction, id_map)
-    print("[migration] " + str(len(id_map)) + " comments migrated")
-    print("[migration] " + str(ignored_counter) + " comments ignored")
-    print("[migration] " + str(len(missed_shouts.keys())) + " commented shouts missed")
-    missed_counter = 0
-    for missed in missed_shouts.values():
-        missed_counter += len(missed)
-    print("[migration] " + str(missed_counter) + " comments dropped")
-
-
-async def all_handle(storage, args):
-    print("[migration] handle everything")
-    await users_handle(storage)
-    await topics_handle(storage)
-    print("[migration] users and topics are migrated")
-    await shouts_handle(storage, args)
-    # print("[migration] remarks...")
-    # await remarks_handle(storage)
-    print("[migration] migrating comments")
-    await comments_handle(storage)
-    # export_email_subscriptions()
-    print("[migration] done!")
-
-
-def data_load():
-    storage = {
-        "content_items": {
-            "by_oid": {},
-            "by_slug": {},
-        },
-        "shouts": {"by_oid": {}, "by_slug": {}, "data": []},
-        "reactions": {"by_oid": {}, "by_slug": {}, "by_content": {}, "data": []},
-        "topics": {
-            "by_oid": {},
-            "by_slug": {},
-            "cats": [],
-            "tags": [],
-        },
-        "remarks": {"data": []},
-        "users": {"by_oid": {}, "by_slug": {}, "data": []},
-        "replacements": json.loads(open("migration/tables/replacements.json").read()),
-    }
-    try:
-        users_data = json.loads(open("migration/data/users.json").read())
-        print("[migration.load] " + str(len(users_data)) + " users ")
-        tags_data = json.loads(open("migration/data/tags.json").read())
-        storage["topics"]["tags"] = tags_data
-        print("[migration.load] " + str(len(tags_data)) + " tags ")
-        cats_data = json.loads(
-            open("migration/data/content_item_categories.json").read()
-        )
-        storage["topics"]["cats"] = cats_data
-        print("[migration.load] " + str(len(cats_data)) + " cats ")
-        comments_data = json.loads(open("migration/data/comments.json").read())
-        storage["reactions"]["data"] = comments_data
-        print("[migration.load] " + str(len(comments_data)) + " comments ")
-        content_data = json.loads(open("migration/data/content_items.json").read())
-        storage["shouts"]["data"] = content_data
-        print("[migration.load] " + str(len(content_data)) + " content items ")
-
-        remarks_data = json.loads(open("migration/data/remarks.json").read())
-        storage["remarks"]["data"] = remarks_data
-        print("[migration.load] " + str(len(remarks_data)) + " remarks data ")
-
-        # fill out storage
-        for x in users_data:
-            storage["users"]["by_oid"][x["_id"]] = x
-            # storage['users']['by_slug'][x['slug']] = x
-        # no user.slug yet
-        print(
-            "[migration.load] "
-            + str(len(storage["users"]["by_oid"].keys()))
-            + " users by oid"
-        )
-        for x in tags_data:
-            storage["topics"]["by_oid"][x["_id"]] = x
-            storage["topics"]["by_slug"][x["slug"]] = x
-        for x in cats_data:
-            storage["topics"]["by_oid"][x["_id"]] = x
-            storage["topics"]["by_slug"][x["slug"]] = x
-        print(
-            "[migration.load] "
-            + str(len(storage["topics"]["by_slug"].keys()))
-            + " topics by slug"
-        )
-        for item in content_data:
-            slug = get_shout_slug(item)
-            storage["content_items"]["by_slug"][slug] = item
-            storage["content_items"]["by_oid"][item["_id"]] = item
-        print("[migration.load] " + str(len(content_data)) + " content items")
-        for x in comments_data:
-            storage["reactions"]["by_oid"][x["_id"]] = x
-            cid = x["contentItem"]
-            storage["reactions"]["by_content"][cid] = x
-            ci = storage["content_items"]["by_oid"].get(cid, {})
-            if "slug" in ci:
-                storage["reactions"]["by_slug"][ci["slug"]] = x
-        print(
-            "[migration.load] "
-            + str(len(storage["reactions"]["by_content"].keys()))
-            + " with comments"
-        )
-        storage["users"]["data"] = users_data
-        storage["topics"]["tags"] = tags_data
-        storage["topics"]["cats"] = cats_data
-        storage["shouts"]["data"] = content_data
-        storage["reactions"]["data"] = comments_data
-    except Exception as e:
-        raise e
-    return storage
-
-
-async def handling_migration():
-    init_tables()
-    await all_handle(data_load(), sys.argv)
-
-
-def process():
-    loop = asyncio.get_event_loop()
-    loop.run_until_complete(handling_migration())
-
-
-if __name__ == "__main__":
-    process()
diff --git a/migration/bson2json.py b/migration/bson2json.py
deleted file mode 100644
index 03effe19..00000000
--- a/migration/bson2json.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import json
-import os
-
-import bson
-import gc
-from .utils import DateTimeEncoder
-
-
-def json_tables():
-    print("[migration] unpack dump/discours/*.bson to migration/data/*.json")
-    data = {
-        "content_items": [],
-        "content_item_categories": [],
-        "tags": [],
-        "email_subscriptions": [],
-        "users": [],
-        "comments": [],
-        "remarks": []
-    }
-    for table in data.keys():
-        print('[migration] bson2json for ' + table)
-        gc.collect()
-        lc = []
-        bs = open("dump/discours/" + table + ".bson", "rb").read()
-        base = 0
-        while base < len(bs):
-            base, d = bson.decode_document(bs, base)
-            lc.append(d)
-        data[table] = lc
-        open(os.getcwd() + "/migration/data/" + table + ".json", "w").write(
-            json.dumps(lc, cls=DateTimeEncoder)
-        )
diff --git a/migration/data/.gitkeep b/migration/data/.gitkeep
deleted file mode 100644
index e69de29b..00000000
diff --git a/migration/export.py b/migration/export.py
deleted file mode 100644
index 102cfb14..00000000
--- a/migration/export.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import json
-import os
-from datetime import datetime, timezone
-
-import frontmatter
-
-from .extract import extract_html, extract_media
-from .utils import DateTimeEncoder
-
-OLD_DATE = "2016-03-05 22:22:00.350000"
-EXPORT_DEST = "../discoursio-web/data/"
-parentDir = "/".join(os.getcwd().split("/")[:-1])
-contentDir = parentDir + "/discoursio-web/content/"
-ts = datetime.now(tz=timezone.utc)
-
-
-def get_metadata(r):
-    authors = []
-    for a in r["authors"]:
-        authors.append(
-            {  # a short version for public listings
-                "slug": a.slug or "discours",
-                "name": a.name or "Дискурс",
-                "userpic": a.userpic or "https://discours.io/static/img/discours.png",
-            }
-        )
-    metadata = {}
-    metadata["title"] = r.get("title", "").replace("{", "(").replace("}", ")")
-    metadata["authors"] = authors
-    metadata["createdAt"] = r.get("createdAt", ts)
-    metadata["layout"] = r["layout"]
-    metadata["topics"] = [topic for topic in r["topics"]]
-    metadata["topics"].sort()
-    if r.get("cover", False):
-        metadata["cover"] = r.get("cover")
-    return metadata
-
-
-def export_mdx(r):
-    # print('[export] mdx %s' % r['slug'])
-    content = ""
-    metadata = get_metadata(r)
-    content = frontmatter.dumps(frontmatter.Post(r["body"], **metadata))
-    ext = "mdx"
-    filepath = contentDir + r["slug"]
-    bc = bytes(content, "utf-8").decode("utf-8", "ignore")
-    open(filepath + "." + ext, "w").write(bc)
-
-
-def export_body(shout, storage):
-    entry = storage["content_items"]["by_oid"][shout["oid"]]
-    if entry:
-        body = extract_html(entry)
-        media = extract_media(entry)
-        shout["body"] = body  # prepare_html_body(entry)  # prepare_md_body(entry)
-        shout["media"] = media
-        export_mdx(shout)
-        print("[export] html for %s" % shout["slug"])
-        open(contentDir + shout["slug"] + ".html", "w").write(body)
-    else:
-        raise Exception("no content_items entry found")
-
-
-def export_slug(slug, storage):
-    shout = storage["shouts"]["by_slug"][slug]
-    shout = storage["shouts"]["by_slug"].get(slug)
-    assert shout, "[export] no shout found by slug: %s " % slug
-    author = shout["authors"][0]
-    assert author, "[export] no author error"
-    export_body(shout, storage)
-
-
-def export_email_subscriptions():
-    email_subscriptions_data = json.loads(
-        open("migration/data/email_subscriptions.json").read()
-    )
-    for data in email_subscriptions_data:
-        # TODO: migrate to mailgun list manually
-        # migrate_email_subscription(data)
-        pass
-    print(
-        "[migration] "
-        + str(len(email_subscriptions_data))
-        + " email subscriptions exported"
-    )
-
-
-def export_shouts(storage):
-    # update what was just migrated or load json again
-    if len(storage["users"]["by_slugs"].keys()) == 0:
-        storage["users"]["by_slugs"] = json.loads(
-            open(EXPORT_DEST + "authors.json").read()
-        )
-        print(
-            "[migration] "
-            + str(len(storage["users"]["by_slugs"].keys()))
-            + " exported authors "
-        )
-    if len(storage["shouts"]["by_slugs"].keys()) == 0:
-        storage["shouts"]["by_slugs"] = json.loads(
-            open(EXPORT_DEST + "articles.json").read()
-        )
-        print(
-            "[migration] "
-            + str(len(storage["shouts"]["by_slugs"].keys()))
-            + " exported articles "
-        )
-    for slug in storage["shouts"]["by_slugs"].keys():
-        export_slug(slug, storage)
-
-
-def export_json(
-    export_articles={}, export_authors={}, export_topics={}, export_comments={}
-):
-    open(EXPORT_DEST + "authors.json", "w").write(
-        json.dumps(
-            export_authors,
-            cls=DateTimeEncoder,
-            indent=4,
-            sort_keys=True,
-            ensure_ascii=False,
-        )
-    )
-    print("[migration] " + str(len(export_authors.items())) + " authors exported")
-    open(EXPORT_DEST + "topics.json", "w").write(
-        json.dumps(
-            export_topics,
-            cls=DateTimeEncoder,
-            indent=4,
-            sort_keys=True,
-            ensure_ascii=False,
-        )
-    )
-    print("[migration] " + str(len(export_topics.keys())) + " topics exported")
-
-    open(EXPORT_DEST + "articles.json", "w").write(
-        json.dumps(
-            export_articles,
-            cls=DateTimeEncoder,
-            indent=4,
-            sort_keys=True,
-            ensure_ascii=False,
-        )
-    )
-    print("[migration] " + str(len(export_articles.items())) + " articles exported")
-    open(EXPORT_DEST + "comments.json", "w").write(
-        json.dumps(
-            export_comments,
-            cls=DateTimeEncoder,
-            indent=4,
-            sort_keys=True,
-            ensure_ascii=False,
-        )
-    )
-    print(
-        "[migration] "
-        + str(len(export_comments.items()))
-        + " exported articles with comments"
-    )
diff --git a/migration/extract.py b/migration/extract.py
deleted file mode 100644
index 9ea84067..00000000
--- a/migration/extract.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import base64
-import os
-import re
-import uuid
-
-from bs4 import BeautifulSoup
-
-
-TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
-contentDir = os.path.join(
-    os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
-)
-s3 = "https://discours-io.s3.amazonaws.com/"
-cdn = "https://assets.discours.io"
-
-
-def replace_tooltips(body):
-    # change if you prefer regexp
-    newbody = body
-    matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
-    for match in matches:
-        newbody = body.replace(
-            match.group(1), '<Tooltip text="' + match.group(2) + '" />'
-        )  # NOTE: doesn't work
-    if len(matches) > 0:
-        print("[extract] found %d tooltips" % len(matches))
-    return newbody
-
-
-
-def extract_footnotes(body, shout_dict):
-    parts = body.split("&&&")
-    lll = len(parts)
-    newparts = list(parts)
-    placed = False
-    if lll & 1:
-        if lll > 1:
-            i = 1
-            print("[extract] found %d footnotes in body" % (lll - 1))
-            for part in parts[1:]:
-                if i & 1:
-                    placed = True
-                    if 'a class="footnote-url" href=' in part:
-                        print("[extract] footnote: " + part)
-                        fn = 'a class="footnote-url" href="'
-                        exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
-                        extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
-                        print("[extract] footnote link: " + extracted_link)
-                        with local_session() as session:
-                            Reaction.create({
-                                "shout": shout_dict['id'],
-                                "kind": ReactionKind.FOOTNOTE,
-                                "body": extracted_body,
-                                "range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
-                            })
-                        newparts[i] = "<a href='#'>ℹ️</a>"
-                else:
-                    newparts[i] = part
-                i += 1
-    return ("".join(newparts), placed)
-
-
-def place_tooltips(body):
-    parts = body.split("&&&")
-    lll = len(parts)
-    newparts = list(parts)
-    placed = False
-    if lll & 1:
-        if lll > 1:
-            i = 1
-            print("[extract] found %d tooltips" % (lll - 1))
-            for part in parts[1:]:
-                if i & 1:
-                    placed = True
-                    if 'a class="footnote-url" href=' in part:
-                        print("[extract] footnote: " + part)
-                        fn = 'a class="footnote-url" href="'
-                        link = part.split(fn, 1)[1].split('"', 1)[0]
-                        extracted_part = (
-                            part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
-                        )
-                        newparts[i] = (
-                            "<Tooltip"
-                            + (' link="' + link + '" ' if link else "")
-                            + ">"
-                            + extracted_part
-                            + "</Tooltip>"
-                        )
-                    else:
-                        newparts[i] = "<Tooltip>%s</Tooltip>" % part
-                        # print('[extract] ' + newparts[i])
-                else:
-                    # print('[extract] ' + part[:10] + '..')
-                    newparts[i] = part
-                i += 1
-    return ("".join(newparts), placed)
-
-
-IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
-IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
-
-parentDir = "/".join(os.getcwd().split("/")[:-1])
-public = parentDir + "/discoursio-web/public"
-cache = {}
-
-
-def reextract_images(body, oid):
-    # change if you prefer regexp
-    matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
-    i = 0
-    for match in matches:
-        print("[extract] image " + match.group(1))
-        ext = match.group(3)
-        name = oid + str(i)
-        link = public + "/upload/image-" + name + "." + ext
-        img = match.group(4)
-        title = match.group(1)  # NOTE: this is not the title
-        if img not in cache:
-            content = base64.b64decode(img + "==")
-            print(str(len(img)) + " image bytes been written")
-            open("../" + link, "wb").write(content)
-            cache[img] = name
-            i += 1
-        else:
-            print("[extract] image cached " + cache[img])
-        body.replace(
-            str(match), "![" + title + "](" + cdn + link + ")"
-        )  # WARNING: this does not work
-    return body
-
-
-IMAGES = {
-    "data:image/png": "png",
-    "data:image/jpg": "jpg",
-    "data:image/jpeg": "jpg",
-}
-
-b64 = ";base64,"
-
-
-def extract_imageparts(bodyparts, prefix):
-    # recursive loop
-    newparts = list(bodyparts)
-    for current in bodyparts:
-        i = bodyparts.index(current)
-        for mime in IMAGES.keys():
-            if mime == current[-len(mime) :] and (i + 1 < len(bodyparts)):
-                print("[extract] " + mime)
-                next = bodyparts[i + 1]
-                ext = IMAGES[mime]
-                b64end = next.index(")")
-                b64encoded = next[:b64end]
-                name = prefix + "-" + str(len(cache))
-                link = "/upload/image-" + name + "." + ext
-                print("[extract] name: " + name)
-                print("[extract] link: " + link)
-                print("[extract] %d bytes" % len(b64encoded))
-                if b64encoded not in cache:
-                    try:
-                        content = base64.b64decode(b64encoded + "==")
-                        open(public + link, "wb").write(content)
-                        print(
-                            "[extract] "
-                            + str(len(content))
-                            + " image bytes been written"
-                        )
-                        cache[b64encoded] = name
-                    except Exception:
-                        raise Exception
-                        # raise Exception('[extract] error decoding image %r' %b64encoded)
-                else:
-                    print("[extract] cached link " + cache[b64encoded])
-                    name = cache[b64encoded]
-                    link = cdn + "/upload/image-" + name + "." + ext
-                newparts[i] = (
-                    current[: -len(mime)]
-                    + current[-len(mime) :]
-                    + link
-                    + next[-b64end:]
-                )
-                newparts[i + 1] = next[:-b64end]
-                break
-    return (
-        extract_imageparts(
-            newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
-        )
-        if len(bodyparts) > (i + 1)
-        else "".join(newparts)
-    )
-
-
-def extract_dataimages(parts, prefix):
-    newparts = list(parts)
-    for part in parts:
-        i = parts.index(part)
-        if part.endswith("]("):
-            [ext, rest] = parts[i + 1].split(b64)
-            name = prefix + "-" + str(len(cache))
-            if ext == "/jpeg":
-                ext = "jpg"
-            else:
-                ext = ext.replace("/", "")
-            link = "/upload/image-" + name + "." + ext
-            print("[extract] filename: " + link)
-            b64end = rest.find(")")
-            if b64end != -1:
-                b64encoded = rest[:b64end]
-                print("[extract] %d text bytes" % len(b64encoded))
-                # write if not cached
-                if b64encoded not in cache:
-                    try:
-                        content = base64.b64decode(b64encoded + "==")
-                        open(public + link, "wb").write(content)
-                        print("[extract] " + str(len(content)) + " image bytes")
-                        cache[b64encoded] = name
-                    except Exception:
-                        raise Exception
-                        # raise Exception('[extract] error decoding image %r' %b64encoded)
-                else:
-                    print("[extract] 0 image bytes, cached for " + cache[b64encoded])
-                    name = cache[b64encoded]
-
-                # update link with CDN
-                link = cdn + "/upload/image-" + name + "." + ext
-
-                # patch newparts
-                newparts[i + 1] = link + rest[b64end:]
-            else:
-                raise Exception("cannot find the end of base64 encoded string")
-        else:
-            print("[extract] dataimage skipping part " + str(i))
-            continue
-    return "".join(newparts)
-
-
-di = "data:image"
-
-
-def extract_md_images(body, prefix):
-    newbody = ""
-    body = (
-        body.replace("\n! [](" + di, "\n ![](" + di)
-        .replace("\n[](" + di, "\n![](" + di)
-        .replace(" [](" + di, " ![](" + di)
-    )
-    parts = body.split(di)
-    if len(parts) > 1:
-        newbody = extract_dataimages(parts, prefix)
-    else:
-        newbody = body
-    return newbody
-
-
-def cleanup_md(body):
-    newbody = (
-        body.replace("<", "")
-        .replace(">", "")
-        .replace("{", "(")
-        .replace("}", ")")
-        .replace("…", "...")
-        .replace(" __ ", " ")
-        .replace("_ _", " ")
-        .replace("****", "")
-        .replace("\u00a0", " ")
-        .replace("\u02c6", "^")
-        .replace("\u00a0", " ")
-        .replace("\ufeff", "")
-        .replace("\u200b", "")
-        .replace("\u200c", "")
-    )  # .replace('\u2212', '-')
-    return newbody
-
-
-def extract_md(body, shout_dict = None):
-    newbody = body
-    if newbody:
-        newbody = cleanup_md(newbody)
-        if not newbody:
-            raise Exception("cleanup error")
-
-        if shout_dict:
-
-            uid = shout_dict['id'] or uuid.uuid4()
-            newbody = extract_md_images(newbody, uid)
-            if not newbody:
-                raise Exception("extract_images error")
-
-            newbody, placed = extract_footnotes(body, shout_dict)
-            if not newbody:
-                raise Exception("extract_footnotes error")
-
-    return newbody
-
-
-def extract_media(entry):
-    ''' normalized media extraction method '''
-    # media [ { title pic url body } ]}
-    kind = entry.get("type")
-    if not kind:
-        print(entry)
-        raise Exception("shout no layout")
-    media = []
-    for m in entry.get("media") or []:
-        # title
-        title = m.get("title", "").replace("\n", " ").replace("&nbsp;", " ")
-        artist = m.get("performer") or m.get("artist")
-        if artist:
-            title = artist + " - " + title
-
-        # pic
-        url = m.get("fileUrl") or m.get("url", "")
-        pic = ""
-        if m.get("thumborId"):
-            pic = cdn + "/unsafe/1600x/" + m["thumborId"]
-
-        # url
-        if not url:
-            if kind == "Image":
-                url = pic
-            elif "youtubeId" in m:
-                url = "https://youtube.com/?watch=" + m["youtubeId"]
-            elif "vimeoId" in m:
-                url = "https://vimeo.com/" + m["vimeoId"]
-        # body
-        body = m.get("body") or m.get("literatureBody") or ""
-        media.append({
-            "url": url,
-            "pic": pic,
-            "title": title,
-            "body": body
-        })
-    return media
-
-
-def prepare_html_body(entry):
-    # body modifications
-    body = ""
-    kind = entry.get("type")
-    addon = ""
-    if kind == "Video":
-        addon = ""
-        for m in entry.get("media") or []:
-            if "youtubeId" in m:
-                addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
-                addon += m["youtubeId"]
-                addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
-            elif "vimeoId" in m:
-                addon += '<iframe src="https://player.vimeo.com/video/'
-                addon += m["vimeoId"]
-                addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen"'
-                addon += " allowfullscreen></iframe>"
-            else:
-                print("[extract] media is not supported")
-                print(m)
-        body += addon
-
-    elif kind == "Music":
-        addon = ""
-        for m in entry.get("media") or []:
-            artist = m.get("performer")
-            trackname = ""
-            if artist:
-                trackname += artist + " - "
-            if "title" in m:
-                trackname += m.get("title", "")
-            addon += "<figure><figcaption>"
-            addon += trackname
-            addon += '</figcaption><audio controls src="'
-            addon += m.get("fileUrl", "")
-            addon += '"></audio></figure>'
-        body += addon
-
-    body = extract_html(entry)
-    # if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
-    return body
-
-
-def cleanup_html(body: str) -> str:
-    new_body = body
-    regex_remove = [
-        r"style=\"width:\s*\d+px;height:\s*\d+px;\"",
-        r"style=\"width:\s*\d+px;\"",
-        r"style=\"color: #000000;\"",
-        r"style=\"float: none;\"",
-        r"style=\"background: white;\"",
-        r"class=\"Apple-interchange-newline\"",
-        r"class=\"MsoNormalCxSpMiddle\"",
-        r"class=\"MsoNormal\"",
-        r"lang=\"EN-US\"",
-        r"id=\"docs-internal-guid-[\w-]+\"",
-        r"<p>\s*</p>",
-        r"<span></span>",
-        r"<i>\s*</i>",
-        r"<b>\s*</b>",
-        r"<h1>\s*</h1>",
-        r"<h2>\s*</h2>",
-        r"<h3>\s*</h3>",
-        r"<h4>\s*</h4>",
-        r"<div>\s*</div>",
-    ]
-    regex_replace = {
-        r"<br>\s*</p>": "</p>"
-    }
-    changed = True
-    while changed:
-        # we need several iterations to clean nested tags this way
-        changed = False
-        new_body_iteration = new_body
-        for regex in regex_remove:
-            new_body = re.sub(regex, "", new_body)
-        for regex, replace in regex_replace.items():
-            new_body = re.sub(regex, replace, new_body)
-        if new_body_iteration != new_body:
-            changed = True
-    return new_body
-
-def extract_html(entry, shout_id = None, cleanup=False):
-    body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
-    if cleanup:
-        # we do that before bs parsing to catch the invalid html
-        body_clean = cleanup_html(body_orig)
-        if body_clean != body_orig:
-            print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
-        body_orig = body_clean
-    if shout_id:
-        extract_footnotes(body_orig, shout_id)
-    body_html = str(BeautifulSoup(body_orig, features="html.parser"))
-    if cleanup:
-        # we do that after bs parsing because it can add dummy tags
-        body_clean_html = cleanup_html(body_html)
-        if body_clean_html != body_html:
-            print(f"[migration] html cleaned after bs4 for slug {entry.get('slug', None)}")
-        body_html = body_clean_html
-    return body_html
diff --git a/migration/html2text/__init__.py b/migration/html2text/__init__.py
deleted file mode 100644
index 1090025c..00000000
--- a/migration/html2text/__init__.py
+++ /dev/null
@@ -1,1054 +0,0 @@
-"""html2text: Turn HTML into equivalent Markdown-structured text."""
-
-import html.entities
-import html.parser
-import re
-import string
-import urllib.parse as urlparse
-from textwrap import wrap
-from typing import Dict, List, Optional, Tuple, Union
-
-from . import config
-from .elements import AnchorElement, ListElement
-from .typing import OutCallback
-from .utils import (
-    dumb_css_parser,
-    element_style,
-    escape_md,
-    escape_md_section,
-    google_fixed_width_font,
-    google_has_height,
-    google_list_style,
-    google_text_emphasis,
-    hn,
-    list_numbering_start,
-    pad_tables_in_text,
-    skipwrap,
-    unifiable_n,
-)
-
-__version__ = (2020, 1, 16)
-
-
-# TODO: Support decoded entities with UNIFIABLE.
-
-
-class HTML2Text(html.parser.HTMLParser):
-    def __init__(
-        self,
-        out: Optional[OutCallback] = None,
-        baseurl: str = "",
-        bodywidth: int = config.BODY_WIDTH,
-    ) -> None:
-        """
-        Input parameters:
-                out: possible custom replacement for self.outtextf (which
-                appends lines of text).
-                baseurl: base URL of the document we process
-        """
-        super().__init__(convert_charrefs=False)
-
-        # Config options
-        self.split_next_td = False
-        self.td_count = 0
-        self.table_start = False
-        self.unicode_snob = config.UNICODE_SNOB  # covered in cli
-        self.escape_snob = config.ESCAPE_SNOB  # covered in cli
-        self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
-        self.body_width = bodywidth  # covered in cli
-        self.skip_internal_links = config.SKIP_INTERNAL_LINKS  # covered in cli
-        self.inline_links = config.INLINE_LINKS  # covered in cli
-        self.protect_links = config.PROTECT_LINKS  # covered in cli
-        self.google_list_indent = config.GOOGLE_LIST_INDENT  # covered in cli
-        self.ignore_links = config.IGNORE_ANCHORS  # covered in cli
-        self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS  # covered in cli
-        self.ignore_images = config.IGNORE_IMAGES  # covered in cli
-        self.images_as_html = config.IMAGES_AS_HTML  # covered in cli
-        self.images_to_alt = config.IMAGES_TO_ALT  # covered in cli
-        self.images_with_size = config.IMAGES_WITH_SIZE  # covered in cli
-        self.ignore_emphasis = config.IGNORE_EMPHASIS  # covered in cli
-        self.bypass_tables = config.BYPASS_TABLES  # covered in cli
-        self.ignore_tables = config.IGNORE_TABLES  # covered in cli
-        self.google_doc = False  # covered in cli
-        self.ul_item_mark = "*"  # covered in cli
-        self.emphasis_mark = "_"  # covered in cli
-        self.strong_mark = "**"
-        self.single_line_break = config.SINGLE_LINE_BREAK  # covered in cli
-        self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
-        self.hide_strikethrough = False  # covered in cli
-        self.mark_code = config.MARK_CODE
-        self.wrap_list_items = config.WRAP_LIST_ITEMS  # covered in cli
-        self.wrap_links = config.WRAP_LINKS  # covered in cli
-        self.wrap_tables = config.WRAP_TABLES
-        self.pad_tables = config.PAD_TABLES  # covered in cli
-        self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
-        self.tag_callback = None
-        self.open_quote = config.OPEN_QUOTE  # covered in cli
-        self.close_quote = config.CLOSE_QUOTE  # covered in cli
-        self.header_id = None
-        self.span_highlight = False
-        self.span_lead = False
-
-        if out is None:
-            self.out = self.outtextf
-        else:
-            self.out = out
-
-        # empty list to store output characters before they are "joined"
-        self.outtextlist = []  # type: List[str]
-
-        self.quiet = 0
-        self.p_p = 0  # number of newline character to print before next output
-        self.outcount = 0
-        self.start = True
-        self.space = False
-        self.a = []  # type: List[AnchorElement]
-        self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
-        self.maybe_automatic_link = None  # type: Optional[str]
-        self.empty_link = False
-        self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
-        self.acount = 0
-        self.list = []  # type: List[ListElement]
-        self.blockquote = 0
-        self.pre = False
-        self.startpre = False
-        self.code = False
-        self.quote = False
-        self.br_toggle = ""
-        self.lastWasNL = False
-        self.lastWasList = False
-        self.style = 0
-        self.style_def = {}  # type: Dict[str, Dict[str, str]]
-        self.tag_stack = (
-            []
-        )  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
-        self.emphasis = 0
-        self.drop_white_space = 0
-        self.inheader = False
-        # Current abbreviation definition
-        self.abbr_title = None  # type: Optional[str]
-        # Last inner HTML (for abbr being defined)
-        self.abbr_data = None  # type: Optional[str]
-        # Stack of abbreviations to write later
-        self.abbr_list = {}  # type: Dict[str, str]
-        self.baseurl = baseurl
-        self.stressed = False
-        self.preceding_stressed = False
-        self.preceding_data = ""
-        self.current_tag = ""
-        self.current_class = ""
-
-        config.UNIFIABLE["nbsp"] = "&nbsp_place_holder;"
-
-    def feed(self, data: str) -> None:
-        data = data.replace("</' + 'script>", "</ignore>")
-        super().feed(data)
-
-    def handle(self, data: str) -> str:
-        self.feed(data)
-        self.feed("")
-        markdown = self.optwrap(self.finish())
-        if self.pad_tables:
-            return pad_tables_in_text(markdown)
-        else:
-            return markdown
-
-    def outtextf(self, s: str) -> None:
-        self.outtextlist.append(s)
-        if s:
-            self.lastWasNL = s[-1] == "\n"
-
-    def finish(self) -> str:
-        self.close()
-
-        self.pbr()
-        self.o("", force="end")
-
-        outtext = "".join(self.outtextlist)
-
-        if self.unicode_snob:
-            nbsp = html.entities.html5["nbsp;"]
-        else:
-            nbsp = " "
-        outtext = outtext.replace("&nbsp_place_holder;", nbsp)
-
-        # Clear self.outtextlist to avoid memory leak of its content to
-        # the next handling.
-        self.outtextlist = []
-
-        return outtext
-
-    def handle_charref(self, c: str) -> None:
-        self.handle_data(self.charref(c), True)
-
-    def handle_entityref(self, c: str) -> None:
-        ref = self.entityref(c)
-
-        # ref may be an empty string (e.g. for &lrm;/&rlm; markers that should
-        # not contribute to the final output).
-        # self.handle_data cannot handle a zero-length string right after a
-        # stressed tag or mid-text within a stressed tag (text get split and
-        # self.stressed/self.preceding_stressed gets switched after the first
-        # part of that text).
-        if ref:
-            self.handle_data(ref, True)
-
-    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
-        self.handle_tag(tag, dict(attrs), start=True)
-
-    def handle_endtag(self, tag: str) -> None:
-        self.handle_tag(tag, {}, start=False)
-
-    def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
-        """
-        :type attrs: dict
-
-        :returns: The index of certain set of attributes (of a link) in the
-        self.a list. If the set of attributes is not found, returns None
-        :rtype: int
-        """
-        if "href" not in attrs:
-            return None
-
-        match = False
-        for i, a in enumerate(self.a):
-            if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
-                if "title" in a.attrs or "title" in attrs:
-                    if (
-                        "title" in a.attrs
-                        and "title" in attrs
-                        and a.attrs["title"] == attrs["title"]
-                    ):
-                        match = True
-                else:
-                    match = True
-
-            if match:
-                return i
-        return None
-
-    def handle_emphasis(
-        self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
-    ) -> None:
-        """
-        Handles various text emphases
-        """
-        tag_emphasis = google_text_emphasis(tag_style)
-        parent_emphasis = google_text_emphasis(parent_style)
-
-        # handle Google's text emphasis
-        strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough
-
-        # google and others may mark a font's weight as `bold` or `700`
-        bold = False
-        for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
-            bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis
-            if bold:
-                break
-
-        italic = "italic" in tag_emphasis and "italic" not in parent_emphasis
-        fixed = (
-            google_fixed_width_font(tag_style)
-            and not google_fixed_width_font(parent_style)
-            and not self.pre
-        )
-
-        if start:
-            # crossed-out text must be handled before other attributes
-            # in order not to output qualifiers unnecessarily
-            if bold or italic or fixed:
-                self.emphasis += 1
-            if strikethrough:
-                self.quiet += 1
-            if italic:
-                self.o(self.emphasis_mark)
-                self.drop_white_space += 1
-            if bold:
-                self.o(self.strong_mark)
-                self.drop_white_space += 1
-            if fixed:
-                self.o("`")
-                self.drop_white_space += 1
-                self.code = True
-        else:
-            if bold or italic or fixed:
-                # there must not be whitespace before closing emphasis mark
-                self.emphasis -= 1
-                self.space = False
-            if fixed:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o("`")
-                self.code = False
-            if bold:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o(self.strong_mark)
-            if italic:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o(self.emphasis_mark)
-            # space is only allowed after *all* emphasis marks
-            if (bold or italic) and not self.emphasis:
-                self.o(" ")
-            if strikethrough:
-                self.quiet -= 1
-
-    def handle_tag(
-        self, tag: str, attrs: Dict[str, Optional[str]], start: bool
-    ) -> None:
-        self.current_tag = tag
-
-        if self.tag_callback is not None:
-            if self.tag_callback(self, tag, attrs, start) is True:
-                return
-
-        # first thing inside the anchor tag is another tag
-        # that produces some output
-        if (
-            start
-            and self.maybe_automatic_link is not None
-            and tag not in ["p", "div", "style", "dl", "dt"]
-            and (tag != "img" or self.ignore_images)
-        ):
-            self.o("[")
-            self.maybe_automatic_link = None
-            self.empty_link = False
-
-        if self.google_doc:
-            # the attrs parameter is empty for a closing tag. in addition, we
-            # need the attributes of the parent nodes in order to get a
-            # complete style description for the current element. we assume
-            # that google docs export well formed html.
-            parent_style = {}  # type: Dict[str, str]
-            if start:
-                if self.tag_stack:
-                    parent_style = self.tag_stack[-1][2]
-                tag_style = element_style(attrs, self.style_def, parent_style)
-                self.tag_stack.append((tag, attrs, tag_style))
-            else:
-                dummy, attrs, tag_style = (
-                    self.tag_stack.pop() if self.tag_stack else (None, {}, {})
-                )
-                if self.tag_stack:
-                    parent_style = self.tag_stack[-1][2]
-
-        if hn(tag):
-            # check if nh is inside of an 'a' tag
-            # (incorrect but found in the wild)
-            if self.astack:
-                if start:
-                    self.inheader = True
-                    # are inside link name, so only add '#' if it can appear before '['
-                    if self.outtextlist and self.outtextlist[-1] == "[":
-                        self.outtextlist.pop()
-                        self.space = False
-                        self.o(hn(tag) * "#" + " ")
-                        self.o("[")
-                        self.header_id = attrs.get("id")
-            else:
-                self.p()
-                if start:
-                    self.inheader = True
-                    self.o(hn(tag) * "#" + " ")
-                    if self.header_id:
-                        self.o(" {#" + self.header_id + "}")
-                        self.header_id = None
-                else:
-                    self.inheader = False
-                    return  # prevent redundant emphasis marks on headers
-
-        if "class" in attrs:
-            self.current_class = attrs.get("class", "")
-            # self.p()
-            if not start:
-                self.current_class = ""
-
-        if tag == "span":
-            if "style" in attrs:
-                if attrs.get("style") == "text-align: center":
-                    self.current_class = "center"
-                if not start:
-                    self.current_class = ""
-            if start:
-                if (
-                    self.current_class == "highlight"
-                    and not self.inheader
-                    and not self.span_lead
-                    and not self.astack
-                ):
-                    self.o("`")  # NOTE: same as <code>
-                    self.span_highlight = True
-                elif (
-                    self.current_class == "lead"
-                    and not self.inheader
-                    and not self.span_highlight
-                ):
-                    # self.o("==") # NOTE:  CriticMarkup {==
-                    self.span_lead = True
-            else:
-                if self.span_highlight:
-                    self.o("`")
-                    self.span_highlight = False
-                elif self.span_lead:
-                    # self.o('==')
-                    self.span_lead = False
-
-        if tag in ["p", "div"]:
-            if self.google_doc:
-                if start and google_has_height(tag_style):
-                    self.p()
-                else:
-                    self.soft_br()
-            elif self.astack or self.inheader:
-                pass
-            else:
-                self.p()
-
-        if tag == "br" and start:
-            if self.blockquote > 0:
-                self.o("  \n> ")
-            else:
-                self.o("  \n")
-
-        if tag == "hr" and start:
-            self.p()
-            self.o("* * *")
-            self.p()
-
-        if tag in ["head", "style", "script"]:
-            if start:
-                self.quiet += 1
-            else:
-                self.quiet -= 1
-
-        if tag == "style":
-            if start:
-                self.style += 1
-            else:
-                self.style -= 1
-
-        if tag in ["body"]:
-            self.quiet = 0  # sites like 9rules.com never close <head>
-
-        if tag == "blockquote":
-            if start:
-                self.p()
-                self.o("> ", force=True)
-                self.start = True
-                self.blockquote += 1
-            else:
-                self.blockquote -= 1
-                self.p()
-
-        if tag in ["em", "i", "u"] and not self.ignore_emphasis:
-            # Separate with a space if we immediately follow an alphanumeric
-            # character, since otherwise Markdown won't render the emphasis
-            # marks, and we'll be left with eg 'foo_bar_' visible.
-            # (Don't add a space otherwise, though, since there isn't one in the
-            # original HTML.)
-            if (
-                start
-                and self.preceding_data
-                and self.preceding_data[-1] not in string.whitespace
-                and self.preceding_data[-1] not in string.punctuation
-            ):
-                emphasis = " " + self.emphasis_mark
-                self.preceding_data += " "
-            else:
-                emphasis = self.emphasis_mark
-
-            self.o(emphasis)
-            if start:
-                self.stressed = True
-
-        if tag in ["strong", "b"] and not self.ignore_emphasis:
-            # Separate with space if we immediately follow an * character, since
-            # without it, Markdown won't render the resulting *** correctly.
-            # (Don't add a space otherwise, though, since there isn't one in the
-            # original HTML.)
-            if (
-                not self.inheader
-                and not self.astack
-                and not self.span_lead
-                and not self.span_highlight
-            ):
-                if (
-                    start
-                    and self.preceding_data
-                    and self.preceding_data[-1] == self.strong_mark[0]
-                ):
-                    strong = " " + self.strong_mark
-                    self.preceding_data += " "
-                else:
-                    strong = self.strong_mark
-
-                self.o(strong)
-                if start:
-                    self.stressed = True
-
-        if tag in ["del", "strike", "s"]:
-            if start and self.preceding_data and self.preceding_data[-1] == "~":
-                strike = " ~~"
-                self.preceding_data += " "
-            else:
-                strike = "~~"
-
-            self.o(strike)
-            if start:
-                self.stressed = True
-
-        if self.google_doc:
-            if not self.inheader:
-                # handle some font attributes, but leave headers clean
-                self.handle_emphasis(start, tag_style, parent_style)
-
-        if tag in ["kbd", "code", "tt"] and not self.pre:
-            self.o("`")  # `` `this` ``
-            self.code = not self.code
-
-        if tag == "abbr":
-            if start:
-                self.abbr_title = None
-                self.abbr_data = ""
-                if "title" in attrs:
-                    self.abbr_title = attrs["title"]
-            else:
-                if self.abbr_title is not None:
-                    assert self.abbr_data is not None
-                    self.abbr_list[self.abbr_data] = self.abbr_title
-                    self.abbr_title = None
-                self.abbr_data = None
-
-        if tag == "q":
-            if not self.quote:
-                self.o(self.open_quote)
-            else:
-                self.o(self.close_quote)
-            self.quote = not self.quote
-
-        def link_url(self: HTML2Text, link: str, title: str = "") -> None:
-            url = urlparse.urljoin(self.baseurl, link)
-            title = ' "{}"'.format(title) if title.strip() else ""
-            self.o("]({url}{title})".format(url=escape_md(url), title=title))
-
-        if tag == "a" and not self.ignore_links:
-            if start:
-                if "data-original-title" in attrs:
-                    # WARNING: old discours specific code
-                    self.o("&&&%s&&&" % attrs["data-original-title"])
-                else:
-                    if (
-                        "href" in attrs
-                        and not attrs["href"].startswith("#_ftn")
-                        and attrs["href"] is not None
-                        and not (
-                            self.skip_internal_links and attrs["href"].startswith("#")
-                        )
-                        and not (
-                            self.ignore_mailto_links
-                            and attrs["href"].startswith("mailto:")
-                        )
-                    ):
-                        self.astack.append(attrs)
-                        self.maybe_automatic_link = attrs["href"]
-                        self.empty_link = True
-                        if self.protect_links:
-                            attrs["href"] = "<" + attrs["href"] + ">"
-                    else:
-                        self.astack.append(None)
-            else:
-                if self.astack:
-                    a = self.astack.pop()
-                    if self.maybe_automatic_link and not self.empty_link:
-                        self.maybe_automatic_link = None
-                    elif a:
-                        assert a["href"] is not None
-                        if self.empty_link:
-                            self.o("[")
-                            self.empty_link = False
-                            self.maybe_automatic_link = None
-                        if self.inline_links:
-                            self.p_p = 0
-                            title = a.get("title") or ""
-                            title = escape_md(title)
-                            link_url(self, a["href"], title)
-                        else:
-                            i = self.previousIndex(a)
-                            if i is not None:
-                                a_props = self.a[i]
-                            else:
-                                self.acount += 1
-                                a_props = AnchorElement(a, self.acount, self.outcount)
-                                self.a.append(a_props)
-                            self.o("][" + str(a_props.count) + "]")
-
-        if tag == "img" and start and not self.ignore_images:
-            # skip cloudinary images
-            if "src" in attrs and "cloudinary" not in attrs["src"]:
-                assert attrs["src"] is not None
-                if not self.images_to_alt:
-                    attrs["href"] = attrs["src"]
-                alt = attrs.get("alt") or self.default_image_alt
-
-                # If we have images_with_size, write raw html including width,
-                # height, and alt attributes
-                if self.images_as_html or (
-                    self.images_with_size and ("width" in attrs or "height" in attrs)
-                ):
-                    self.o("<img src='" + attrs["src"] + "' ")
-                    if "width" in attrs:
-                        assert attrs["width"] is not None
-                        self.o("width='" + attrs["width"] + "' ")
-                    if "height" in attrs:
-                        assert attrs["height"] is not None
-                        self.o("height='" + attrs["height"] + "' ")
-                    if alt:
-                        self.o("alt='" + alt + "' ")
-                    self.o("/>")
-                    return
-
-                # If we have a link to create, output the start
-                if self.maybe_automatic_link is not None:
-                    href = self.maybe_automatic_link
-                    if (
-                        self.images_to_alt
-                        and escape_md(alt) == href
-                        and self.absolute_url_matcher.match(href)
-                    ):
-                        self.o("<" + escape_md(alt) + ">")
-                        self.empty_link = False
-                        return
-                    else:
-                        self.o("[")
-                        self.maybe_automatic_link = None
-                        self.empty_link = False
-
-                # If we have images_to_alt, we discard the image itself,
-                # considering only the alt text.
-                if self.images_to_alt:
-                    self.o(escape_md(alt))
-                else:
-                    self.o("![" + escape_md(alt) + "]")
-                    if self.inline_links:
-                        href = attrs.get("href") or ""
-                        self.o(
-                            "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
-                        )
-                    else:
-                        i = self.previousIndex(attrs)
-                        if i is not None:
-                            a_props = self.a[i]
-                        else:
-                            self.acount += 1
-                            a_props = AnchorElement(attrs, self.acount, self.outcount)
-                            self.a.append(a_props)
-                        self.o("[" + str(a_props.count) + "]")
-
-        if tag == "dl" and start:
-            self.p()
-        if tag == "dt" and not start:
-            self.pbr()
-        if tag == "dd" and start:
-            self.o("    ")
-        if tag == "dd" and not start:
-            self.pbr()
-
-        if tag in ["ol", "ul"]:
-            # Google Docs create sub lists as top level lists
-            if not self.list and not self.lastWasList:
-                self.p()
-            if start:
-                if self.google_doc:
-                    list_style = google_list_style(tag_style)
-                else:
-                    list_style = tag
-                numbering_start = list_numbering_start(attrs)
-                self.list.append(ListElement(list_style, numbering_start))
-            else:
-                if self.list:
-                    self.list.pop()
-                    if not self.google_doc and not self.list:
-                        self.o("\n")
-            self.lastWasList = True
-        else:
-            self.lastWasList = False
-
-        if tag == "li":
-            self.pbr()
-            if start:
-                if self.list:
-                    li = self.list[-1]
-                else:
-                    li = ListElement("ul", 0)
-                if self.google_doc:
-                    self.o("  " * self.google_nest_count(tag_style))
-                else:
-                    # Indent two spaces per list, except use three spaces for an
-                    # unordered list inside an ordered list.
-                    # https://spec.commonmark.org/0.28/#motivation
-                    # WARNING: does not line up <ol><li>s > 9 correctly.
-                    parent_list = None
-                    for list in self.list:
-                        self.o(
-                            "   " if parent_list == "ol" and list.name == "ul" else "  "
-                        )
-                        parent_list = list.name
-
-                if li.name == "ul":
-                    self.o(self.ul_item_mark + " ")
-                elif li.name == "ol":
-                    li.num += 1
-                    self.o(str(li.num) + ". ")
-                self.start = True
-
-        if tag in ["table", "tr", "td", "th"]:
-            if self.ignore_tables:
-                if tag == "tr":
-                    if start:
-                        pass
-                    else:
-                        self.soft_br()
-                else:
-                    pass
-
-            elif self.bypass_tables:
-                if start:
-                    self.soft_br()
-                if tag in ["td", "th"]:
-                    if start:
-                        self.o("<{}>\n\n".format(tag))
-                    else:
-                        self.o("\n</{}>".format(tag))
-                else:
-                    if start:
-                        self.o("<{}>".format(tag))
-                    else:
-                        self.o("</{}>".format(tag))
-
-            else:
-                if tag == "table":
-                    if start:
-                        self.table_start = True
-                        if self.pad_tables:
-                            self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
-                            self.o("  \n")
-                    else:
-                        if self.pad_tables:
-                            # add break in case the table is empty or its 1 row table
-                            self.soft_br()
-                            self.o("</" + config.TABLE_MARKER_FOR_PAD + ">")
-                            self.o("  \n")
-                if tag in ["td", "th"] and start:
-                    if self.split_next_td:
-                        self.o("| ")
-                    self.split_next_td = True
-
-                if tag == "tr" and start:
-                    self.td_count = 0
-                if tag == "tr" and not start:
-                    self.split_next_td = False
-                    self.soft_br()
-                if tag == "tr" and not start and self.table_start:
-                    # Underline table header
-                    self.o("|".join(["---"] * self.td_count))
-                    self.soft_br()
-                    self.table_start = False
-                if tag in ["td", "th"] and start:
-                    self.td_count += 1
-
-        if tag == "pre":
-            if start:
-                self.startpre = True
-                self.pre = True
-            else:
-                self.pre = False
-                if self.mark_code:
-                    self.out("\n[/code]")
-            self.p()
-
-    def pbr(self) -> None:
-        "Pretty print has a line break"
-        if self.p_p == 0:
-            self.p_p = 1
-
-    def p(self) -> None:
-        "Set pretty print to 1 or 2 lines"
-        self.p_p = 1 if self.single_line_break else 2
-
-    def soft_br(self) -> None:
-        "Soft breaks"
-        self.pbr()
-        self.br_toggle = "  "
-
-    def o(
-        self, data: str, puredata: bool = False, force: Union[bool, str] = False
-    ) -> None:
-        """
-        Deal with indentation and whitespace
-        """
-        if self.abbr_data is not None:
-            self.abbr_data += data
-
-        if not self.quiet:
-            if self.google_doc:
-                # prevent white space immediately after 'begin emphasis'
-                # marks ('**' and '_')
-                lstripped_data = data.lstrip()
-                if self.drop_white_space and not (self.pre or self.code):
-                    data = lstripped_data
-                if lstripped_data != "":
-                    self.drop_white_space = 0
-
-            if puredata and not self.pre:
-                # This is a very dangerous call ... it could mess up
-                # all handling of &nbsp; when not handled properly
-                # (see entityref)
-                data = re.sub(r"\s+", r" ", data)
-                if data and data[0] == " ":
-                    self.space = True
-                    data = data[1:]
-            if not data and not force:
-                return
-
-            if self.startpre:
-                # self.out(" :") # not an output when already one there
-                if not data.startswith("\n") and not data.startswith("\r\n"):
-                    # <pre>stuff...
-                    data = "\n" + data
-                if self.mark_code:
-                    self.out("\n[code]")
-                    self.p_p = 0
-
-            bq = ">" * self.blockquote
-            if not (force and data and data[0] == ">") and self.blockquote:
-                bq += " "
-
-            if self.pre:
-                if not self.list:
-                    bq += "    "
-                # else: list content is already partially indented
-                bq += "    " * len(self.list)
-                data = data.replace("\n", "\n" + bq)
-
-            if self.startpre:
-                self.startpre = False
-                if self.list:
-                    # use existing initial indentation
-                    data = data.lstrip("\n")
-
-            if self.start:
-                self.space = False
-                self.p_p = 0
-                self.start = False
-
-            if force == "end":
-                # It's the end.
-                self.p_p = 0
-                self.out("\n")
-                self.space = False
-
-            if self.p_p:
-                self.out((self.br_toggle + "\n" + bq) * self.p_p)
-                self.space = False
-                self.br_toggle = ""
-
-            if self.space:
-                if not self.lastWasNL:
-                    self.out(" ")
-                self.space = False
-
-            if self.a and (
-                (self.p_p == 2 and self.links_each_paragraph) or force == "end"
-            ):
-                if force == "end":
-                    self.out("\n")
-
-                newa = []
-                for link in self.a:
-                    if self.outcount > link.outcount:
-                        self.out(
-                            "   ["
-                            + str(link.count)
-                            + "]: "
-                            + urlparse.urljoin(self.baseurl, link.attrs["href"])
-                        )
-                        if "title" in link.attrs:
-                            assert link.attrs["title"] is not None
-                            self.out(" (" + link.attrs["title"] + ")")
-                        self.out("\n")
-                    else:
-                        newa.append(link)
-
-                # Don't need an extra line when nothing was done.
-                if self.a != newa:
-                    self.out("\n")
-
-                self.a = newa
-
-            if self.abbr_list and force == "end":
-                for abbr, definition in self.abbr_list.items():
-                    self.out("  *[" + abbr + "]: " + definition + "\n")
-
-            self.p_p = 0
-            self.out(data)
-            self.outcount += 1
-
-    def handle_data(self, data: str, entity_char: bool = False) -> None:
-        if not data:
-            # Data may be empty for some HTML entities. For example,
-            # LEFT-TO-RIGHT MARK.
-            return
-
-        if self.stressed:
-            data = data.strip()
-            self.stressed = False
-            self.preceding_stressed = True
-        elif self.preceding_stressed:
-            if (
-                re.match(r"[^][(){}\s.!?]", data[0])
-                and not hn(self.current_tag)
-                and self.current_tag not in ["a", "code", "pre"]
-            ):
-                # should match a letter or common punctuation
-                data = " " + data
-            self.preceding_stressed = False
-
-        if self.style:
-            self.style_def.update(dumb_css_parser(data))
-
-        if self.maybe_automatic_link is not None:
-            href = self.maybe_automatic_link
-            if (
-                href == data
-                and self.absolute_url_matcher.match(href)
-                and self.use_automatic_links
-            ):
-                self.o("<" + data + ">")
-                self.empty_link = False
-                return
-            else:
-                self.o("[")
-                self.maybe_automatic_link = None
-                self.empty_link = False
-
-        if not self.code and not self.pre and not entity_char:
-            data = escape_md_section(data, snob=self.escape_snob)
-        self.preceding_data = data
-        self.o(data, puredata=True)
-
-    def charref(self, name: str) -> str:
-        if name[0] in ["x", "X"]:
-            c = int(name[1:], 16)
-        else:
-            c = int(name)
-
-        if not self.unicode_snob and c in unifiable_n:
-            return unifiable_n[c]
-        else:
-            try:
-                return chr(c)
-            except ValueError:  # invalid unicode
-                return ""
-
-    def entityref(self, c: str) -> str:
-        if not self.unicode_snob and c in config.UNIFIABLE:
-            return config.UNIFIABLE[c]
-        try:
-            ch = html.entities.html5[c + ";"]
-        except KeyError:
-            return "&" + c + ";"
-        return config.UNIFIABLE[c] if c == "nbsp" else ch
-
-    def google_nest_count(self, style: Dict[str, str]) -> int:
-        """
-        Calculate the nesting count of google doc lists
-
-        :type style: dict
-
-        :rtype: int
-        """
-        nest_count = 0
-        if "margin-left" in style:
-            nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
-
-        return nest_count
-
-    def optwrap(self, text: str) -> str:
-        """
-        Wrap all paragraphs in the provided text.
-
-        :type text: str
-
-        :rtype: str
-        """
-        if not self.body_width:
-            return text
-
-        result = ""
-        newlines = 0
-        # I cannot think of a better solution for now.
-        # To avoid the non-wrap behaviour for entire paras
-        # because of the presence of a link in it
-        if not self.wrap_links:
-            self.inline_links = False
-        for para in text.split("\n"):
-            if len(para) > 0:
-                if not skipwrap(
-                    para, self.wrap_links, self.wrap_list_items, self.wrap_tables
-                ):
-                    indent = ""
-                    if para.startswith("  " + self.ul_item_mark):
-                        # list item continuation: add a double indent to the
-                        # new lines
-                        indent = "    "
-                    elif para.startswith("> "):
-                        # blockquote continuation: add the greater than symbol
-                        # to the new lines
-                        indent = "> "
-                    wrapped = wrap(
-                        para,
-                        self.body_width,
-                        break_long_words=False,
-                        subsequent_indent=indent,
-                    )
-                    result += "\n".join(wrapped)
-                    if para.endswith("  "):
-                        result += "  \n"
-                        newlines = 1
-                    elif indent:
-                        result += "\n"
-                        newlines = 1
-                    else:
-                        result += "\n\n"
-                        newlines = 2
-                else:
-                    # Warning for the tempted!!!
-                    # Be aware that obvious replacement of this with
-                    # line.isspace()
-                    # DOES NOT work! Explanations are welcome.
-                    if not config.RE_SPACE.match(para):
-                        result += para + "\n"
-                        newlines = 1
-            else:
-                if newlines < 2:
-                    result += "\n"
-                    newlines += 1
-        return result
-
-
-def html2text(
-    html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
-) -> str:
-    h = html.strip() or ""
-    if h:
-        h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
-        h = h.handle(html.strip())
-        # print('[html2text] %d bytes' % len(html))
-    return h
diff --git a/migration/html2text/__main__.py b/migration/html2text/__main__.py
deleted file mode 100644
index 4e28416e..00000000
--- a/migration/html2text/__main__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .cli import main
-
-main()
diff --git a/migration/html2text/cli.py b/migration/html2text/cli.py
deleted file mode 100644
index dbaba28b..00000000
--- a/migration/html2text/cli.py
+++ /dev/null
@@ -1,323 +0,0 @@
-import argparse
-import sys
-
-from . import HTML2Text, __version__, config
-
-
-# noinspection DuplicatedCode
-def main() -> None:
-    baseurl = ""
-
-    class bcolors:
-        HEADER = "\033[95m"
-        OKBLUE = "\033[94m"
-        OKGREEN = "\033[92m"
-        WARNING = "\033[93m"
-        FAIL = "\033[91m"
-        ENDC = "\033[0m"
-        BOLD = "\033[1m"
-        UNDERLINE = "\033[4m"
-
-    p = argparse.ArgumentParser()
-    p.add_argument(
-        "--default-image-alt",
-        dest="default_image_alt",
-        default=config.DEFAULT_IMAGE_ALT,
-        help="The default alt string for images with missing ones",
-    )
-    p.add_argument(
-        "--pad-tables",
-        dest="pad_tables",
-        action="store_true",
-        default=config.PAD_TABLES,
-        help="pad the cells to equal column width in tables",
-    )
-    p.add_argument(
-        "--no-wrap-links",
-        dest="wrap_links",
-        action="store_false",
-        default=config.WRAP_LINKS,
-        help="don't wrap links during conversion",
-    )
-    p.add_argument(
-        "--wrap-list-items",
-        dest="wrap_list_items",
-        action="store_true",
-        default=config.WRAP_LIST_ITEMS,
-        help="wrap list items during conversion",
-    )
-    p.add_argument(
-        "--wrap-tables",
-        dest="wrap_tables",
-        action="store_true",
-        default=config.WRAP_TABLES,
-        help="wrap tables",
-    )
-    p.add_argument(
-        "--ignore-emphasis",
-        dest="ignore_emphasis",
-        action="store_true",
-        default=config.IGNORE_EMPHASIS,
-        help="don't include any formatting for emphasis",
-    )
-    p.add_argument(
-        "--reference-links",
-        dest="inline_links",
-        action="store_false",
-        default=config.INLINE_LINKS,
-        help="use reference style links instead of inline links",
-    )
-    p.add_argument(
-        "--ignore-links",
-        dest="ignore_links",
-        action="store_true",
-        default=config.IGNORE_ANCHORS,
-        help="don't include any formatting for links",
-    )
-    p.add_argument(
-        "--ignore-mailto-links",
-        action="store_true",
-        dest="ignore_mailto_links",
-        default=config.IGNORE_MAILTO_LINKS,
-        help="don't include mailto: links",
-    )
-    p.add_argument(
-        "--protect-links",
-        dest="protect_links",
-        action="store_true",
-        default=config.PROTECT_LINKS,
-        help="protect links from line breaks surrounding them with angle brackets",
-    )
-    p.add_argument(
-        "--ignore-images",
-        dest="ignore_images",
-        action="store_true",
-        default=config.IGNORE_IMAGES,
-        help="don't include any formatting for images",
-    )
-    p.add_argument(
-        "--images-as-html",
-        dest="images_as_html",
-        action="store_true",
-        default=config.IMAGES_AS_HTML,
-        help=(
-            "Always write image tags as raw html; preserves `height`, `width` and "
-            "`alt` if possible."
-        ),
-    )
-    p.add_argument(
-        "--images-to-alt",
-        dest="images_to_alt",
-        action="store_true",
-        default=config.IMAGES_TO_ALT,
-        help="Discard image data, only keep alt text",
-    )
-    p.add_argument(
-        "--images-with-size",
-        dest="images_with_size",
-        action="store_true",
-        default=config.IMAGES_WITH_SIZE,
-        help=(
-            "Write image tags with height and width attrs as raw html to retain "
-            "dimensions"
-        ),
-    )
-    p.add_argument(
-        "-g",
-        "--google-doc",
-        action="store_true",
-        dest="google_doc",
-        default=False,
-        help="convert an html-exported Google Document",
-    )
-    p.add_argument(
-        "-d",
-        "--dash-unordered-list",
-        action="store_true",
-        dest="ul_style_dash",
-        default=False,
-        help="use a dash rather than a star for unordered list items",
-    )
-    p.add_argument(
-        "-e",
-        "--asterisk-emphasis",
-        action="store_true",
-        dest="em_style_asterisk",
-        default=False,
-        help="use an asterisk rather than an underscore for emphasized text",
-    )
-    p.add_argument(
-        "-b",
-        "--body-width",
-        dest="body_width",
-        type=int,
-        default=config.BODY_WIDTH,
-        help="number of characters per output line, 0 for no wrap",
-    )
-    p.add_argument(
-        "-i",
-        "--google-list-indent",
-        dest="list_indent",
-        type=int,
-        default=config.GOOGLE_LIST_INDENT,
-        help="number of pixels Google indents nested lists",
-    )
-    p.add_argument(
-        "-s",
-        "--hide-strikethrough",
-        action="store_true",
-        dest="hide_strikethrough",
-        default=False,
-        help="hide strike-through text. only relevant when -g is " "specified as well",
-    )
-    p.add_argument(
-        "--escape-all",
-        action="store_true",
-        dest="escape_snob",
-        default=False,
-        help=(
-            "Escape all special characters.  Output is less readable, but avoids "
-            "corner case formatting issues."
-        ),
-    )
-    p.add_argument(
-        "--bypass-tables",
-        action="store_true",
-        dest="bypass_tables",
-        default=config.BYPASS_TABLES,
-        help="Format tables in HTML rather than Markdown syntax.",
-    )
-    p.add_argument(
-        "--ignore-tables",
-        action="store_true",
-        dest="ignore_tables",
-        default=config.IGNORE_TABLES,
-        help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
-    )
-    p.add_argument(
-        "--single-line-break",
-        action="store_true",
-        dest="single_line_break",
-        default=config.SINGLE_LINE_BREAK,
-        help=(
-            "Use a single line break after a block element rather than two line "
-            "breaks. NOTE: Requires --body-width=0"
-        ),
-    )
-    p.add_argument(
-        "--unicode-snob",
-        action="store_true",
-        dest="unicode_snob",
-        default=config.UNICODE_SNOB,
-        help="Use unicode throughout document",
-    )
-    p.add_argument(
-        "--no-automatic-links",
-        action="store_false",
-        dest="use_automatic_links",
-        default=config.USE_AUTOMATIC_LINKS,
-        help="Do not use automatic links wherever applicable",
-    )
-    p.add_argument(
-        "--no-skip-internal-links",
-        action="store_false",
-        dest="skip_internal_links",
-        default=config.SKIP_INTERNAL_LINKS,
-        help="Do not skip internal links",
-    )
-    p.add_argument(
-        "--links-after-para",
-        action="store_true",
-        dest="links_each_paragraph",
-        default=config.LINKS_EACH_PARAGRAPH,
-        help="Put links after each paragraph instead of document",
-    )
-    p.add_argument(
-        "--mark-code",
-        action="store_true",
-        dest="mark_code",
-        default=config.MARK_CODE,
-        help="Mark program code blocks with [code]...[/code]",
-    )
-    p.add_argument(
-        "--decode-errors",
-        dest="decode_errors",
-        default=config.DECODE_ERRORS,
-        help=(
-            "What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
-            "acceptable values"
-        ),
-    )
-    p.add_argument(
-        "--open-quote",
-        dest="open_quote",
-        default=config.OPEN_QUOTE,
-        help="The character used to open quotes",
-    )
-    p.add_argument(
-        "--close-quote",
-        dest="close_quote",
-        default=config.CLOSE_QUOTE,
-        help="The character used to close quotes",
-    )
-    p.add_argument(
-        "--version", action="version", version=".".join(map(str, __version__))
-    )
-    p.add_argument("filename", nargs="?")
-    p.add_argument("encoding", nargs="?", default="utf-8")
-    args = p.parse_args()
-
-    if args.filename and args.filename != "-":
-        with open(args.filename, "rb") as fp:
-            data = fp.read()
-    else:
-        data = sys.stdin.buffer.read()
-
-    try:
-        html = data.decode(args.encoding, args.decode_errors)
-    except UnicodeDecodeError as err:
-        warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
-        warning += " Use the " + bcolors.OKGREEN
-        warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
-        print(warning)
-        raise err
-
-    h = HTML2Text(baseurl=baseurl)
-    # handle options
-    if args.ul_style_dash:
-        h.ul_item_mark = "-"
-    if args.em_style_asterisk:
-        h.emphasis_mark = "*"
-        h.strong_mark = "__"
-
-    h.body_width = args.body_width
-    h.google_list_indent = args.list_indent
-    h.ignore_emphasis = args.ignore_emphasis
-    h.ignore_links = args.ignore_links
-    h.ignore_mailto_links = args.ignore_mailto_links
-    h.protect_links = args.protect_links
-    h.ignore_images = args.ignore_images
-    h.images_as_html = args.images_as_html
-    h.images_to_alt = args.images_to_alt
-    h.images_with_size = args.images_with_size
-    h.google_doc = args.google_doc
-    h.hide_strikethrough = args.hide_strikethrough
-    h.escape_snob = args.escape_snob
-    h.bypass_tables = args.bypass_tables
-    h.ignore_tables = args.ignore_tables
-    h.single_line_break = args.single_line_break
-    h.inline_links = args.inline_links
-    h.unicode_snob = args.unicode_snob
-    h.use_automatic_links = args.use_automatic_links
-    h.skip_internal_links = args.skip_internal_links
-    h.links_each_paragraph = args.links_each_paragraph
-    h.mark_code = args.mark_code
-    h.wrap_links = args.wrap_links
-    h.wrap_list_items = args.wrap_list_items
-    h.wrap_tables = args.wrap_tables
-    h.pad_tables = args.pad_tables
-    h.default_image_alt = args.default_image_alt
-    h.open_quote = args.open_quote
-    h.close_quote = args.close_quote
-
-    sys.stdout.write(h.handle(html))
diff --git a/migration/html2text/config.py b/migration/html2text/config.py
deleted file mode 100644
index 0f4d29bc..00000000
--- a/migration/html2text/config.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import re
-
-# Use Unicode characters instead of their ascii pseudo-replacements
-UNICODE_SNOB = True
-
-# Marker to use for marking tables for padding post processing
-TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
-# Escape all special characters.  Output is less readable, but avoids
-# corner case formatting issues.
-ESCAPE_SNOB = True
-
-# Put the links after each paragraph instead of at the end.
-LINKS_EACH_PARAGRAPH = False
-
-# Wrap long lines at position. 0 for no wrapping.
-BODY_WIDTH = 0
-
-# Don't show internal links (href="#local-anchor") -- corresponding link
-# targets won't be visible in the plain text file anyway.
-SKIP_INTERNAL_LINKS = False
-
-# Use inline, rather than reference, formatting for images and links
-INLINE_LINKS = True
-
-# Protect links from line breaks surrounding them with angle brackets (in
-# addition to their square brackets)
-PROTECT_LINKS = True
-WRAP_LINKS = True
-
-# Wrap list items.
-WRAP_LIST_ITEMS = False
-
-# Wrap tables
-WRAP_TABLES = False
-
-# Number of pixels Google indents nested lists
-GOOGLE_LIST_INDENT = 36
-
-# Values Google and others may use to indicate bold text
-BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
-
-IGNORE_ANCHORS = False
-IGNORE_MAILTO_LINKS = False
-IGNORE_IMAGES = False
-IMAGES_AS_HTML = False
-IMAGES_TO_ALT = False
-IMAGES_WITH_SIZE = False
-IGNORE_EMPHASIS = False
-MARK_CODE = True
-DECODE_ERRORS = "strict"
-DEFAULT_IMAGE_ALT = ""
-PAD_TABLES = True
-
-# Convert links with same href and text to <href> format
-# if they are absolute links
-USE_AUTOMATIC_LINKS = True
-
-# For checking space-only lines on line 771
-RE_SPACE = re.compile(r"\s\+")
-
-RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
-RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
-RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
-RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
-
-# to find links in the text
-RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
-
-# to find table separators
-RE_TABLE = re.compile(r" \| ")
-
-RE_MD_DOT_MATCHER = re.compile(
-    r"""
-    ^             # start of line
-    (\s*\d+)      # optional whitespace and a number
-    (\.)          # dot
-    (?=\s)        # lookahead assert whitespace
-    """,
-    re.MULTILINE | re.VERBOSE,
-)
-RE_MD_PLUS_MATCHER = re.compile(
-    r"""
-    ^
-    (\s*)
-    (\+)
-    (?=\s)
-    """,
-    flags=re.MULTILINE | re.VERBOSE,
-)
-RE_MD_DASH_MATCHER = re.compile(
-    r"""
-    ^
-    (\s*)
-    (-)
-    (?=\s|\-)     # followed by whitespace (bullet list, or spaced out hr)
-                  # or another dash (header or hr)
-    """,
-    flags=re.MULTILINE | re.VERBOSE,
-)
-RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
-RE_MD_BACKSLASH_MATCHER = re.compile(
-    r"""
-    (\\)          # match one slash
-    (?=[%s])      # followed by a char that requires escaping
-    """
-    % re.escape(RE_SLASH_CHARS),
-    flags=re.VERBOSE,
-)
-
-UNIFIABLE = {
-    "rsquo": "'",
-    "lsquo": "'",
-    "rdquo": '"',
-    "ldquo": '"',
-    "copy": "(C)",
-    "mdash": "--",
-    "nbsp": " ",
-    "rarr": "->",
-    "larr": "<-",
-    "middot": "*",
-    "ndash": "-",
-    "oelig": "oe",
-    "aelig": "ae",
-    "agrave": "a",
-    "aacute": "a",
-    "acirc": "a",
-    "atilde": "a",
-    "auml": "a",
-    "aring": "a",
-    "egrave": "e",
-    "eacute": "e",
-    "ecirc": "e",
-    "euml": "e",
-    "igrave": "i",
-    "iacute": "i",
-    "icirc": "i",
-    "iuml": "i",
-    "ograve": "o",
-    "oacute": "o",
-    "ocirc": "o",
-    "otilde": "o",
-    "ouml": "o",
-    "ugrave": "u",
-    "uacute": "u",
-    "ucirc": "u",
-    "uuml": "u",
-    "lrm": "",
-    "rlm": "",
-}
-
-# Format tables in HTML rather than Markdown syntax
-BYPASS_TABLES = False
-# Ignore table-related tags (table, th, td, tr) while keeping rows
-IGNORE_TABLES = False
-
-
-# Use a single line break after a block element rather than two line breaks.
-# NOTE: Requires body width setting to be 0.
-SINGLE_LINE_BREAK = False
-
-
-# Use double quotation marks when converting the <q> tag.
-OPEN_QUOTE = '"'
-CLOSE_QUOTE = '"'
diff --git a/migration/html2text/elements.py b/migration/html2text/elements.py
deleted file mode 100644
index 2533ec08..00000000
--- a/migration/html2text/elements.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from typing import Dict, Optional
-
-
-class AnchorElement:
-    __slots__ = ["attrs", "count", "outcount"]
-
-    def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
-        self.attrs = attrs
-        self.count = count
-        self.outcount = outcount
-
-
-class ListElement:
-    __slots__ = ["name", "num"]
-
-    def __init__(self, name: str, num: int):
-        self.name = name
-        self.num = num
diff --git a/migration/html2text/py.typed b/migration/html2text/py.typed
deleted file mode 100644
index e69de29b..00000000
diff --git a/migration/html2text/typing.py b/migration/html2text/typing.py
deleted file mode 100644
index 6e17fed2..00000000
--- a/migration/html2text/typing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-class OutCallback:
-    def __call__(self, s: str) -> None:
-        ...
diff --git a/migration/html2text/utils.py b/migration/html2text/utils.py
deleted file mode 100644
index 1cf22b52..00000000
--- a/migration/html2text/utils.py
+++ /dev/null
@@ -1,287 +0,0 @@
-import html.entities
-from typing import Dict, List, Optional
-
-from . import config
-
-unifiable_n = {
-    html.entities.name2codepoint[k]: v
-    for k, v in config.UNIFIABLE.items()
-    if k != "nbsp"
-}
-
-
-def hn(tag: str) -> int:
-    if tag[0] == "h" and len(tag) == 2:
-        n = tag[1]
-        if "0" < n <= "9":
-            return int(n)
-    return 0
-
-
-def dumb_property_dict(style: str) -> Dict[str, str]:
-    """
-    :returns: A hash of css attributes
-    """
-    return {
-        x.strip().lower(): y.strip().lower()
-        for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
-    }
-
-
-def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
-    """
-    :type data: str
-
-    :returns: A hash of css selectors, each of which contains a hash of
-    css attributes.
-    :rtype: dict
-    """
-    # remove @import sentences
-    data += ";"
-    importIndex = data.find("@import")
-    while importIndex != -1:
-        data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
-        importIndex = data.find("@import")
-
-    # parse the css. reverted from dictionary comprehension in order to
-    # support older pythons
-    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
-    try:
-        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
-    except ValueError:
-        elements = {}  # not that important
-
-    return elements
-
-
-def element_style(
-    attrs: Dict[str, Optional[str]],
-    style_def: Dict[str, Dict[str, str]],
-    parent_style: Dict[str, str],
-) -> Dict[str, str]:
-    """
-    :type attrs: dict
-    :type style_def: dict
-    :type style_def: dict
-
-    :returns: A hash of the 'final' style attributes of the element
-    :rtype: dict
-    """
-    style = parent_style.copy()
-    if attrs.get("class"):
-        for css_class in attrs["class"].split():
-            css_style = style_def.get("." + css_class, {})
-            style.update(css_style)
-    if attrs.get("style"):
-        immediate_style = dumb_property_dict(attrs["style"])
-        style.update(immediate_style)
-
-    return style
-
-
-def google_list_style(style: Dict[str, str]) -> str:
-    """
-    Finds out whether this is an ordered or unordered list
-
-    :type style: dict
-
-    :rtype: str
-    """
-    if "list-style-type" in style:
-        list_style = style["list-style-type"]
-        if list_style in ["disc", "circle", "square", "none"]:
-            return "ul"
-
-    return "ol"
-
-
-def google_has_height(style: Dict[str, str]) -> bool:
-    """
-    Check if the style of the element has the 'height' attribute
-    explicitly defined
-
-    :type style: dict
-
-    :rtype: bool
-    """
-    return "height" in style
-
-
-def google_text_emphasis(style: Dict[str, str]) -> List[str]:
-    """
-    :type style: dict
-
-    :returns: A list of all emphasis modifiers of the element
-    :rtype: list
-    """
-    emphasis = []
-    if "text-decoration" in style:
-        emphasis.append(style["text-decoration"])
-    if "font-style" in style:
-        emphasis.append(style["font-style"])
-    if "font-weight" in style:
-        emphasis.append(style["font-weight"])
-
-    return emphasis
-
-
-def google_fixed_width_font(style: Dict[str, str]) -> bool:
-    """
-    Check if the css of the current element defines a fixed width font
-
-    :type style: dict
-
-    :rtype: bool
-    """
-    font_family = ""
-    if "font-family" in style:
-        font_family = style["font-family"]
-    return "courier new" == font_family or "consolas" == font_family
-
-
-def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
-    """
-    Extract numbering from list element attributes
-
-    :type attrs: dict
-
-    :rtype: int or None
-    """
-    if attrs.get("start"):
-        try:
-            return int(attrs["start"]) - 1
-        except ValueError:
-            pass
-
-    return 0
-
-
-def skipwrap(
-    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
-) -> bool:
-    # If it appears to contain a link
-    # don't wrap
-    if not wrap_links and config.RE_LINK.search(para):
-        return True
-    # If the text begins with four spaces or one tab, it's a code block;
-    # don't wrap
-    if para[0:4] == "    " or para[0] == "\t":
-        return True
-
-    # If the text begins with only two "--", possibly preceded by
-    # whitespace, that's an emdash; so wrap.
-    stripped = para.lstrip()
-    if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
-        return False
-
-    # I'm not sure what this is for; I thought it was to detect lists,
-    # but there's a <br>-inside-<span> case in one of the tests that
-    # also depends upon it.
-    if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
-        return not wrap_list_items
-
-    # If text contains a pipe character it is likely a table
-    if not wrap_tables and config.RE_TABLE.search(para):
-        return True
-
-    # If the text begins with a single -, *, or +, followed by a space,
-    # or an integer, followed by a ., followed by a space (in either
-    # case optionally proceeded by whitespace), it's a list; don't wrap.
-    return bool(
-        config.RE_ORDERED_LIST_MATCHER.match(stripped)
-        or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
-    )
-
-
-def escape_md(text: str) -> str:
-    """
-    Escapes markdown-sensitive characters within other markdown
-    constructs.
-    """
-    return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
-
-
-def escape_md_section(text: str, snob: bool = False) -> str:
-    """
-    Escapes markdown-sensitive characters across whole document sections.
-    """
-    text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
-
-    if snob:
-        text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
-
-    text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
-    text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
-    text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
-
-    return text
-
-
-def reformat_table(lines: List[str], right_margin: int) -> List[str]:
-    """
-    Given the lines of a table
-    padds the cells and returns the new lines
-    """
-    # find the maximum width of the columns
-    max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
-    max_cols = len(max_width)
-    for line in lines:
-        cols = [x.rstrip() for x in line.split("|")]
-        num_cols = len(cols)
-
-        # don't drop any data if colspan attributes result in unequal lengths
-        if num_cols < max_cols:
-            cols += [""] * (max_cols - num_cols)
-        elif max_cols < num_cols:
-            max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
-            max_cols = num_cols
-
-        max_width = [
-            max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
-        ]
-
-    # reformat
-    new_lines = []
-    for line in lines:
-        cols = [x.rstrip() for x in line.split("|")]
-        if set(line.strip()) == set("-|"):
-            filler = "-"
-            new_cols = [
-                x.rstrip() + (filler * (M - len(x.rstrip())))
-                for x, M in zip(cols, max_width)
-            ]
-            new_lines.append("|-" + "|".join(new_cols) + "|")
-        else:
-            filler = " "
-            new_cols = [
-                x.rstrip() + (filler * (M - len(x.rstrip())))
-                for x, M in zip(cols, max_width)
-            ]
-            new_lines.append("| " + "|".join(new_cols) + "|")
-    return new_lines
-
-
-def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
-    """
-    Provide padding for tables in the text
-    """
-    lines = text.split("\n")
-    table_buffer = []  # type: List[str]
-    table_started = False
-    new_lines = []
-    for line in lines:
-        # Toggle table started
-        if config.TABLE_MARKER_FOR_PAD in line:
-            table_started = not table_started
-            if not table_started:
-                table = reformat_table(table_buffer, right_margin)
-                new_lines.extend(table)
-                table_buffer = []
-                new_lines.append("")
-            continue
-        # Process lines
-        if table_started:
-            table_buffer.append(line)
-        else:
-            new_lines.append(line)
-    return "\n".join(new_lines)
diff --git a/migration/tables/__init__.py b/migration/tables/__init__.py
deleted file mode 100644
index 8e7ee938..00000000
--- a/migration/tables/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__all__ = (["users", "topics", "content_items", "comments"],)
diff --git a/migration/tables/comments.py b/migration/tables/comments.py
deleted file mode 100644
index d5f35c20..00000000
--- a/migration/tables/comments.py
+++ /dev/null
@@ -1,209 +0,0 @@
-from datetime import datetime, timezone
-
-from dateutil.parser import parse as date_parse
-
-from services.db import local_session
-from migration.html2text import html2text
-from orm.reaction import Reaction, ReactionKind
-from orm.shout import ShoutReactionsFollower
-from orm.topic import TopicFollower
-from orm.user import User
-from orm.shout import Shout
-
-ts = datetime.now(tz=timezone.utc)
-
-
-def auto_followers(session, topics, reaction_dict):
-    # creating shout's reactions following for reaction author
-    following1 = (
-        session.query(ShoutReactionsFollower)
-        .where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
-        .filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
-        .first()
-    )
-    if not following1:
-        following1 = ShoutReactionsFollower.create(
-            follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
-        )
-        session.add(following1)
-    # creating topics followings for reaction author
-    for t in topics:
-        tf = (
-            session.query(TopicFollower)
-            .where(TopicFollower.follower == reaction_dict["createdBy"])
-            .filter(TopicFollower.topic == t["id"])
-            .first()
-        )
-        if not tf:
-            topic_following = TopicFollower.create(
-                follower=reaction_dict["createdBy"], topic=t["id"], auto=True
-            )
-            session.add(topic_following)
-
-
-def migrate_ratings(session, entry, reaction_dict):
-    for comment_rating_old in entry.get("ratings", []):
-        rater = (
-            session.query(User)
-            .filter(User.oid == comment_rating_old["createdBy"])
-            .first()
-        )
-        re_reaction_dict = {
-            "shout": reaction_dict["shout"],
-            "replyTo": reaction_dict["id"],
-            "kind": ReactionKind.LIKE
-            if comment_rating_old["value"] > 0
-            else ReactionKind.DISLIKE,
-            "createdBy": rater.id if rater else 1,
-        }
-        cts = comment_rating_old.get("createdAt")
-        if cts:
-            re_reaction_dict["createdAt"] = date_parse(cts)
-        try:
-            # creating reaction from old rating
-            rr = Reaction.create(**re_reaction_dict)
-            following2 = (
-                session.query(ShoutReactionsFollower)
-                .where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
-                .filter(ShoutReactionsFollower.shout == rr.shout)
-                .first()
-            )
-            if not following2:
-                following2 = ShoutReactionsFollower.create(
-                    follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
-                )
-                session.add(following2)
-            session.add(rr)
-
-        except Exception as e:
-            print("[migration] comment rating error: %r" % re_reaction_dict)
-            raise e
-    session.commit()
-
-
-async def migrate(entry, storage):
-    """
-    {
-      "_id": "hdtwS8fSyFLxXCgSC",
-      "body": "<p>",
-      "contentItem": "mnK8KsJHPRi8DrybQ",
-      "createdBy": "bMFPuyNg6qAD2mhXe",
-      "thread": "01/",
-      "createdAt": "2016-04-19 04:33:53+00:00",
-      "ratings": [
-            { "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
-            { "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
-      ],
-      "rating": 2,
-      "updatedAt": "2020-05-27 19:22:57.091000+00:00",
-      "updatedBy": "0"
-    }
-    ->
-    type Reaction {
-            id: Int!
-            shout: Shout!
-            createdAt: DateTime!
-            createdBy: User!
-            updatedAt: DateTime
-            deletedAt: DateTime
-            deletedBy: User
-            range: String # full / 0:2340
-            kind: ReactionKind!
-            body: String
-            replyTo: Reaction
-            stat: Stat
-            old_id: String
-            old_thread: String
-            }
-    """
-    old_ts = entry.get("createdAt")
-    reaction_dict = {
-        "createdAt": (ts if not old_ts else date_parse(old_ts)),
-        "body": html2text(entry.get("body", "")),
-        "oid": entry["_id"],
-    }
-    shout_oid = entry.get("contentItem")
-    if shout_oid not in storage["shouts"]["by_oid"]:
-        if len(storage["shouts"]["by_oid"]) > 0:
-            return shout_oid
-        else:
-            print("[migration] no shouts migrated yet")
-            raise Exception
-        return
-    else:
-        stage = "started"
-        reaction = None
-        with local_session() as session:
-            author = session.query(User).filter(User.oid == entry["createdBy"]).first()
-            old_shout = storage["shouts"]["by_oid"].get(shout_oid)
-            if not old_shout:
-                raise Exception("no old shout in storage")
-            else:
-                stage = "author and old id found"
-                try:
-                    shout = (
-                        session.query(Shout)
-                        .where(Shout.slug == old_shout["slug"])
-                        .one()
-                    )
-                    if shout:
-                        reaction_dict["shout"] = shout.id
-                        reaction_dict["createdBy"] = author.id if author else 1
-                        reaction_dict["kind"] = ReactionKind.COMMENT
-
-                        # creating reaction from old comment
-                        reaction = Reaction.create(**reaction_dict)
-                        session.add(reaction)
-                        # session.commit()
-                        stage = "new reaction commited"
-                        reaction_dict = reaction.dict()
-                        topics = [t.dict() for t in shout.topics]
-                        auto_followers(session, topics, reaction_dict)
-
-                        migrate_ratings(session, entry, reaction_dict)
-
-                        return reaction
-                except Exception as e:
-                    print(e)
-                    print(reaction)
-                    raise Exception(stage)
-    return
-
-
-def migrate_2stage(old_comment, idmap):
-    if old_comment.get("body"):
-        new_id = idmap.get(old_comment.get("oid"))
-        new_id = idmap.get(old_comment.get("_id"))
-        if new_id:
-            new_replyto_id = None
-            old_replyto_id = old_comment.get("replyTo")
-            if old_replyto_id:
-                new_replyto_id = int(idmap.get(old_replyto_id, "0"))
-            with local_session() as session:
-                comment = session.query(Reaction).where(Reaction.id == new_id).first()
-                try:
-                    if new_replyto_id:
-                        new_reply = (
-                            session.query(Reaction)
-                            .where(Reaction.id == new_replyto_id)
-                            .first()
-                        )
-                        if not new_reply:
-                            print(new_replyto_id)
-                            raise Exception("cannot find reply by id!")
-                        comment.replyTo = new_reply.id
-                        session.add(comment)
-                    srf = (
-                        session.query(ShoutReactionsFollower)
-                        .where(ShoutReactionsFollower.shout == comment.shout)
-                        .filter(ShoutReactionsFollower.follower == comment.createdBy)
-                        .first()
-                    )
-                    if not srf:
-                        srf = ShoutReactionsFollower.create(
-                            shout=comment.shout, follower=comment.createdBy, auto=True
-                        )
-                        session.add(srf)
-                    session.commit()
-                except Exception:
-                    raise Exception("cannot find a comment by oldid")
diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
deleted file mode 100644
index ec263fc6..00000000
--- a/migration/tables/content_items.py
+++ /dev/null
@@ -1,420 +0,0 @@
-from datetime import datetime, timezone
-import json
-from dateutil.parser import parse as date_parse
-from sqlalchemy.exc import IntegrityError
-from transliterate import translit
-from services.db import local_session
-from migration.extract import extract_html, extract_media
-from orm.reaction import Reaction, ReactionKind
-from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
-from orm.user import User
-from orm.topic import TopicFollower, Topic
-from services.viewed import ViewedStorage
-import re
-
-OLD_DATE = "2016-03-05 22:22:00.350000"
-ts = datetime.now(tz=timezone.utc)
-type2layout = {
-    "Article": "article",
-    "Literature": "literature",
-    "Music": "audio",
-    "Video": "video",
-    "Image": "image",
-}
-
-anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
-discours = {"slug": "discours", "id": 2, "name": "Дискурс"}
-
-
-def get_shout_slug(entry):
-    slug = entry.get("slug", "")
-    if not slug:
-        for friend in entry.get("friendlySlugs", []):
-            slug = friend.get("slug", "")
-            if slug:
-                break
-    slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
-    return slug
-
-
-def create_author_from_app(app):
-    user = None
-    userdata = None
-    # check if email is used
-    if app["email"]:
-        with local_session() as session:
-            user = session.query(User).where(User.email == app["email"]).first()
-            if not user:
-                # print('[migration] app %r' % app)
-                name = app.get("name")
-                if name:
-                    slug = translit(name, "ru", reversed=True).lower()
-                    slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
-                    print("[migration] created slug %s" % slug)
-                    # check if slug is used
-                    if slug:
-                        user = session.query(User).where(User.slug == slug).first()
-
-                        # get slug from email
-                        if user:
-                            slug = app["email"].split("@")[0]
-                            user = session.query(User).where(User.slug == slug).first()
-                            # one more try
-                            if user:
-                                slug += "-author"
-                                user = (
-                                    session.query(User).where(User.slug == slug).first()
-                                )
-
-                # create user with application data
-                if not user:
-                    userdata = {
-                        "username": app["email"],
-                        "email": app["email"],
-                        "name": app.get("name", ""),
-                        "bio": app.get("bio", ""),
-                        "emailConfirmed": False,
-                        "slug": slug,
-                        "createdAt": ts,
-                        "lastSeen": ts,
-                    }
-                    # print('[migration] userdata %r' % userdata)
-                    user = User.create(**userdata)
-                    session.add(user)
-                    session.commit()
-                    userdata["id"] = user.id
-
-            userdata = user.dict()
-        return userdata
-    else:
-        raise Exception("app is not ok", app)
-
-
-async def create_shout(shout_dict):
-    s = Shout.create(**shout_dict)
-    author = s.authors[0]
-    with local_session() as session:
-        srf = (
-            session.query(ShoutReactionsFollower)
-            .where(ShoutReactionsFollower.shout == s.id)
-            .filter(ShoutReactionsFollower.follower == author.id)
-            .first()
-        )
-        if not srf:
-            srf = ShoutReactionsFollower.create(
-                shout=s.id, follower=author.id, auto=True
-            )
-            session.add(srf)
-        session.commit()
-    return s
-
-
-async def get_user(entry, storage):
-    app = entry.get("application")
-    userdata = None
-    user_oid = None
-    if app:
-        userdata = create_author_from_app(app)
-    else:
-        user_oid = entry.get("createdBy")
-        if user_oid == "0":
-            userdata = discours
-        elif user_oid:
-            userdata = storage["users"]["by_oid"].get(user_oid)
-        if not userdata:
-            print("no userdata by oid, anonymous")
-            userdata = anondict
-            print(app)
-    # cleanup slug
-    if userdata:
-        slug = userdata.get("slug", "")
-        if slug:
-            slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
-            userdata["slug"] = slug
-    else:
-        userdata = anondict
-
-    user = await process_user(userdata, storage, user_oid)
-    return user, user_oid
-
-
-async def migrate(entry, storage):
-    author, user_oid = await get_user(entry, storage)
-    r = {
-        "layout": type2layout[entry["type"]],
-        "title": entry["title"],
-        "authors": [
-            author,
-        ],
-        "slug": get_shout_slug(entry),
-        "cover": (
-            "https://assets.discours.io/unsafe/1600x/" + entry["thumborId"]
-            if entry.get("thumborId")
-            else entry.get("image", {}).get("url")
-        ),
-        "visibility": "public" if entry.get("published") else "authors",
-        "publishedAt": date_parse(entry.get("publishedAt"))
-        if entry.get("published")
-        else None,
-        "deletedAt": date_parse(entry.get("deletedAt"))
-        if entry.get("deletedAt")
-        else None,
-        "createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
-        "updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
-        "topics": await add_topics_follower(entry, storage, author),
-        "body": extract_html(entry, cleanup=True),
-    }
-
-    # main topic patch
-    r["mainTopic"] = r["topics"][0]
-
-    # published author auto-confirm
-    if entry.get("published"):
-        with local_session() as session:
-            # update user.emailConfirmed if published
-            author.emailConfirmed = True
-            session.add(author)
-            session.commit()
-
-    # media
-    media = extract_media(entry)
-    r["media"] = json.dumps(media, ensure_ascii=True) if media else None
-
-    # ----------------------------------- copy
-    shout_dict = r.copy()
-    del shout_dict["topics"]
-
-    try:
-        # save shout to db
-        shout_dict["oid"] = entry.get("_id", "")
-        shout = await create_shout(shout_dict)
-    except IntegrityError as e:
-        print("[migration] create_shout integrity error", e)
-        shout = await resolve_create_shout(shout_dict)
-    except Exception as e:
-        raise Exception(e)
-
-    # udpate data
-    shout_dict = shout.dict()
-    shout_dict["authors"] = [
-        author.dict(),
-    ]
-
-    # shout topics aftermath
-    shout_dict["topics"] = await topics_aftermath(r, storage)
-
-    # content_item ratings to reactions
-    await content_ratings_to_reactions(entry, shout_dict["slug"])
-
-    # shout views
-    await ViewedStorage.increment(
-        shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
-    )
-    # del shout_dict['ratings']
-
-    storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
-    storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
-    return shout_dict
-
-
-async def add_topics_follower(entry, storage, user):
-    topics = set([])
-    category = entry.get("category")
-    topics_by_oid = storage["topics"]["by_oid"]
-    oids = [
-        category,
-    ] + entry.get("tags", [])
-    for toid in oids:
-        tslug = topics_by_oid.get(toid, {}).get("slug")
-        if tslug:
-            topics.add(tslug)
-    ttt = list(topics)
-    # add author as TopicFollower
-    with local_session() as session:
-        for tpcslug in topics:
-            try:
-                tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
-                if tpc:
-                    tf = (
-                        session.query(TopicFollower)
-                        .where(TopicFollower.follower == user.id)
-                        .filter(TopicFollower.topic == tpc.id)
-                        .first()
-                    )
-                    if not tf:
-                        tf = TopicFollower.create(
-                            topic=tpc.id, follower=user.id, auto=True
-                        )
-                        session.add(tf)
-                        session.commit()
-            except IntegrityError:
-                print("[migration.shout] hidden by topic " + tpc.slug)
-    # main topic
-    maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
-    if maintopic in ttt:
-        ttt.remove(maintopic)
-    ttt.insert(0, maintopic)
-    return ttt
-
-
-async def process_user(userdata, storage, oid):
-    with local_session() as session:
-        uid = userdata.get("id")  # anonymous as
-        if not uid:
-            print(userdata)
-            print("has no id field, set it @anonymous")
-            userdata = anondict
-            uid = 1
-        user = session.query(User).filter(User.id == uid).first()
-        if not user:
-            try:
-                slug = userdata["slug"].lower().strip()
-                slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
-                userdata["slug"] = slug
-                user = User.create(**userdata)
-                session.add(user)
-                session.commit()
-            except IntegrityError:
-                print(f"[migration] user creating with slug {userdata['slug']}")
-                print("[migration] from userdata")
-                print(userdata)
-                raise Exception(
-                    "[migration] cannot create user in content_items.get_user()"
-                )
-        if user.id == 946:
-            print("[migration] ***************** ALPINA")
-        if user.id == 2:
-            print("[migration] +++++++++++++++++ DISCOURS")
-        userdata["id"] = user.id
-        userdata["createdAt"] = user.createdAt
-        storage["users"]["by_slug"][userdata["slug"]] = userdata
-        storage["users"]["by_oid"][oid] = userdata
-        if not user:
-            raise Exception("could not get a user")
-        return user
-
-
-async def resolve_create_shout(shout_dict):
-    with local_session() as session:
-        s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
-        bump = False
-        if s:
-            if s.createdAt != shout_dict["createdAt"]:
-                # create new with different slug
-                shout_dict["slug"] += "-" + shout_dict["layout"]
-                try:
-                    await create_shout(shout_dict)
-                except IntegrityError as e:
-                    print(e)
-                    bump = True
-            else:
-                # update old
-                for key in shout_dict:
-                    if key in s.__dict__:
-                        if s.__dict__[key] != shout_dict[key]:
-                            print(
-                                "[migration] shout already exists, but differs in %s"
-                                % key
-                            )
-                            bump = True
-                    else:
-                        print("[migration] shout already exists, but lacks %s" % key)
-                        bump = True
-                if bump:
-                    s.update(shout_dict)
-        else:
-            print("[migration] something went wrong with shout: \n%r" % shout_dict)
-            raise Exception("")
-        session.commit()
-        return s
-
-
-async def topics_aftermath(entry, storage):
-    r = []
-    for tpc in filter(lambda x: bool(x), entry["topics"]):
-        oldslug = tpc
-        newslug = storage["replacements"].get(oldslug, oldslug)
-
-        if newslug:
-            with local_session() as session:
-                shout = session.query(Shout).where(Shout.slug == entry["slug"]).first()
-                new_topic = session.query(Topic).where(Topic.slug == newslug).first()
-
-                shout_topic_old = (
-                    session.query(ShoutTopic)
-                    .join(Shout)
-                    .join(Topic)
-                    .filter(Shout.slug == entry["slug"])
-                    .filter(Topic.slug == oldslug)
-                    .first()
-                )
-                if shout_topic_old:
-                    shout_topic_old.update({"topic": new_topic.id})
-                else:
-                    shout_topic_new = (
-                        session.query(ShoutTopic)
-                        .join(Shout)
-                        .join(Topic)
-                        .filter(Shout.slug == entry["slug"])
-                        .filter(Topic.slug == newslug)
-                        .first()
-                    )
-                    if not shout_topic_new:
-                        try:
-                            ShoutTopic.create(
-                                **{"shout": shout.id, "topic": new_topic.id}
-                            )
-                        except Exception:
-                            print("[migration] shout topic error: " + newslug)
-                session.commit()
-            if newslug not in r:
-                r.append(newslug)
-        else:
-            print("[migration] ignored topic slug: \n%r" % tpc["slug"])
-            # raise Exception
-    return r
-
-
-async def content_ratings_to_reactions(entry, slug):
-    try:
-        with local_session() as session:
-            for content_rating in entry.get("ratings", []):
-                rater = (
-                    session.query(User)
-                    .filter(User.oid == content_rating["createdBy"])
-                    .first()
-                ) or User.default_user
-                shout = session.query(Shout).where(Shout.slug == slug).first()
-                cts = content_rating.get("createdAt")
-                reaction_dict = {
-                    "createdAt": date_parse(cts) if cts else None,
-                    "kind": ReactionKind.LIKE
-                    if content_rating["value"] > 0
-                    else ReactionKind.DISLIKE,
-                    "createdBy": rater.id,
-                    "shout": shout.id,
-                }
-                reaction = (
-                    session.query(Reaction)
-                    .filter(Reaction.shout == reaction_dict["shout"])
-                    .filter(Reaction.createdBy == reaction_dict["createdBy"])
-                    .filter(Reaction.kind == reaction_dict["kind"])
-                    .first()
-                )
-                if reaction:
-                    k = (
-                        ReactionKind.AGREE
-                        if content_rating["value"] > 0
-                        else ReactionKind.DISAGREE
-                    )
-                    reaction_dict["kind"] = k
-                    reaction.update(reaction_dict)
-                    session.add(reaction)
-                else:
-                    rea = Reaction.create(**reaction_dict)
-                    session.add(rea)
-                # shout_dict['ratings'].append(reaction_dict)
-
-            session.commit()
-    except Exception:
-        print("[migration] content_item.ratings error: \n%r" % content_rating)
diff --git a/migration/tables/remarks.py b/migration/tables/remarks.py
deleted file mode 100644
index 9a426346..00000000
--- a/migration/tables/remarks.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from services.db import local_session
-from migration.extract import extract_md
-from migration.html2text import html2text
-from orm.reaction import Reaction, ReactionKind
-
-
-def migrate(entry, storage):
-    post_oid = entry["contentItem"]
-    print(post_oid)
-    shout_dict = storage["shouts"]["by_oid"].get(post_oid)
-    if shout_dict:
-        print(shout_dict["body"])
-        remark = {
-            "shout": shout_dict["id"],
-            "body": extract_md(html2text(entry["body"]), shout_dict),
-            "kind": ReactionKind.REMARK,
-        }
-
-        if entry.get("textBefore"):
-            remark["range"] = (
-                str(shout_dict["body"].index(entry["textBefore"] or ""))
-                + ":"
-                + str(
-                    shout_dict["body"].index(entry["textAfter"] or "")
-                    + len(entry["textAfter"] or "")
-                )
-            )
-
-        with local_session() as session:
-            rmrk = Reaction.create(**remark)
-            session.commit()
-            del rmrk["_sa_instance_state"]
-            return rmrk
-    return
diff --git a/migration/tables/replacements.json b/migration/tables/replacements.json
deleted file mode 100644
index 5e042e66..00000000
--- a/migration/tables/replacements.json
+++ /dev/null
@@ -1,828 +0,0 @@
-{
-    "207": "207",
-    "1990-e": "90s",
-    "2000-e": "2000s",
-    "90-e": "90s",
-    "Georgia": "georgia",
-    "Japan": "japan",
-    "Sweden": "sweden",
-    "abstraktsiya": "abstract",
-    "absurdism": "absurdism",
-    "acclimatization": "acclimatisation",
-    "activism": "activism",
-    "adolf-gitler": "adolf-hitler",
-    "afrika": "africa",
-    "agata-kristi": "agatha-christie",
-    "agressivnoe-povedenie": "agression",
-    "agressiya": "agression",
-    "aktsii": "actions",
-    "aktsionizm": "actionism",
-    "alber-kamyu": "albert-kamus",
-    "albomy": "albums",
-    "aleksandr-griboedov": "aleksander-griboedov",
-    "aleksandr-pushkin": "aleksander-pushkin",
-    "aleksandr-solzhenitsyn": "aleksander-solzhenitsyn",
-    "aleksandr-vvedenskiy": "aleksander-vvedensky",
-    "aleksey-navalnyy": "alexey-navalny",
-    "alfavit": "alphabet",
-    "alkogol": "alcohol",
-    "alternativa": "alternative",
-    "alternative": "alternative",
-    "alternativnaya-istoriya": "alternative-history",
-    "amerika": "america",
-    "anarhizm": "anarchism",
-    "anatoliy-mariengof": "anatoly-mariengof",
-    "ancient-russia": "ancient-russia",
-    "andegraund": "underground",
-    "andrey-platonov": "andrey-platonov",
-    "andrey-rodionov": "andrey-rodionov",
-    "andrey-tarkovskiy": "andrey-tarkovsky",
-    "angliyskie-istorii": "english-stories",
-    "angliyskiy-yazyk": "english-langugae",
-    "ango": "ango",
-    "animation": "animation",
-    "animatsiya": "animation",
-    "anime": "anime",
-    "anri-volohonskiy": "anri-volohonsky",
-    "antifashizm": "anti-faschism",
-    "antiquity": "antiquity",
-    "antiutopiya": "dystopia",
-    "anton-dolin": "anton-dolin",
-    "antropology": "antropology",
-    "antropotsen": "antropocenus",
-    "architecture": "architecture",
-    "arheologiya": "archeology",
-    "arhetipy": "archetypes",
-    "arhiv": "archive",
-    "aristokraty": "aristocracy",
-    "aristotel": "aristotle",
-    "arktika": "arctic",
-    "armiya": "army",
-    "armiya-1": "army",
-    "art": "art",
-    "art-is": "art-is",
-    "artists": "artists",
-    "ateizm": "atheism",
-    "audio-poetry": "audio-poetry",
-    "audiopoeziya": "audio-poetry",
-    "audiospektakl": "audio-spectacles",
-    "auktsyon": "auktsyon",
-    "avangard": "avantgarde",
-    "avtofikshn": "autofiction",
-    "avtorskaya-pesnya": "bardsongs",
-    "azbuka-immigratsii": "immigration-basics",
-    "aziatskiy-kinematograf": "asian-cinema",
-    "b-movie": "b-movie",
-    "bannye-chteniya": "sauna-reading",
-    "bardsongs": "bardsongs",
-    "bdsm": "bdsm",
-    "beecake": "beecake",
-    "belarus": "belarus",
-    "belgiya": "belgium",
-    "bertold-breht": "berttold-brecht",
-    "bezumie": "madness",
-    "biography": "biography",
-    "biologiya": "biology",
-    "bipolyarnoe-rasstroystvo": "bipolar-disorder",
-    "bitniki": "beatnics",
-    "biznes": "business",
-    "blizhniy-vostok": "middle-east",
-    "blizost": "closeness",
-    "blocked-in-russia": "blocked-in-russia",
-    "blokada": "blockade",
-    "bob-dilan": "bob-dylan",
-    "bog": "god",
-    "bol": "pain",
-    "bolotnoe-delo": "bolotnaya-case",
-    "books": "books",
-    "boris-eltsin": "boris-eltsin",
-    "boris-godunov": "boris-godunov",
-    "boris-grebenschikov": "boris-grebenschikov",
-    "boris-nemtsov": "boris-nemtsov",
-    "boris-pasternak": "boris-pasternak",
-    "brak": "marriage",
-    "bret-iston-ellis": "bret-iston-ellis",
-    "buddizm": "buddhism",
-    "bullying": "bullying",
-    "bunt": "riot",
-    "burning-man": "burning-man",
-    "bytie": "being",
-    "byurokratiya": "bureaucracy",
-    "capitalism": "capitalism",
-    "censored-in-russia": "censored-in-russia",
-    "ch-rno-beloe": "black-and-white",
-    "ch-rnyy-yumor": "black-humour",
-    "chapters": "chapters",
-    "charity": "charity",
-    "chayldfri": "childfree",
-    "chechenskaya-voyna": "chechen-war",
-    "chechnya": "chechnya",
-    "chelovek": "male",
-    "chernobyl": "chernobyl",
-    "chernyy-yumor": "black-humour",
-    "children": "children",
-    "china": "china",
-    "chinovniki": "bureaucracy",
-    "chukotka": "chukotka",
-    "chuma": "plague",
-    "church": "church",
-    "cinema": "cinema",
-    "city": "city",
-    "civil-position": "civil-position",
-    "clips": "clips",
-    "collage": "collage",
-    "comics": "comics",
-    "conspiracy-theory": "conspiracy-theory",
-    "contemporary-art": "contemporary-art",
-    "contemporary-poetry": "poetry",
-    "contemporary-prose": "prose",
-    "coronavirus": "coronavirus",
-    "corruption": "corruption",
-    "creative-writing-school": "creative-writing-school",
-    "crime": "crime",
-    "criticism": "criticism",
-    "critiques": "reviews",
-    "culture": "culture",
-    "dadaizm": "dadaism",
-    "daniel-defo": "daniel-defoe",
-    "daniil-harms": "daniil-kharms",
-    "dante-aligeri": "dante-alighieri",
-    "darkveyv": "darkwave",
-    "death": "death",
-    "debaty": "debats",
-    "delo-seti": "seti-case",
-    "democracy": "democracy",
-    "demografiya": "demographics",
-    "demonstrations": "demonstrations",
-    "depression": "depression",
-    "derevnya": "village",
-    "derrida": "derrida",
-    "design": "design",
-    "detskie-doma": "orphanages",
-    "detstvo": "childhood",
-    "devid-linch": "david-linch",
-    "devyanostye": "90s",
-    "dialog": "dialogue",
-    "digital": "digital",
-    "digital-art": "digital-art",
-    "dinozavry": "dinosaurs",
-    "directing": "directing",
-    "diskurs": "discours",
-    "diskurs-1": "discourse",
-    "diskurs-analiz": "discourse-analytics",
-    "dissidenty": "dissidents",
-    "diy": "diy",
-    "dmitriy-donskoy": "dmitriy-donskoy",
-    "dmitriy-prigov": "dmitriy-prigov",
-    "dnevnik-1": "dairy",
-    "dnevniki": "dairies",
-    "documentary": "documentary",
-    "dokumentalnaya-poema": "documentary-poem",
-    "dokumentalnaya-poeziya": "documentary-poetry",
-    "dokumenty": "doсuments",
-    "domashnee-nasilie": "home-terror",
-    "donald-tramp": "donald-trump",
-    "donbass": "donbass",
-    "donbass-diary": "donbass-diary",
-    "donorstvo": "donation",
-    "dozhd": "rain",
-    "drama": "drama",
-    "dramaturgy": "dramaturgy",
-    "drawing": "drawing",
-    "drevo-zhizni": "tree-of-life",
-    "drugs": "drugs",
-    "duh": "spirit",
-    "dzhaz": "jazz",
-    "dzhek-keruak": "jack-keruak",
-    "dzhim-morrison": "jim-morrison",
-    "dzhordzh-romero": "george-romero",
-    "dzhordzho-agamben": "giorgio-agamben",
-    "ecology": "ecology",
-    "economics": "economics",
-    "eda": "food",
-    "editorial-statements": "editorial-statements",
-    "eduard-limonov": "eduard-limonov",
-    "education": "education",
-    "egor-letov": "egor-letov",
-    "ekspat": "expat",
-    "eksperiment": "experiments",
-    "eksperimentalnaya-muzyka": "experimental-music",
-    "ekspressionizm": "expressionism",
-    "ekstremizm": "extremism",
-    "ekzistentsializm-1": "existentialism",
-    "ekzistentsiya": "existence",
-    "elections": "elections",
-    "electronic": "electronics",
-    "electronics": "electronics",
-    "elena-glinskaya": "elena-glinskaya",
-    "elena-guro": "elena-guro",
-    "elizaveta-mnatsakanova": "elizaveta-mnatsakanova",
-    "embient": "ambient",
-    "emigration": "emigration",
-    "emil-dyurkgeym": "emile-durkheim",
-    "emotsii": "emotions",
-    "empiric": "empiric",
-    "epidemiya": "pandemic",
-    "erich-von-neff": "erich-von-neff",
-    "erotika": "erotics",
-    "essay": "essay",
-    "estetika": "aestetics",
-    "etika": "ethics",
-    "etno": "ethno",
-    "etnos": "ethnics",
-    "everyday-life": "everyday-life",
-    "evgeniy-onegin": "eugene-onegin",
-    "evolyutsiya": "evolution",
-    "exhibitions": "exhibitions",
-    "experience": "experiences",
-    "experimental": "experimental",
-    "experimental-music": "experimental-music",
-    "explanation": "explanation",
-    "faktcheking": "fact-checking",
-    "falsifikatsii": "falsifications",
-    "family": "family",
-    "fanfiki": "fan-fiction",
-    "fantastika": "sci-fi",
-    "fatalizm": "fatalism",
-    "fedor-dostoevskiy": "fedor-dostoevsky",
-    "fedor-ioannovich": "fedor-ioannovich",
-    "feleton": "feuilleton",
-    "feminism": "feminism",
-    "fenomenologiya": "phenomenology",
-    "fentezi": "fantasy",
-    "festival": "festival",
-    "festival-territoriya": "festival-territory",
-    "folk": "folk",
-    "folklor": "folklore",
-    "fotoreportazh": "photoreports",
-    "france": "france",
-    "frants-kafka": "franz-kafka",
-    "frederik-begbeder": "frederick-begbeder",
-    "freedom": "freedom",
-    "friendship": "friendship",
-    "fsb": "fsb",
-    "futbol": "footbool",
-    "future": "future",
-    "futuristy": "futurists",
-    "futurizm": "futurism",
-    "galereya": "gallery",
-    "galereya-anna-nova": "gallery-anna-nova",
-    "gdr": "gdr",
-    "gender": "gender",
-    "gendernyy-diskurs": "gender",
-    "gennadiy-aygi": "gennadiy-aygi",
-    "gerhard-rihter": "gerhard-rihter",
-    "germaniya": "germany",
-    "germenevtika": "hermeneutics",
-    "geroi": "heroes",
-    "girls": "girls",
-    "gkchp": "gkchp",
-    "glitch": "glitch",
-    "globalizatsiya": "globalisation",
-    "gollivud": "hollywood",
-    "gonzo": "gonzo",
-    "gore-ot-uma": "woe-from-wit",
-    "graffiti": "graffiti",
-    "graficheskaya-novella": "graphic-novell",
-    "graphics": "graphics",
-    "gravyura": "engraving",
-    "grazhdanskaya-oborona": "grazhdanskaya-oborona",
-    "gretsiya": "greece",
-    "griby": "mushrooms",
-    "gruziya-2": "georgia",
-    "gulag": "gulag",
-    "han-batyy": "khan-batyy",
-    "hayku": "haiku",
-    "health": "health",
-    "himiya": "chemistry",
-    "hip-hop": "hip-hop",
-    "history": "history",
-    "history-of-russia": "history-of-russia",
-    "holokost": "holocaust",
-    "horeografiya": "choreography",
-    "horror": "horror",
-    "hospis": "hospice",
-    "hristianstvo": "christianity",
-    "humans": "humans",
-    "humour": "humour",
-    "ideologiya": "ideology",
-    "idm": "idm",
-    "igil": "isis",
-    "igor-pomerantsev": "igor-pomerantsev",
-    "igra": "game",
-    "igra-prestolov": "game-of-throne",
-    "igry": "games",
-    "iisus-hristos": "jesus-christ",
-    "illness": "illness",
-    "illustration-history": "illustration-history",
-    "illustrations": "illustrations",
-    "imazhinizm": "imagism",
-    "immanuil-kant": "immanuel-kant",
-    "impressionizm": "impressionism",
-    "improvizatsiya": "improvisation",
-    "indi": "indie",
-    "individualizm": "individualism",
-    "infografika": "infographics",
-    "informatsiya": "information",
-    "ingmar-bergman": "ingmar-bergman",
-    "inklyuziya": "inclusion",
-    "installyatsiya": "installation",
-    "internet": "internet",
-    "interview": "interview",
-    "invalidnost": "disability",
-    "investigations": "investigations",
-    "iosif-brodskiy": "joseph-brodsky",
-    "iosif-stalin": "joseph-stalin",
-    "iskusstvennyy-intellekt": "artificial-intelligence",
-    "islam": "islam",
-    "istoriya-moskvy": "moscow-history",
-    "istoriya-nauki": "history-of-sceince",
-    "istoriya-o-medsestre": "nurse-story",
-    "istoriya-teatra": "theatre-history",
-    "italiya": "italy",
-    "italyanskiy-yazyk": "italian-language",
-    "iudaika": "judaica",
-    "ivan-groznyy": "ivan-grozny",
-    "ivan-iii-gorbatyy": "ivan-iii-gorbaty",
-    "ivan-kalita": "ivan-kalita",
-    "ivan-krylov": "ivan-krylov",
-    "izobreteniya": "inventions",
-    "izrail-1": "israel",
-    "jazz": "jazz",
-    "john-lennon": "john-lennon",
-    "journalism": "journalism",
-    "justice": "justice",
-    "k-pop": "k-pop",
-    "kalligrafiya": "calligraphy",
-    "karikatura": "caricatures",
-    "kartochki-rubinshteyna": "rubinstein-cards",
-    "katrin-nenasheva": "katrin-nenasheva",
-    "kavarga": "kavarga",
-    "kavkaz": "caucasus",
-    "kazan": "kazan",
-    "kiberbezopasnost": "cybersecurity",
-    "kinoklub": "cinema-club",
-    "kinokritika": "film-criticism",
-    "kirill-serebrennikov": "kirill-serebrennikov",
-    "kladbische": "cemetery",
-    "klassika": "classic",
-    "kollektivnoe-bessoznatelnoe": "сollective-unconscious",
-    "komediya": "comedy",
-    "kommunikatsii": "communications",
-    "kommunizm": "communism",
-    "kommuny": "communes",
-    "kompyuternye-igry": "computer-games",
-    "konets-vesny": "end-of-spring",
-    "konservatizm": "conservatism",
-    "kontrkultura": "counter-culture",
-    "kontseptualizm": "conceptualism",
-    "korotkometrazhka": "cinema-shorts",
-    "kosmos": "cosmos",
-    "kraudfanding": "crowdfunding",
-    "kriptovalyuty": "cryptocurrencies",
-    "krizis": "crisis",
-    "krov": "blood",
-    "krym": "crimea",
-    "kulturologiya": "culturology",
-    "kulty": "cults",
-    "kurdistan": "kurdistan",
-    "kurt-kobeyn": "kurt-cobain",
-    "kurt-vonnegut": "kurt-vonnegut",
-    "kvir": "queer",
-    "laboratoriya": "lab",
-    "language": "languages",
-    "lars-fon-trier": "lars-fon-trier",
-    "laws": "laws",
-    "lectures": "lectures",
-    "leto": "summer",
-    "lev-tolstoy": "leo-tolstoy",
-    "lgbt": "lgbt",
-    "liberalizm": "liberalism",
-    "libertarianstvo": "libertarianism",
-    "life": "life",
-    "likbez": "likbez",
-    "lingvistika": "linguistics",
-    "lirika": "lirics",
-    "literary-studies": "literary-studies",
-    "literature": "literature",
-    "literaturnyykaver": "literature-cover",
-    "lo-fi": "lo-fi",
-    "lomonosov": "lomonosov",
-    "love": "love",
-    "luzha-goluboy-krovi": "luzha-goluboy-krovi",
-    "lyudvig-vitgenshteyn": "ludwig-wittgenstein",
-    "lzhedmitriy": "false-dmitry",
-    "lzhenauka": "pseudoscience",
-    "magiya": "magic",
-    "maks-veber": "max-weber",
-    "manifests": "manifests",
-    "manipulyatsii-soznaniem": "mind-manipulation",
-    "marina-abramovich": "marina-abramovich",
-    "marketing": "marketing",
-    "marksizm": "marxism",
-    "marsel-dyushan": "marchel-duchamp",
-    "marsel-prust": "marcel-proust",
-    "martin-haydegger": "martin-hidegger",
-    "matematika": "maths",
-    "mayakovskiy": "vladimir-mayakovsky",
-    "media": "media",
-    "medicine": "medicine",
-    "memuary": "memoirs",
-    "menedzhment": "management",
-    "menty": "police",
-    "merab-mamardashvili": "merab-mamardashvili",
-    "mest": "revenge",
-    "metamodernizm": "metamodern",
-    "metavselennaya": "metaverse",
-    "metro": "metro",
-    "mifologiya": "mythology",
-    "mify": "myth",
-    "mihael-haneke": "michael-haneke",
-    "mihail-baryshnikov": "mihail-baryshnikov",
-    "mihail-bulgakov": "mihail-bulgakov",
-    "mikrotonalnaya-muzyka": "mikrotone-muzyka",
-    "minimalizm": "minimalism",
-    "minkult-privet": "minkult-privet",
-    "mir": "world",
-    "mirovozzrenie": "mindsets",
-    "mishel-fuko": "michel-foucault",
-    "mistika": "mystics",
-    "mitropolit-makariy": "mitropolit-makariy",
-    "mlm": "mlm",
-    "mobilizatsiya": "mobilisation",
-    "moda": "fashion",
-    "modernizm": "modernism",
-    "mokyumentari": "mockumentary",
-    "molodezh": "youth",
-    "moloko-plus": "moloko-plus",
-    "money": "money",
-    "monologs": "monologues",
-    "monstratsiya": "monstration",
-    "moralnaya-otvetstvennost": "moral-responsibility",
-    "more": "sea",
-    "moscow": "moscow",
-    "moshennichestvo": "frauds",
-    "moskovskiy-romanticheskiy-kontseptualizm": "moscow-romantic-conceptualism",
-    "moskovskoe-delo": "moscow-case",
-    "movies": "movies",
-    "mozg": "brain",
-    "multiplikatsiya": "animation",
-    "music": "music",
-    "musulmanstvo": "islam",
-    "muzei": "museum",
-    "muzey": "museum",
-    "muzhchiny": "man",
-    "myshlenie": "thinking",
-    "nagornyy-karabah": "nagorno-karabakh",
-    "nasilie-1": "violence",
-    "natsionalizm": "nationalism",
-    "natsionalnaya-ideya": "national-idea",
-    "natsizm": "nazism",
-    "natyurmort": "nature-morte",
-    "nauchpop": "pop-science",
-    "nbp": "nbp",
-    "nenavist": "hate",
-    "neofitsialnaya-literatura": "unofficial-literature",
-    "neoklassika": "neoclassic",
-    "neprozrachnye-smysly": "hidden-meanings",
-    "neravenstvo": "inequality",
-    "net-voyne": "no-war",
-    "new-year": "new-year",
-    "neyronauka": "neuro-science",
-    "neyroseti": "neural-networks",
-    "niu-vshe": "hse",
-    "nizhniy-novgorod": "nizhny-novgorod",
-    "nko": "nonprofits",
-    "nlo": "ufo",
-    "nobelevskaya-premiya": "nobel-prize",
-    "noize-mc": "noize-mc",
-    "nonkonformizm": "nonconformism",
-    "notforall": "notforall",
-    "novaya-drama": "new-drama",
-    "novosti": "news",
-    "noyz": "noise",
-    "nuar": "noir",
-    "oberiu": "oberiu",
-    "ocherk": "etudes",
-    "ochevidnyy-nuar": "ochevidnyy-nuar",
-    "odinochestvo": "loneliness",
-    "odna-kniga-odna-istoriya": "one-book-one-story",
-    "okrainy": "outskirts",
-    "omon": "swat",
-    "opinions": "opinions",
-    "oppozitsiya": "opposition",
-    "orhan-pamuk": "orhan-pamuk",
-    "ornitologiya": "ornitology",
-    "osen": "autumn",
-    "osip-mandelshtam": "osip-mandelshtam",
-    "oskar-uayld": "oscar-wilde",
-    "osoznanie": "awareness",
-    "otnosheniya": "relationship",
-    "pablo-pikasso": "pablo-picasso",
-    "painting": "painting",
-    "paintings": "painting",
-    "pamyat": "memory",
-    "pandemiya": "pandemic",
-    "parizh": "paris",
-    "patriotizm": "patriotism",
-    "patsifizm": "pacifism",
-    "paul-tselan": "paul-tselan",
-    "per-burd": "pierre-bourdieu",
-    "perezhivaniya": "worries",
-    "performance": "performance",
-    "peyzazh": "landscape",
-    "philology": "philology",
-    "philosophy": "philosophy",
-    "photo": "photography",
-    "photography": "photography",
-    "photoprojects": "photoprojects",
-    "plakaty": "posters",
-    "plastilin": "plasticine",
-    "plays": "plays",
-    "podrostki": "teenagers",
-    "poema": "poem",
-    "poems": "poems",
-    "poeticheskaya-proza": "poetic-prose",
-    "poetry": "poetry",
-    "poetry-of-squares": "poetry-of-squares",
-    "poetry-slam": "poetry-slam",
-    "pokoy": "peace",
-    "police": "police",
-    "politicheskoe-fentezi": "political-fantasy",
-    "politics": "politics",
-    "politzaklyuchennye": "political-prisoners",
-    "polsha": "poland",
-    "pomosch": "help",
-    "pop-art": "pop-art",
-    "pop-culture": "pop-culture",
-    "populyarnaya-psihologiya": "popular-psychology",
-    "pornografiya": "pornography",
-    "portret": "portrait",
-    "poslovitsy": "proverbs",
-    "post-pank": "post-punk",
-    "post-rok": "post-rock",
-    "postmodernism": "postmodernism",
-    "povest": "novells",
-    "povsednevnost": "everyday-life",
-    "power": "power",
-    "pravo": "right",
-    "pravoslavie": "orthodox",
-    "pravozaschitniki": "human-rights-activism",
-    "prazdnik": "holidays",
-    "predatelstvo": "betrayal",
-    "predprinimatelstvo": "entrepreneurship",
-    "premera": "premier",
-    "premiya-oskar": "oscar-prize",
-    "pribaltika-1": "baltic",
-    "priroda": "nature",
-    "prison": "prison",
-    "pritcha": "parable",
-    "privatnost": "privacy",
-    "progress": "progress",
-    "projects": "projects",
-    "prokrastinatsiya": "procrastination",
-    "propaganda": "propaganda",
-    "proschenie": "forgiveness",
-    "prose": "prose",
-    "proshloe": "past",
-    "prostitutsiya": "prostitution",
-    "prosveschenie": "enlightenment",
-    "protests": "protests",
-    "psalmy": "psalms",
-    "psihoanaliz": "psychoanalysis",
-    "psihodeliki": "psychodelics",
-    "pskov": "pskov",
-    "psychiatry": "psychiatry",
-    "psychology": "psychology",
-    "ptitsy": "birds",
-    "punk": "punk",
-    "r-b": "rnb",
-    "rasizm": "racism",
-    "realizm": "realism",
-    "redaktura": "editing",
-    "refleksiya": "reflection",
-    "reggi": "reggae",
-    "religion": "religion",
-    "rene-zhirar": "rene-girard",
-    "renesanss": "renessance",
-    "renovatsiya": "renovation",
-    "rep": "rap",
-    "reportage": "reportage",
-    "reportazh-1": "reportage",
-    "repressions": "repressions",
-    "research": "research",
-    "retroveyv": "retrowave",
-    "review": "review",
-    "revolution": "revolution",
-    "rezo-gabriadze": "rezo-gabriadze",
-    "risunki": "painting",
-    "roboty": "robots",
-    "rock": "rock",
-    "roditeli": "parents",
-    "romantizm": "romantism",
-    "romany": "novell",
-    "ronald-reygan": "ronald-reygan",
-    "roskomnadzor": "roskomnadzor",
-    "rossiyskoe-kino": "russian-cinema",
-    "rouling": "rowling",
-    "rozhava": "rojava",
-    "rpts": "rpts",
-    "rus-na-grani-sryva": "rus-na-grani-sryva",
-    "russia": "russia",
-    "russian-language": "russian-language",
-    "russian-literature": "russian-literature",
-    "russkaya-toska": "russian-toska",
-    "russkiy-mir": "russkiy-mir",
-    "salo": "lard",
-    "salvador-dali": "salvador-dali",
-    "samoidentifikatsiya": "self-identity",
-    "samoopredelenie": "self-definition",
-    "sankt-peterburg": "saint-petersburg",
-    "sasha-skochilenko": "sasha-skochilenko",
-    "satira": "satiric",
-    "saund-art": "sound-art",
-    "schaste": "happiness",
-    "school": "school",
-    "science": "science",
-    "sculpture": "sculpture",
-    "second-world-war": "second-world-war",
-    "sekond-hend": "second-hand",
-    "seksprosvet": "sex-education",
-    "seksualizirovannoe-nasilie": "sexualized-violence",
-    "seksualnoe-nasilie": "sexualized-violence",
-    "sekty": "sects",
-    "semi": "semi",
-    "semiotics": "semiotics",
-    "serbiya": "serbia",
-    "sergey-bodrov-mladshiy": "sergey-bodrov-junior",
-    "sergey-solov-v": "sergey-solovyov",
-    "serialy": "series",
-    "sever": "north",
-    "severnaya-koreya": "north-korea",
-    "sex": "sex",
-    "shotlandiya": "scotland",
-    "shugeyz": "shoegaze",
-    "siloviki": "siloviki",
-    "simeon-bekbulatovich": "simeon-bekbulatovich",
-    "simvolizm": "simbolism",
-    "siriya": "siria",
-    "skulptura": "sculpture",
-    "slavoy-zhizhek": "slavoj-zizek",
-    "smert-1": "death",
-    "smysl": "meaning",
-    "sny": "dreams",
-    "sobytiya": "events",
-    "social": "society",
-    "society": "society",
-    "sociology": "sociology",
-    "sofya-paleolog": "sofya-paleolog",
-    "sofya-vitovtovna": "sofya-vitovtovna",
-    "soobschestva": "communities",
-    "soprotivlenie": "resistence",
-    "sotsializm": "socialism",
-    "sotsialnaya-filosofiya": "social-philosophy",
-    "sotsiologiya-1": "sociology",
-    "sotsseti": "social-networks",
-    "sotvorenie-tretego-rima": "third-rome",
-    "sovremennost": "modernity",
-    "spaces": "spaces",
-    "spektakl": "spectacles",
-    "spetseffekty": "special-fx",
-    "spetsoperatsiya": "special-operation",
-    "spetssluzhby": "special-services",
-    "sport": "sport",
-    "srednevekove": "middle-age",
-    "state": "state",
-    "statistika": "statistics",
-    "stendap": "stand-up",
-    "stihi": "poetry",
-    "stoitsizm": "stoicism",
-    "stories": "stories",
-    "stoyanie-na-ugre": "stoyanie-na-ugre",
-    "strah": "fear",
-    "street-art": "street-art",
-    "stsenarii": "scenarios",
-    "sud": "court",
-    "summary": "summary",
-    "supergeroi": "superheroes",
-    "svetlana-aleksievich": "svetlana-aleksievich",
-    "svobodu-ivanu-golunovu": "free-ivan-golunov",
-    "syurrealizm": "surrealism",
-    "tales": "tales",
-    "tanets": "dance",
-    "tataro-mongolskoe-igo": "mongol-tatar-yoke",
-    "tatuirovki": "tattoo",
-    "technology": "technology",
-    "televidenie": "television",
-    "telo": "body",
-    "telo-kak-iskusstvo": "body-as-art",
-    "terrorizm": "terrorism",
-    "tests": "tests",
-    "text": "texts",
-    "the-beatles": "the-beatles",
-    "theater": "theater",
-    "theory": "theory",
-    "tokio": "tokio",
-    "torture": "torture",
-    "totalitarizm": "totalitarism",
-    "traditions": "traditions",
-    "tragicomedy": "tragicomedy",
-    "transgendernost": "transgender",
-    "translation": "translation",
-    "transport": "transport",
-    "travel": "travel",
-    "travma": "trauma",
-    "trendy": "trends",
-    "tretiy-reyh": "third-reich",
-    "triller": "thriller",
-    "tsar": "central-african-republic",
-    "tsar-edip": "oedipus",
-    "tsarevich-dmitriy": "tsarevich-dmitry",
-    "tsennosti": "values",
-    "tsenzura": "censorship",
-    "tseremonii": "ceremonies",
-    "turizm": "tourism",
-    "tvorchestvo": "creativity",
-    "ugnetennyy-zhilischnyy-klass": "oppressed-housing-class",
-    "uilyam-shekspir": "william-shakespeare",
-    "ukraina-2": "ukraine",
-    "ukraine": "ukraine",
-    "university": "university",
-    "urban-studies": "urban-studies",
-    "uroki-literatury": "literature-lessons",
-    "usa": "usa",
-    "ussr": "ussr",
-    "utopiya": "utopia",
-    "utrata": "loss",
-    "valter-benyamin": "valter-benyamin",
-    "varlam-shalamov": "varlam-shalamov",
-    "vasiliy-ii-temnyy": "basil-ii-temnyy",
-    "vasiliy-iii": "basil-iii",
-    "vdnh": "vdnh",
-    "vechnost": "ethernety",
-    "velikobritaniya": "great-britain",
-    "velimir-hlebnikov": "velimir-hlebnikov",
-    "velkom-tu-greyt-britn": "welcome-to-great-britain",
-    "venedikt-erofeev": "venedikt-erofeev",
-    "venetsiya": "veneece",
-    "vengriya": "hungary",
-    "verlibry": "free-verse",
-    "veschi": "things",
-    "vessels": "vessels",
-    "veterany": "veterans",
-    "video": "video",
-    "videoart": "videoart",
-    "videoklip": "clips",
-    "videopoeziya": "video-poetry",
-    "viktor-astafev": "viktor-astafev",
-    "viktor-pelevin": "viktor-pelevin",
-    "vilgelm-rayh": "wilhelm-reich",
-    "vinzavod": "vinzavod",
-    "violence": "violence",
-    "visual-culture": "visual-culture",
-    "vizualnaya-poeziya": "visual-poetry",
-    "vladimir-lenin": "vladimir-lenin",
-    "vladimir-mayakovskiy": "vladimir-mayakovsky",
-    "vladimir-nabokov": "vladimir-nabokov",
-    "vladimir-putin": "vladimir-putin",
-    "vladimir-sorokin": "vladimir-sorokin",
-    "vladimir-voynovich": "vladimir-voynovich",
-    "vnutrenniy-opyt": "inner-expirience",
-    "volga": "volga",
-    "volontery": "volonteurs",
-    "vong-karvay": "wong-karwai",
-    "vospominaniya": "memories",
-    "vostok": "east",
-    "voyna-na-ukraine": "war-in-ukraine",
-    "voyna-v-ukraine": "war-in-ukraine",
-    "vremya": "time",
-    "vudi-allen": "woody-allen",
-    "vynuzhdennye-otnosheniya": "forced-relationship",
-    "war": "war",
-    "war-in-ukraine-images": "war-in-ukrahine-images",
-    "women": "women",
-    "work": "work",
-    "writers": "writers",
-    "xx-century": "xx-century",
-    "yakob-yordans": "yakob-yordans",
-    "yan-vermeer": "yan-vermeer",
-    "yanka-dyagileva": "yanka-dyagileva",
-    "yaponskaya-literatura": "japan-literature",
-    "yazychestvo": "paganism",
-    "youth": "youth",
-    "yozef-rot": "yozef-rot",
-    "yurgen-habermas": "jorgen-habermas",
-    "za-liniey-mannergeyma": "behind-mannerheim-line",
-    "zabota": "care",
-    "zahar-prilepin": "zahar-prilepin",
-    "zakonodatelstvo": "laws",
-    "zakony-mira": "world-laws",
-    "zametki": "notes",
-    "zhelanie": "wish",
-    "zhivotnye": "animals",
-    "zhoze-saramago": "jose-saramago",
-    "zigmund-freyd": "sigmund-freud",
-    "zolotaya-orda": "golden-horde",
-    "zombi": "zombie",
-    "zombi-simpsony": "zombie-simpsons"
-}
diff --git a/migration/tables/topics.py b/migration/tables/topics.py
deleted file mode 100644
index 9fb5c45f..00000000
--- a/migration/tables/topics.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from services.db import local_session
-from migration.extract import extract_md
-from migration.html2text import html2text
-from orm import Topic
-
-
-def migrate(entry):
-    body_orig = entry.get("description", "").replace("&nbsp;", " ")
-    topic_dict = {
-        "slug": entry["slug"],
-        "oid": entry["_id"],
-        "title": entry["title"].replace("&nbsp;", " "),
-        "body": extract_md(html2text(body_orig)),
-    }
-
-    with local_session() as session:
-        slug = topic_dict["slug"]
-        topic = session.query(Topic).filter(Topic.slug == slug).first() or Topic.create(
-            **topic_dict
-        )
-        if not topic:
-            raise Exception("no topic!")
-        if topic:
-            if len(topic.title) > len(topic_dict["title"]):
-                Topic.update(topic, {"title": topic_dict["title"]})
-            if len(topic.body) < len(topic_dict["body"]):
-                Topic.update(topic, {"body": topic_dict["body"]})
-            session.commit()
-    # print(topic.__dict__)
-    rt = topic.__dict__.copy()
-    del rt["_sa_instance_state"]
-    return rt
diff --git a/migration/tables/users.py b/migration/tables/users.py
deleted file mode 100644
index 9e85a3df..00000000
--- a/migration/tables/users.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import re
-
-from bs4 import BeautifulSoup
-from dateutil.parser import parse
-from sqlalchemy.exc import IntegrityError
-
-from services.db import local_session
-from orm.user import AuthorFollower, User, UserRating
-
-
-def migrate(entry):
-    if "subscribedTo" in entry:
-        del entry["subscribedTo"]
-    email = entry["emails"][0]["address"]
-    user_dict = {
-        "oid": entry["_id"],
-        "roles": [],
-        "ratings": [],
-        "username": email,
-        "email": email,
-        "createdAt": parse(entry["createdAt"]),
-        "emailConfirmed": ("@discours.io" in email)
-        or bool(entry["emails"][0]["verified"]),
-        "muted": False,  # amnesty
-        "bio": entry["profile"].get("bio", ""),
-        "links": [],
-        "name": "anonymous",
-        "password": entry["services"]["password"].get("bcrypt"),
-    }
-
-    if "updatedAt" in entry:
-        user_dict["updatedAt"] = parse(entry["updatedAt"])
-    if "wasOnineAt" in entry:
-        user_dict["lastSeen"] = parse(entry["wasOnlineAt"])
-    if entry.get("profile"):
-        # slug
-        slug = entry["profile"].get("path").lower()
-        slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
-        user_dict["slug"] = slug
-        bio = (
-            (entry.get("profile", {"bio": ""}).get("bio") or "")
-            .replace("\(", "(")
-            .replace("\)", ")")
-        )
-        bio_text = BeautifulSoup(bio, features="lxml").text
-
-        if len(bio_text) > 120:
-            user_dict["about"] = bio_text
-        else:
-            user_dict["bio"] = bio_text
-
-        # userpic
-        try:
-            user_dict["userpic"] = (
-                "https://assets.discours.io/unsafe/100x/"
-                + entry["profile"]["thumborId"]
-            )
-        except KeyError:
-            try:
-                user_dict["userpic"] = entry["profile"]["image"]["url"]
-            except KeyError:
-                user_dict["userpic"] = ""
-
-        # name
-        fn = entry["profile"].get("firstName", "")
-        ln = entry["profile"].get("lastName", "")
-        name = fn if fn else ""
-        name = (name + " " + ln) if ln else name
-        if not name:
-            name = slug if slug else "anonymous"
-        name = (
-            entry["profile"]["path"].lower().strip().replace(" ", "-")
-            if len(name) < 2
-            else name
-        )
-        user_dict["name"] = name
-
-        # links
-        fb = entry["profile"].get("facebook", False)
-        if fb:
-            user_dict["links"].append(fb)
-        vk = entry["profile"].get("vkontakte", False)
-        if vk:
-            user_dict["links"].append(vk)
-        tr = entry["profile"].get("twitter", False)
-        if tr:
-            user_dict["links"].append(tr)
-        ws = entry["profile"].get("website", False)
-        if ws:
-            user_dict["links"].append(ws)
-
-    # some checks
-    if not user_dict["slug"] and len(user_dict["links"]) > 0:
-        user_dict["slug"] = user_dict["links"][0].split("/")[-1]
-
-    user_dict["slug"] = user_dict.get("slug", user_dict["email"].split("@")[0])
-    oid = user_dict["oid"]
-    user_dict["slug"] = user_dict["slug"].lower().strip().replace(" ", "-")
-    try:
-        user = User.create(**user_dict.copy())
-    except IntegrityError:
-        print("[migration] cannot create user " + user_dict["slug"])
-        with local_session() as session:
-            old_user = (
-                session.query(User).filter(User.slug == user_dict["slug"]).first()
-            )
-            old_user.oid = oid
-            old_user.password = user_dict["password"]
-            session.commit()
-            user = old_user
-            if not user:
-                print("[migration] ERROR: cannot find user " + user_dict["slug"])
-                raise Exception
-    user_dict["id"] = user.id
-    return user_dict
-
-
-def post_migrate():
-    old_discours_dict = {
-        "slug": "old-discours",
-        "username": "old-discours",
-        "email": "old@discours.io",
-        "name": "Просмотры на старой версии сайта",
-    }
-
-    with local_session() as session:
-        old_discours_user = User.create(**old_discours_dict)
-        session.add(old_discours_user)
-        session.commit()
-
-
-def migrate_2stage(entry, id_map):
-    ce = 0
-    for rating_entry in entry.get("ratings", []):
-        rater_oid = rating_entry["createdBy"]
-        rater_slug = id_map.get(rater_oid)
-        if not rater_slug:
-            ce += 1
-            # print(rating_entry)
-            continue
-        oid = entry["_id"]
-        author_slug = id_map.get(oid)
-
-        with local_session() as session:
-            try:
-                rater = session.query(User).where(User.slug == rater_slug).one()
-                user = session.query(User).where(User.slug == author_slug).one()
-
-                user_rating_dict = {
-                    "value": rating_entry["value"],
-                    "rater": rater.id,
-                    "user": user.id,
-                }
-
-                user_rating = UserRating.create(**user_rating_dict)
-                if user_rating_dict["value"] > 0:
-                    af = AuthorFollower.create(
-                        author=user.id, follower=rater.id, auto=True
-                    )
-                    session.add(af)
-                session.add(user_rating)
-                session.commit()
-            except IntegrityError:
-                print("[migration] cannot rate " + author_slug + "`s by " + rater_slug)
-            except Exception as e:
-                print(e)
-    return ce
diff --git a/migration/utils.py b/migration/utils.py
deleted file mode 100644
index 39b47583..00000000
--- a/migration/utils.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from datetime import datetime
-from json import JSONEncoder
-
-
-class DateTimeEncoder(JSONEncoder):
-    def default(self, z):
-        if isinstance(z, datetime):
-            return str(z)
-        else:
-            return super().default(z)
diff --git a/requirements.txt b/requirements.txt
index a653cb31..c891a256 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ pyjwt>=2.6.0
 git+https://github.com/encode/starlette.git#main
 sqlalchemy>=1.4.41
 graphql-core>=3.0.3
-gql~=3.4.0
+gql[httpx]
 uvicorn>=0.18.3
 pydantic>=1.10.2
 passlib~=1.7.4
@@ -14,14 +14,11 @@ itsdangerous
 authlib>=1.1.0
 httpx>=0.23.0
 psycopg2-binary
-transliterate~=1.10.2
 bcrypt>=4.0.0
 websockets
-bson~=0.5.10
 flake8
 DateTime~=4.7
 python-dateutil~=2.8.2
-beautifulsoup4~=4.11.1
 lxml
 sentry-sdk>=1.14.0
 boto3~=1.28.2
diff --git a/services/viewed.py b/services/viewed.py
index 21d688b7..a7b17a3b 100644
--- a/services/viewed.py
+++ b/services/viewed.py
@@ -2,18 +2,16 @@ import asyncio
 import time
 from datetime import timedelta, timezone, datetime
 from os import environ, path
-from ssl import create_default_context
 
 from gql import Client, gql
-from gql.transport.aiohttp import AIOHTTPTransport
+from gql.transport.httpx import HTTPXAsyncTransport
 
 from services.db import local_session
 from orm import Topic
 from orm.shout import ShoutTopic, Shout
 
 load_facts = gql(
-    """
-query getDomains {
+    """ query getDomains {
     domains {
         id
         title
@@ -23,14 +21,11 @@ query getDomains {
             viewsMonth
             viewsYear
         }
-    }
-}
-"""
+    } } """
 )
 
 load_pages = gql(
-    """
-query getDomains {
+    """ query getDomains {
     domains {
     title
     statistics {
@@ -41,10 +36,9 @@ query getDomains {
                 value
             }
         }
-    }
-}
-"""
+    } } """
 )
+
 schema_str = open(path.dirname(__file__) + "/ackee.graphql").read()
 token = environ.get("ACKEE_TOKEN", "")
 
@@ -52,9 +46,8 @@ token = environ.get("ACKEE_TOKEN", "")
 def create_client(headers=None, schema=None):
     return Client(
         schema=schema,
-        transport=AIOHTTPTransport(
+        transport=HTTPXAsyncTransport(
             url="https://ackee.discours.io/api",
-            ssl=create_default_context(),
             headers=headers,
         ),
     )