git-dep3
This commit is contained in:
parent
3d659caa6e
commit
5fedd007c7
|
@ -1,292 +0,0 @@
|
||||||
""" cmd managed migration """
|
|
||||||
import asyncio
|
|
||||||
import gc
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
import bs4
|
|
||||||
|
|
||||||
from migration.export import export_mdx
|
|
||||||
from migration.tables.comments import migrate as migrateComment
|
|
||||||
from migration.tables.comments import migrate_2stage as migrateComment_2stage
|
|
||||||
from migration.tables.content_items import get_shout_slug
|
|
||||||
from migration.tables.content_items import migrate as migrateShout
|
|
||||||
from migration.tables.remarks import migrate as migrateRemark
|
|
||||||
from migration.tables.topics import migrate as migrateTopic
|
|
||||||
from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
|
|
||||||
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
|
||||||
from orm import init_tables
|
|
||||||
from orm.reaction import Reaction
|
|
||||||
|
|
||||||
TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
|
|
||||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
|
||||||
|
|
||||||
|
|
||||||
async def users_handle(storage):
|
|
||||||
"""migrating users first"""
|
|
||||||
counter = 0
|
|
||||||
id_map = {}
|
|
||||||
print("[migration] migrating %d users" % (len(storage["users"]["data"])))
|
|
||||||
for entry in storage["users"]["data"]:
|
|
||||||
oid = entry["_id"]
|
|
||||||
user = migrateUser(entry)
|
|
||||||
storage["users"]["by_oid"][oid] = user # full
|
|
||||||
del user["password"]
|
|
||||||
del user["emailConfirmed"]
|
|
||||||
del user["username"]
|
|
||||||
del user["email"]
|
|
||||||
storage["users"]["by_slug"][user["slug"]] = user # public
|
|
||||||
id_map[user["oid"]] = user["slug"]
|
|
||||||
counter += 1
|
|
||||||
ce = 0
|
|
||||||
for entry in storage["users"]["data"]:
|
|
||||||
ce += migrateUser_2stage(entry, id_map)
|
|
||||||
users_post_migrate()
|
|
||||||
|
|
||||||
|
|
||||||
async def topics_handle(storage):
|
|
||||||
"""topics from categories and tags"""
|
|
||||||
counter = 0
|
|
||||||
for t in storage["topics"]["tags"] + storage["topics"]["cats"]:
|
|
||||||
if t["slug"] in storage["replacements"]:
|
|
||||||
t["slug"] = storage["replacements"][t["slug"]]
|
|
||||||
topic = migrateTopic(t)
|
|
||||||
storage["topics"]["by_oid"][t["_id"]] = topic
|
|
||||||
storage["topics"]["by_slug"][t["slug"]] = topic
|
|
||||||
counter += 1
|
|
||||||
else:
|
|
||||||
print("[migration] topic " + t["slug"] + " ignored")
|
|
||||||
for oldslug, newslug in storage["replacements"].items():
|
|
||||||
if oldslug != newslug and oldslug in storage["topics"]["by_slug"]:
|
|
||||||
oid = storage["topics"]["by_slug"][oldslug]["_id"]
|
|
||||||
del storage["topics"]["by_slug"][oldslug]
|
|
||||||
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
|
||||||
print("[migration] " + str(counter) + " topics migrated")
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(storage["topics"]["by_oid"].values()))
|
|
||||||
+ " topics by oid"
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(storage["topics"]["by_slug"].values()))
|
|
||||||
+ " topics by slug"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def shouts_handle(storage, args):
|
|
||||||
"""migrating content items one by one"""
|
|
||||||
counter = 0
|
|
||||||
discours_author = 0
|
|
||||||
anonymous_author = 0
|
|
||||||
pub_counter = 0
|
|
||||||
ignored = 0
|
|
||||||
topics_dataset_bodies = []
|
|
||||||
topics_dataset_tlist = []
|
|
||||||
for entry in storage["shouts"]["data"]:
|
|
||||||
gc.collect()
|
|
||||||
# slug
|
|
||||||
slug = get_shout_slug(entry)
|
|
||||||
|
|
||||||
# single slug mode
|
|
||||||
if "-" in args and slug not in args:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# migrate
|
|
||||||
shout_dict = await migrateShout(entry, storage)
|
|
||||||
if shout_dict:
|
|
||||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
|
||||||
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
|
|
||||||
# shouts.topics
|
|
||||||
if not shout_dict["topics"]:
|
|
||||||
print("[migration] no topics!")
|
|
||||||
|
|
||||||
# with author
|
|
||||||
author = shout_dict["authors"][0]
|
|
||||||
if author["slug"] == "discours":
|
|
||||||
discours_author += 1
|
|
||||||
if author["slug"] == "anonymous":
|
|
||||||
anonymous_author += 1
|
|
||||||
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
|
||||||
|
|
||||||
if entry.get("published"):
|
|
||||||
if "mdx" in args:
|
|
||||||
export_mdx(shout_dict)
|
|
||||||
pub_counter += 1
|
|
||||||
|
|
||||||
# print main counter
|
|
||||||
counter += 1
|
|
||||||
print('[migration] shouts_handle %d: %s @%s' % (
|
|
||||||
(counter + 1), shout_dict["slug"], author["slug"]
|
|
||||||
))
|
|
||||||
|
|
||||||
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
|
|
||||||
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
|
||||||
texts = texts + b.findAll(text=True)
|
|
||||||
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
|
|
||||||
topics_dataset_tlist.append(shout_dict["topics"])
|
|
||||||
else:
|
|
||||||
ignored += 1
|
|
||||||
|
|
||||||
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
|
|
||||||
# ', fmt='%s')
|
|
||||||
|
|
||||||
print("[migration] " + str(counter) + " content items were migrated")
|
|
||||||
print("[migration] " + str(pub_counter) + " have been published")
|
|
||||||
print("[migration] " + str(discours_author) + " authored by @discours")
|
|
||||||
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
|
|
||||||
|
|
||||||
|
|
||||||
async def remarks_handle(storage):
|
|
||||||
print("[migration] comments")
|
|
||||||
c = 0
|
|
||||||
for entry_remark in storage["remarks"]["data"]:
|
|
||||||
remark = await migrateRemark(entry_remark, storage)
|
|
||||||
c += 1
|
|
||||||
print("[migration] " + str(c) + " remarks migrated")
|
|
||||||
|
|
||||||
|
|
||||||
async def comments_handle(storage):
|
|
||||||
print("[migration] comments")
|
|
||||||
id_map = {}
|
|
||||||
ignored_counter = 0
|
|
||||||
missed_shouts = {}
|
|
||||||
for oldcomment in storage["reactions"]["data"]:
|
|
||||||
if not oldcomment.get("deleted"):
|
|
||||||
reaction = await migrateComment(oldcomment, storage)
|
|
||||||
if type(reaction) == str:
|
|
||||||
missed_shouts[reaction] = oldcomment
|
|
||||||
elif type(reaction) == Reaction:
|
|
||||||
reaction = reaction.dict()
|
|
||||||
rid = reaction["id"]
|
|
||||||
oid = reaction["oid"]
|
|
||||||
id_map[oid] = rid
|
|
||||||
else:
|
|
||||||
ignored_counter += 1
|
|
||||||
|
|
||||||
for reaction in storage["reactions"]["data"]:
|
|
||||||
migrateComment_2stage(reaction, id_map)
|
|
||||||
print("[migration] " + str(len(id_map)) + " comments migrated")
|
|
||||||
print("[migration] " + str(ignored_counter) + " comments ignored")
|
|
||||||
print("[migration] " + str(len(missed_shouts.keys())) + " commented shouts missed")
|
|
||||||
missed_counter = 0
|
|
||||||
for missed in missed_shouts.values():
|
|
||||||
missed_counter += len(missed)
|
|
||||||
print("[migration] " + str(missed_counter) + " comments dropped")
|
|
||||||
|
|
||||||
|
|
||||||
async def all_handle(storage, args):
|
|
||||||
print("[migration] handle everything")
|
|
||||||
await users_handle(storage)
|
|
||||||
await topics_handle(storage)
|
|
||||||
print("[migration] users and topics are migrated")
|
|
||||||
await shouts_handle(storage, args)
|
|
||||||
# print("[migration] remarks...")
|
|
||||||
# await remarks_handle(storage)
|
|
||||||
print("[migration] migrating comments")
|
|
||||||
await comments_handle(storage)
|
|
||||||
# export_email_subscriptions()
|
|
||||||
print("[migration] done!")
|
|
||||||
|
|
||||||
|
|
||||||
def data_load():
|
|
||||||
storage = {
|
|
||||||
"content_items": {
|
|
||||||
"by_oid": {},
|
|
||||||
"by_slug": {},
|
|
||||||
},
|
|
||||||
"shouts": {"by_oid": {}, "by_slug": {}, "data": []},
|
|
||||||
"reactions": {"by_oid": {}, "by_slug": {}, "by_content": {}, "data": []},
|
|
||||||
"topics": {
|
|
||||||
"by_oid": {},
|
|
||||||
"by_slug": {},
|
|
||||||
"cats": [],
|
|
||||||
"tags": [],
|
|
||||||
},
|
|
||||||
"remarks": {"data": []},
|
|
||||||
"users": {"by_oid": {}, "by_slug": {}, "data": []},
|
|
||||||
"replacements": json.loads(open("migration/tables/replacements.json").read()),
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
users_data = json.loads(open("migration/data/users.json").read())
|
|
||||||
print("[migration.load] " + str(len(users_data)) + " users ")
|
|
||||||
tags_data = json.loads(open("migration/data/tags.json").read())
|
|
||||||
storage["topics"]["tags"] = tags_data
|
|
||||||
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
|
||||||
cats_data = json.loads(
|
|
||||||
open("migration/data/content_item_categories.json").read()
|
|
||||||
)
|
|
||||||
storage["topics"]["cats"] = cats_data
|
|
||||||
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
|
||||||
comments_data = json.loads(open("migration/data/comments.json").read())
|
|
||||||
storage["reactions"]["data"] = comments_data
|
|
||||||
print("[migration.load] " + str(len(comments_data)) + " comments ")
|
|
||||||
content_data = json.loads(open("migration/data/content_items.json").read())
|
|
||||||
storage["shouts"]["data"] = content_data
|
|
||||||
print("[migration.load] " + str(len(content_data)) + " content items ")
|
|
||||||
|
|
||||||
remarks_data = json.loads(open("migration/data/remarks.json").read())
|
|
||||||
storage["remarks"]["data"] = remarks_data
|
|
||||||
print("[migration.load] " + str(len(remarks_data)) + " remarks data ")
|
|
||||||
|
|
||||||
# fill out storage
|
|
||||||
for x in users_data:
|
|
||||||
storage["users"]["by_oid"][x["_id"]] = x
|
|
||||||
# storage['users']['by_slug'][x['slug']] = x
|
|
||||||
# no user.slug yet
|
|
||||||
print(
|
|
||||||
"[migration.load] "
|
|
||||||
+ str(len(storage["users"]["by_oid"].keys()))
|
|
||||||
+ " users by oid"
|
|
||||||
)
|
|
||||||
for x in tags_data:
|
|
||||||
storage["topics"]["by_oid"][x["_id"]] = x
|
|
||||||
storage["topics"]["by_slug"][x["slug"]] = x
|
|
||||||
for x in cats_data:
|
|
||||||
storage["topics"]["by_oid"][x["_id"]] = x
|
|
||||||
storage["topics"]["by_slug"][x["slug"]] = x
|
|
||||||
print(
|
|
||||||
"[migration.load] "
|
|
||||||
+ str(len(storage["topics"]["by_slug"].keys()))
|
|
||||||
+ " topics by slug"
|
|
||||||
)
|
|
||||||
for item in content_data:
|
|
||||||
slug = get_shout_slug(item)
|
|
||||||
storage["content_items"]["by_slug"][slug] = item
|
|
||||||
storage["content_items"]["by_oid"][item["_id"]] = item
|
|
||||||
print("[migration.load] " + str(len(content_data)) + " content items")
|
|
||||||
for x in comments_data:
|
|
||||||
storage["reactions"]["by_oid"][x["_id"]] = x
|
|
||||||
cid = x["contentItem"]
|
|
||||||
storage["reactions"]["by_content"][cid] = x
|
|
||||||
ci = storage["content_items"]["by_oid"].get(cid, {})
|
|
||||||
if "slug" in ci:
|
|
||||||
storage["reactions"]["by_slug"][ci["slug"]] = x
|
|
||||||
print(
|
|
||||||
"[migration.load] "
|
|
||||||
+ str(len(storage["reactions"]["by_content"].keys()))
|
|
||||||
+ " with comments"
|
|
||||||
)
|
|
||||||
storage["users"]["data"] = users_data
|
|
||||||
storage["topics"]["tags"] = tags_data
|
|
||||||
storage["topics"]["cats"] = cats_data
|
|
||||||
storage["shouts"]["data"] = content_data
|
|
||||||
storage["reactions"]["data"] = comments_data
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
return storage
|
|
||||||
|
|
||||||
|
|
||||||
async def handling_migration():
|
|
||||||
init_tables()
|
|
||||||
await all_handle(data_load(), sys.argv)
|
|
||||||
|
|
||||||
|
|
||||||
def process():
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(handling_migration())
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
process()
|
|
|
@ -1,32 +0,0 @@
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
import bson
|
|
||||||
import gc
|
|
||||||
from .utils import DateTimeEncoder
|
|
||||||
|
|
||||||
|
|
||||||
def json_tables():
|
|
||||||
print("[migration] unpack dump/discours/*.bson to migration/data/*.json")
|
|
||||||
data = {
|
|
||||||
"content_items": [],
|
|
||||||
"content_item_categories": [],
|
|
||||||
"tags": [],
|
|
||||||
"email_subscriptions": [],
|
|
||||||
"users": [],
|
|
||||||
"comments": [],
|
|
||||||
"remarks": []
|
|
||||||
}
|
|
||||||
for table in data.keys():
|
|
||||||
print('[migration] bson2json for ' + table)
|
|
||||||
gc.collect()
|
|
||||||
lc = []
|
|
||||||
bs = open("dump/discours/" + table + ".bson", "rb").read()
|
|
||||||
base = 0
|
|
||||||
while base < len(bs):
|
|
||||||
base, d = bson.decode_document(bs, base)
|
|
||||||
lc.append(d)
|
|
||||||
data[table] = lc
|
|
||||||
open(os.getcwd() + "/migration/data/" + table + ".json", "w").write(
|
|
||||||
json.dumps(lc, cls=DateTimeEncoder)
|
|
||||||
)
|
|
|
@ -1,159 +0,0 @@
|
||||||
import json
|
|
||||||
import os
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
import frontmatter
|
|
||||||
|
|
||||||
from .extract import extract_html, extract_media
|
|
||||||
from .utils import DateTimeEncoder
|
|
||||||
|
|
||||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
|
||||||
EXPORT_DEST = "../discoursio-web/data/"
|
|
||||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
|
||||||
contentDir = parentDir + "/discoursio-web/content/"
|
|
||||||
ts = datetime.now(tz=timezone.utc)
|
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(r):
|
|
||||||
authors = []
|
|
||||||
for a in r["authors"]:
|
|
||||||
authors.append(
|
|
||||||
{ # a short version for public listings
|
|
||||||
"slug": a.slug or "discours",
|
|
||||||
"name": a.name or "Дискурс",
|
|
||||||
"userpic": a.userpic or "https://discours.io/static/img/discours.png",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
metadata = {}
|
|
||||||
metadata["title"] = r.get("title", "").replace("{", "(").replace("}", ")")
|
|
||||||
metadata["authors"] = authors
|
|
||||||
metadata["createdAt"] = r.get("createdAt", ts)
|
|
||||||
metadata["layout"] = r["layout"]
|
|
||||||
metadata["topics"] = [topic for topic in r["topics"]]
|
|
||||||
metadata["topics"].sort()
|
|
||||||
if r.get("cover", False):
|
|
||||||
metadata["cover"] = r.get("cover")
|
|
||||||
return metadata
|
|
||||||
|
|
||||||
|
|
||||||
def export_mdx(r):
|
|
||||||
# print('[export] mdx %s' % r['slug'])
|
|
||||||
content = ""
|
|
||||||
metadata = get_metadata(r)
|
|
||||||
content = frontmatter.dumps(frontmatter.Post(r["body"], **metadata))
|
|
||||||
ext = "mdx"
|
|
||||||
filepath = contentDir + r["slug"]
|
|
||||||
bc = bytes(content, "utf-8").decode("utf-8", "ignore")
|
|
||||||
open(filepath + "." + ext, "w").write(bc)
|
|
||||||
|
|
||||||
|
|
||||||
def export_body(shout, storage):
|
|
||||||
entry = storage["content_items"]["by_oid"][shout["oid"]]
|
|
||||||
if entry:
|
|
||||||
body = extract_html(entry)
|
|
||||||
media = extract_media(entry)
|
|
||||||
shout["body"] = body # prepare_html_body(entry) # prepare_md_body(entry)
|
|
||||||
shout["media"] = media
|
|
||||||
export_mdx(shout)
|
|
||||||
print("[export] html for %s" % shout["slug"])
|
|
||||||
open(contentDir + shout["slug"] + ".html", "w").write(body)
|
|
||||||
else:
|
|
||||||
raise Exception("no content_items entry found")
|
|
||||||
|
|
||||||
|
|
||||||
def export_slug(slug, storage):
|
|
||||||
shout = storage["shouts"]["by_slug"][slug]
|
|
||||||
shout = storage["shouts"]["by_slug"].get(slug)
|
|
||||||
assert shout, "[export] no shout found by slug: %s " % slug
|
|
||||||
author = shout["authors"][0]
|
|
||||||
assert author, "[export] no author error"
|
|
||||||
export_body(shout, storage)
|
|
||||||
|
|
||||||
|
|
||||||
def export_email_subscriptions():
|
|
||||||
email_subscriptions_data = json.loads(
|
|
||||||
open("migration/data/email_subscriptions.json").read()
|
|
||||||
)
|
|
||||||
for data in email_subscriptions_data:
|
|
||||||
# TODO: migrate to mailgun list manually
|
|
||||||
# migrate_email_subscription(data)
|
|
||||||
pass
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(email_subscriptions_data))
|
|
||||||
+ " email subscriptions exported"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def export_shouts(storage):
|
|
||||||
# update what was just migrated or load json again
|
|
||||||
if len(storage["users"]["by_slugs"].keys()) == 0:
|
|
||||||
storage["users"]["by_slugs"] = json.loads(
|
|
||||||
open(EXPORT_DEST + "authors.json").read()
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(storage["users"]["by_slugs"].keys()))
|
|
||||||
+ " exported authors "
|
|
||||||
)
|
|
||||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
|
||||||
storage["shouts"]["by_slugs"] = json.loads(
|
|
||||||
open(EXPORT_DEST + "articles.json").read()
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(storage["shouts"]["by_slugs"].keys()))
|
|
||||||
+ " exported articles "
|
|
||||||
)
|
|
||||||
for slug in storage["shouts"]["by_slugs"].keys():
|
|
||||||
export_slug(slug, storage)
|
|
||||||
|
|
||||||
|
|
||||||
def export_json(
|
|
||||||
export_articles={}, export_authors={}, export_topics={}, export_comments={}
|
|
||||||
):
|
|
||||||
open(EXPORT_DEST + "authors.json", "w").write(
|
|
||||||
json.dumps(
|
|
||||||
export_authors,
|
|
||||||
cls=DateTimeEncoder,
|
|
||||||
indent=4,
|
|
||||||
sort_keys=True,
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print("[migration] " + str(len(export_authors.items())) + " authors exported")
|
|
||||||
open(EXPORT_DEST + "topics.json", "w").write(
|
|
||||||
json.dumps(
|
|
||||||
export_topics,
|
|
||||||
cls=DateTimeEncoder,
|
|
||||||
indent=4,
|
|
||||||
sort_keys=True,
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print("[migration] " + str(len(export_topics.keys())) + " topics exported")
|
|
||||||
|
|
||||||
open(EXPORT_DEST + "articles.json", "w").write(
|
|
||||||
json.dumps(
|
|
||||||
export_articles,
|
|
||||||
cls=DateTimeEncoder,
|
|
||||||
indent=4,
|
|
||||||
sort_keys=True,
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print("[migration] " + str(len(export_articles.items())) + " articles exported")
|
|
||||||
open(EXPORT_DEST + "comments.json", "w").write(
|
|
||||||
json.dumps(
|
|
||||||
export_comments,
|
|
||||||
cls=DateTimeEncoder,
|
|
||||||
indent=4,
|
|
||||||
sort_keys=True,
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
"[migration] "
|
|
||||||
+ str(len(export_comments.items()))
|
|
||||||
+ " exported articles with comments"
|
|
||||||
)
|
|
|
@ -1,434 +0,0 @@
|
||||||
import base64
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
|
|
||||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
|
||||||
contentDir = os.path.join(
|
|
||||||
os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
|
|
||||||
)
|
|
||||||
s3 = "https://discours-io.s3.amazonaws.com/"
|
|
||||||
cdn = "https://assets.discours.io"
|
|
||||||
|
|
||||||
|
|
||||||
def replace_tooltips(body):
|
|
||||||
# change if you prefer regexp
|
|
||||||
newbody = body
|
|
||||||
matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
|
||||||
for match in matches:
|
|
||||||
newbody = body.replace(
|
|
||||||
match.group(1), '<Tooltip text="' + match.group(2) + '" />'
|
|
||||||
) # NOTE: doesn't work
|
|
||||||
if len(matches) > 0:
|
|
||||||
print("[extract] found %d tooltips" % len(matches))
|
|
||||||
return newbody
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extract_footnotes(body, shout_dict):
|
|
||||||
parts = body.split("&&&")
|
|
||||||
lll = len(parts)
|
|
||||||
newparts = list(parts)
|
|
||||||
placed = False
|
|
||||||
if lll & 1:
|
|
||||||
if lll > 1:
|
|
||||||
i = 1
|
|
||||||
print("[extract] found %d footnotes in body" % (lll - 1))
|
|
||||||
for part in parts[1:]:
|
|
||||||
if i & 1:
|
|
||||||
placed = True
|
|
||||||
if 'a class="footnote-url" href=' in part:
|
|
||||||
print("[extract] footnote: " + part)
|
|
||||||
fn = 'a class="footnote-url" href="'
|
|
||||||
exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
|
|
||||||
extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
|
|
||||||
print("[extract] footnote link: " + extracted_link)
|
|
||||||
with local_session() as session:
|
|
||||||
Reaction.create({
|
|
||||||
"shout": shout_dict['id'],
|
|
||||||
"kind": ReactionKind.FOOTNOTE,
|
|
||||||
"body": extracted_body,
|
|
||||||
"range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
|
|
||||||
})
|
|
||||||
newparts[i] = "<a href='#'>ℹ️</a>"
|
|
||||||
else:
|
|
||||||
newparts[i] = part
|
|
||||||
i += 1
|
|
||||||
return ("".join(newparts), placed)
|
|
||||||
|
|
||||||
|
|
||||||
def place_tooltips(body):
|
|
||||||
parts = body.split("&&&")
|
|
||||||
lll = len(parts)
|
|
||||||
newparts = list(parts)
|
|
||||||
placed = False
|
|
||||||
if lll & 1:
|
|
||||||
if lll > 1:
|
|
||||||
i = 1
|
|
||||||
print("[extract] found %d tooltips" % (lll - 1))
|
|
||||||
for part in parts[1:]:
|
|
||||||
if i & 1:
|
|
||||||
placed = True
|
|
||||||
if 'a class="footnote-url" href=' in part:
|
|
||||||
print("[extract] footnote: " + part)
|
|
||||||
fn = 'a class="footnote-url" href="'
|
|
||||||
link = part.split(fn, 1)[1].split('"', 1)[0]
|
|
||||||
extracted_part = (
|
|
||||||
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
|
||||||
)
|
|
||||||
newparts[i] = (
|
|
||||||
"<Tooltip"
|
|
||||||
+ (' link="' + link + '" ' if link else "")
|
|
||||||
+ ">"
|
|
||||||
+ extracted_part
|
|
||||||
+ "</Tooltip>"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
newparts[i] = "<Tooltip>%s</Tooltip>" % part
|
|
||||||
# print('[extract] ' + newparts[i])
|
|
||||||
else:
|
|
||||||
# print('[extract] ' + part[:10] + '..')
|
|
||||||
newparts[i] = part
|
|
||||||
i += 1
|
|
||||||
return ("".join(newparts), placed)
|
|
||||||
|
|
||||||
|
|
||||||
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
|
||||||
IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
|
|
||||||
|
|
||||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
|
||||||
public = parentDir + "/discoursio-web/public"
|
|
||||||
cache = {}
|
|
||||||
|
|
||||||
|
|
||||||
def reextract_images(body, oid):
|
|
||||||
# change if you prefer regexp
|
|
||||||
matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
|
||||||
i = 0
|
|
||||||
for match in matches:
|
|
||||||
print("[extract] image " + match.group(1))
|
|
||||||
ext = match.group(3)
|
|
||||||
name = oid + str(i)
|
|
||||||
link = public + "/upload/image-" + name + "." + ext
|
|
||||||
img = match.group(4)
|
|
||||||
title = match.group(1) # NOTE: this is not the title
|
|
||||||
if img not in cache:
|
|
||||||
content = base64.b64decode(img + "==")
|
|
||||||
print(str(len(img)) + " image bytes been written")
|
|
||||||
open("../" + link, "wb").write(content)
|
|
||||||
cache[img] = name
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
print("[extract] image cached " + cache[img])
|
|
||||||
body.replace(
|
|
||||||
str(match), ""
|
|
||||||
) # WARNING: this does not work
|
|
||||||
return body
|
|
||||||
|
|
||||||
|
|
||||||
IMAGES = {
|
|
||||||
"data:image/png": "png",
|
|
||||||
"data:image/jpg": "jpg",
|
|
||||||
"data:image/jpeg": "jpg",
|
|
||||||
}
|
|
||||||
|
|
||||||
b64 = ";base64,"
|
|
||||||
|
|
||||||
|
|
||||||
def extract_imageparts(bodyparts, prefix):
|
|
||||||
# recursive loop
|
|
||||||
newparts = list(bodyparts)
|
|
||||||
for current in bodyparts:
|
|
||||||
i = bodyparts.index(current)
|
|
||||||
for mime in IMAGES.keys():
|
|
||||||
if mime == current[-len(mime) :] and (i + 1 < len(bodyparts)):
|
|
||||||
print("[extract] " + mime)
|
|
||||||
next = bodyparts[i + 1]
|
|
||||||
ext = IMAGES[mime]
|
|
||||||
b64end = next.index(")")
|
|
||||||
b64encoded = next[:b64end]
|
|
||||||
name = prefix + "-" + str(len(cache))
|
|
||||||
link = "/upload/image-" + name + "." + ext
|
|
||||||
print("[extract] name: " + name)
|
|
||||||
print("[extract] link: " + link)
|
|
||||||
print("[extract] %d bytes" % len(b64encoded))
|
|
||||||
if b64encoded not in cache:
|
|
||||||
try:
|
|
||||||
content = base64.b64decode(b64encoded + "==")
|
|
||||||
open(public + link, "wb").write(content)
|
|
||||||
print(
|
|
||||||
"[extract] "
|
|
||||||
+ str(len(content))
|
|
||||||
+ " image bytes been written"
|
|
||||||
)
|
|
||||||
cache[b64encoded] = name
|
|
||||||
except Exception:
|
|
||||||
raise Exception
|
|
||||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
|
||||||
else:
|
|
||||||
print("[extract] cached link " + cache[b64encoded])
|
|
||||||
name = cache[b64encoded]
|
|
||||||
link = cdn + "/upload/image-" + name + "." + ext
|
|
||||||
newparts[i] = (
|
|
||||||
current[: -len(mime)]
|
|
||||||
+ current[-len(mime) :]
|
|
||||||
+ link
|
|
||||||
+ next[-b64end:]
|
|
||||||
)
|
|
||||||
newparts[i + 1] = next[:-b64end]
|
|
||||||
break
|
|
||||||
return (
|
|
||||||
extract_imageparts(
|
|
||||||
newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
|
|
||||||
)
|
|
||||||
if len(bodyparts) > (i + 1)
|
|
||||||
else "".join(newparts)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_dataimages(parts, prefix):
|
|
||||||
newparts = list(parts)
|
|
||||||
for part in parts:
|
|
||||||
i = parts.index(part)
|
|
||||||
if part.endswith("]("):
|
|
||||||
[ext, rest] = parts[i + 1].split(b64)
|
|
||||||
name = prefix + "-" + str(len(cache))
|
|
||||||
if ext == "/jpeg":
|
|
||||||
ext = "jpg"
|
|
||||||
else:
|
|
||||||
ext = ext.replace("/", "")
|
|
||||||
link = "/upload/image-" + name + "." + ext
|
|
||||||
print("[extract] filename: " + link)
|
|
||||||
b64end = rest.find(")")
|
|
||||||
if b64end != -1:
|
|
||||||
b64encoded = rest[:b64end]
|
|
||||||
print("[extract] %d text bytes" % len(b64encoded))
|
|
||||||
# write if not cached
|
|
||||||
if b64encoded not in cache:
|
|
||||||
try:
|
|
||||||
content = base64.b64decode(b64encoded + "==")
|
|
||||||
open(public + link, "wb").write(content)
|
|
||||||
print("[extract] " + str(len(content)) + " image bytes")
|
|
||||||
cache[b64encoded] = name
|
|
||||||
except Exception:
|
|
||||||
raise Exception
|
|
||||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
|
||||||
else:
|
|
||||||
print("[extract] 0 image bytes, cached for " + cache[b64encoded])
|
|
||||||
name = cache[b64encoded]
|
|
||||||
|
|
||||||
# update link with CDN
|
|
||||||
link = cdn + "/upload/image-" + name + "." + ext
|
|
||||||
|
|
||||||
# patch newparts
|
|
||||||
newparts[i + 1] = link + rest[b64end:]
|
|
||||||
else:
|
|
||||||
raise Exception("cannot find the end of base64 encoded string")
|
|
||||||
else:
|
|
||||||
print("[extract] dataimage skipping part " + str(i))
|
|
||||||
continue
|
|
||||||
return "".join(newparts)
|
|
||||||
|
|
||||||
|
|
||||||
di = "data:image"
|
|
||||||
|
|
||||||
|
|
||||||
def extract_md_images(body, prefix):
|
|
||||||
newbody = ""
|
|
||||||
body = (
|
|
||||||
body.replace("\n! [](" + di, "\n 
|
|
||||||
.replace("\n[](" + di, "\n
|
|
||||||
.replace(" [](" + di, " 
|
|
||||||
)
|
|
||||||
parts = body.split(di)
|
|
||||||
if len(parts) > 1:
|
|
||||||
newbody = extract_dataimages(parts, prefix)
|
|
||||||
else:
|
|
||||||
newbody = body
|
|
||||||
return newbody
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_md(body):
|
|
||||||
newbody = (
|
|
||||||
body.replace("<", "")
|
|
||||||
.replace(">", "")
|
|
||||||
.replace("{", "(")
|
|
||||||
.replace("}", ")")
|
|
||||||
.replace("…", "...")
|
|
||||||
.replace(" __ ", " ")
|
|
||||||
.replace("_ _", " ")
|
|
||||||
.replace("****", "")
|
|
||||||
.replace("\u00a0", " ")
|
|
||||||
.replace("\u02c6", "^")
|
|
||||||
.replace("\u00a0", " ")
|
|
||||||
.replace("\ufeff", "")
|
|
||||||
.replace("\u200b", "")
|
|
||||||
.replace("\u200c", "")
|
|
||||||
) # .replace('\u2212', '-')
|
|
||||||
return newbody
|
|
||||||
|
|
||||||
|
|
||||||
def extract_md(body, shout_dict = None):
|
|
||||||
newbody = body
|
|
||||||
if newbody:
|
|
||||||
newbody = cleanup_md(newbody)
|
|
||||||
if not newbody:
|
|
||||||
raise Exception("cleanup error")
|
|
||||||
|
|
||||||
if shout_dict:
|
|
||||||
|
|
||||||
uid = shout_dict['id'] or uuid.uuid4()
|
|
||||||
newbody = extract_md_images(newbody, uid)
|
|
||||||
if not newbody:
|
|
||||||
raise Exception("extract_images error")
|
|
||||||
|
|
||||||
newbody, placed = extract_footnotes(body, shout_dict)
|
|
||||||
if not newbody:
|
|
||||||
raise Exception("extract_footnotes error")
|
|
||||||
|
|
||||||
return newbody
|
|
||||||
|
|
||||||
|
|
||||||
def extract_media(entry):
|
|
||||||
''' normalized media extraction method '''
|
|
||||||
# media [ { title pic url body } ]}
|
|
||||||
kind = entry.get("type")
|
|
||||||
if not kind:
|
|
||||||
print(entry)
|
|
||||||
raise Exception("shout no layout")
|
|
||||||
media = []
|
|
||||||
for m in entry.get("media") or []:
|
|
||||||
# title
|
|
||||||
title = m.get("title", "").replace("\n", " ").replace(" ", " ")
|
|
||||||
artist = m.get("performer") or m.get("artist")
|
|
||||||
if artist:
|
|
||||||
title = artist + " - " + title
|
|
||||||
|
|
||||||
# pic
|
|
||||||
url = m.get("fileUrl") or m.get("url", "")
|
|
||||||
pic = ""
|
|
||||||
if m.get("thumborId"):
|
|
||||||
pic = cdn + "/unsafe/1600x/" + m["thumborId"]
|
|
||||||
|
|
||||||
# url
|
|
||||||
if not url:
|
|
||||||
if kind == "Image":
|
|
||||||
url = pic
|
|
||||||
elif "youtubeId" in m:
|
|
||||||
url = "https://youtube.com/?watch=" + m["youtubeId"]
|
|
||||||
elif "vimeoId" in m:
|
|
||||||
url = "https://vimeo.com/" + m["vimeoId"]
|
|
||||||
# body
|
|
||||||
body = m.get("body") or m.get("literatureBody") or ""
|
|
||||||
media.append({
|
|
||||||
"url": url,
|
|
||||||
"pic": pic,
|
|
||||||
"title": title,
|
|
||||||
"body": body
|
|
||||||
})
|
|
||||||
return media
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_html_body(entry):
|
|
||||||
# body modifications
|
|
||||||
body = ""
|
|
||||||
kind = entry.get("type")
|
|
||||||
addon = ""
|
|
||||||
if kind == "Video":
|
|
||||||
addon = ""
|
|
||||||
for m in entry.get("media") or []:
|
|
||||||
if "youtubeId" in m:
|
|
||||||
addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
|
|
||||||
addon += m["youtubeId"]
|
|
||||||
addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
|
|
||||||
elif "vimeoId" in m:
|
|
||||||
addon += '<iframe src="https://player.vimeo.com/video/'
|
|
||||||
addon += m["vimeoId"]
|
|
||||||
addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen"'
|
|
||||||
addon += " allowfullscreen></iframe>"
|
|
||||||
else:
|
|
||||||
print("[extract] media is not supported")
|
|
||||||
print(m)
|
|
||||||
body += addon
|
|
||||||
|
|
||||||
elif kind == "Music":
|
|
||||||
addon = ""
|
|
||||||
for m in entry.get("media") or []:
|
|
||||||
artist = m.get("performer")
|
|
||||||
trackname = ""
|
|
||||||
if artist:
|
|
||||||
trackname += artist + " - "
|
|
||||||
if "title" in m:
|
|
||||||
trackname += m.get("title", "")
|
|
||||||
addon += "<figure><figcaption>"
|
|
||||||
addon += trackname
|
|
||||||
addon += '</figcaption><audio controls src="'
|
|
||||||
addon += m.get("fileUrl", "")
|
|
||||||
addon += '"></audio></figure>'
|
|
||||||
body += addon
|
|
||||||
|
|
||||||
body = extract_html(entry)
|
|
||||||
# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
|
|
||||||
return body
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_html(body: str) -> str:
|
|
||||||
new_body = body
|
|
||||||
regex_remove = [
|
|
||||||
r"style=\"width:\s*\d+px;height:\s*\d+px;\"",
|
|
||||||
r"style=\"width:\s*\d+px;\"",
|
|
||||||
r"style=\"color: #000000;\"",
|
|
||||||
r"style=\"float: none;\"",
|
|
||||||
r"style=\"background: white;\"",
|
|
||||||
r"class=\"Apple-interchange-newline\"",
|
|
||||||
r"class=\"MsoNormalCxSpMiddle\"",
|
|
||||||
r"class=\"MsoNormal\"",
|
|
||||||
r"lang=\"EN-US\"",
|
|
||||||
r"id=\"docs-internal-guid-[\w-]+\"",
|
|
||||||
r"<p>\s*</p>",
|
|
||||||
r"<span></span>",
|
|
||||||
r"<i>\s*</i>",
|
|
||||||
r"<b>\s*</b>",
|
|
||||||
r"<h1>\s*</h1>",
|
|
||||||
r"<h2>\s*</h2>",
|
|
||||||
r"<h3>\s*</h3>",
|
|
||||||
r"<h4>\s*</h4>",
|
|
||||||
r"<div>\s*</div>",
|
|
||||||
]
|
|
||||||
regex_replace = {
|
|
||||||
r"<br>\s*</p>": "</p>"
|
|
||||||
}
|
|
||||||
changed = True
|
|
||||||
while changed:
|
|
||||||
# we need several iterations to clean nested tags this way
|
|
||||||
changed = False
|
|
||||||
new_body_iteration = new_body
|
|
||||||
for regex in regex_remove:
|
|
||||||
new_body = re.sub(regex, "", new_body)
|
|
||||||
for regex, replace in regex_replace.items():
|
|
||||||
new_body = re.sub(regex, replace, new_body)
|
|
||||||
if new_body_iteration != new_body:
|
|
||||||
changed = True
|
|
||||||
return new_body
|
|
||||||
|
|
||||||
def extract_html(entry, shout_id = None, cleanup=False):
|
|
||||||
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
|
|
||||||
if cleanup:
|
|
||||||
# we do that before bs parsing to catch the invalid html
|
|
||||||
body_clean = cleanup_html(body_orig)
|
|
||||||
if body_clean != body_orig:
|
|
||||||
print(f"[migration] html cleaned for slug {entry.get('slug', None)}")
|
|
||||||
body_orig = body_clean
|
|
||||||
if shout_id:
|
|
||||||
extract_footnotes(body_orig, shout_id)
|
|
||||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
|
||||||
if cleanup:
|
|
||||||
# we do that after bs parsing because it can add dummy tags
|
|
||||||
body_clean_html = cleanup_html(body_html)
|
|
||||||
if body_clean_html != body_html:
|
|
||||||
print(f"[migration] html cleaned after bs4 for slug {entry.get('slug', None)}")
|
|
||||||
body_html = body_clean_html
|
|
||||||
return body_html
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +0,0 @@
|
||||||
from .cli import main
|
|
||||||
|
|
||||||
main()
|
|
|
@ -1,323 +0,0 @@
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from . import HTML2Text, __version__, config
|
|
||||||
|
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
|
||||||
def main() -> None:
|
|
||||||
baseurl = ""
|
|
||||||
|
|
||||||
class bcolors:
|
|
||||||
HEADER = "\033[95m"
|
|
||||||
OKBLUE = "\033[94m"
|
|
||||||
OKGREEN = "\033[92m"
|
|
||||||
WARNING = "\033[93m"
|
|
||||||
FAIL = "\033[91m"
|
|
||||||
ENDC = "\033[0m"
|
|
||||||
BOLD = "\033[1m"
|
|
||||||
UNDERLINE = "\033[4m"
|
|
||||||
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument(
|
|
||||||
"--default-image-alt",
|
|
||||||
dest="default_image_alt",
|
|
||||||
default=config.DEFAULT_IMAGE_ALT,
|
|
||||||
help="The default alt string for images with missing ones",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--pad-tables",
|
|
||||||
dest="pad_tables",
|
|
||||||
action="store_true",
|
|
||||||
default=config.PAD_TABLES,
|
|
||||||
help="pad the cells to equal column width in tables",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--no-wrap-links",
|
|
||||||
dest="wrap_links",
|
|
||||||
action="store_false",
|
|
||||||
default=config.WRAP_LINKS,
|
|
||||||
help="don't wrap links during conversion",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--wrap-list-items",
|
|
||||||
dest="wrap_list_items",
|
|
||||||
action="store_true",
|
|
||||||
default=config.WRAP_LIST_ITEMS,
|
|
||||||
help="wrap list items during conversion",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--wrap-tables",
|
|
||||||
dest="wrap_tables",
|
|
||||||
action="store_true",
|
|
||||||
default=config.WRAP_TABLES,
|
|
||||||
help="wrap tables",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--ignore-emphasis",
|
|
||||||
dest="ignore_emphasis",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IGNORE_EMPHASIS,
|
|
||||||
help="don't include any formatting for emphasis",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--reference-links",
|
|
||||||
dest="inline_links",
|
|
||||||
action="store_false",
|
|
||||||
default=config.INLINE_LINKS,
|
|
||||||
help="use reference style links instead of inline links",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--ignore-links",
|
|
||||||
dest="ignore_links",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IGNORE_ANCHORS,
|
|
||||||
help="don't include any formatting for links",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--ignore-mailto-links",
|
|
||||||
action="store_true",
|
|
||||||
dest="ignore_mailto_links",
|
|
||||||
default=config.IGNORE_MAILTO_LINKS,
|
|
||||||
help="don't include mailto: links",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--protect-links",
|
|
||||||
dest="protect_links",
|
|
||||||
action="store_true",
|
|
||||||
default=config.PROTECT_LINKS,
|
|
||||||
help="protect links from line breaks surrounding them with angle brackets",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--ignore-images",
|
|
||||||
dest="ignore_images",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IGNORE_IMAGES,
|
|
||||||
help="don't include any formatting for images",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--images-as-html",
|
|
||||||
dest="images_as_html",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IMAGES_AS_HTML,
|
|
||||||
help=(
|
|
||||||
"Always write image tags as raw html; preserves `height`, `width` and "
|
|
||||||
"`alt` if possible."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--images-to-alt",
|
|
||||||
dest="images_to_alt",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IMAGES_TO_ALT,
|
|
||||||
help="Discard image data, only keep alt text",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--images-with-size",
|
|
||||||
dest="images_with_size",
|
|
||||||
action="store_true",
|
|
||||||
default=config.IMAGES_WITH_SIZE,
|
|
||||||
help=(
|
|
||||||
"Write image tags with height and width attrs as raw html to retain "
|
|
||||||
"dimensions"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-g",
|
|
||||||
"--google-doc",
|
|
||||||
action="store_true",
|
|
||||||
dest="google_doc",
|
|
||||||
default=False,
|
|
||||||
help="convert an html-exported Google Document",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-d",
|
|
||||||
"--dash-unordered-list",
|
|
||||||
action="store_true",
|
|
||||||
dest="ul_style_dash",
|
|
||||||
default=False,
|
|
||||||
help="use a dash rather than a star for unordered list items",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-e",
|
|
||||||
"--asterisk-emphasis",
|
|
||||||
action="store_true",
|
|
||||||
dest="em_style_asterisk",
|
|
||||||
default=False,
|
|
||||||
help="use an asterisk rather than an underscore for emphasized text",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-b",
|
|
||||||
"--body-width",
|
|
||||||
dest="body_width",
|
|
||||||
type=int,
|
|
||||||
default=config.BODY_WIDTH,
|
|
||||||
help="number of characters per output line, 0 for no wrap",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-i",
|
|
||||||
"--google-list-indent",
|
|
||||||
dest="list_indent",
|
|
||||||
type=int,
|
|
||||||
default=config.GOOGLE_LIST_INDENT,
|
|
||||||
help="number of pixels Google indents nested lists",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"-s",
|
|
||||||
"--hide-strikethrough",
|
|
||||||
action="store_true",
|
|
||||||
dest="hide_strikethrough",
|
|
||||||
default=False,
|
|
||||||
help="hide strike-through text. only relevant when -g is " "specified as well",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--escape-all",
|
|
||||||
action="store_true",
|
|
||||||
dest="escape_snob",
|
|
||||||
default=False,
|
|
||||||
help=(
|
|
||||||
"Escape all special characters. Output is less readable, but avoids "
|
|
||||||
"corner case formatting issues."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--bypass-tables",
|
|
||||||
action="store_true",
|
|
||||||
dest="bypass_tables",
|
|
||||||
default=config.BYPASS_TABLES,
|
|
||||||
help="Format tables in HTML rather than Markdown syntax.",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--ignore-tables",
|
|
||||||
action="store_true",
|
|
||||||
dest="ignore_tables",
|
|
||||||
default=config.IGNORE_TABLES,
|
|
||||||
help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--single-line-break",
|
|
||||||
action="store_true",
|
|
||||||
dest="single_line_break",
|
|
||||||
default=config.SINGLE_LINE_BREAK,
|
|
||||||
help=(
|
|
||||||
"Use a single line break after a block element rather than two line "
|
|
||||||
"breaks. NOTE: Requires --body-width=0"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--unicode-snob",
|
|
||||||
action="store_true",
|
|
||||||
dest="unicode_snob",
|
|
||||||
default=config.UNICODE_SNOB,
|
|
||||||
help="Use unicode throughout document",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--no-automatic-links",
|
|
||||||
action="store_false",
|
|
||||||
dest="use_automatic_links",
|
|
||||||
default=config.USE_AUTOMATIC_LINKS,
|
|
||||||
help="Do not use automatic links wherever applicable",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--no-skip-internal-links",
|
|
||||||
action="store_false",
|
|
||||||
dest="skip_internal_links",
|
|
||||||
default=config.SKIP_INTERNAL_LINKS,
|
|
||||||
help="Do not skip internal links",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--links-after-para",
|
|
||||||
action="store_true",
|
|
||||||
dest="links_each_paragraph",
|
|
||||||
default=config.LINKS_EACH_PARAGRAPH,
|
|
||||||
help="Put links after each paragraph instead of document",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--mark-code",
|
|
||||||
action="store_true",
|
|
||||||
dest="mark_code",
|
|
||||||
default=config.MARK_CODE,
|
|
||||||
help="Mark program code blocks with [code]...[/code]",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--decode-errors",
|
|
||||||
dest="decode_errors",
|
|
||||||
default=config.DECODE_ERRORS,
|
|
||||||
help=(
|
|
||||||
"What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
|
|
||||||
"acceptable values"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--open-quote",
|
|
||||||
dest="open_quote",
|
|
||||||
default=config.OPEN_QUOTE,
|
|
||||||
help="The character used to open quotes",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--close-quote",
|
|
||||||
dest="close_quote",
|
|
||||||
default=config.CLOSE_QUOTE,
|
|
||||||
help="The character used to close quotes",
|
|
||||||
)
|
|
||||||
p.add_argument(
|
|
||||||
"--version", action="version", version=".".join(map(str, __version__))
|
|
||||||
)
|
|
||||||
p.add_argument("filename", nargs="?")
|
|
||||||
p.add_argument("encoding", nargs="?", default="utf-8")
|
|
||||||
args = p.parse_args()
|
|
||||||
|
|
||||||
if args.filename and args.filename != "-":
|
|
||||||
with open(args.filename, "rb") as fp:
|
|
||||||
data = fp.read()
|
|
||||||
else:
|
|
||||||
data = sys.stdin.buffer.read()
|
|
||||||
|
|
||||||
try:
|
|
||||||
html = data.decode(args.encoding, args.decode_errors)
|
|
||||||
except UnicodeDecodeError as err:
|
|
||||||
warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
|
|
||||||
warning += " Use the " + bcolors.OKGREEN
|
|
||||||
warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
|
|
||||||
print(warning)
|
|
||||||
raise err
|
|
||||||
|
|
||||||
h = HTML2Text(baseurl=baseurl)
|
|
||||||
# handle options
|
|
||||||
if args.ul_style_dash:
|
|
||||||
h.ul_item_mark = "-"
|
|
||||||
if args.em_style_asterisk:
|
|
||||||
h.emphasis_mark = "*"
|
|
||||||
h.strong_mark = "__"
|
|
||||||
|
|
||||||
h.body_width = args.body_width
|
|
||||||
h.google_list_indent = args.list_indent
|
|
||||||
h.ignore_emphasis = args.ignore_emphasis
|
|
||||||
h.ignore_links = args.ignore_links
|
|
||||||
h.ignore_mailto_links = args.ignore_mailto_links
|
|
||||||
h.protect_links = args.protect_links
|
|
||||||
h.ignore_images = args.ignore_images
|
|
||||||
h.images_as_html = args.images_as_html
|
|
||||||
h.images_to_alt = args.images_to_alt
|
|
||||||
h.images_with_size = args.images_with_size
|
|
||||||
h.google_doc = args.google_doc
|
|
||||||
h.hide_strikethrough = args.hide_strikethrough
|
|
||||||
h.escape_snob = args.escape_snob
|
|
||||||
h.bypass_tables = args.bypass_tables
|
|
||||||
h.ignore_tables = args.ignore_tables
|
|
||||||
h.single_line_break = args.single_line_break
|
|
||||||
h.inline_links = args.inline_links
|
|
||||||
h.unicode_snob = args.unicode_snob
|
|
||||||
h.use_automatic_links = args.use_automatic_links
|
|
||||||
h.skip_internal_links = args.skip_internal_links
|
|
||||||
h.links_each_paragraph = args.links_each_paragraph
|
|
||||||
h.mark_code = args.mark_code
|
|
||||||
h.wrap_links = args.wrap_links
|
|
||||||
h.wrap_list_items = args.wrap_list_items
|
|
||||||
h.wrap_tables = args.wrap_tables
|
|
||||||
h.pad_tables = args.pad_tables
|
|
||||||
h.default_image_alt = args.default_image_alt
|
|
||||||
h.open_quote = args.open_quote
|
|
||||||
h.close_quote = args.close_quote
|
|
||||||
|
|
||||||
sys.stdout.write(h.handle(html))
|
|
|
@ -1,164 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
# Use Unicode characters instead of their ascii pseudo-replacements
|
|
||||||
UNICODE_SNOB = True
|
|
||||||
|
|
||||||
# Marker to use for marking tables for padding post processing
|
|
||||||
TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
|
|
||||||
# Escape all special characters. Output is less readable, but avoids
|
|
||||||
# corner case formatting issues.
|
|
||||||
ESCAPE_SNOB = True
|
|
||||||
|
|
||||||
# Put the links after each paragraph instead of at the end.
|
|
||||||
LINKS_EACH_PARAGRAPH = False
|
|
||||||
|
|
||||||
# Wrap long lines at position. 0 for no wrapping.
|
|
||||||
BODY_WIDTH = 0
|
|
||||||
|
|
||||||
# Don't show internal links (href="#local-anchor") -- corresponding link
|
|
||||||
# targets won't be visible in the plain text file anyway.
|
|
||||||
SKIP_INTERNAL_LINKS = False
|
|
||||||
|
|
||||||
# Use inline, rather than reference, formatting for images and links
|
|
||||||
INLINE_LINKS = True
|
|
||||||
|
|
||||||
# Protect links from line breaks surrounding them with angle brackets (in
|
|
||||||
# addition to their square brackets)
|
|
||||||
PROTECT_LINKS = True
|
|
||||||
WRAP_LINKS = True
|
|
||||||
|
|
||||||
# Wrap list items.
|
|
||||||
WRAP_LIST_ITEMS = False
|
|
||||||
|
|
||||||
# Wrap tables
|
|
||||||
WRAP_TABLES = False
|
|
||||||
|
|
||||||
# Number of pixels Google indents nested lists
|
|
||||||
GOOGLE_LIST_INDENT = 36
|
|
||||||
|
|
||||||
# Values Google and others may use to indicate bold text
|
|
||||||
BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
|
|
||||||
|
|
||||||
IGNORE_ANCHORS = False
|
|
||||||
IGNORE_MAILTO_LINKS = False
|
|
||||||
IGNORE_IMAGES = False
|
|
||||||
IMAGES_AS_HTML = False
|
|
||||||
IMAGES_TO_ALT = False
|
|
||||||
IMAGES_WITH_SIZE = False
|
|
||||||
IGNORE_EMPHASIS = False
|
|
||||||
MARK_CODE = True
|
|
||||||
DECODE_ERRORS = "strict"
|
|
||||||
DEFAULT_IMAGE_ALT = ""
|
|
||||||
PAD_TABLES = True
|
|
||||||
|
|
||||||
# Convert links with same href and text to <href> format
|
|
||||||
# if they are absolute links
|
|
||||||
USE_AUTOMATIC_LINKS = True
|
|
||||||
|
|
||||||
# For checking space-only lines on line 771
|
|
||||||
RE_SPACE = re.compile(r"\s\+")
|
|
||||||
|
|
||||||
RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
|
|
||||||
RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
|
|
||||||
RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
|
|
||||||
RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
|
|
||||||
|
|
||||||
# to find links in the text
|
|
||||||
RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
|
|
||||||
|
|
||||||
# to find table separators
|
|
||||||
RE_TABLE = re.compile(r" \| ")
|
|
||||||
|
|
||||||
RE_MD_DOT_MATCHER = re.compile(
|
|
||||||
r"""
|
|
||||||
^ # start of line
|
|
||||||
(\s*\d+) # optional whitespace and a number
|
|
||||||
(\.) # dot
|
|
||||||
(?=\s) # lookahead assert whitespace
|
|
||||||
""",
|
|
||||||
re.MULTILINE | re.VERBOSE,
|
|
||||||
)
|
|
||||||
RE_MD_PLUS_MATCHER = re.compile(
|
|
||||||
r"""
|
|
||||||
^
|
|
||||||
(\s*)
|
|
||||||
(\+)
|
|
||||||
(?=\s)
|
|
||||||
""",
|
|
||||||
flags=re.MULTILINE | re.VERBOSE,
|
|
||||||
)
|
|
||||||
RE_MD_DASH_MATCHER = re.compile(
|
|
||||||
r"""
|
|
||||||
^
|
|
||||||
(\s*)
|
|
||||||
(-)
|
|
||||||
(?=\s|\-) # followed by whitespace (bullet list, or spaced out hr)
|
|
||||||
# or another dash (header or hr)
|
|
||||||
""",
|
|
||||||
flags=re.MULTILINE | re.VERBOSE,
|
|
||||||
)
|
|
||||||
RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
|
|
||||||
RE_MD_BACKSLASH_MATCHER = re.compile(
|
|
||||||
r"""
|
|
||||||
(\\) # match one slash
|
|
||||||
(?=[%s]) # followed by a char that requires escaping
|
|
||||||
"""
|
|
||||||
% re.escape(RE_SLASH_CHARS),
|
|
||||||
flags=re.VERBOSE,
|
|
||||||
)
|
|
||||||
|
|
||||||
UNIFIABLE = {
|
|
||||||
"rsquo": "'",
|
|
||||||
"lsquo": "'",
|
|
||||||
"rdquo": '"',
|
|
||||||
"ldquo": '"',
|
|
||||||
"copy": "(C)",
|
|
||||||
"mdash": "--",
|
|
||||||
"nbsp": " ",
|
|
||||||
"rarr": "->",
|
|
||||||
"larr": "<-",
|
|
||||||
"middot": "*",
|
|
||||||
"ndash": "-",
|
|
||||||
"oelig": "oe",
|
|
||||||
"aelig": "ae",
|
|
||||||
"agrave": "a",
|
|
||||||
"aacute": "a",
|
|
||||||
"acirc": "a",
|
|
||||||
"atilde": "a",
|
|
||||||
"auml": "a",
|
|
||||||
"aring": "a",
|
|
||||||
"egrave": "e",
|
|
||||||
"eacute": "e",
|
|
||||||
"ecirc": "e",
|
|
||||||
"euml": "e",
|
|
||||||
"igrave": "i",
|
|
||||||
"iacute": "i",
|
|
||||||
"icirc": "i",
|
|
||||||
"iuml": "i",
|
|
||||||
"ograve": "o",
|
|
||||||
"oacute": "o",
|
|
||||||
"ocirc": "o",
|
|
||||||
"otilde": "o",
|
|
||||||
"ouml": "o",
|
|
||||||
"ugrave": "u",
|
|
||||||
"uacute": "u",
|
|
||||||
"ucirc": "u",
|
|
||||||
"uuml": "u",
|
|
||||||
"lrm": "",
|
|
||||||
"rlm": "",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Format tables in HTML rather than Markdown syntax
|
|
||||||
BYPASS_TABLES = False
|
|
||||||
# Ignore table-related tags (table, th, td, tr) while keeping rows
|
|
||||||
IGNORE_TABLES = False
|
|
||||||
|
|
||||||
|
|
||||||
# Use a single line break after a block element rather than two line breaks.
|
|
||||||
# NOTE: Requires body width setting to be 0.
|
|
||||||
SINGLE_LINE_BREAK = False
|
|
||||||
|
|
||||||
|
|
||||||
# Use double quotation marks when converting the <q> tag.
|
|
||||||
OPEN_QUOTE = '"'
|
|
||||||
CLOSE_QUOTE = '"'
|
|
|
@ -1,18 +0,0 @@
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
class AnchorElement:
|
|
||||||
__slots__ = ["attrs", "count", "outcount"]
|
|
||||||
|
|
||||||
def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
|
|
||||||
self.attrs = attrs
|
|
||||||
self.count = count
|
|
||||||
self.outcount = outcount
|
|
||||||
|
|
||||||
|
|
||||||
class ListElement:
|
|
||||||
__slots__ = ["name", "num"]
|
|
||||||
|
|
||||||
def __init__(self, name: str, num: int):
|
|
||||||
self.name = name
|
|
||||||
self.num = num
|
|
|
@ -1,3 +0,0 @@
|
||||||
class OutCallback:
|
|
||||||
def __call__(self, s: str) -> None:
|
|
||||||
...
|
|
|
@ -1,287 +0,0 @@
|
||||||
import html.entities
|
|
||||||
from typing import Dict, List, Optional
|
|
||||||
|
|
||||||
from . import config
|
|
||||||
|
|
||||||
unifiable_n = {
|
|
||||||
html.entities.name2codepoint[k]: v
|
|
||||||
for k, v in config.UNIFIABLE.items()
|
|
||||||
if k != "nbsp"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def hn(tag: str) -> int:
|
|
||||||
if tag[0] == "h" and len(tag) == 2:
|
|
||||||
n = tag[1]
|
|
||||||
if "0" < n <= "9":
|
|
||||||
return int(n)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def dumb_property_dict(style: str) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
:returns: A hash of css attributes
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
x.strip().lower(): y.strip().lower()
|
|
||||||
for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
|
|
||||||
"""
|
|
||||||
:type data: str
|
|
||||||
|
|
||||||
:returns: A hash of css selectors, each of which contains a hash of
|
|
||||||
css attributes.
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
# remove @import sentences
|
|
||||||
data += ";"
|
|
||||||
importIndex = data.find("@import")
|
|
||||||
while importIndex != -1:
|
|
||||||
data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
|
|
||||||
importIndex = data.find("@import")
|
|
||||||
|
|
||||||
# parse the css. reverted from dictionary comprehension in order to
|
|
||||||
# support older pythons
|
|
||||||
pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
|
|
||||||
try:
|
|
||||||
elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
|
|
||||||
except ValueError:
|
|
||||||
elements = {} # not that important
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
|
|
||||||
def element_style(
|
|
||||||
attrs: Dict[str, Optional[str]],
|
|
||||||
style_def: Dict[str, Dict[str, str]],
|
|
||||||
parent_style: Dict[str, str],
|
|
||||||
) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
:type attrs: dict
|
|
||||||
:type style_def: dict
|
|
||||||
:type style_def: dict
|
|
||||||
|
|
||||||
:returns: A hash of the 'final' style attributes of the element
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
style = parent_style.copy()
|
|
||||||
if attrs.get("class"):
|
|
||||||
for css_class in attrs["class"].split():
|
|
||||||
css_style = style_def.get("." + css_class, {})
|
|
||||||
style.update(css_style)
|
|
||||||
if attrs.get("style"):
|
|
||||||
immediate_style = dumb_property_dict(attrs["style"])
|
|
||||||
style.update(immediate_style)
|
|
||||||
|
|
||||||
return style
|
|
||||||
|
|
||||||
|
|
||||||
def google_list_style(style: Dict[str, str]) -> str:
|
|
||||||
"""
|
|
||||||
Finds out whether this is an ordered or unordered list
|
|
||||||
|
|
||||||
:type style: dict
|
|
||||||
|
|
||||||
:rtype: str
|
|
||||||
"""
|
|
||||||
if "list-style-type" in style:
|
|
||||||
list_style = style["list-style-type"]
|
|
||||||
if list_style in ["disc", "circle", "square", "none"]:
|
|
||||||
return "ul"
|
|
||||||
|
|
||||||
return "ol"
|
|
||||||
|
|
||||||
|
|
||||||
def google_has_height(style: Dict[str, str]) -> bool:
|
|
||||||
"""
|
|
||||||
Check if the style of the element has the 'height' attribute
|
|
||||||
explicitly defined
|
|
||||||
|
|
||||||
:type style: dict
|
|
||||||
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
return "height" in style
|
|
||||||
|
|
||||||
|
|
||||||
def google_text_emphasis(style: Dict[str, str]) -> List[str]:
|
|
||||||
"""
|
|
||||||
:type style: dict
|
|
||||||
|
|
||||||
:returns: A list of all emphasis modifiers of the element
|
|
||||||
:rtype: list
|
|
||||||
"""
|
|
||||||
emphasis = []
|
|
||||||
if "text-decoration" in style:
|
|
||||||
emphasis.append(style["text-decoration"])
|
|
||||||
if "font-style" in style:
|
|
||||||
emphasis.append(style["font-style"])
|
|
||||||
if "font-weight" in style:
|
|
||||||
emphasis.append(style["font-weight"])
|
|
||||||
|
|
||||||
return emphasis
|
|
||||||
|
|
||||||
|
|
||||||
def google_fixed_width_font(style: Dict[str, str]) -> bool:
|
|
||||||
"""
|
|
||||||
Check if the css of the current element defines a fixed width font
|
|
||||||
|
|
||||||
:type style: dict
|
|
||||||
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
font_family = ""
|
|
||||||
if "font-family" in style:
|
|
||||||
font_family = style["font-family"]
|
|
||||||
return "courier new" == font_family or "consolas" == font_family
|
|
||||||
|
|
||||||
|
|
||||||
def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
|
|
||||||
"""
|
|
||||||
Extract numbering from list element attributes
|
|
||||||
|
|
||||||
:type attrs: dict
|
|
||||||
|
|
||||||
:rtype: int or None
|
|
||||||
"""
|
|
||||||
if attrs.get("start"):
|
|
||||||
try:
|
|
||||||
return int(attrs["start"]) - 1
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def skipwrap(
|
|
||||||
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
|
|
||||||
) -> bool:
|
|
||||||
# If it appears to contain a link
|
|
||||||
# don't wrap
|
|
||||||
if not wrap_links and config.RE_LINK.search(para):
|
|
||||||
return True
|
|
||||||
# If the text begins with four spaces or one tab, it's a code block;
|
|
||||||
# don't wrap
|
|
||||||
if para[0:4] == " " or para[0] == "\t":
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If the text begins with only two "--", possibly preceded by
|
|
||||||
# whitespace, that's an emdash; so wrap.
|
|
||||||
stripped = para.lstrip()
|
|
||||||
if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
|
|
||||||
return False
|
|
||||||
|
|
||||||
# I'm not sure what this is for; I thought it was to detect lists,
|
|
||||||
# but there's a <br>-inside-<span> case in one of the tests that
|
|
||||||
# also depends upon it.
|
|
||||||
if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
|
|
||||||
return not wrap_list_items
|
|
||||||
|
|
||||||
# If text contains a pipe character it is likely a table
|
|
||||||
if not wrap_tables and config.RE_TABLE.search(para):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If the text begins with a single -, *, or +, followed by a space,
|
|
||||||
# or an integer, followed by a ., followed by a space (in either
|
|
||||||
# case optionally proceeded by whitespace), it's a list; don't wrap.
|
|
||||||
return bool(
|
|
||||||
config.RE_ORDERED_LIST_MATCHER.match(stripped)
|
|
||||||
or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def escape_md(text: str) -> str:
|
|
||||||
"""
|
|
||||||
Escapes markdown-sensitive characters within other markdown
|
|
||||||
constructs.
|
|
||||||
"""
|
|
||||||
return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
|
|
||||||
|
|
||||||
|
|
||||||
def escape_md_section(text: str, snob: bool = False) -> str:
|
|
||||||
"""
|
|
||||||
Escapes markdown-sensitive characters across whole document sections.
|
|
||||||
"""
|
|
||||||
text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
|
|
||||||
|
|
||||||
if snob:
|
|
||||||
text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
|
|
||||||
|
|
||||||
text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
|
|
||||||
text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
|
|
||||||
text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
|
||||||
"""
|
|
||||||
Given the lines of a table
|
|
||||||
padds the cells and returns the new lines
|
|
||||||
"""
|
|
||||||
# find the maximum width of the columns
|
|
||||||
max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
|
|
||||||
max_cols = len(max_width)
|
|
||||||
for line in lines:
|
|
||||||
cols = [x.rstrip() for x in line.split("|")]
|
|
||||||
num_cols = len(cols)
|
|
||||||
|
|
||||||
# don't drop any data if colspan attributes result in unequal lengths
|
|
||||||
if num_cols < max_cols:
|
|
||||||
cols += [""] * (max_cols - num_cols)
|
|
||||||
elif max_cols < num_cols:
|
|
||||||
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
|
|
||||||
max_cols = num_cols
|
|
||||||
|
|
||||||
max_width = [
|
|
||||||
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
|
|
||||||
]
|
|
||||||
|
|
||||||
# reformat
|
|
||||||
new_lines = []
|
|
||||||
for line in lines:
|
|
||||||
cols = [x.rstrip() for x in line.split("|")]
|
|
||||||
if set(line.strip()) == set("-|"):
|
|
||||||
filler = "-"
|
|
||||||
new_cols = [
|
|
||||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
|
||||||
for x, M in zip(cols, max_width)
|
|
||||||
]
|
|
||||||
new_lines.append("|-" + "|".join(new_cols) + "|")
|
|
||||||
else:
|
|
||||||
filler = " "
|
|
||||||
new_cols = [
|
|
||||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
|
||||||
for x, M in zip(cols, max_width)
|
|
||||||
]
|
|
||||||
new_lines.append("| " + "|".join(new_cols) + "|")
|
|
||||||
return new_lines
|
|
||||||
|
|
||||||
|
|
||||||
def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
|
|
||||||
"""
|
|
||||||
Provide padding for tables in the text
|
|
||||||
"""
|
|
||||||
lines = text.split("\n")
|
|
||||||
table_buffer = [] # type: List[str]
|
|
||||||
table_started = False
|
|
||||||
new_lines = []
|
|
||||||
for line in lines:
|
|
||||||
# Toggle table started
|
|
||||||
if config.TABLE_MARKER_FOR_PAD in line:
|
|
||||||
table_started = not table_started
|
|
||||||
if not table_started:
|
|
||||||
table = reformat_table(table_buffer, right_margin)
|
|
||||||
new_lines.extend(table)
|
|
||||||
table_buffer = []
|
|
||||||
new_lines.append("")
|
|
||||||
continue
|
|
||||||
# Process lines
|
|
||||||
if table_started:
|
|
||||||
table_buffer.append(line)
|
|
||||||
else:
|
|
||||||
new_lines.append(line)
|
|
||||||
return "\n".join(new_lines)
|
|
|
@ -1 +0,0 @@
|
||||||
__all__ = (["users", "topics", "content_items", "comments"],)
|
|
|
@ -1,209 +0,0 @@
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from dateutil.parser import parse as date_parse
|
|
||||||
|
|
||||||
from services.db import local_session
|
|
||||||
from migration.html2text import html2text
|
|
||||||
from orm.reaction import Reaction, ReactionKind
|
|
||||||
from orm.shout import ShoutReactionsFollower
|
|
||||||
from orm.topic import TopicFollower
|
|
||||||
from orm.user import User
|
|
||||||
from orm.shout import Shout
|
|
||||||
|
|
||||||
ts = datetime.now(tz=timezone.utc)
|
|
||||||
|
|
||||||
|
|
||||||
def auto_followers(session, topics, reaction_dict):
|
|
||||||
# creating shout's reactions following for reaction author
|
|
||||||
following1 = (
|
|
||||||
session.query(ShoutReactionsFollower)
|
|
||||||
.where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
|
|
||||||
.filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not following1:
|
|
||||||
following1 = ShoutReactionsFollower.create(
|
|
||||||
follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
|
|
||||||
)
|
|
||||||
session.add(following1)
|
|
||||||
# creating topics followings for reaction author
|
|
||||||
for t in topics:
|
|
||||||
tf = (
|
|
||||||
session.query(TopicFollower)
|
|
||||||
.where(TopicFollower.follower == reaction_dict["createdBy"])
|
|
||||||
.filter(TopicFollower.topic == t["id"])
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not tf:
|
|
||||||
topic_following = TopicFollower.create(
|
|
||||||
follower=reaction_dict["createdBy"], topic=t["id"], auto=True
|
|
||||||
)
|
|
||||||
session.add(topic_following)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_ratings(session, entry, reaction_dict):
|
|
||||||
for comment_rating_old in entry.get("ratings", []):
|
|
||||||
rater = (
|
|
||||||
session.query(User)
|
|
||||||
.filter(User.oid == comment_rating_old["createdBy"])
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
re_reaction_dict = {
|
|
||||||
"shout": reaction_dict["shout"],
|
|
||||||
"replyTo": reaction_dict["id"],
|
|
||||||
"kind": ReactionKind.LIKE
|
|
||||||
if comment_rating_old["value"] > 0
|
|
||||||
else ReactionKind.DISLIKE,
|
|
||||||
"createdBy": rater.id if rater else 1,
|
|
||||||
}
|
|
||||||
cts = comment_rating_old.get("createdAt")
|
|
||||||
if cts:
|
|
||||||
re_reaction_dict["createdAt"] = date_parse(cts)
|
|
||||||
try:
|
|
||||||
# creating reaction from old rating
|
|
||||||
rr = Reaction.create(**re_reaction_dict)
|
|
||||||
following2 = (
|
|
||||||
session.query(ShoutReactionsFollower)
|
|
||||||
.where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
|
|
||||||
.filter(ShoutReactionsFollower.shout == rr.shout)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not following2:
|
|
||||||
following2 = ShoutReactionsFollower.create(
|
|
||||||
follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
|
|
||||||
)
|
|
||||||
session.add(following2)
|
|
||||||
session.add(rr)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("[migration] comment rating error: %r" % re_reaction_dict)
|
|
||||||
raise e
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
async def migrate(entry, storage):
|
|
||||||
"""
|
|
||||||
{
|
|
||||||
"_id": "hdtwS8fSyFLxXCgSC",
|
|
||||||
"body": "<p>",
|
|
||||||
"contentItem": "mnK8KsJHPRi8DrybQ",
|
|
||||||
"createdBy": "bMFPuyNg6qAD2mhXe",
|
|
||||||
"thread": "01/",
|
|
||||||
"createdAt": "2016-04-19 04:33:53+00:00",
|
|
||||||
"ratings": [
|
|
||||||
{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
|
|
||||||
{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
|
|
||||||
],
|
|
||||||
"rating": 2,
|
|
||||||
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
|
|
||||||
"updatedBy": "0"
|
|
||||||
}
|
|
||||||
->
|
|
||||||
type Reaction {
|
|
||||||
id: Int!
|
|
||||||
shout: Shout!
|
|
||||||
createdAt: DateTime!
|
|
||||||
createdBy: User!
|
|
||||||
updatedAt: DateTime
|
|
||||||
deletedAt: DateTime
|
|
||||||
deletedBy: User
|
|
||||||
range: String # full / 0:2340
|
|
||||||
kind: ReactionKind!
|
|
||||||
body: String
|
|
||||||
replyTo: Reaction
|
|
||||||
stat: Stat
|
|
||||||
old_id: String
|
|
||||||
old_thread: String
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
old_ts = entry.get("createdAt")
|
|
||||||
reaction_dict = {
|
|
||||||
"createdAt": (ts if not old_ts else date_parse(old_ts)),
|
|
||||||
"body": html2text(entry.get("body", "")),
|
|
||||||
"oid": entry["_id"],
|
|
||||||
}
|
|
||||||
shout_oid = entry.get("contentItem")
|
|
||||||
if shout_oid not in storage["shouts"]["by_oid"]:
|
|
||||||
if len(storage["shouts"]["by_oid"]) > 0:
|
|
||||||
return shout_oid
|
|
||||||
else:
|
|
||||||
print("[migration] no shouts migrated yet")
|
|
||||||
raise Exception
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
stage = "started"
|
|
||||||
reaction = None
|
|
||||||
with local_session() as session:
|
|
||||||
author = session.query(User).filter(User.oid == entry["createdBy"]).first()
|
|
||||||
old_shout = storage["shouts"]["by_oid"].get(shout_oid)
|
|
||||||
if not old_shout:
|
|
||||||
raise Exception("no old shout in storage")
|
|
||||||
else:
|
|
||||||
stage = "author and old id found"
|
|
||||||
try:
|
|
||||||
shout = (
|
|
||||||
session.query(Shout)
|
|
||||||
.where(Shout.slug == old_shout["slug"])
|
|
||||||
.one()
|
|
||||||
)
|
|
||||||
if shout:
|
|
||||||
reaction_dict["shout"] = shout.id
|
|
||||||
reaction_dict["createdBy"] = author.id if author else 1
|
|
||||||
reaction_dict["kind"] = ReactionKind.COMMENT
|
|
||||||
|
|
||||||
# creating reaction from old comment
|
|
||||||
reaction = Reaction.create(**reaction_dict)
|
|
||||||
session.add(reaction)
|
|
||||||
# session.commit()
|
|
||||||
stage = "new reaction commited"
|
|
||||||
reaction_dict = reaction.dict()
|
|
||||||
topics = [t.dict() for t in shout.topics]
|
|
||||||
auto_followers(session, topics, reaction_dict)
|
|
||||||
|
|
||||||
migrate_ratings(session, entry, reaction_dict)
|
|
||||||
|
|
||||||
return reaction
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
print(reaction)
|
|
||||||
raise Exception(stage)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_2stage(old_comment, idmap):
|
|
||||||
if old_comment.get("body"):
|
|
||||||
new_id = idmap.get(old_comment.get("oid"))
|
|
||||||
new_id = idmap.get(old_comment.get("_id"))
|
|
||||||
if new_id:
|
|
||||||
new_replyto_id = None
|
|
||||||
old_replyto_id = old_comment.get("replyTo")
|
|
||||||
if old_replyto_id:
|
|
||||||
new_replyto_id = int(idmap.get(old_replyto_id, "0"))
|
|
||||||
with local_session() as session:
|
|
||||||
comment = session.query(Reaction).where(Reaction.id == new_id).first()
|
|
||||||
try:
|
|
||||||
if new_replyto_id:
|
|
||||||
new_reply = (
|
|
||||||
session.query(Reaction)
|
|
||||||
.where(Reaction.id == new_replyto_id)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not new_reply:
|
|
||||||
print(new_replyto_id)
|
|
||||||
raise Exception("cannot find reply by id!")
|
|
||||||
comment.replyTo = new_reply.id
|
|
||||||
session.add(comment)
|
|
||||||
srf = (
|
|
||||||
session.query(ShoutReactionsFollower)
|
|
||||||
.where(ShoutReactionsFollower.shout == comment.shout)
|
|
||||||
.filter(ShoutReactionsFollower.follower == comment.createdBy)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not srf:
|
|
||||||
srf = ShoutReactionsFollower.create(
|
|
||||||
shout=comment.shout, follower=comment.createdBy, auto=True
|
|
||||||
)
|
|
||||||
session.add(srf)
|
|
||||||
session.commit()
|
|
||||||
except Exception:
|
|
||||||
raise Exception("cannot find a comment by oldid")
|
|
|
@ -1,420 +0,0 @@
|
||||||
from datetime import datetime, timezone
|
|
||||||
import json
|
|
||||||
from dateutil.parser import parse as date_parse
|
|
||||||
from sqlalchemy.exc import IntegrityError
|
|
||||||
from transliterate import translit
|
|
||||||
from services.db import local_session
|
|
||||||
from migration.extract import extract_html, extract_media
|
|
||||||
from orm.reaction import Reaction, ReactionKind
|
|
||||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
|
||||||
from orm.user import User
|
|
||||||
from orm.topic import TopicFollower, Topic
|
|
||||||
from services.viewed import ViewedStorage
|
|
||||||
import re
|
|
||||||
|
|
||||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
|
||||||
ts = datetime.now(tz=timezone.utc)
|
|
||||||
type2layout = {
|
|
||||||
"Article": "article",
|
|
||||||
"Literature": "literature",
|
|
||||||
"Music": "audio",
|
|
||||||
"Video": "video",
|
|
||||||
"Image": "image",
|
|
||||||
}
|
|
||||||
|
|
||||||
anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
|
|
||||||
discours = {"slug": "discours", "id": 2, "name": "Дискурс"}
|
|
||||||
|
|
||||||
|
|
||||||
def get_shout_slug(entry):
|
|
||||||
slug = entry.get("slug", "")
|
|
||||||
if not slug:
|
|
||||||
for friend in entry.get("friendlySlugs", []):
|
|
||||||
slug = friend.get("slug", "")
|
|
||||||
if slug:
|
|
||||||
break
|
|
||||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
|
||||||
return slug
|
|
||||||
|
|
||||||
|
|
||||||
def create_author_from_app(app):
|
|
||||||
user = None
|
|
||||||
userdata = None
|
|
||||||
# check if email is used
|
|
||||||
if app["email"]:
|
|
||||||
with local_session() as session:
|
|
||||||
user = session.query(User).where(User.email == app["email"]).first()
|
|
||||||
if not user:
|
|
||||||
# print('[migration] app %r' % app)
|
|
||||||
name = app.get("name")
|
|
||||||
if name:
|
|
||||||
slug = translit(name, "ru", reversed=True).lower()
|
|
||||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
|
||||||
print("[migration] created slug %s" % slug)
|
|
||||||
# check if slug is used
|
|
||||||
if slug:
|
|
||||||
user = session.query(User).where(User.slug == slug).first()
|
|
||||||
|
|
||||||
# get slug from email
|
|
||||||
if user:
|
|
||||||
slug = app["email"].split("@")[0]
|
|
||||||
user = session.query(User).where(User.slug == slug).first()
|
|
||||||
# one more try
|
|
||||||
if user:
|
|
||||||
slug += "-author"
|
|
||||||
user = (
|
|
||||||
session.query(User).where(User.slug == slug).first()
|
|
||||||
)
|
|
||||||
|
|
||||||
# create user with application data
|
|
||||||
if not user:
|
|
||||||
userdata = {
|
|
||||||
"username": app["email"],
|
|
||||||
"email": app["email"],
|
|
||||||
"name": app.get("name", ""),
|
|
||||||
"bio": app.get("bio", ""),
|
|
||||||
"emailConfirmed": False,
|
|
||||||
"slug": slug,
|
|
||||||
"createdAt": ts,
|
|
||||||
"lastSeen": ts,
|
|
||||||
}
|
|
||||||
# print('[migration] userdata %r' % userdata)
|
|
||||||
user = User.create(**userdata)
|
|
||||||
session.add(user)
|
|
||||||
session.commit()
|
|
||||||
userdata["id"] = user.id
|
|
||||||
|
|
||||||
userdata = user.dict()
|
|
||||||
return userdata
|
|
||||||
else:
|
|
||||||
raise Exception("app is not ok", app)
|
|
||||||
|
|
||||||
|
|
||||||
async def create_shout(shout_dict):
|
|
||||||
s = Shout.create(**shout_dict)
|
|
||||||
author = s.authors[0]
|
|
||||||
with local_session() as session:
|
|
||||||
srf = (
|
|
||||||
session.query(ShoutReactionsFollower)
|
|
||||||
.where(ShoutReactionsFollower.shout == s.id)
|
|
||||||
.filter(ShoutReactionsFollower.follower == author.id)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not srf:
|
|
||||||
srf = ShoutReactionsFollower.create(
|
|
||||||
shout=s.id, follower=author.id, auto=True
|
|
||||||
)
|
|
||||||
session.add(srf)
|
|
||||||
session.commit()
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
async def get_user(entry, storage):
|
|
||||||
app = entry.get("application")
|
|
||||||
userdata = None
|
|
||||||
user_oid = None
|
|
||||||
if app:
|
|
||||||
userdata = create_author_from_app(app)
|
|
||||||
else:
|
|
||||||
user_oid = entry.get("createdBy")
|
|
||||||
if user_oid == "0":
|
|
||||||
userdata = discours
|
|
||||||
elif user_oid:
|
|
||||||
userdata = storage["users"]["by_oid"].get(user_oid)
|
|
||||||
if not userdata:
|
|
||||||
print("no userdata by oid, anonymous")
|
|
||||||
userdata = anondict
|
|
||||||
print(app)
|
|
||||||
# cleanup slug
|
|
||||||
if userdata:
|
|
||||||
slug = userdata.get("slug", "")
|
|
||||||
if slug:
|
|
||||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
|
||||||
userdata["slug"] = slug
|
|
||||||
else:
|
|
||||||
userdata = anondict
|
|
||||||
|
|
||||||
user = await process_user(userdata, storage, user_oid)
|
|
||||||
return user, user_oid
|
|
||||||
|
|
||||||
|
|
||||||
async def migrate(entry, storage):
|
|
||||||
author, user_oid = await get_user(entry, storage)
|
|
||||||
r = {
|
|
||||||
"layout": type2layout[entry["type"]],
|
|
||||||
"title": entry["title"],
|
|
||||||
"authors": [
|
|
||||||
author,
|
|
||||||
],
|
|
||||||
"slug": get_shout_slug(entry),
|
|
||||||
"cover": (
|
|
||||||
"https://assets.discours.io/unsafe/1600x/" + entry["thumborId"]
|
|
||||||
if entry.get("thumborId")
|
|
||||||
else entry.get("image", {}).get("url")
|
|
||||||
),
|
|
||||||
"visibility": "public" if entry.get("published") else "authors",
|
|
||||||
"publishedAt": date_parse(entry.get("publishedAt"))
|
|
||||||
if entry.get("published")
|
|
||||||
else None,
|
|
||||||
"deletedAt": date_parse(entry.get("deletedAt"))
|
|
||||||
if entry.get("deletedAt")
|
|
||||||
else None,
|
|
||||||
"createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
|
|
||||||
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
|
|
||||||
"topics": await add_topics_follower(entry, storage, author),
|
|
||||||
"body": extract_html(entry, cleanup=True),
|
|
||||||
}
|
|
||||||
|
|
||||||
# main topic patch
|
|
||||||
r["mainTopic"] = r["topics"][0]
|
|
||||||
|
|
||||||
# published author auto-confirm
|
|
||||||
if entry.get("published"):
|
|
||||||
with local_session() as session:
|
|
||||||
# update user.emailConfirmed if published
|
|
||||||
author.emailConfirmed = True
|
|
||||||
session.add(author)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
# media
|
|
||||||
media = extract_media(entry)
|
|
||||||
r["media"] = json.dumps(media, ensure_ascii=True) if media else None
|
|
||||||
|
|
||||||
# ----------------------------------- copy
|
|
||||||
shout_dict = r.copy()
|
|
||||||
del shout_dict["topics"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
# save shout to db
|
|
||||||
shout_dict["oid"] = entry.get("_id", "")
|
|
||||||
shout = await create_shout(shout_dict)
|
|
||||||
except IntegrityError as e:
|
|
||||||
print("[migration] create_shout integrity error", e)
|
|
||||||
shout = await resolve_create_shout(shout_dict)
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(e)
|
|
||||||
|
|
||||||
# udpate data
|
|
||||||
shout_dict = shout.dict()
|
|
||||||
shout_dict["authors"] = [
|
|
||||||
author.dict(),
|
|
||||||
]
|
|
||||||
|
|
||||||
# shout topics aftermath
|
|
||||||
shout_dict["topics"] = await topics_aftermath(r, storage)
|
|
||||||
|
|
||||||
# content_item ratings to reactions
|
|
||||||
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
|
||||||
|
|
||||||
# shout views
|
|
||||||
await ViewedStorage.increment(
|
|
||||||
shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
|
|
||||||
)
|
|
||||||
# del shout_dict['ratings']
|
|
||||||
|
|
||||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
|
||||||
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
|
|
||||||
return shout_dict
|
|
||||||
|
|
||||||
|
|
||||||
async def add_topics_follower(entry, storage, user):
|
|
||||||
topics = set([])
|
|
||||||
category = entry.get("category")
|
|
||||||
topics_by_oid = storage["topics"]["by_oid"]
|
|
||||||
oids = [
|
|
||||||
category,
|
|
||||||
] + entry.get("tags", [])
|
|
||||||
for toid in oids:
|
|
||||||
tslug = topics_by_oid.get(toid, {}).get("slug")
|
|
||||||
if tslug:
|
|
||||||
topics.add(tslug)
|
|
||||||
ttt = list(topics)
|
|
||||||
# add author as TopicFollower
|
|
||||||
with local_session() as session:
|
|
||||||
for tpcslug in topics:
|
|
||||||
try:
|
|
||||||
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
|
|
||||||
if tpc:
|
|
||||||
tf = (
|
|
||||||
session.query(TopicFollower)
|
|
||||||
.where(TopicFollower.follower == user.id)
|
|
||||||
.filter(TopicFollower.topic == tpc.id)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not tf:
|
|
||||||
tf = TopicFollower.create(
|
|
||||||
topic=tpc.id, follower=user.id, auto=True
|
|
||||||
)
|
|
||||||
session.add(tf)
|
|
||||||
session.commit()
|
|
||||||
except IntegrityError:
|
|
||||||
print("[migration.shout] hidden by topic " + tpc.slug)
|
|
||||||
# main topic
|
|
||||||
maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
|
|
||||||
if maintopic in ttt:
|
|
||||||
ttt.remove(maintopic)
|
|
||||||
ttt.insert(0, maintopic)
|
|
||||||
return ttt
|
|
||||||
|
|
||||||
|
|
||||||
async def process_user(userdata, storage, oid):
|
|
||||||
with local_session() as session:
|
|
||||||
uid = userdata.get("id") # anonymous as
|
|
||||||
if not uid:
|
|
||||||
print(userdata)
|
|
||||||
print("has no id field, set it @anonymous")
|
|
||||||
userdata = anondict
|
|
||||||
uid = 1
|
|
||||||
user = session.query(User).filter(User.id == uid).first()
|
|
||||||
if not user:
|
|
||||||
try:
|
|
||||||
slug = userdata["slug"].lower().strip()
|
|
||||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
|
||||||
userdata["slug"] = slug
|
|
||||||
user = User.create(**userdata)
|
|
||||||
session.add(user)
|
|
||||||
session.commit()
|
|
||||||
except IntegrityError:
|
|
||||||
print(f"[migration] user creating with slug {userdata['slug']}")
|
|
||||||
print("[migration] from userdata")
|
|
||||||
print(userdata)
|
|
||||||
raise Exception(
|
|
||||||
"[migration] cannot create user in content_items.get_user()"
|
|
||||||
)
|
|
||||||
if user.id == 946:
|
|
||||||
print("[migration] ***************** ALPINA")
|
|
||||||
if user.id == 2:
|
|
||||||
print("[migration] +++++++++++++++++ DISCOURS")
|
|
||||||
userdata["id"] = user.id
|
|
||||||
userdata["createdAt"] = user.createdAt
|
|
||||||
storage["users"]["by_slug"][userdata["slug"]] = userdata
|
|
||||||
storage["users"]["by_oid"][oid] = userdata
|
|
||||||
if not user:
|
|
||||||
raise Exception("could not get a user")
|
|
||||||
return user
|
|
||||||
|
|
||||||
|
|
||||||
async def resolve_create_shout(shout_dict):
|
|
||||||
with local_session() as session:
|
|
||||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
|
||||||
bump = False
|
|
||||||
if s:
|
|
||||||
if s.createdAt != shout_dict["createdAt"]:
|
|
||||||
# create new with different slug
|
|
||||||
shout_dict["slug"] += "-" + shout_dict["layout"]
|
|
||||||
try:
|
|
||||||
await create_shout(shout_dict)
|
|
||||||
except IntegrityError as e:
|
|
||||||
print(e)
|
|
||||||
bump = True
|
|
||||||
else:
|
|
||||||
# update old
|
|
||||||
for key in shout_dict:
|
|
||||||
if key in s.__dict__:
|
|
||||||
if s.__dict__[key] != shout_dict[key]:
|
|
||||||
print(
|
|
||||||
"[migration] shout already exists, but differs in %s"
|
|
||||||
% key
|
|
||||||
)
|
|
||||||
bump = True
|
|
||||||
else:
|
|
||||||
print("[migration] shout already exists, but lacks %s" % key)
|
|
||||||
bump = True
|
|
||||||
if bump:
|
|
||||||
s.update(shout_dict)
|
|
||||||
else:
|
|
||||||
print("[migration] something went wrong with shout: \n%r" % shout_dict)
|
|
||||||
raise Exception("")
|
|
||||||
session.commit()
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
async def topics_aftermath(entry, storage):
|
|
||||||
r = []
|
|
||||||
for tpc in filter(lambda x: bool(x), entry["topics"]):
|
|
||||||
oldslug = tpc
|
|
||||||
newslug = storage["replacements"].get(oldslug, oldslug)
|
|
||||||
|
|
||||||
if newslug:
|
|
||||||
with local_session() as session:
|
|
||||||
shout = session.query(Shout).where(Shout.slug == entry["slug"]).first()
|
|
||||||
new_topic = session.query(Topic).where(Topic.slug == newslug).first()
|
|
||||||
|
|
||||||
shout_topic_old = (
|
|
||||||
session.query(ShoutTopic)
|
|
||||||
.join(Shout)
|
|
||||||
.join(Topic)
|
|
||||||
.filter(Shout.slug == entry["slug"])
|
|
||||||
.filter(Topic.slug == oldslug)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if shout_topic_old:
|
|
||||||
shout_topic_old.update({"topic": new_topic.id})
|
|
||||||
else:
|
|
||||||
shout_topic_new = (
|
|
||||||
session.query(ShoutTopic)
|
|
||||||
.join(Shout)
|
|
||||||
.join(Topic)
|
|
||||||
.filter(Shout.slug == entry["slug"])
|
|
||||||
.filter(Topic.slug == newslug)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not shout_topic_new:
|
|
||||||
try:
|
|
||||||
ShoutTopic.create(
|
|
||||||
**{"shout": shout.id, "topic": new_topic.id}
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
print("[migration] shout topic error: " + newslug)
|
|
||||||
session.commit()
|
|
||||||
if newslug not in r:
|
|
||||||
r.append(newslug)
|
|
||||||
else:
|
|
||||||
print("[migration] ignored topic slug: \n%r" % tpc["slug"])
|
|
||||||
# raise Exception
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
async def content_ratings_to_reactions(entry, slug):
|
|
||||||
try:
|
|
||||||
with local_session() as session:
|
|
||||||
for content_rating in entry.get("ratings", []):
|
|
||||||
rater = (
|
|
||||||
session.query(User)
|
|
||||||
.filter(User.oid == content_rating["createdBy"])
|
|
||||||
.first()
|
|
||||||
) or User.default_user
|
|
||||||
shout = session.query(Shout).where(Shout.slug == slug).first()
|
|
||||||
cts = content_rating.get("createdAt")
|
|
||||||
reaction_dict = {
|
|
||||||
"createdAt": date_parse(cts) if cts else None,
|
|
||||||
"kind": ReactionKind.LIKE
|
|
||||||
if content_rating["value"] > 0
|
|
||||||
else ReactionKind.DISLIKE,
|
|
||||||
"createdBy": rater.id,
|
|
||||||
"shout": shout.id,
|
|
||||||
}
|
|
||||||
reaction = (
|
|
||||||
session.query(Reaction)
|
|
||||||
.filter(Reaction.shout == reaction_dict["shout"])
|
|
||||||
.filter(Reaction.createdBy == reaction_dict["createdBy"])
|
|
||||||
.filter(Reaction.kind == reaction_dict["kind"])
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if reaction:
|
|
||||||
k = (
|
|
||||||
ReactionKind.AGREE
|
|
||||||
if content_rating["value"] > 0
|
|
||||||
else ReactionKind.DISAGREE
|
|
||||||
)
|
|
||||||
reaction_dict["kind"] = k
|
|
||||||
reaction.update(reaction_dict)
|
|
||||||
session.add(reaction)
|
|
||||||
else:
|
|
||||||
rea = Reaction.create(**reaction_dict)
|
|
||||||
session.add(rea)
|
|
||||||
# shout_dict['ratings'].append(reaction_dict)
|
|
||||||
|
|
||||||
session.commit()
|
|
||||||
except Exception:
|
|
||||||
print("[migration] content_item.ratings error: \n%r" % content_rating)
|
|
|
@ -1,34 +0,0 @@
|
||||||
from services.db import local_session
|
|
||||||
from migration.extract import extract_md
|
|
||||||
from migration.html2text import html2text
|
|
||||||
from orm.reaction import Reaction, ReactionKind
|
|
||||||
|
|
||||||
|
|
||||||
def migrate(entry, storage):
|
|
||||||
post_oid = entry["contentItem"]
|
|
||||||
print(post_oid)
|
|
||||||
shout_dict = storage["shouts"]["by_oid"].get(post_oid)
|
|
||||||
if shout_dict:
|
|
||||||
print(shout_dict["body"])
|
|
||||||
remark = {
|
|
||||||
"shout": shout_dict["id"],
|
|
||||||
"body": extract_md(html2text(entry["body"]), shout_dict),
|
|
||||||
"kind": ReactionKind.REMARK,
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry.get("textBefore"):
|
|
||||||
remark["range"] = (
|
|
||||||
str(shout_dict["body"].index(entry["textBefore"] or ""))
|
|
||||||
+ ":"
|
|
||||||
+ str(
|
|
||||||
shout_dict["body"].index(entry["textAfter"] or "")
|
|
||||||
+ len(entry["textAfter"] or "")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
with local_session() as session:
|
|
||||||
rmrk = Reaction.create(**remark)
|
|
||||||
session.commit()
|
|
||||||
del rmrk["_sa_instance_state"]
|
|
||||||
return rmrk
|
|
||||||
return
|
|
|
@ -1,828 +0,0 @@
|
||||||
{
|
|
||||||
"207": "207",
|
|
||||||
"1990-e": "90s",
|
|
||||||
"2000-e": "2000s",
|
|
||||||
"90-e": "90s",
|
|
||||||
"Georgia": "georgia",
|
|
||||||
"Japan": "japan",
|
|
||||||
"Sweden": "sweden",
|
|
||||||
"abstraktsiya": "abstract",
|
|
||||||
"absurdism": "absurdism",
|
|
||||||
"acclimatization": "acclimatisation",
|
|
||||||
"activism": "activism",
|
|
||||||
"adolf-gitler": "adolf-hitler",
|
|
||||||
"afrika": "africa",
|
|
||||||
"agata-kristi": "agatha-christie",
|
|
||||||
"agressivnoe-povedenie": "agression",
|
|
||||||
"agressiya": "agression",
|
|
||||||
"aktsii": "actions",
|
|
||||||
"aktsionizm": "actionism",
|
|
||||||
"alber-kamyu": "albert-kamus",
|
|
||||||
"albomy": "albums",
|
|
||||||
"aleksandr-griboedov": "aleksander-griboedov",
|
|
||||||
"aleksandr-pushkin": "aleksander-pushkin",
|
|
||||||
"aleksandr-solzhenitsyn": "aleksander-solzhenitsyn",
|
|
||||||
"aleksandr-vvedenskiy": "aleksander-vvedensky",
|
|
||||||
"aleksey-navalnyy": "alexey-navalny",
|
|
||||||
"alfavit": "alphabet",
|
|
||||||
"alkogol": "alcohol",
|
|
||||||
"alternativa": "alternative",
|
|
||||||
"alternative": "alternative",
|
|
||||||
"alternativnaya-istoriya": "alternative-history",
|
|
||||||
"amerika": "america",
|
|
||||||
"anarhizm": "anarchism",
|
|
||||||
"anatoliy-mariengof": "anatoly-mariengof",
|
|
||||||
"ancient-russia": "ancient-russia",
|
|
||||||
"andegraund": "underground",
|
|
||||||
"andrey-platonov": "andrey-platonov",
|
|
||||||
"andrey-rodionov": "andrey-rodionov",
|
|
||||||
"andrey-tarkovskiy": "andrey-tarkovsky",
|
|
||||||
"angliyskie-istorii": "english-stories",
|
|
||||||
"angliyskiy-yazyk": "english-langugae",
|
|
||||||
"ango": "ango",
|
|
||||||
"animation": "animation",
|
|
||||||
"animatsiya": "animation",
|
|
||||||
"anime": "anime",
|
|
||||||
"anri-volohonskiy": "anri-volohonsky",
|
|
||||||
"antifashizm": "anti-faschism",
|
|
||||||
"antiquity": "antiquity",
|
|
||||||
"antiutopiya": "dystopia",
|
|
||||||
"anton-dolin": "anton-dolin",
|
|
||||||
"antropology": "antropology",
|
|
||||||
"antropotsen": "antropocenus",
|
|
||||||
"architecture": "architecture",
|
|
||||||
"arheologiya": "archeology",
|
|
||||||
"arhetipy": "archetypes",
|
|
||||||
"arhiv": "archive",
|
|
||||||
"aristokraty": "aristocracy",
|
|
||||||
"aristotel": "aristotle",
|
|
||||||
"arktika": "arctic",
|
|
||||||
"armiya": "army",
|
|
||||||
"armiya-1": "army",
|
|
||||||
"art": "art",
|
|
||||||
"art-is": "art-is",
|
|
||||||
"artists": "artists",
|
|
||||||
"ateizm": "atheism",
|
|
||||||
"audio-poetry": "audio-poetry",
|
|
||||||
"audiopoeziya": "audio-poetry",
|
|
||||||
"audiospektakl": "audio-spectacles",
|
|
||||||
"auktsyon": "auktsyon",
|
|
||||||
"avangard": "avantgarde",
|
|
||||||
"avtofikshn": "autofiction",
|
|
||||||
"avtorskaya-pesnya": "bardsongs",
|
|
||||||
"azbuka-immigratsii": "immigration-basics",
|
|
||||||
"aziatskiy-kinematograf": "asian-cinema",
|
|
||||||
"b-movie": "b-movie",
|
|
||||||
"bannye-chteniya": "sauna-reading",
|
|
||||||
"bardsongs": "bardsongs",
|
|
||||||
"bdsm": "bdsm",
|
|
||||||
"beecake": "beecake",
|
|
||||||
"belarus": "belarus",
|
|
||||||
"belgiya": "belgium",
|
|
||||||
"bertold-breht": "berttold-brecht",
|
|
||||||
"bezumie": "madness",
|
|
||||||
"biography": "biography",
|
|
||||||
"biologiya": "biology",
|
|
||||||
"bipolyarnoe-rasstroystvo": "bipolar-disorder",
|
|
||||||
"bitniki": "beatnics",
|
|
||||||
"biznes": "business",
|
|
||||||
"blizhniy-vostok": "middle-east",
|
|
||||||
"blizost": "closeness",
|
|
||||||
"blocked-in-russia": "blocked-in-russia",
|
|
||||||
"blokada": "blockade",
|
|
||||||
"bob-dilan": "bob-dylan",
|
|
||||||
"bog": "god",
|
|
||||||
"bol": "pain",
|
|
||||||
"bolotnoe-delo": "bolotnaya-case",
|
|
||||||
"books": "books",
|
|
||||||
"boris-eltsin": "boris-eltsin",
|
|
||||||
"boris-godunov": "boris-godunov",
|
|
||||||
"boris-grebenschikov": "boris-grebenschikov",
|
|
||||||
"boris-nemtsov": "boris-nemtsov",
|
|
||||||
"boris-pasternak": "boris-pasternak",
|
|
||||||
"brak": "marriage",
|
|
||||||
"bret-iston-ellis": "bret-iston-ellis",
|
|
||||||
"buddizm": "buddhism",
|
|
||||||
"bullying": "bullying",
|
|
||||||
"bunt": "riot",
|
|
||||||
"burning-man": "burning-man",
|
|
||||||
"bytie": "being",
|
|
||||||
"byurokratiya": "bureaucracy",
|
|
||||||
"capitalism": "capitalism",
|
|
||||||
"censored-in-russia": "censored-in-russia",
|
|
||||||
"ch-rno-beloe": "black-and-white",
|
|
||||||
"ch-rnyy-yumor": "black-humour",
|
|
||||||
"chapters": "chapters",
|
|
||||||
"charity": "charity",
|
|
||||||
"chayldfri": "childfree",
|
|
||||||
"chechenskaya-voyna": "chechen-war",
|
|
||||||
"chechnya": "chechnya",
|
|
||||||
"chelovek": "male",
|
|
||||||
"chernobyl": "chernobyl",
|
|
||||||
"chernyy-yumor": "black-humour",
|
|
||||||
"children": "children",
|
|
||||||
"china": "china",
|
|
||||||
"chinovniki": "bureaucracy",
|
|
||||||
"chukotka": "chukotka",
|
|
||||||
"chuma": "plague",
|
|
||||||
"church": "church",
|
|
||||||
"cinema": "cinema",
|
|
||||||
"city": "city",
|
|
||||||
"civil-position": "civil-position",
|
|
||||||
"clips": "clips",
|
|
||||||
"collage": "collage",
|
|
||||||
"comics": "comics",
|
|
||||||
"conspiracy-theory": "conspiracy-theory",
|
|
||||||
"contemporary-art": "contemporary-art",
|
|
||||||
"contemporary-poetry": "poetry",
|
|
||||||
"contemporary-prose": "prose",
|
|
||||||
"coronavirus": "coronavirus",
|
|
||||||
"corruption": "corruption",
|
|
||||||
"creative-writing-school": "creative-writing-school",
|
|
||||||
"crime": "crime",
|
|
||||||
"criticism": "criticism",
|
|
||||||
"critiques": "reviews",
|
|
||||||
"culture": "culture",
|
|
||||||
"dadaizm": "dadaism",
|
|
||||||
"daniel-defo": "daniel-defoe",
|
|
||||||
"daniil-harms": "daniil-kharms",
|
|
||||||
"dante-aligeri": "dante-alighieri",
|
|
||||||
"darkveyv": "darkwave",
|
|
||||||
"death": "death",
|
|
||||||
"debaty": "debats",
|
|
||||||
"delo-seti": "seti-case",
|
|
||||||
"democracy": "democracy",
|
|
||||||
"demografiya": "demographics",
|
|
||||||
"demonstrations": "demonstrations",
|
|
||||||
"depression": "depression",
|
|
||||||
"derevnya": "village",
|
|
||||||
"derrida": "derrida",
|
|
||||||
"design": "design",
|
|
||||||
"detskie-doma": "orphanages",
|
|
||||||
"detstvo": "childhood",
|
|
||||||
"devid-linch": "david-linch",
|
|
||||||
"devyanostye": "90s",
|
|
||||||
"dialog": "dialogue",
|
|
||||||
"digital": "digital",
|
|
||||||
"digital-art": "digital-art",
|
|
||||||
"dinozavry": "dinosaurs",
|
|
||||||
"directing": "directing",
|
|
||||||
"diskurs": "discours",
|
|
||||||
"diskurs-1": "discourse",
|
|
||||||
"diskurs-analiz": "discourse-analytics",
|
|
||||||
"dissidenty": "dissidents",
|
|
||||||
"diy": "diy",
|
|
||||||
"dmitriy-donskoy": "dmitriy-donskoy",
|
|
||||||
"dmitriy-prigov": "dmitriy-prigov",
|
|
||||||
"dnevnik-1": "dairy",
|
|
||||||
"dnevniki": "dairies",
|
|
||||||
"documentary": "documentary",
|
|
||||||
"dokumentalnaya-poema": "documentary-poem",
|
|
||||||
"dokumentalnaya-poeziya": "documentary-poetry",
|
|
||||||
"dokumenty": "doсuments",
|
|
||||||
"domashnee-nasilie": "home-terror",
|
|
||||||
"donald-tramp": "donald-trump",
|
|
||||||
"donbass": "donbass",
|
|
||||||
"donbass-diary": "donbass-diary",
|
|
||||||
"donorstvo": "donation",
|
|
||||||
"dozhd": "rain",
|
|
||||||
"drama": "drama",
|
|
||||||
"dramaturgy": "dramaturgy",
|
|
||||||
"drawing": "drawing",
|
|
||||||
"drevo-zhizni": "tree-of-life",
|
|
||||||
"drugs": "drugs",
|
|
||||||
"duh": "spirit",
|
|
||||||
"dzhaz": "jazz",
|
|
||||||
"dzhek-keruak": "jack-keruak",
|
|
||||||
"dzhim-morrison": "jim-morrison",
|
|
||||||
"dzhordzh-romero": "george-romero",
|
|
||||||
"dzhordzho-agamben": "giorgio-agamben",
|
|
||||||
"ecology": "ecology",
|
|
||||||
"economics": "economics",
|
|
||||||
"eda": "food",
|
|
||||||
"editorial-statements": "editorial-statements",
|
|
||||||
"eduard-limonov": "eduard-limonov",
|
|
||||||
"education": "education",
|
|
||||||
"egor-letov": "egor-letov",
|
|
||||||
"ekspat": "expat",
|
|
||||||
"eksperiment": "experiments",
|
|
||||||
"eksperimentalnaya-muzyka": "experimental-music",
|
|
||||||
"ekspressionizm": "expressionism",
|
|
||||||
"ekstremizm": "extremism",
|
|
||||||
"ekzistentsializm-1": "existentialism",
|
|
||||||
"ekzistentsiya": "existence",
|
|
||||||
"elections": "elections",
|
|
||||||
"electronic": "electronics",
|
|
||||||
"electronics": "electronics",
|
|
||||||
"elena-glinskaya": "elena-glinskaya",
|
|
||||||
"elena-guro": "elena-guro",
|
|
||||||
"elizaveta-mnatsakanova": "elizaveta-mnatsakanova",
|
|
||||||
"embient": "ambient",
|
|
||||||
"emigration": "emigration",
|
|
||||||
"emil-dyurkgeym": "emile-durkheim",
|
|
||||||
"emotsii": "emotions",
|
|
||||||
"empiric": "empiric",
|
|
||||||
"epidemiya": "pandemic",
|
|
||||||
"erich-von-neff": "erich-von-neff",
|
|
||||||
"erotika": "erotics",
|
|
||||||
"essay": "essay",
|
|
||||||
"estetika": "aestetics",
|
|
||||||
"etika": "ethics",
|
|
||||||
"etno": "ethno",
|
|
||||||
"etnos": "ethnics",
|
|
||||||
"everyday-life": "everyday-life",
|
|
||||||
"evgeniy-onegin": "eugene-onegin",
|
|
||||||
"evolyutsiya": "evolution",
|
|
||||||
"exhibitions": "exhibitions",
|
|
||||||
"experience": "experiences",
|
|
||||||
"experimental": "experimental",
|
|
||||||
"experimental-music": "experimental-music",
|
|
||||||
"explanation": "explanation",
|
|
||||||
"faktcheking": "fact-checking",
|
|
||||||
"falsifikatsii": "falsifications",
|
|
||||||
"family": "family",
|
|
||||||
"fanfiki": "fan-fiction",
|
|
||||||
"fantastika": "sci-fi",
|
|
||||||
"fatalizm": "fatalism",
|
|
||||||
"fedor-dostoevskiy": "fedor-dostoevsky",
|
|
||||||
"fedor-ioannovich": "fedor-ioannovich",
|
|
||||||
"feleton": "feuilleton",
|
|
||||||
"feminism": "feminism",
|
|
||||||
"fenomenologiya": "phenomenology",
|
|
||||||
"fentezi": "fantasy",
|
|
||||||
"festival": "festival",
|
|
||||||
"festival-territoriya": "festival-territory",
|
|
||||||
"folk": "folk",
|
|
||||||
"folklor": "folklore",
|
|
||||||
"fotoreportazh": "photoreports",
|
|
||||||
"france": "france",
|
|
||||||
"frants-kafka": "franz-kafka",
|
|
||||||
"frederik-begbeder": "frederick-begbeder",
|
|
||||||
"freedom": "freedom",
|
|
||||||
"friendship": "friendship",
|
|
||||||
"fsb": "fsb",
|
|
||||||
"futbol": "footbool",
|
|
||||||
"future": "future",
|
|
||||||
"futuristy": "futurists",
|
|
||||||
"futurizm": "futurism",
|
|
||||||
"galereya": "gallery",
|
|
||||||
"galereya-anna-nova": "gallery-anna-nova",
|
|
||||||
"gdr": "gdr",
|
|
||||||
"gender": "gender",
|
|
||||||
"gendernyy-diskurs": "gender",
|
|
||||||
"gennadiy-aygi": "gennadiy-aygi",
|
|
||||||
"gerhard-rihter": "gerhard-rihter",
|
|
||||||
"germaniya": "germany",
|
|
||||||
"germenevtika": "hermeneutics",
|
|
||||||
"geroi": "heroes",
|
|
||||||
"girls": "girls",
|
|
||||||
"gkchp": "gkchp",
|
|
||||||
"glitch": "glitch",
|
|
||||||
"globalizatsiya": "globalisation",
|
|
||||||
"gollivud": "hollywood",
|
|
||||||
"gonzo": "gonzo",
|
|
||||||
"gore-ot-uma": "woe-from-wit",
|
|
||||||
"graffiti": "graffiti",
|
|
||||||
"graficheskaya-novella": "graphic-novell",
|
|
||||||
"graphics": "graphics",
|
|
||||||
"gravyura": "engraving",
|
|
||||||
"grazhdanskaya-oborona": "grazhdanskaya-oborona",
|
|
||||||
"gretsiya": "greece",
|
|
||||||
"griby": "mushrooms",
|
|
||||||
"gruziya-2": "georgia",
|
|
||||||
"gulag": "gulag",
|
|
||||||
"han-batyy": "khan-batyy",
|
|
||||||
"hayku": "haiku",
|
|
||||||
"health": "health",
|
|
||||||
"himiya": "chemistry",
|
|
||||||
"hip-hop": "hip-hop",
|
|
||||||
"history": "history",
|
|
||||||
"history-of-russia": "history-of-russia",
|
|
||||||
"holokost": "holocaust",
|
|
||||||
"horeografiya": "choreography",
|
|
||||||
"horror": "horror",
|
|
||||||
"hospis": "hospice",
|
|
||||||
"hristianstvo": "christianity",
|
|
||||||
"humans": "humans",
|
|
||||||
"humour": "humour",
|
|
||||||
"ideologiya": "ideology",
|
|
||||||
"idm": "idm",
|
|
||||||
"igil": "isis",
|
|
||||||
"igor-pomerantsev": "igor-pomerantsev",
|
|
||||||
"igra": "game",
|
|
||||||
"igra-prestolov": "game-of-throne",
|
|
||||||
"igry": "games",
|
|
||||||
"iisus-hristos": "jesus-christ",
|
|
||||||
"illness": "illness",
|
|
||||||
"illustration-history": "illustration-history",
|
|
||||||
"illustrations": "illustrations",
|
|
||||||
"imazhinizm": "imagism",
|
|
||||||
"immanuil-kant": "immanuel-kant",
|
|
||||||
"impressionizm": "impressionism",
|
|
||||||
"improvizatsiya": "improvisation",
|
|
||||||
"indi": "indie",
|
|
||||||
"individualizm": "individualism",
|
|
||||||
"infografika": "infographics",
|
|
||||||
"informatsiya": "information",
|
|
||||||
"ingmar-bergman": "ingmar-bergman",
|
|
||||||
"inklyuziya": "inclusion",
|
|
||||||
"installyatsiya": "installation",
|
|
||||||
"internet": "internet",
|
|
||||||
"interview": "interview",
|
|
||||||
"invalidnost": "disability",
|
|
||||||
"investigations": "investigations",
|
|
||||||
"iosif-brodskiy": "joseph-brodsky",
|
|
||||||
"iosif-stalin": "joseph-stalin",
|
|
||||||
"iskusstvennyy-intellekt": "artificial-intelligence",
|
|
||||||
"islam": "islam",
|
|
||||||
"istoriya-moskvy": "moscow-history",
|
|
||||||
"istoriya-nauki": "history-of-sceince",
|
|
||||||
"istoriya-o-medsestre": "nurse-story",
|
|
||||||
"istoriya-teatra": "theatre-history",
|
|
||||||
"italiya": "italy",
|
|
||||||
"italyanskiy-yazyk": "italian-language",
|
|
||||||
"iudaika": "judaica",
|
|
||||||
"ivan-groznyy": "ivan-grozny",
|
|
||||||
"ivan-iii-gorbatyy": "ivan-iii-gorbaty",
|
|
||||||
"ivan-kalita": "ivan-kalita",
|
|
||||||
"ivan-krylov": "ivan-krylov",
|
|
||||||
"izobreteniya": "inventions",
|
|
||||||
"izrail-1": "israel",
|
|
||||||
"jazz": "jazz",
|
|
||||||
"john-lennon": "john-lennon",
|
|
||||||
"journalism": "journalism",
|
|
||||||
"justice": "justice",
|
|
||||||
"k-pop": "k-pop",
|
|
||||||
"kalligrafiya": "calligraphy",
|
|
||||||
"karikatura": "caricatures",
|
|
||||||
"kartochki-rubinshteyna": "rubinstein-cards",
|
|
||||||
"katrin-nenasheva": "katrin-nenasheva",
|
|
||||||
"kavarga": "kavarga",
|
|
||||||
"kavkaz": "caucasus",
|
|
||||||
"kazan": "kazan",
|
|
||||||
"kiberbezopasnost": "cybersecurity",
|
|
||||||
"kinoklub": "cinema-club",
|
|
||||||
"kinokritika": "film-criticism",
|
|
||||||
"kirill-serebrennikov": "kirill-serebrennikov",
|
|
||||||
"kladbische": "cemetery",
|
|
||||||
"klassika": "classic",
|
|
||||||
"kollektivnoe-bessoznatelnoe": "сollective-unconscious",
|
|
||||||
"komediya": "comedy",
|
|
||||||
"kommunikatsii": "communications",
|
|
||||||
"kommunizm": "communism",
|
|
||||||
"kommuny": "communes",
|
|
||||||
"kompyuternye-igry": "computer-games",
|
|
||||||
"konets-vesny": "end-of-spring",
|
|
||||||
"konservatizm": "conservatism",
|
|
||||||
"kontrkultura": "counter-culture",
|
|
||||||
"kontseptualizm": "conceptualism",
|
|
||||||
"korotkometrazhka": "cinema-shorts",
|
|
||||||
"kosmos": "cosmos",
|
|
||||||
"kraudfanding": "crowdfunding",
|
|
||||||
"kriptovalyuty": "cryptocurrencies",
|
|
||||||
"krizis": "crisis",
|
|
||||||
"krov": "blood",
|
|
||||||
"krym": "crimea",
|
|
||||||
"kulturologiya": "culturology",
|
|
||||||
"kulty": "cults",
|
|
||||||
"kurdistan": "kurdistan",
|
|
||||||
"kurt-kobeyn": "kurt-cobain",
|
|
||||||
"kurt-vonnegut": "kurt-vonnegut",
|
|
||||||
"kvir": "queer",
|
|
||||||
"laboratoriya": "lab",
|
|
||||||
"language": "languages",
|
|
||||||
"lars-fon-trier": "lars-fon-trier",
|
|
||||||
"laws": "laws",
|
|
||||||
"lectures": "lectures",
|
|
||||||
"leto": "summer",
|
|
||||||
"lev-tolstoy": "leo-tolstoy",
|
|
||||||
"lgbt": "lgbt",
|
|
||||||
"liberalizm": "liberalism",
|
|
||||||
"libertarianstvo": "libertarianism",
|
|
||||||
"life": "life",
|
|
||||||
"likbez": "likbez",
|
|
||||||
"lingvistika": "linguistics",
|
|
||||||
"lirika": "lirics",
|
|
||||||
"literary-studies": "literary-studies",
|
|
||||||
"literature": "literature",
|
|
||||||
"literaturnyykaver": "literature-cover",
|
|
||||||
"lo-fi": "lo-fi",
|
|
||||||
"lomonosov": "lomonosov",
|
|
||||||
"love": "love",
|
|
||||||
"luzha-goluboy-krovi": "luzha-goluboy-krovi",
|
|
||||||
"lyudvig-vitgenshteyn": "ludwig-wittgenstein",
|
|
||||||
"lzhedmitriy": "false-dmitry",
|
|
||||||
"lzhenauka": "pseudoscience",
|
|
||||||
"magiya": "magic",
|
|
||||||
"maks-veber": "max-weber",
|
|
||||||
"manifests": "manifests",
|
|
||||||
"manipulyatsii-soznaniem": "mind-manipulation",
|
|
||||||
"marina-abramovich": "marina-abramovich",
|
|
||||||
"marketing": "marketing",
|
|
||||||
"marksizm": "marxism",
|
|
||||||
"marsel-dyushan": "marchel-duchamp",
|
|
||||||
"marsel-prust": "marcel-proust",
|
|
||||||
"martin-haydegger": "martin-hidegger",
|
|
||||||
"matematika": "maths",
|
|
||||||
"mayakovskiy": "vladimir-mayakovsky",
|
|
||||||
"media": "media",
|
|
||||||
"medicine": "medicine",
|
|
||||||
"memuary": "memoirs",
|
|
||||||
"menedzhment": "management",
|
|
||||||
"menty": "police",
|
|
||||||
"merab-mamardashvili": "merab-mamardashvili",
|
|
||||||
"mest": "revenge",
|
|
||||||
"metamodernizm": "metamodern",
|
|
||||||
"metavselennaya": "metaverse",
|
|
||||||
"metro": "metro",
|
|
||||||
"mifologiya": "mythology",
|
|
||||||
"mify": "myth",
|
|
||||||
"mihael-haneke": "michael-haneke",
|
|
||||||
"mihail-baryshnikov": "mihail-baryshnikov",
|
|
||||||
"mihail-bulgakov": "mihail-bulgakov",
|
|
||||||
"mikrotonalnaya-muzyka": "mikrotone-muzyka",
|
|
||||||
"minimalizm": "minimalism",
|
|
||||||
"minkult-privet": "minkult-privet",
|
|
||||||
"mir": "world",
|
|
||||||
"mirovozzrenie": "mindsets",
|
|
||||||
"mishel-fuko": "michel-foucault",
|
|
||||||
"mistika": "mystics",
|
|
||||||
"mitropolit-makariy": "mitropolit-makariy",
|
|
||||||
"mlm": "mlm",
|
|
||||||
"mobilizatsiya": "mobilisation",
|
|
||||||
"moda": "fashion",
|
|
||||||
"modernizm": "modernism",
|
|
||||||
"mokyumentari": "mockumentary",
|
|
||||||
"molodezh": "youth",
|
|
||||||
"moloko-plus": "moloko-plus",
|
|
||||||
"money": "money",
|
|
||||||
"monologs": "monologues",
|
|
||||||
"monstratsiya": "monstration",
|
|
||||||
"moralnaya-otvetstvennost": "moral-responsibility",
|
|
||||||
"more": "sea",
|
|
||||||
"moscow": "moscow",
|
|
||||||
"moshennichestvo": "frauds",
|
|
||||||
"moskovskiy-romanticheskiy-kontseptualizm": "moscow-romantic-conceptualism",
|
|
||||||
"moskovskoe-delo": "moscow-case",
|
|
||||||
"movies": "movies",
|
|
||||||
"mozg": "brain",
|
|
||||||
"multiplikatsiya": "animation",
|
|
||||||
"music": "music",
|
|
||||||
"musulmanstvo": "islam",
|
|
||||||
"muzei": "museum",
|
|
||||||
"muzey": "museum",
|
|
||||||
"muzhchiny": "man",
|
|
||||||
"myshlenie": "thinking",
|
|
||||||
"nagornyy-karabah": "nagorno-karabakh",
|
|
||||||
"nasilie-1": "violence",
|
|
||||||
"natsionalizm": "nationalism",
|
|
||||||
"natsionalnaya-ideya": "national-idea",
|
|
||||||
"natsizm": "nazism",
|
|
||||||
"natyurmort": "nature-morte",
|
|
||||||
"nauchpop": "pop-science",
|
|
||||||
"nbp": "nbp",
|
|
||||||
"nenavist": "hate",
|
|
||||||
"neofitsialnaya-literatura": "unofficial-literature",
|
|
||||||
"neoklassika": "neoclassic",
|
|
||||||
"neprozrachnye-smysly": "hidden-meanings",
|
|
||||||
"neravenstvo": "inequality",
|
|
||||||
"net-voyne": "no-war",
|
|
||||||
"new-year": "new-year",
|
|
||||||
"neyronauka": "neuro-science",
|
|
||||||
"neyroseti": "neural-networks",
|
|
||||||
"niu-vshe": "hse",
|
|
||||||
"nizhniy-novgorod": "nizhny-novgorod",
|
|
||||||
"nko": "nonprofits",
|
|
||||||
"nlo": "ufo",
|
|
||||||
"nobelevskaya-premiya": "nobel-prize",
|
|
||||||
"noize-mc": "noize-mc",
|
|
||||||
"nonkonformizm": "nonconformism",
|
|
||||||
"notforall": "notforall",
|
|
||||||
"novaya-drama": "new-drama",
|
|
||||||
"novosti": "news",
|
|
||||||
"noyz": "noise",
|
|
||||||
"nuar": "noir",
|
|
||||||
"oberiu": "oberiu",
|
|
||||||
"ocherk": "etudes",
|
|
||||||
"ochevidnyy-nuar": "ochevidnyy-nuar",
|
|
||||||
"odinochestvo": "loneliness",
|
|
||||||
"odna-kniga-odna-istoriya": "one-book-one-story",
|
|
||||||
"okrainy": "outskirts",
|
|
||||||
"omon": "swat",
|
|
||||||
"opinions": "opinions",
|
|
||||||
"oppozitsiya": "opposition",
|
|
||||||
"orhan-pamuk": "orhan-pamuk",
|
|
||||||
"ornitologiya": "ornitology",
|
|
||||||
"osen": "autumn",
|
|
||||||
"osip-mandelshtam": "osip-mandelshtam",
|
|
||||||
"oskar-uayld": "oscar-wilde",
|
|
||||||
"osoznanie": "awareness",
|
|
||||||
"otnosheniya": "relationship",
|
|
||||||
"pablo-pikasso": "pablo-picasso",
|
|
||||||
"painting": "painting",
|
|
||||||
"paintings": "painting",
|
|
||||||
"pamyat": "memory",
|
|
||||||
"pandemiya": "pandemic",
|
|
||||||
"parizh": "paris",
|
|
||||||
"patriotizm": "patriotism",
|
|
||||||
"patsifizm": "pacifism",
|
|
||||||
"paul-tselan": "paul-tselan",
|
|
||||||
"per-burd": "pierre-bourdieu",
|
|
||||||
"perezhivaniya": "worries",
|
|
||||||
"performance": "performance",
|
|
||||||
"peyzazh": "landscape",
|
|
||||||
"philology": "philology",
|
|
||||||
"philosophy": "philosophy",
|
|
||||||
"photo": "photography",
|
|
||||||
"photography": "photography",
|
|
||||||
"photoprojects": "photoprojects",
|
|
||||||
"plakaty": "posters",
|
|
||||||
"plastilin": "plasticine",
|
|
||||||
"plays": "plays",
|
|
||||||
"podrostki": "teenagers",
|
|
||||||
"poema": "poem",
|
|
||||||
"poems": "poems",
|
|
||||||
"poeticheskaya-proza": "poetic-prose",
|
|
||||||
"poetry": "poetry",
|
|
||||||
"poetry-of-squares": "poetry-of-squares",
|
|
||||||
"poetry-slam": "poetry-slam",
|
|
||||||
"pokoy": "peace",
|
|
||||||
"police": "police",
|
|
||||||
"politicheskoe-fentezi": "political-fantasy",
|
|
||||||
"politics": "politics",
|
|
||||||
"politzaklyuchennye": "political-prisoners",
|
|
||||||
"polsha": "poland",
|
|
||||||
"pomosch": "help",
|
|
||||||
"pop-art": "pop-art",
|
|
||||||
"pop-culture": "pop-culture",
|
|
||||||
"populyarnaya-psihologiya": "popular-psychology",
|
|
||||||
"pornografiya": "pornography",
|
|
||||||
"portret": "portrait",
|
|
||||||
"poslovitsy": "proverbs",
|
|
||||||
"post-pank": "post-punk",
|
|
||||||
"post-rok": "post-rock",
|
|
||||||
"postmodernism": "postmodernism",
|
|
||||||
"povest": "novells",
|
|
||||||
"povsednevnost": "everyday-life",
|
|
||||||
"power": "power",
|
|
||||||
"pravo": "right",
|
|
||||||
"pravoslavie": "orthodox",
|
|
||||||
"pravozaschitniki": "human-rights-activism",
|
|
||||||
"prazdnik": "holidays",
|
|
||||||
"predatelstvo": "betrayal",
|
|
||||||
"predprinimatelstvo": "entrepreneurship",
|
|
||||||
"premera": "premier",
|
|
||||||
"premiya-oskar": "oscar-prize",
|
|
||||||
"pribaltika-1": "baltic",
|
|
||||||
"priroda": "nature",
|
|
||||||
"prison": "prison",
|
|
||||||
"pritcha": "parable",
|
|
||||||
"privatnost": "privacy",
|
|
||||||
"progress": "progress",
|
|
||||||
"projects": "projects",
|
|
||||||
"prokrastinatsiya": "procrastination",
|
|
||||||
"propaganda": "propaganda",
|
|
||||||
"proschenie": "forgiveness",
|
|
||||||
"prose": "prose",
|
|
||||||
"proshloe": "past",
|
|
||||||
"prostitutsiya": "prostitution",
|
|
||||||
"prosveschenie": "enlightenment",
|
|
||||||
"protests": "protests",
|
|
||||||
"psalmy": "psalms",
|
|
||||||
"psihoanaliz": "psychoanalysis",
|
|
||||||
"psihodeliki": "psychodelics",
|
|
||||||
"pskov": "pskov",
|
|
||||||
"psychiatry": "psychiatry",
|
|
||||||
"psychology": "psychology",
|
|
||||||
"ptitsy": "birds",
|
|
||||||
"punk": "punk",
|
|
||||||
"r-b": "rnb",
|
|
||||||
"rasizm": "racism",
|
|
||||||
"realizm": "realism",
|
|
||||||
"redaktura": "editing",
|
|
||||||
"refleksiya": "reflection",
|
|
||||||
"reggi": "reggae",
|
|
||||||
"religion": "religion",
|
|
||||||
"rene-zhirar": "rene-girard",
|
|
||||||
"renesanss": "renessance",
|
|
||||||
"renovatsiya": "renovation",
|
|
||||||
"rep": "rap",
|
|
||||||
"reportage": "reportage",
|
|
||||||
"reportazh-1": "reportage",
|
|
||||||
"repressions": "repressions",
|
|
||||||
"research": "research",
|
|
||||||
"retroveyv": "retrowave",
|
|
||||||
"review": "review",
|
|
||||||
"revolution": "revolution",
|
|
||||||
"rezo-gabriadze": "rezo-gabriadze",
|
|
||||||
"risunki": "painting",
|
|
||||||
"roboty": "robots",
|
|
||||||
"rock": "rock",
|
|
||||||
"roditeli": "parents",
|
|
||||||
"romantizm": "romantism",
|
|
||||||
"romany": "novell",
|
|
||||||
"ronald-reygan": "ronald-reygan",
|
|
||||||
"roskomnadzor": "roskomnadzor",
|
|
||||||
"rossiyskoe-kino": "russian-cinema",
|
|
||||||
"rouling": "rowling",
|
|
||||||
"rozhava": "rojava",
|
|
||||||
"rpts": "rpts",
|
|
||||||
"rus-na-grani-sryva": "rus-na-grani-sryva",
|
|
||||||
"russia": "russia",
|
|
||||||
"russian-language": "russian-language",
|
|
||||||
"russian-literature": "russian-literature",
|
|
||||||
"russkaya-toska": "russian-toska",
|
|
||||||
"russkiy-mir": "russkiy-mir",
|
|
||||||
"salo": "lard",
|
|
||||||
"salvador-dali": "salvador-dali",
|
|
||||||
"samoidentifikatsiya": "self-identity",
|
|
||||||
"samoopredelenie": "self-definition",
|
|
||||||
"sankt-peterburg": "saint-petersburg",
|
|
||||||
"sasha-skochilenko": "sasha-skochilenko",
|
|
||||||
"satira": "satiric",
|
|
||||||
"saund-art": "sound-art",
|
|
||||||
"schaste": "happiness",
|
|
||||||
"school": "school",
|
|
||||||
"science": "science",
|
|
||||||
"sculpture": "sculpture",
|
|
||||||
"second-world-war": "second-world-war",
|
|
||||||
"sekond-hend": "second-hand",
|
|
||||||
"seksprosvet": "sex-education",
|
|
||||||
"seksualizirovannoe-nasilie": "sexualized-violence",
|
|
||||||
"seksualnoe-nasilie": "sexualized-violence",
|
|
||||||
"sekty": "sects",
|
|
||||||
"semi": "semi",
|
|
||||||
"semiotics": "semiotics",
|
|
||||||
"serbiya": "serbia",
|
|
||||||
"sergey-bodrov-mladshiy": "sergey-bodrov-junior",
|
|
||||||
"sergey-solov-v": "sergey-solovyov",
|
|
||||||
"serialy": "series",
|
|
||||||
"sever": "north",
|
|
||||||
"severnaya-koreya": "north-korea",
|
|
||||||
"sex": "sex",
|
|
||||||
"shotlandiya": "scotland",
|
|
||||||
"shugeyz": "shoegaze",
|
|
||||||
"siloviki": "siloviki",
|
|
||||||
"simeon-bekbulatovich": "simeon-bekbulatovich",
|
|
||||||
"simvolizm": "simbolism",
|
|
||||||
"siriya": "siria",
|
|
||||||
"skulptura": "sculpture",
|
|
||||||
"slavoy-zhizhek": "slavoj-zizek",
|
|
||||||
"smert-1": "death",
|
|
||||||
"smysl": "meaning",
|
|
||||||
"sny": "dreams",
|
|
||||||
"sobytiya": "events",
|
|
||||||
"social": "society",
|
|
||||||
"society": "society",
|
|
||||||
"sociology": "sociology",
|
|
||||||
"sofya-paleolog": "sofya-paleolog",
|
|
||||||
"sofya-vitovtovna": "sofya-vitovtovna",
|
|
||||||
"soobschestva": "communities",
|
|
||||||
"soprotivlenie": "resistence",
|
|
||||||
"sotsializm": "socialism",
|
|
||||||
"sotsialnaya-filosofiya": "social-philosophy",
|
|
||||||
"sotsiologiya-1": "sociology",
|
|
||||||
"sotsseti": "social-networks",
|
|
||||||
"sotvorenie-tretego-rima": "third-rome",
|
|
||||||
"sovremennost": "modernity",
|
|
||||||
"spaces": "spaces",
|
|
||||||
"spektakl": "spectacles",
|
|
||||||
"spetseffekty": "special-fx",
|
|
||||||
"spetsoperatsiya": "special-operation",
|
|
||||||
"spetssluzhby": "special-services",
|
|
||||||
"sport": "sport",
|
|
||||||
"srednevekove": "middle-age",
|
|
||||||
"state": "state",
|
|
||||||
"statistika": "statistics",
|
|
||||||
"stendap": "stand-up",
|
|
||||||
"stihi": "poetry",
|
|
||||||
"stoitsizm": "stoicism",
|
|
||||||
"stories": "stories",
|
|
||||||
"stoyanie-na-ugre": "stoyanie-na-ugre",
|
|
||||||
"strah": "fear",
|
|
||||||
"street-art": "street-art",
|
|
||||||
"stsenarii": "scenarios",
|
|
||||||
"sud": "court",
|
|
||||||
"summary": "summary",
|
|
||||||
"supergeroi": "superheroes",
|
|
||||||
"svetlana-aleksievich": "svetlana-aleksievich",
|
|
||||||
"svobodu-ivanu-golunovu": "free-ivan-golunov",
|
|
||||||
"syurrealizm": "surrealism",
|
|
||||||
"tales": "tales",
|
|
||||||
"tanets": "dance",
|
|
||||||
"tataro-mongolskoe-igo": "mongol-tatar-yoke",
|
|
||||||
"tatuirovki": "tattoo",
|
|
||||||
"technology": "technology",
|
|
||||||
"televidenie": "television",
|
|
||||||
"telo": "body",
|
|
||||||
"telo-kak-iskusstvo": "body-as-art",
|
|
||||||
"terrorizm": "terrorism",
|
|
||||||
"tests": "tests",
|
|
||||||
"text": "texts",
|
|
||||||
"the-beatles": "the-beatles",
|
|
||||||
"theater": "theater",
|
|
||||||
"theory": "theory",
|
|
||||||
"tokio": "tokio",
|
|
||||||
"torture": "torture",
|
|
||||||
"totalitarizm": "totalitarism",
|
|
||||||
"traditions": "traditions",
|
|
||||||
"tragicomedy": "tragicomedy",
|
|
||||||
"transgendernost": "transgender",
|
|
||||||
"translation": "translation",
|
|
||||||
"transport": "transport",
|
|
||||||
"travel": "travel",
|
|
||||||
"travma": "trauma",
|
|
||||||
"trendy": "trends",
|
|
||||||
"tretiy-reyh": "third-reich",
|
|
||||||
"triller": "thriller",
|
|
||||||
"tsar": "central-african-republic",
|
|
||||||
"tsar-edip": "oedipus",
|
|
||||||
"tsarevich-dmitriy": "tsarevich-dmitry",
|
|
||||||
"tsennosti": "values",
|
|
||||||
"tsenzura": "censorship",
|
|
||||||
"tseremonii": "ceremonies",
|
|
||||||
"turizm": "tourism",
|
|
||||||
"tvorchestvo": "creativity",
|
|
||||||
"ugnetennyy-zhilischnyy-klass": "oppressed-housing-class",
|
|
||||||
"uilyam-shekspir": "william-shakespeare",
|
|
||||||
"ukraina-2": "ukraine",
|
|
||||||
"ukraine": "ukraine",
|
|
||||||
"university": "university",
|
|
||||||
"urban-studies": "urban-studies",
|
|
||||||
"uroki-literatury": "literature-lessons",
|
|
||||||
"usa": "usa",
|
|
||||||
"ussr": "ussr",
|
|
||||||
"utopiya": "utopia",
|
|
||||||
"utrata": "loss",
|
|
||||||
"valter-benyamin": "valter-benyamin",
|
|
||||||
"varlam-shalamov": "varlam-shalamov",
|
|
||||||
"vasiliy-ii-temnyy": "basil-ii-temnyy",
|
|
||||||
"vasiliy-iii": "basil-iii",
|
|
||||||
"vdnh": "vdnh",
|
|
||||||
"vechnost": "ethernety",
|
|
||||||
"velikobritaniya": "great-britain",
|
|
||||||
"velimir-hlebnikov": "velimir-hlebnikov",
|
|
||||||
"velkom-tu-greyt-britn": "welcome-to-great-britain",
|
|
||||||
"venedikt-erofeev": "venedikt-erofeev",
|
|
||||||
"venetsiya": "veneece",
|
|
||||||
"vengriya": "hungary",
|
|
||||||
"verlibry": "free-verse",
|
|
||||||
"veschi": "things",
|
|
||||||
"vessels": "vessels",
|
|
||||||
"veterany": "veterans",
|
|
||||||
"video": "video",
|
|
||||||
"videoart": "videoart",
|
|
||||||
"videoklip": "clips",
|
|
||||||
"videopoeziya": "video-poetry",
|
|
||||||
"viktor-astafev": "viktor-astafev",
|
|
||||||
"viktor-pelevin": "viktor-pelevin",
|
|
||||||
"vilgelm-rayh": "wilhelm-reich",
|
|
||||||
"vinzavod": "vinzavod",
|
|
||||||
"violence": "violence",
|
|
||||||
"visual-culture": "visual-culture",
|
|
||||||
"vizualnaya-poeziya": "visual-poetry",
|
|
||||||
"vladimir-lenin": "vladimir-lenin",
|
|
||||||
"vladimir-mayakovskiy": "vladimir-mayakovsky",
|
|
||||||
"vladimir-nabokov": "vladimir-nabokov",
|
|
||||||
"vladimir-putin": "vladimir-putin",
|
|
||||||
"vladimir-sorokin": "vladimir-sorokin",
|
|
||||||
"vladimir-voynovich": "vladimir-voynovich",
|
|
||||||
"vnutrenniy-opyt": "inner-expirience",
|
|
||||||
"volga": "volga",
|
|
||||||
"volontery": "volonteurs",
|
|
||||||
"vong-karvay": "wong-karwai",
|
|
||||||
"vospominaniya": "memories",
|
|
||||||
"vostok": "east",
|
|
||||||
"voyna-na-ukraine": "war-in-ukraine",
|
|
||||||
"voyna-v-ukraine": "war-in-ukraine",
|
|
||||||
"vremya": "time",
|
|
||||||
"vudi-allen": "woody-allen",
|
|
||||||
"vynuzhdennye-otnosheniya": "forced-relationship",
|
|
||||||
"war": "war",
|
|
||||||
"war-in-ukraine-images": "war-in-ukrahine-images",
|
|
||||||
"women": "women",
|
|
||||||
"work": "work",
|
|
||||||
"writers": "writers",
|
|
||||||
"xx-century": "xx-century",
|
|
||||||
"yakob-yordans": "yakob-yordans",
|
|
||||||
"yan-vermeer": "yan-vermeer",
|
|
||||||
"yanka-dyagileva": "yanka-dyagileva",
|
|
||||||
"yaponskaya-literatura": "japan-literature",
|
|
||||||
"yazychestvo": "paganism",
|
|
||||||
"youth": "youth",
|
|
||||||
"yozef-rot": "yozef-rot",
|
|
||||||
"yurgen-habermas": "jorgen-habermas",
|
|
||||||
"za-liniey-mannergeyma": "behind-mannerheim-line",
|
|
||||||
"zabota": "care",
|
|
||||||
"zahar-prilepin": "zahar-prilepin",
|
|
||||||
"zakonodatelstvo": "laws",
|
|
||||||
"zakony-mira": "world-laws",
|
|
||||||
"zametki": "notes",
|
|
||||||
"zhelanie": "wish",
|
|
||||||
"zhivotnye": "animals",
|
|
||||||
"zhoze-saramago": "jose-saramago",
|
|
||||||
"zigmund-freyd": "sigmund-freud",
|
|
||||||
"zolotaya-orda": "golden-horde",
|
|
||||||
"zombi": "zombie",
|
|
||||||
"zombi-simpsony": "zombie-simpsons"
|
|
||||||
}
|
|
|
@ -1,32 +0,0 @@
|
||||||
from services.db import local_session
|
|
||||||
from migration.extract import extract_md
|
|
||||||
from migration.html2text import html2text
|
|
||||||
from orm import Topic
|
|
||||||
|
|
||||||
|
|
||||||
def migrate(entry):
|
|
||||||
body_orig = entry.get("description", "").replace(" ", " ")
|
|
||||||
topic_dict = {
|
|
||||||
"slug": entry["slug"],
|
|
||||||
"oid": entry["_id"],
|
|
||||||
"title": entry["title"].replace(" ", " "),
|
|
||||||
"body": extract_md(html2text(body_orig)),
|
|
||||||
}
|
|
||||||
|
|
||||||
with local_session() as session:
|
|
||||||
slug = topic_dict["slug"]
|
|
||||||
topic = session.query(Topic).filter(Topic.slug == slug).first() or Topic.create(
|
|
||||||
**topic_dict
|
|
||||||
)
|
|
||||||
if not topic:
|
|
||||||
raise Exception("no topic!")
|
|
||||||
if topic:
|
|
||||||
if len(topic.title) > len(topic_dict["title"]):
|
|
||||||
Topic.update(topic, {"title": topic_dict["title"]})
|
|
||||||
if len(topic.body) < len(topic_dict["body"]):
|
|
||||||
Topic.update(topic, {"body": topic_dict["body"]})
|
|
||||||
session.commit()
|
|
||||||
# print(topic.__dict__)
|
|
||||||
rt = topic.__dict__.copy()
|
|
||||||
del rt["_sa_instance_state"]
|
|
||||||
return rt
|
|
|
@ -1,167 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from dateutil.parser import parse
|
|
||||||
from sqlalchemy.exc import IntegrityError
|
|
||||||
|
|
||||||
from services.db import local_session
|
|
||||||
from orm.user import AuthorFollower, User, UserRating
|
|
||||||
|
|
||||||
|
|
||||||
def migrate(entry):
|
|
||||||
if "subscribedTo" in entry:
|
|
||||||
del entry["subscribedTo"]
|
|
||||||
email = entry["emails"][0]["address"]
|
|
||||||
user_dict = {
|
|
||||||
"oid": entry["_id"],
|
|
||||||
"roles": [],
|
|
||||||
"ratings": [],
|
|
||||||
"username": email,
|
|
||||||
"email": email,
|
|
||||||
"createdAt": parse(entry["createdAt"]),
|
|
||||||
"emailConfirmed": ("@discours.io" in email)
|
|
||||||
or bool(entry["emails"][0]["verified"]),
|
|
||||||
"muted": False, # amnesty
|
|
||||||
"bio": entry["profile"].get("bio", ""),
|
|
||||||
"links": [],
|
|
||||||
"name": "anonymous",
|
|
||||||
"password": entry["services"]["password"].get("bcrypt"),
|
|
||||||
}
|
|
||||||
|
|
||||||
if "updatedAt" in entry:
|
|
||||||
user_dict["updatedAt"] = parse(entry["updatedAt"])
|
|
||||||
if "wasOnineAt" in entry:
|
|
||||||
user_dict["lastSeen"] = parse(entry["wasOnlineAt"])
|
|
||||||
if entry.get("profile"):
|
|
||||||
# slug
|
|
||||||
slug = entry["profile"].get("path").lower()
|
|
||||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
|
|
||||||
user_dict["slug"] = slug
|
|
||||||
bio = (
|
|
||||||
(entry.get("profile", {"bio": ""}).get("bio") or "")
|
|
||||||
.replace("\(", "(")
|
|
||||||
.replace("\)", ")")
|
|
||||||
)
|
|
||||||
bio_text = BeautifulSoup(bio, features="lxml").text
|
|
||||||
|
|
||||||
if len(bio_text) > 120:
|
|
||||||
user_dict["about"] = bio_text
|
|
||||||
else:
|
|
||||||
user_dict["bio"] = bio_text
|
|
||||||
|
|
||||||
# userpic
|
|
||||||
try:
|
|
||||||
user_dict["userpic"] = (
|
|
||||||
"https://assets.discours.io/unsafe/100x/"
|
|
||||||
+ entry["profile"]["thumborId"]
|
|
||||||
)
|
|
||||||
except KeyError:
|
|
||||||
try:
|
|
||||||
user_dict["userpic"] = entry["profile"]["image"]["url"]
|
|
||||||
except KeyError:
|
|
||||||
user_dict["userpic"] = ""
|
|
||||||
|
|
||||||
# name
|
|
||||||
fn = entry["profile"].get("firstName", "")
|
|
||||||
ln = entry["profile"].get("lastName", "")
|
|
||||||
name = fn if fn else ""
|
|
||||||
name = (name + " " + ln) if ln else name
|
|
||||||
if not name:
|
|
||||||
name = slug if slug else "anonymous"
|
|
||||||
name = (
|
|
||||||
entry["profile"]["path"].lower().strip().replace(" ", "-")
|
|
||||||
if len(name) < 2
|
|
||||||
else name
|
|
||||||
)
|
|
||||||
user_dict["name"] = name
|
|
||||||
|
|
||||||
# links
|
|
||||||
fb = entry["profile"].get("facebook", False)
|
|
||||||
if fb:
|
|
||||||
user_dict["links"].append(fb)
|
|
||||||
vk = entry["profile"].get("vkontakte", False)
|
|
||||||
if vk:
|
|
||||||
user_dict["links"].append(vk)
|
|
||||||
tr = entry["profile"].get("twitter", False)
|
|
||||||
if tr:
|
|
||||||
user_dict["links"].append(tr)
|
|
||||||
ws = entry["profile"].get("website", False)
|
|
||||||
if ws:
|
|
||||||
user_dict["links"].append(ws)
|
|
||||||
|
|
||||||
# some checks
|
|
||||||
if not user_dict["slug"] and len(user_dict["links"]) > 0:
|
|
||||||
user_dict["slug"] = user_dict["links"][0].split("/")[-1]
|
|
||||||
|
|
||||||
user_dict["slug"] = user_dict.get("slug", user_dict["email"].split("@")[0])
|
|
||||||
oid = user_dict["oid"]
|
|
||||||
user_dict["slug"] = user_dict["slug"].lower().strip().replace(" ", "-")
|
|
||||||
try:
|
|
||||||
user = User.create(**user_dict.copy())
|
|
||||||
except IntegrityError:
|
|
||||||
print("[migration] cannot create user " + user_dict["slug"])
|
|
||||||
with local_session() as session:
|
|
||||||
old_user = (
|
|
||||||
session.query(User).filter(User.slug == user_dict["slug"]).first()
|
|
||||||
)
|
|
||||||
old_user.oid = oid
|
|
||||||
old_user.password = user_dict["password"]
|
|
||||||
session.commit()
|
|
||||||
user = old_user
|
|
||||||
if not user:
|
|
||||||
print("[migration] ERROR: cannot find user " + user_dict["slug"])
|
|
||||||
raise Exception
|
|
||||||
user_dict["id"] = user.id
|
|
||||||
return user_dict
|
|
||||||
|
|
||||||
|
|
||||||
def post_migrate():
|
|
||||||
old_discours_dict = {
|
|
||||||
"slug": "old-discours",
|
|
||||||
"username": "old-discours",
|
|
||||||
"email": "old@discours.io",
|
|
||||||
"name": "Просмотры на старой версии сайта",
|
|
||||||
}
|
|
||||||
|
|
||||||
with local_session() as session:
|
|
||||||
old_discours_user = User.create(**old_discours_dict)
|
|
||||||
session.add(old_discours_user)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_2stage(entry, id_map):
|
|
||||||
ce = 0
|
|
||||||
for rating_entry in entry.get("ratings", []):
|
|
||||||
rater_oid = rating_entry["createdBy"]
|
|
||||||
rater_slug = id_map.get(rater_oid)
|
|
||||||
if not rater_slug:
|
|
||||||
ce += 1
|
|
||||||
# print(rating_entry)
|
|
||||||
continue
|
|
||||||
oid = entry["_id"]
|
|
||||||
author_slug = id_map.get(oid)
|
|
||||||
|
|
||||||
with local_session() as session:
|
|
||||||
try:
|
|
||||||
rater = session.query(User).where(User.slug == rater_slug).one()
|
|
||||||
user = session.query(User).where(User.slug == author_slug).one()
|
|
||||||
|
|
||||||
user_rating_dict = {
|
|
||||||
"value": rating_entry["value"],
|
|
||||||
"rater": rater.id,
|
|
||||||
"user": user.id,
|
|
||||||
}
|
|
||||||
|
|
||||||
user_rating = UserRating.create(**user_rating_dict)
|
|
||||||
if user_rating_dict["value"] > 0:
|
|
||||||
af = AuthorFollower.create(
|
|
||||||
author=user.id, follower=rater.id, auto=True
|
|
||||||
)
|
|
||||||
session.add(af)
|
|
||||||
session.add(user_rating)
|
|
||||||
session.commit()
|
|
||||||
except IntegrityError:
|
|
||||||
print("[migration] cannot rate " + author_slug + "`s by " + rater_slug)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return ce
|
|
|
@ -1,10 +0,0 @@
|
||||||
from datetime import datetime
|
|
||||||
from json import JSONEncoder
|
|
||||||
|
|
||||||
|
|
||||||
class DateTimeEncoder(JSONEncoder):
|
|
||||||
def default(self, z):
|
|
||||||
if isinstance(z, datetime):
|
|
||||||
return str(z)
|
|
||||||
else:
|
|
||||||
return super().default(z)
|
|
|
@ -6,7 +6,7 @@ pyjwt>=2.6.0
|
||||||
git+https://github.com/encode/starlette.git#main
|
git+https://github.com/encode/starlette.git#main
|
||||||
sqlalchemy>=1.4.41
|
sqlalchemy>=1.4.41
|
||||||
graphql-core>=3.0.3
|
graphql-core>=3.0.3
|
||||||
gql~=3.4.0
|
gql[httpx]
|
||||||
uvicorn>=0.18.3
|
uvicorn>=0.18.3
|
||||||
pydantic>=1.10.2
|
pydantic>=1.10.2
|
||||||
passlib~=1.7.4
|
passlib~=1.7.4
|
||||||
|
@ -14,14 +14,11 @@ itsdangerous
|
||||||
authlib>=1.1.0
|
authlib>=1.1.0
|
||||||
httpx>=0.23.0
|
httpx>=0.23.0
|
||||||
psycopg2-binary
|
psycopg2-binary
|
||||||
transliterate~=1.10.2
|
|
||||||
bcrypt>=4.0.0
|
bcrypt>=4.0.0
|
||||||
websockets
|
websockets
|
||||||
bson~=0.5.10
|
|
||||||
flake8
|
flake8
|
||||||
DateTime~=4.7
|
DateTime~=4.7
|
||||||
python-dateutil~=2.8.2
|
python-dateutil~=2.8.2
|
||||||
beautifulsoup4~=4.11.1
|
|
||||||
lxml
|
lxml
|
||||||
sentry-sdk>=1.14.0
|
sentry-sdk>=1.14.0
|
||||||
boto3~=1.28.2
|
boto3~=1.28.2
|
||||||
|
|
|
@ -2,18 +2,16 @@ import asyncio
|
||||||
import time
|
import time
|
||||||
from datetime import timedelta, timezone, datetime
|
from datetime import timedelta, timezone, datetime
|
||||||
from os import environ, path
|
from os import environ, path
|
||||||
from ssl import create_default_context
|
|
||||||
|
|
||||||
from gql import Client, gql
|
from gql import Client, gql
|
||||||
from gql.transport.aiohttp import AIOHTTPTransport
|
from gql.transport.httpx import HTTPXAsyncTransport
|
||||||
|
|
||||||
from services.db import local_session
|
from services.db import local_session
|
||||||
from orm import Topic
|
from orm import Topic
|
||||||
from orm.shout import ShoutTopic, Shout
|
from orm.shout import ShoutTopic, Shout
|
||||||
|
|
||||||
load_facts = gql(
|
load_facts = gql(
|
||||||
"""
|
""" query getDomains {
|
||||||
query getDomains {
|
|
||||||
domains {
|
domains {
|
||||||
id
|
id
|
||||||
title
|
title
|
||||||
|
@ -23,14 +21,11 @@ query getDomains {
|
||||||
viewsMonth
|
viewsMonth
|
||||||
viewsYear
|
viewsYear
|
||||||
}
|
}
|
||||||
}
|
} } """
|
||||||
}
|
|
||||||
"""
|
|
||||||
)
|
)
|
||||||
|
|
||||||
load_pages = gql(
|
load_pages = gql(
|
||||||
"""
|
""" query getDomains {
|
||||||
query getDomains {
|
|
||||||
domains {
|
domains {
|
||||||
title
|
title
|
||||||
statistics {
|
statistics {
|
||||||
|
@ -41,10 +36,9 @@ query getDomains {
|
||||||
value
|
value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} } """
|
||||||
}
|
|
||||||
"""
|
|
||||||
)
|
)
|
||||||
|
|
||||||
schema_str = open(path.dirname(__file__) + "/ackee.graphql").read()
|
schema_str = open(path.dirname(__file__) + "/ackee.graphql").read()
|
||||||
token = environ.get("ACKEE_TOKEN", "")
|
token = environ.get("ACKEE_TOKEN", "")
|
||||||
|
|
||||||
|
@ -52,9 +46,8 @@ token = environ.get("ACKEE_TOKEN", "")
|
||||||
def create_client(headers=None, schema=None):
|
def create_client(headers=None, schema=None):
|
||||||
return Client(
|
return Client(
|
||||||
schema=schema,
|
schema=schema,
|
||||||
transport=AIOHTTPTransport(
|
transport=HTTPXAsyncTransport(
|
||||||
url="https://ackee.discours.io/api",
|
url="https://ackee.discours.io/api",
|
||||||
ssl=create_default_context(),
|
|
||||||
headers=headers,
|
headers=headers,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user