From 65532ea1a3b6082e0db965bb07852737f77fcbb2 Mon Sep 17 00:00:00 2001
From: tonyrewin <anton.rewin@gmail.com>
Date: Thu, 11 Aug 2022 12:14:12 +0300
Subject: [PATCH] migration-is-back

---
 migrate.py                         |  303 ++++++++
 migration/__init__.py              |    1 +
 migration/bson2json.py             |   28 +
 migration/export.py                |  105 +++
 migration/extract.py               |  324 +++++++++
 migration/html2text/__init__.py    | 1041 ++++++++++++++++++++++++++++
 migration/html2text/__main__.py    |    3 +
 migration/html2text/cli.py         |  322 +++++++++
 migration/html2text/config.py      |  164 +++++
 migration/html2text/elements.py    |   18 +
 migration/html2text/py.typed       |    0
 migration/html2text/typing.py      |    3 +
 migration/html2text/utils.py       |  290 ++++++++
 migration/tables/__init__.py       |    1 +
 migration/tables/comments.py       |  108 +++
 migration/tables/content_items.py  |  226 ++++++
 migration/tables/replacements.json |  768 ++++++++++++++++++++
 migration/tables/topics.py         |   28 +
 migration/tables/users.py          |  106 +++
 migration/utils.py                 |    9 +
 20 files changed, 3848 insertions(+)
 create mode 100644 migrate.py
 create mode 100644 migration/__init__.py
 create mode 100644 migration/bson2json.py
 create mode 100644 migration/export.py
 create mode 100644 migration/extract.py
 create mode 100644 migration/html2text/__init__.py
 create mode 100644 migration/html2text/__main__.py
 create mode 100644 migration/html2text/cli.py
 create mode 100644 migration/html2text/config.py
 create mode 100644 migration/html2text/elements.py
 create mode 100644 migration/html2text/py.typed
 create mode 100644 migration/html2text/typing.py
 create mode 100644 migration/html2text/utils.py
 create mode 100644 migration/tables/__init__.py
 create mode 100644 migration/tables/comments.py
 create mode 100644 migration/tables/content_items.py
 create mode 100644 migration/tables/replacements.json
 create mode 100644 migration/tables/topics.py
 create mode 100644 migration/tables/users.py
 create mode 100644 migration/utils.py

diff --git a/migrate.py b/migrate.py
new file mode 100644
index 00000000..c5312a3a
--- /dev/null
+++ b/migrate.py
@@ -0,0 +1,303 @@
+''' cmd managed migration '''
+from datetime import datetime
+import json
+import subprocess
+import sys
+import os
+
+# from migration.export import export_email_subscriptions
+from migration.export import export_mdx, export_slug
+from migration.tables.users import migrate as migrateUser
+from migration.tables.users import migrate_2stage as migrateUser_2stage
+from migration.tables.content_items import get_shout_slug, migrate as migrateShout
+from migration.tables.topics import migrate as migrateTopic
+from migration.tables.comments import migrate as migrateComment
+from migration.tables.comments import migrate_2stage as migrateComment_2stage
+from orm.reaction import Reaction
+from settings import DB_URL
+
+TODAY = datetime.strftime(datetime.now(), '%Y%m%d')
+
+OLD_DATE = '2016-03-05 22:22:00.350000'
+
+
+def users_handle(storage):
+	''' migrating users first '''
+	counter = 0
+	id_map = {}
+	print('[migration] migrating %d users' % (len(storage['users']['data'])))
+	for entry in storage['users']['data']:
+		oid = entry['_id']
+		user = migrateUser(entry)
+		storage['users']['by_oid'][oid] = user  # full
+		del user['password']
+		del user['notifications']
+		del user['emailConfirmed']
+		del user['username']
+		del user['email']
+		storage['users']['by_slug'][user['slug']] = user  # public
+		id_map[user['oid']] = user['slug']
+		counter += 1
+	ce = 0
+	for entry in storage['users']['data']:
+		ce += migrateUser_2stage(entry, id_map)
+	return storage
+
+
+def topics_handle(storage):
+	''' topics from categories and tags '''
+	counter = 0
+	for t in (storage['topics']['tags'] + storage['topics']['cats']):
+		if t['slug'] in storage['replacements']:
+			t['slug'] = storage['replacements'][t['slug']]
+			topic = migrateTopic(t)
+			storage['topics']['by_oid'][t['_id']] = topic
+			storage['topics']['by_slug'][t['slug']] = topic
+			counter += 1
+		else:
+			print('[migration] topic ' + t['slug'] + ' ignored')
+	for oldslug, newslug in storage['replacements'].items():
+		if oldslug != newslug and oldslug in storage['topics']['by_slug']:
+			oid = storage['topics']['by_slug'][oldslug]['_id']
+			del storage['topics']['by_slug'][oldslug]
+			storage['topics']['by_oid'][oid] = storage['topics']['by_slug'][newslug]
+	print('[migration] ' + str(counter) + ' topics migrated')
+	print('[migration] ' + str(len(storage['topics']
+		  ['by_oid'].values())) + ' topics by oid')
+	print('[migration] ' + str(len(storage['topics']
+		  ['by_slug'].values())) + ' topics by slug')
+	# raise Exception
+	return storage
+
+
+def shouts_handle(storage, args):
+	''' migrating content items one by one '''
+	counter = 0
+	discours_author = 0
+	pub_counter = 0
+	for entry in storage['shouts']['data']:
+		# slug
+		slug = get_shout_slug(entry)
+
+		 # single slug mode
+		if '-' in args and slug not in args: continue
+
+		# migrate
+		shout = migrateShout(entry, storage)
+		storage['shouts']['by_oid'][entry['_id']] = shout
+		storage['shouts']['by_slug'][shout['slug']] = shout
+		# shouts.topics
+		if not shout['topics']: print('[migration] no topics!')
+
+		# wuth author
+		author = shout['authors'][0].slug
+		if author == 'discours': discours_author += 1
+		# print('[migration] ' + shout['slug'] + ' with author ' + author)
+
+		if entry.get('published'):
+			if 'mdx' in args: export_mdx(shout)
+			pub_counter += 1
+
+		# print main counter
+		counter += 1
+		line = str(counter+1) + ': ' + shout['slug'] + " @" + author
+		print(line)
+
+	print('[migration] ' + str(counter) + ' content items were migrated')
+	print('[migration] ' + str(pub_counter) + ' have been published')
+	print('[migration] ' + str(discours_author) + ' authored by @discours')
+	return storage
+
+
+def comments_handle(storage):
+	id_map = {}
+	ignored_counter = 0
+	missed_shouts = {}
+	for oldcomment in storage['reactions']['data']:
+		if not oldcomment.get('deleted'):
+			reaction = migrateComment(oldcomment, storage)
+			if type(reaction) == str:
+				missed_shouts[reaction] = oldcomment
+			elif type(reaction) == Reaction:
+				reaction = reaction.dict()
+				id = reaction['id']
+				oid = reaction['oid']
+				id_map[oid] = id
+			else:
+				ignored_counter += 1
+
+	for reaction in storage['reactions']['data']: migrateComment_2stage(
+		reaction, id_map)
+	print('[migration] ' + str(len(id_map)) + ' comments migrated')
+	print('[migration] ' + str(ignored_counter) + ' comments ignored')
+	print('[migration] ' + str(len(missed_shouts.keys())) +
+		  ' commented shouts missed')
+	missed_counter = 0
+	for missed in missed_shouts.values():
+		missed_counter += len(missed)
+	print('[migration] ' + str(missed_counter) + ' comments dropped')
+	return storage
+
+
+def bson_handle():
+	# decode bson # preparing data
+	from migration import bson2json
+	bson2json.json_tables()
+
+
+def export_one(slug, storage):
+	topics_handle(storage)
+	users_handle(storage)
+	shouts_handle(storage)
+	export_slug(slug, storage)
+
+
+def all_handle(storage, args):
+	print('[migration] handle everything')
+	users_handle(storage)
+	topics_handle(storage)
+	shouts_handle(storage, args)
+	comments_handle(storage)
+	# export_email_subscriptions()
+	print('[migration] done!')
+
+
+def data_load():
+	storage = {
+		'content_items': {
+			'by_oid': {},
+			'by_slug': {},
+		},
+		'shouts': {
+			'by_oid': {},
+			'by_slug': {},
+			'data': []
+		},
+		'reactions': {
+			'by_oid': {},
+			'by_slug': {},
+			'by_content': {},
+			'data':	[]
+		},
+		'topics': {
+			'by_oid': {},
+			'by_slug': {},
+			'cats': [],
+			'tags': [],
+		},
+		'users': {
+			'by_oid': {},
+			'by_slug': {},
+			'data': []
+		},
+		'replacements': json.loads(open('migration/tables/replacements.json').read())
+	}
+	users_data = []
+	tags_data = []
+	cats_data = []
+	comments_data = []
+	content_data = []
+	try:
+		users_data = json.loads(open('migration/data/users.json').read())
+		print('[migration] ' + str(len(users_data)) + ' users ')
+		tags_data = json.loads(open('migration/data/tags.json').read())
+		storage['topics']['tags'] = tags_data
+		print('[migration] ' + str(len(tags_data)) + ' tags ')
+		cats_data = json.loads(
+			open('migration/data/content_item_categories.json').read())
+		storage['topics']['cats'] = cats_data
+		print('[migration] ' + str(len(cats_data)) + ' cats ')
+		comments_data = json.loads(open('migration/data/comments.json').read())
+		storage['reactions']['data'] = comments_data
+		print('[migration] ' + str(len(comments_data)) + ' comments ')
+		content_data = json.loads(open('migration/data/content_items.json').read())
+		storage['shouts']['data'] = content_data
+		print('[migration] ' + str(len(content_data)) + ' content items ')
+		# fill out storage
+		for x in users_data:
+			storage['users']['by_oid'][x['_id']] = x
+			# storage['users']['by_slug'][x['slug']] = x
+		# no user.slug yet
+		print('[migration] ' + str(len(storage['users']
+			  ['by_oid'].keys())) + ' users by oid')
+		for x in tags_data:
+			storage['topics']['by_oid'][x['_id']] = x
+			storage['topics']['by_slug'][x['slug']] = x
+		for x in cats_data:
+			storage['topics']['by_oid'][x['_id']] = x
+			storage['topics']['by_slug'][x['slug']] = x
+		print('[migration] ' + str(len(storage['topics']
+			  ['by_slug'].keys())) + ' topics by slug')
+		for item in content_data:
+			slug = get_shout_slug(item)
+			storage['content_items']['by_slug'][slug] = item
+			storage['content_items']['by_oid'][item['_id']] = item
+		print('[migration] ' + str(len(content_data)) + ' content items')
+		for x in comments_data:
+			storage['reactions']['by_oid'][x['_id']] = x
+			cid = x['contentItem']
+			storage['reactions']['by_content'][cid] = x
+			ci = storage['content_items']['by_oid'].get(cid, {})
+			if 'slug' in ci: storage['reactions']['by_slug'][ci['slug']] = x
+		print('[migration] ' + str(len(storage['reactions']
+			  ['by_content'].keys())) + ' with comments')
+	except Exception as e: raise e
+	storage['users']['data'] = users_data
+	storage['topics']['tags'] = tags_data
+	storage['topics']['cats'] = cats_data
+	storage['shouts']['data'] = content_data
+	storage['reactions']['data'] = comments_data
+	return storage
+
+
+def mongo_download(url):
+	if not url: raise Exception('\n\nYou should set MONGODB_URL enviroment variable\n')
+	print('[migration] mongodump ' + url)
+	subprocess.call([
+		'mongodump',
+		'--uri', url + '/?authSource=admin',
+		'--forceTableScan',
+	], stderr = subprocess.STDOUT)
+
+
+def create_pgdump():
+	pgurl = DB_URL
+	if not pgurl: raise Exception('\n\nYou should set DATABASE_URL enviroment variable\n')
+	subprocess.call(
+		[ 'pg_dump', pgurl, '-f', TODAY + '-pgdump.sql'], 
+		stderr = subprocess.STDOUT
+	)
+	subprocess.call([
+		'scp',
+		TODAY + '-pgdump.sql',
+		'root@build.discours.io:/root/.'
+	])
+
+
+def handle_auto():
+	print('[migration] no command given, auto mode')
+	mongo_download(os.getenv('MONGODB_URL'))
+	bson_handle()
+	all_handle(data_load(), sys.argv)
+	create_pgdump()
+
+def migrate():
+	if len(sys.argv) > 1:
+		cmd=sys.argv[1]
+		if type(cmd) == str: print('[migration] command: ' + cmd)
+		if cmd == 'mongodb':
+			mongo_download(sys.argv[2])
+		elif cmd == 'bson':
+			bson_handle()
+		else:
+			storage=data_load()
+			if cmd == '-': export_one(sys.argv[2], storage)
+			else: all_handle(storage, sys.argv)
+	elif len(sys.argv) == 1:
+		handle_auto()
+	else:
+		print('[migration] usage: python migrate.py <command>')
+		print('[migration] commands: mongodb, bson, all, all mdx, - <slug>')
+
+if __name__ == '__main__':
+	migrate()
diff --git a/migration/__init__.py b/migration/__init__.py
new file mode 100644
index 00000000..e2750039
--- /dev/null
+++ b/migration/__init__.py
@@ -0,0 +1 @@
+__all__ = ["tables", "bson2json", "html2md"]
\ No newline at end of file
diff --git a/migration/bson2json.py b/migration/bson2json.py
new file mode 100644
index 00000000..ba2802db
--- /dev/null
+++ b/migration/bson2json.py
@@ -0,0 +1,28 @@
+import os
+import bson
+import json
+
+from migration.utils import DateTimeEncoder
+
+def json_tables():
+	print('[migration] unpack dump/discours/*.bson to migration/data/*.json')
+	data = {
+		"content_items": [],
+		"content_item_categories": [],
+		"tags": [],
+		"email_subscriptions": [],
+		"users": [],
+		"comments": []
+	}
+	for table in data.keys():
+		lc = []
+		with open('dump/discours/'+table+'.bson', 'rb') as f:
+			bs = f.read()
+			f.close()
+			base = 0
+			while base < len(bs):
+				base, d = bson.decode_document(bs, base)
+				lc.append(d)
+			data[table] = lc
+			open(os.getcwd() + '/migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
+
diff --git a/migration/export.py b/migration/export.py
new file mode 100644
index 00000000..d4463aa8
--- /dev/null
+++ b/migration/export.py
@@ -0,0 +1,105 @@
+
+from datetime import datetime
+import json
+import os
+import frontmatter
+from migration.extract import extract_html, prepare_html_body
+from migration.utils import DateTimeEncoder
+
+OLD_DATE = '2016-03-05 22:22:00.350000'
+EXPORT_DEST = '../discoursio-web/data/'
+parentDir = '/'.join(os.getcwd().split('/')[:-1])
+contentDir = parentDir + '/discoursio-web/content/'
+ts = datetime.now()
+
+def get_metadata(r):
+	authors = []
+	for a in r['authors']:
+		authors.append({ # a short version for public listings
+			'slug': a.slug or 'discours',
+			'name': a.name or 'Дискурс',
+			'userpic': a.userpic or 'https://discours.io/static/img/discours.png'
+		})
+	metadata = {}
+	metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
+	metadata['authors'] = authors
+	metadata['createdAt'] = r.get('createdAt', ts)
+	metadata['layout'] = r['layout']
+	metadata['topics'] = [topic for topic in r['topics']]
+	metadata['topics'].sort()
+	if r.get('cover', False): metadata['cover'] = r.get('cover')
+	return metadata
+	
+def export_mdx(r):
+	# print('[export] mdx %s' % r['slug']) 
+	content = ''
+	metadata = get_metadata(r)
+	content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
+	ext = 'mdx'
+	filepath = contentDir + r['slug']
+	bc = bytes(content,'utf-8').decode('utf-8','ignore')
+	open(filepath + '.' + ext, 'w').write(bc)
+
+def export_body(shout, storage):
+	entry = storage['content_items']['by_oid'][shout['oid']]
+	if entry:
+		shout['body'] = prepare_html_body(entry) # prepare_md_body(entry)
+		export_mdx(shout)
+		print('[export] html for %s' % shout['slug'])
+		body = extract_html(entry)
+		open(contentDir + shout['slug'] + '.html', 'w').write(body)
+	else:
+		raise Exception('no content_items entry found')
+
+def export_slug(slug, storage):
+	shout = storage['shouts']['by_slug'][slug]
+	shout = storage['shouts']['by_slug'].get(slug)
+	assert shout, '[export] no shout found by slug: %s ' % slug
+	author = shout['authors'][0]
+	assert author, '[export] no author error'
+	export_body(shout, storage)
+
+def export_email_subscriptions():
+	email_subscriptions_data = json.loads(open('migration/data/email_subscriptions.json').read())
+	for data in email_subscriptions_data:
+		# migrate_email_subscription(data)
+		pass
+	print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions exported')
+
+def export_shouts(storage):
+	# update what was just migrated or load json again
+	if len(storage['users']['by_slugs'].keys()) == 0:
+		storage['users']['by_slugs'] = json.loads(open(EXPORT_DEST + 'authors.json').read())
+		print('[migration] ' + str(len(storage['users']['by_slugs'].keys())) + ' exported authors ')
+	if len(storage['shouts']['by_slugs'].keys()) == 0:
+		storage['shouts']['by_slugs'] = json.loads(open(EXPORT_DEST + 'articles.json').read())
+		print('[migration] ' + str(len(storage['shouts']['by_slugs'].keys())) + ' exported articles ')
+	for slug in storage['shouts']['by_slugs'].keys(): export_slug(slug, storage)
+
+def export_json(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
+	open(EXPORT_DEST + 'authors.json', 'w').write(json.dumps(export_authors,
+															cls=DateTimeEncoder,
+															indent=4,
+															sort_keys=True,
+															ensure_ascii=False))
+	print('[migration] ' + str(len(export_authors.items())) + ' authors exported')
+	open(EXPORT_DEST + 'topics.json', 'w').write(json.dumps(export_topics,
+														cls=DateTimeEncoder,
+														indent=4,
+														sort_keys=True,
+														ensure_ascii=False))
+	print('[migration] ' + str(len(export_topics.keys())) + ' topics exported')
+	
+	open(EXPORT_DEST + 'articles.json', 'w').write(json.dumps(export_articles,
+															cls=DateTimeEncoder,
+															indent=4,
+															sort_keys=True,
+															ensure_ascii=False))
+	print('[migration] ' + str(len(export_articles.items())) + ' articles exported')
+	open(EXPORT_DEST + 'comments.json', 'w').write(json.dumps(export_comments,
+															cls=DateTimeEncoder,
+															indent=4,
+															sort_keys=True,
+															ensure_ascii=False))
+	print('[migration] ' + str(len(export_comments.items())) + ' exported articles with comments')
+
diff --git a/migration/extract.py b/migration/extract.py
new file mode 100644
index 00000000..c8220609
--- /dev/null
+++ b/migration/extract.py
@@ -0,0 +1,324 @@
+import os
+import re
+import base64
+from migration.html2text import html2text
+
+TOOLTIP_REGEX = r'(\/\/\/(.+)\/\/\/)'
+contentDir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'discoursio-web', 'content')
+s3 = 'https://discours-io.s3.amazonaws.com/'
+cdn = 'https://assets.discours.io'
+
+def replace_tooltips(body): 
+	# FIXME: if you prefer regexp
+	newbody = body
+	matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
+	for match in matches:
+		newbody = body.replace(match.group(1), '<Tooltip text="' + match.group(2) + '" />') # FIXME: doesn't work
+	if len(matches) > 0: 
+		print('[extract] found %d tooltips' % len(matches))
+	return newbody
+
+
+def place_tooltips(body):
+	parts = body.split('&&&')
+	l = len(parts)
+	newparts = list(parts)
+	placed = False
+	if l & 1:
+		if l > 1: 
+			i = 1
+			print('[extract] found %d tooltips' % (l-1))
+			for part in parts[1:]:
+				if i & 1: 
+					placed = True
+					if 'a class="footnote-url" href=' in part:
+						print('[extract] footnote: ' + part)
+						fn = 'a class="footnote-url" href="'
+						link = part.split(fn,1)[1].split('"', 1)[0]
+						extracted_part = part.split(fn,1)[0] + ' ' + part.split('/', 1)[-1]
+						newparts[i] = '<Tooltip' + (' link="' + link + '" ' if link else '') + '>' + extracted_part + '</Tooltip>'
+					else:
+						newparts[i] = '<Tooltip>%s</Tooltip>' % part
+						# print('[extract] ' + newparts[i])
+				else:
+					# print('[extract] ' + part[:10] + '..')
+					newparts[i] = part
+				i += 1
+	return (''.join(newparts), placed)
+
+IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}=|[A-Za-z\d+\/]{2}==)))\)"
+
+parentDir = '/'.join(os.getcwd().split('/')[:-1])
+public = parentDir + '/discoursio-web/public'
+cache = {}
+
+
+def reextract_images(body, oid): 
+	# FIXME: if you prefer regexp
+	matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
+	i = 0
+	for match in matches:
+		print('[extract] image ' + match.group(1))
+		ext = match.group(3)
+		name = oid + str(i)
+		link = public + '/upload/image-' + name + '.' + ext
+		img = match.group(4)
+		title = match.group(1) # FIXME: this is not the title
+		if img not in cache:
+			content = base64.b64decode(img + '==')
+			print(str(len(img)) + ' image bytes been written')
+			open('../' + link, 'wb').write(content)
+			cache[img] = name
+			i += 1
+		else:
+			print('[extract] image cached ' + cache[img])
+		body.replace(str(match), '![' + title + '](' + cdn + link + ')') # FIXME: this does not work
+	return body
+
+IMAGES = {
+	'data:image/png': 'png',
+	'data:image/jpg': 'jpg',
+	'data:image/jpeg': 'jpg',
+}
+
+b64 = ';base64,'
+
+def extract_imageparts(bodyparts, prefix):
+	# recursive loop
+	newparts = list(bodyparts)
+	for current in bodyparts:
+		i = bodyparts.index(current)
+		for mime in IMAGES.keys():
+			if mime == current[-len(mime):] and (i + 1 < len(bodyparts)):
+				print('[extract] ' + mime)
+				next = bodyparts[i+1]
+				ext = IMAGES[mime]
+				b64end = next.index(')')
+				b64encoded = next[:b64end]
+				name = prefix + '-' + str(len(cache))
+				link = '/upload/image-' + name + '.' + ext
+				print('[extract] name: ' + name)
+				print('[extract] link: ' + link)
+				print('[extract] %d bytes' % len(b64encoded))
+				if b64encoded not in cache:
+					try:
+						content = base64.b64decode(b64encoded + '==')
+						open(public + link, 'wb').write(content)
+						print('[extract] ' +str(len(content)) + ' image bytes been written')
+						cache[b64encoded] = name
+					except:
+						raise Exception
+						# raise Exception('[extract] error decoding image %r' %b64encoded)
+				else:
+					print('[extract] cached link ' + cache[b64encoded])
+					name = cache[b64encoded]
+					link = cdn + '/upload/image-' + name + '.' + ext
+				newparts[i] = current[:-len(mime)] + current[-len(mime):] + link + next[-b64end:]
+				newparts[i+1] = next[:-b64end]
+				break
+	return extract_imageparts(newparts[i] + newparts[i+1] + b64.join(bodyparts[i+2:]), prefix) \
+		if len(bodyparts) > (i + 1) else ''.join(newparts)
+
+def extract_dataimages(parts, prefix):
+	newparts = list(parts)
+	for part in parts:
+		i = parts.index(part)
+		if part.endswith(']('):
+			[ext, rest] = parts[i+1].split(b64)
+			name = prefix + '-' + str(len(cache))
+			if ext == '/jpeg': ext = 'jpg'
+			else: ext = ext.replace('/', '')
+			link = '/upload/image-' + name + '.' + ext
+			print('[extract] filename: ' + link)
+			b64end = rest.find(')')
+			if b64end !=-1:
+				b64encoded = rest[:b64end]
+				print('[extract] %d text bytes' % len(b64encoded))
+				# write if not cached
+				if b64encoded not in cache:
+					try:
+						content = base64.b64decode(b64encoded + '==')
+						open(public + link, 'wb').write(content)
+						print('[extract] ' +str(len(content)) + ' image bytes')
+						cache[b64encoded] = name
+					except:
+						raise Exception
+						# raise Exception('[extract] error decoding image %r' %b64encoded)
+				else:
+					print('[extract] 0 image bytes, cached for ' + cache[b64encoded])
+					name = cache[b64encoded]
+
+				# update link with CDN
+				link = cdn + '/upload/image-' + name + '.' + ext
+				
+				# patch newparts
+				newparts[i+1] = link + rest[b64end:]
+			else:
+				raise Exception('cannot find the end of base64 encoded string')
+		else:
+			print('[extract] dataimage skipping part ' + str(i))
+			continue
+	return ''.join(newparts)
+
+di = 'data:image'
+
+def extract_md_images(body, oid):
+	newbody = ''
+	body = body\
+		.replace('\n! []('+di, '\n ![]('+di)\
+		.replace('\n[]('+di, '\n![]('+di)\
+		.replace(' []('+di, ' ![]('+di)
+	parts = body.split(di)
+	i = 0
+	if len(parts) > 1: newbody = extract_dataimages(parts, oid)
+	else: newbody = body
+	return newbody
+
+
+def cleanup(body):
+	newbody = body\
+		.replace('<', '').replace('>', '')\
+		.replace('{', '(').replace('}', ')')\
+		.replace('…', '...')\
+		.replace(' __ ', ' ')\
+		.replace('_ _', ' ')\
+		.replace('****',  '')\
+		.replace('\u00a0', ' ')\
+		.replace('\u02c6', '^')\
+		.replace('\u00a0',' ')\
+		.replace('\ufeff', '')\
+		.replace('\u200b', '')\
+		.replace('\u200c', '')\
+		# .replace('\u2212', '-')
+	return newbody
+
+def extract_md(body, oid):
+	newbody = body
+	if newbody:
+		newbody = extract_md_images(newbody, oid)
+		if not newbody: raise Exception('extract_images error')
+		newbody = cleanup(newbody)
+		if not newbody: raise Exception('cleanup error')
+		newbody, placed = place_tooltips(newbody)
+		if not newbody: raise Exception('place_tooltips error')
+		if placed:
+			newbody = 'import Tooltip from \'$/components/Article/Tooltip\'\n\n' + newbody
+	return newbody
+
+def prepare_md_body(entry):
+	# body modifications
+	body = ''
+	kind = entry.get('type')
+	addon = ''
+	if kind == 'Video':
+		addon = ''
+		for m in entry.get('media', []):
+			if 'youtubeId' in m: addon += '<VideoPlayer youtubeId=\'' + m['youtubeId'] + '\' />\n'
+			elif 'vimeoId' in m: addon += '<VideoPlayer vimeoId=\''  + m['vimeoId'] + '\' />\n'
+			else:
+				print('[extract] media is not supported')
+				print(m)
+		body = 'import VideoPlayer from \'$/components/Article/VideoPlayer\'\n\n' + addon
+	
+	elif kind == 'Music':
+		addon = ''
+		for m in entry.get('media', []):
+			artist = m.get('performer')
+			trackname = ''
+			if artist: trackname += artist + ' - '
+			if 'title' in m: trackname += m.get('title','')
+			addon += '<MusicPlayer src=\"' + m.get('fileUrl','') + '\" title=\"' + trackname + '\" />\n'
+		body = 'import MusicPlayer from \'$/components/Article/MusicPlayer\'\n\n' + addon
+
+	body_orig = extract_html(entry)
+	if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
+	if not body: print('[extract] empty MDX body')
+	return body
+
+def prepare_html_body(entry):
+	# body modifications
+	body = ''
+	kind = entry.get('type')
+	addon = ''
+	if kind == 'Video':
+		addon = ''
+		for m in entry.get('media', []):
+			if 'youtubeId' in m: 
+				addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
+				addon += m['youtubeId']
+				addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
+			elif 'vimeoId' in m: 
+				addon += '<iframe src="https://player.vimeo.com/video/'
+				addon += m['vimeoId']
+				addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen" allowfullscreen></iframe>'
+			else:
+				print('[extract] media is not supported')
+				print(m)
+		body += addon
+	
+	elif kind == 'Music':
+		addon = ''
+		for m in entry.get('media', []):
+			artist = m.get('performer')
+			trackname = ''
+			if artist: trackname += artist + ' - '
+			if 'title' in m: trackname += m.get('title','')
+			addon += '<figure><figcaption>'
+			addon += trackname
+			addon += '</figcaption><audio controls src="' 
+			addon += m.get('fileUrl','') 
+			addon += '"></audio></figure>'
+		body += addon
+
+	body = extract_html(entry)
+	# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
+	if not body: print('[extract] empty HTML body')
+	return body
+
+def extract_html(entry):
+	body_orig = entry.get('body') or ''
+	media = entry.get('media', [])
+	kind = entry.get('type') or ''
+	print('[extract] kind: ' + kind)
+	mbodies = set([])
+	if media:
+		# print('[extract] media is found')
+		for m in media:
+			mbody = m.get('body', '')
+			addon = ''
+			if kind == 'Literature':
+				mbody = m.get('literatureBody') or m.get('body', '')
+			elif kind == 'Image':
+				cover = ''
+				if 'thumborId' in entry: cover = cdn + '/unsafe/1600x/' + entry['thumborId']
+				if not cover:
+					if 'image' in entry: cover = entry['image'].get('url', '')
+					if 'cloudinary' in cover: cover = ''
+				# else: print('[extract] cover: ' + cover)
+				title = m.get('title','').replace('\n', ' ').replace('&nbsp;', ' ')
+				u = m.get('thumborId') or cover or ''
+				if title: addon += '<h4>' + title + '</h4>\n'
+				if not u.startswith('http'): u = s3 + u
+				if not u: print('[extract] no image url for ' + str(m))
+				if 'cloudinary' in u: u = 'img/lost.svg'
+				if u != cover or (u == cover and media.index(m) == 0):
+					addon += '<img src=\"' + u + '\" alt=\"'+ title +'\" />\n'
+			if addon:
+				body_orig += addon
+				# print('[extract] item addon: ' + addon)
+			# if addon: print('[extract] addon: %s' % addon)
+			if mbody and mbody not in mbodies:
+				mbodies.add(mbody)
+				body_orig += mbody
+		if len(list(mbodies)) != len(media):
+			print('[extract] %d/%d media item bodies appended' % (len(list(mbodies)),len(media)))
+		# print('[extract] media items body: \n' + body_orig)
+	if not body_orig:
+		for up in entry.get('bodyHistory', []) or []:
+			body_orig = up.get('text', '') or ''
+			if body_orig: 
+				print('[extract] got html body from history')
+				break
+	if not body_orig: print('[extract] empty HTML body')
+	# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
+	return body_orig
\ No newline at end of file
diff --git a/migration/html2text/__init__.py b/migration/html2text/__init__.py
new file mode 100644
index 00000000..26810d42
--- /dev/null
+++ b/migration/html2text/__init__.py
@@ -0,0 +1,1041 @@
+"""html2text: Turn HTML into equivalent Markdown-structured text."""
+
+import html.entities
+import html.parser
+import re
+import string
+import urllib.parse as urlparse
+from textwrap import wrap
+from typing import Dict, List, Optional, Tuple, Union
+
+from . import config
+from .elements import AnchorElement, ListElement
+from .typing import OutCallback
+from .utils import (
+	dumb_css_parser,
+	element_style,
+	escape_md,
+	escape_md_section,
+	google_fixed_width_font,
+	google_has_height,
+	google_list_style,
+	google_text_emphasis,
+	hn,
+	list_numbering_start,
+	pad_tables_in_text,
+	skipwrap,
+	unifiable_n,
+)
+
+__version__ = (2020, 1, 16)
+
+
+# TODO:
+# Support decoded entities with UNIFIABLE.
+
+
+class HTML2Text(html.parser.HTMLParser):
+	def __init__(
+		self,
+		out: Optional[OutCallback] = None,
+		baseurl: str = "",
+		bodywidth: int = config.BODY_WIDTH,
+	) -> None:
+		"""
+		Input parameters:
+			out: possible custom replacement for self.outtextf (which
+			appends lines of text).
+			baseurl: base URL of the document we process
+		"""
+		super().__init__(convert_charrefs=False)
+
+		# Config options
+		self.split_next_td = False
+		self.td_count = 0
+		self.table_start = False
+		self.unicode_snob = config.UNICODE_SNOB  # covered in cli
+		self.escape_snob = config.ESCAPE_SNOB  # covered in cli
+		self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
+		self.body_width = bodywidth  # covered in cli
+		self.skip_internal_links = config.SKIP_INTERNAL_LINKS  # covered in cli
+		self.inline_links = config.INLINE_LINKS  # covered in cli
+		self.protect_links = config.PROTECT_LINKS  # covered in cli
+		self.google_list_indent = config.GOOGLE_LIST_INDENT  # covered in cli
+		self.ignore_links = config.IGNORE_ANCHORS  # covered in cli
+		self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS  # covered in cli
+		self.ignore_images = config.IGNORE_IMAGES  # covered in cli
+		self.images_as_html = config.IMAGES_AS_HTML  # covered in cli
+		self.images_to_alt = config.IMAGES_TO_ALT  # covered in cli
+		self.images_with_size = config.IMAGES_WITH_SIZE  # covered in cli
+		self.ignore_emphasis = config.IGNORE_EMPHASIS  # covered in cli
+		self.bypass_tables = config.BYPASS_TABLES  # covered in cli
+		self.ignore_tables = config.IGNORE_TABLES  # covered in cli
+		self.google_doc = False  # covered in cli
+		self.ul_item_mark = "*"  # covered in cli
+		self.emphasis_mark = "_"  # covered in cli
+		self.strong_mark = "**"
+		self.single_line_break = config.SINGLE_LINE_BREAK  # covered in cli
+		self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
+		self.hide_strikethrough = False  # covered in cli
+		self.mark_code = config.MARK_CODE
+		self.wrap_list_items = config.WRAP_LIST_ITEMS  # covered in cli
+		self.wrap_links = config.WRAP_LINKS  # covered in cli
+		self.wrap_tables = config.WRAP_TABLES
+		self.pad_tables = config.PAD_TABLES  # covered in cli
+		self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
+		self.tag_callback = None
+		self.open_quote = config.OPEN_QUOTE  # covered in cli
+		self.close_quote = config.CLOSE_QUOTE  # covered in cli
+		self.header_id = None
+		self.span_highlight = False
+		self.span_lead = False
+
+		if out is None:
+			self.out = self.outtextf
+		else:
+			self.out = out
+
+		# empty list to store output characters before they are "joined"
+		self.outtextlist = []  # type: List[str]
+
+		self.quiet = 0
+		self.p_p = 0  # number of newline character to print before next output
+		self.outcount = 0
+		self.start = True
+		self.space = False
+		self.a = []  # type: List[AnchorElement]
+		self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
+		self.maybe_automatic_link = None  # type: Optional[str]
+		self.empty_link = False
+		self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
+		self.acount = 0
+		self.list = []  # type: List[ListElement]
+		self.blockquote = 0
+		self.pre = False
+		self.startpre = False
+		self.code = False
+		self.quote = False
+		self.br_toggle = ""
+		self.lastWasNL = False
+		self.lastWasList = False
+		self.style = 0
+		self.style_def = {}  # type: Dict[str, Dict[str, str]]
+		self.tag_stack = (
+			[]
+		)  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
+		self.emphasis = 0
+		self.drop_white_space = 0
+		self.inheader = False
+		# Current abbreviation definition
+		self.abbr_title = None  # type: Optional[str]
+		# Last inner HTML (for abbr being defined)
+		self.abbr_data = None  # type: Optional[str]
+		# Stack of abbreviations to write later
+		self.abbr_list = {}  # type: Dict[str, str]
+		self.baseurl = baseurl
+		self.stressed = False
+		self.preceding_stressed = False
+		self.preceding_data = ""
+		self.current_tag = ""
+		self.current_class = ""
+
+		config.UNIFIABLE["nbsp"] = "&nbsp_place_holder;"
+
+	def feed(self, data: str) -> None:
+		data = data.replace("</' + 'script>", "</ignore>")
+		super().feed(data)
+
+	def handle(self, data: str) -> str:
+		self.feed(data)
+		self.feed("")
+		markdown = self.optwrap(self.finish())
+		if self.pad_tables:
+			return pad_tables_in_text(markdown)
+		else:
+			return markdown
+
+	def outtextf(self, s: str) -> None:
+		self.outtextlist.append(s)
+		if s:
+			self.lastWasNL = s[-1] == "\n"
+
+	def finish(self) -> str:
+		self.close()
+
+		self.pbr()
+		self.o("", force="end")
+
+		outtext = "".join(self.outtextlist)
+
+		if self.unicode_snob:
+			nbsp = html.entities.html5["nbsp;"]
+		else:
+			nbsp = " "
+		outtext = outtext.replace("&nbsp_place_holder;", nbsp)
+
+		# Clear self.outtextlist to avoid memory leak of its content to
+		# the next handling.
+		self.outtextlist = []
+
+		return outtext
+
+	def handle_charref(self, c: str) -> None:
+		self.handle_data(self.charref(c), True)
+
+	def handle_entityref(self, c: str) -> None:
+		ref = self.entityref(c)
+
+		# ref may be an empty string (e.g. for &lrm;/&rlm; markers that should
+		# not contribute to the final output).
+		# self.handle_data cannot handle a zero-length string right after a
+		# stressed tag or mid-text within a stressed tag (text get split and
+		# self.stressed/self.preceding_stressed gets switched after the first
+		# part of that text).
+		if ref:
+			self.handle_data(ref, True)
+
+	def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
+		self.handle_tag(tag, dict(attrs), start=True)
+
+	def handle_endtag(self, tag: str) -> None:
+		self.handle_tag(tag, {}, start=False)
+
+	def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
+		"""
+		:type attrs: dict
+
+		:returns: The index of certain set of attributes (of a link) in the
+		self.a list. If the set of attributes is not found, returns None
+		:rtype: int
+		"""
+		if "href" not in attrs:
+			return None
+
+		match = False
+		for i, a in enumerate(self.a):
+			if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
+				if "title" in a.attrs or "title" in attrs:
+					if (
+						"title" in a.attrs
+						and "title" in attrs
+						and a.attrs["title"] == attrs["title"]
+					):
+						match = True
+				else:
+					match = True
+
+			if match:
+				return i
+		return None
+
+	def handle_emphasis(
+		self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
+	) -> None:
+		"""
+		Handles various text emphases
+		"""
+		tag_emphasis = google_text_emphasis(tag_style)
+		parent_emphasis = google_text_emphasis(parent_style)
+
+		# handle Google's text emphasis
+		strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough
+
+		# google and others may mark a font's weight as `bold` or `700`
+		bold = False
+		for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
+			bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis
+			if bold:
+				break
+
+		italic = "italic" in tag_emphasis and "italic" not in parent_emphasis
+		fixed = (
+			google_fixed_width_font(tag_style)
+			and not google_fixed_width_font(parent_style)
+			and not self.pre
+		)
+
+		if start:
+			# crossed-out text must be handled before other attributes
+			# in order not to output qualifiers unnecessarily
+			if bold or italic or fixed:
+				self.emphasis += 1
+			if strikethrough:
+				self.quiet += 1
+			if italic:
+				self.o(self.emphasis_mark)
+				self.drop_white_space += 1
+			if bold:
+				self.o(self.strong_mark)
+				self.drop_white_space += 1
+			if fixed:
+				self.o("`")
+				self.drop_white_space += 1
+				self.code = True
+		else:
+			if bold or italic or fixed:
+				# there must not be whitespace before closing emphasis mark
+				self.emphasis -= 1
+				self.space = False
+			if fixed:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o("`")
+				self.code = False
+			if bold:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o(self.strong_mark)
+			if italic:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o(self.emphasis_mark)
+			# space is only allowed after *all* emphasis marks
+			if (bold or italic) and not self.emphasis:
+				self.o(" ")
+			if strikethrough:
+				self.quiet -= 1
+
+	def handle_tag(
+		self, tag: str, attrs: Dict[str, Optional[str]], start: bool
+	) -> None:
+		self.current_tag = tag
+
+		if self.tag_callback is not None:
+			if self.tag_callback(self, tag, attrs, start) is True:
+				return
+
+		# first thing inside the anchor tag is another tag
+		# that produces some output
+		if (
+			start
+			and self.maybe_automatic_link is not None
+			and tag not in ["p", "div", "style", "dl", "dt"]
+			and (tag != "img" or self.ignore_images)
+		):
+			self.o("[")
+			self.maybe_automatic_link = None
+			self.empty_link = False
+
+		if self.google_doc:
+			# the attrs parameter is empty for a closing tag. in addition, we
+			# need the attributes of the parent nodes in order to get a
+			# complete style description for the current element. we assume
+			# that google docs export well formed html.
+			parent_style = {}  # type: Dict[str, str]
+			if start:
+				if self.tag_stack:
+					parent_style = self.tag_stack[-1][2]
+				tag_style = element_style(attrs, self.style_def, parent_style)
+				self.tag_stack.append((tag, attrs, tag_style))
+			else:
+				dummy, attrs, tag_style = (
+					self.tag_stack.pop() if self.tag_stack else (None, {}, {})
+				)
+				if self.tag_stack:
+					parent_style = self.tag_stack[-1][2]
+
+		if hn(tag):
+			# check if nh is inside of an 'a' tag 
+			# (incorrect but found in the wild)
+			if self.astack:
+				if start:
+					self.inheader = True
+					# are inside link name, so only add '#' if it can appear before '['
+					if self.outtextlist and self.outtextlist[-1] == "[":
+						self.outtextlist.pop()
+						self.space = False
+						self.o(hn(tag) * "#" + " ")
+						self.o("[")
+						self.header_id = attrs.get('id')
+			else:
+				self.p()
+				if start:
+					self.inheader = True
+					self.o(hn(tag) * "#" + " ")
+					if self.header_id: 
+						self.o(' {#' + self.header_id + '}')
+						self.header_id = None
+				else:
+					self.inheader = False
+					return  # prevent redundant emphasis marks on headers
+
+		if 'class' in attrs:
+			self.current_class = attrs.get('class', '')
+			# self.p()
+			if not start:
+				self.current_class = ''
+
+		if tag == 'span':
+			if 'style' in attrs:
+				if attrs.get('style') == 'text-align: center':
+					self.current_class = 'center'
+				if not start:
+					self.current_class = ''
+			if start:
+					if self.current_class == 'highlight' and \
+						self.inheader == False and \
+						self.span_lead == False and \
+						self.astack == False:
+							self.o('`') # NOTE: same as <code>
+							self.span_highlight = True
+					elif self.current_class == 'lead' and \
+						self.inheader == False and \
+						self.span_highlight == False:
+							#self.o("==") # NOTE:  CriticMarkup {==
+							self.span_lead = True
+			else:
+				if self.span_highlight:
+					self.o('`')
+					self.span_highlight = False
+				elif self.span_lead:
+					#self.o('==')
+					self.span_lead = False
+
+		if tag in ["p", "div"]:
+			if self.google_doc:
+				if start and google_has_height(tag_style):
+					self.p()
+				else:
+					self.soft_br()
+			elif self.astack or self.inheader:
+				pass
+			else:
+				self.p()
+
+		if tag == "br" and start:
+			if self.blockquote > 0:
+				self.o("  \n> ")
+			else:
+				self.o("  \n")
+
+		if tag == "hr" and start:
+			self.p()
+			self.o("* * *")
+			self.p()
+
+		if tag in ["head", "style", "script"]:
+			if start:
+				self.quiet += 1
+			else:
+				self.quiet -= 1
+
+		if tag == "style":
+			if start:
+				self.style += 1
+			else:
+				self.style -= 1
+
+		if tag in ["body"]:
+			self.quiet = 0  # sites like 9rules.com never close <head>
+
+		if tag == "blockquote":
+			if start:
+				self.p()
+				self.o("> ", force=True)
+				self.start = True
+				self.blockquote += 1
+			else:
+				self.blockquote -= 1
+				self.p()
+
+		if tag in ["em", "i", "u"] and not self.ignore_emphasis:
+			# Separate with a space if we immediately follow an alphanumeric
+			# character, since otherwise Markdown won't render the emphasis
+			# marks, and we'll be left with eg 'foo_bar_' visible.
+			# (Don't add a space otherwise, though, since there isn't one in the
+			# original HTML.)
+			if (
+				start
+				and self.preceding_data
+				and self.preceding_data[-1] not in string.whitespace
+				and self.preceding_data[-1] not in string.punctuation
+			):
+				emphasis = " " + self.emphasis_mark
+				self.preceding_data += " "
+			else:
+				emphasis = self.emphasis_mark
+
+			self.o(emphasis)
+			if start:
+				self.stressed = True
+
+		if tag in ["strong", "b"] and not self.ignore_emphasis:
+			# Separate with space if we immediately follow an * character, since
+			# without it, Markdown won't render the resulting *** correctly.
+			# (Don't add a space otherwise, though, since there isn't one in the
+			# original HTML.)
+			if not self.inheader and not self.astack \
+				and not self.span_lead and not self.span_highlight:
+				if (
+					start
+					and self.preceding_data
+					and self.preceding_data[-1] == self.strong_mark[0]
+				):
+					strong = " " + self.strong_mark
+					self.preceding_data += " "
+				else:
+					strong = self.strong_mark
+
+				self.o(strong)
+				if start:
+					self.stressed = True
+
+		if tag in ["del", "strike", "s"]:
+			if start and self.preceding_data and self.preceding_data[-1] == "~":
+				strike = " ~~"
+				self.preceding_data += " "
+			else:
+				strike = "~~"
+
+			self.o(strike)
+			if start:
+				self.stressed = True
+
+		if self.google_doc:
+			if not self.inheader:
+				# handle some font attributes, but leave headers clean
+				self.handle_emphasis(start, tag_style, parent_style)
+
+		if tag in ["kbd", "code", "tt"] and not self.pre:
+			self.o("`")  # TODO: `` `this` ``
+			self.code = not self.code
+
+		if tag == "abbr":
+			if start:
+				self.abbr_title = None
+				self.abbr_data = ""
+				if "title" in attrs:
+					self.abbr_title = attrs["title"]
+			else:
+				if self.abbr_title is not None:
+					assert self.abbr_data is not None
+					self.abbr_list[self.abbr_data] = self.abbr_title
+					self.abbr_title = None
+				self.abbr_data = None
+
+		if tag == "q":
+			if not self.quote:
+				self.o(self.open_quote)
+			else:
+				self.o(self.close_quote)
+			self.quote = not self.quote
+
+		def link_url(self: HTML2Text, link: str, title: str = "") -> None:
+			url = urlparse.urljoin(self.baseurl, link)
+			title = ' "{}"'.format(title) if title.strip() else ""
+			self.o("]({url}{title})".format(url=escape_md(url), title=title))
+
+		if tag == "a" and not self.ignore_links:
+			if start:
+				if 'data-original-title' in attrs:
+					# WARNING: old discours specific code
+					self.o('&&&%s&&&' % attrs['data-original-title'])
+				else:
+					if (
+						"href" in attrs 
+						and not attrs["href"].startswith('#_ftn')
+						and attrs["href"] is not None
+						and not (self.skip_internal_links and attrs["href"].startswith("#"))
+						and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
+					):
+						self.astack.append(attrs)
+						self.maybe_automatic_link = attrs["href"]
+						self.empty_link = True
+						if self.protect_links:
+							attrs["href"] = "<" + attrs["href"] + ">"
+					else:
+						self.astack.append(None)
+			else:
+				if self.astack:
+					a = self.astack.pop()
+					if self.maybe_automatic_link and not self.empty_link:
+						self.maybe_automatic_link = None
+					elif a:
+						assert a["href"] is not None
+						if self.empty_link:
+							self.o("[")
+							self.empty_link = False
+							self.maybe_automatic_link = None
+						if self.inline_links:
+							self.p_p = 0
+							title = a.get("title") or ""
+							title = escape_md(title)
+							link_url(self, a["href"], title)
+						else:
+							i = self.previousIndex(a)
+							if i is not None:
+								a_props = self.a[i]
+							else:
+								self.acount += 1
+								a_props = AnchorElement(a, self.acount, self.outcount)
+								self.a.append(a_props)
+							self.o("][" + str(a_props.count) + "]")
+
+		if tag == "img" and start and not self.ignore_images:
+			# skip cloudinary images
+			if "src" in attrs and 'cloudinary' not in attrs['src']:
+				assert attrs["src"] is not None
+				if not self.images_to_alt:
+					attrs["href"] = attrs["src"]
+				alt = attrs.get("alt") or self.default_image_alt
+
+				# If we have images_with_size, write raw html including width,
+				# height, and alt attributes
+				if self.images_as_html or (
+					self.images_with_size and ("width" in attrs or "height" in attrs)
+				):
+					self.o("<img src='" + attrs["src"] + "' ")
+					if "width" in attrs:
+						assert attrs["width"] is not None
+						self.o("width='" + attrs["width"] + "' ")
+					if "height" in attrs:
+						assert attrs["height"] is not None
+						self.o("height='" + attrs["height"] + "' ")
+					if alt:
+						self.o("alt='" + alt + "' ")
+					self.o("/>")
+					return
+
+				# If we have a link to create, output the start
+				if self.maybe_automatic_link is not None:
+					href = self.maybe_automatic_link
+					if (
+						self.images_to_alt
+						and escape_md(alt) == href
+						and self.absolute_url_matcher.match(href)
+					):
+						self.o("<" + escape_md(alt) + ">")
+						self.empty_link = False
+						return
+					else:
+						self.o("[")
+						self.maybe_automatic_link = None
+						self.empty_link = False
+
+				# If we have images_to_alt, we discard the image itself,
+				# considering only the alt text.
+				if self.images_to_alt:
+					self.o(escape_md(alt))
+				else:
+					self.o("![" + escape_md(alt) + "]")
+					if self.inline_links:
+						href = attrs.get("href") or ""
+						self.o(
+							"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
+						)
+					else:
+						i = self.previousIndex(attrs)
+						if i is not None:
+							a_props = self.a[i]
+						else:
+							self.acount += 1
+							a_props = AnchorElement(attrs, self.acount, self.outcount)
+							self.a.append(a_props)
+						self.o("[" + str(a_props.count) + "]")
+
+		if tag == "dl" and start:
+			self.p()
+		if tag == "dt" and not start:
+			self.pbr()
+		if tag == "dd" and start:
+			self.o("    ")
+		if tag == "dd" and not start:
+			self.pbr()
+
+		if tag in ["ol", "ul"]:
+			# Google Docs create sub lists as top level lists
+			if not self.list and not self.lastWasList:
+				self.p()
+			if start:
+				if self.google_doc:
+					list_style = google_list_style(tag_style)
+				else:
+					list_style = tag
+				numbering_start = list_numbering_start(attrs)
+				self.list.append(ListElement(list_style, numbering_start))
+			else:
+				if self.list:
+					self.list.pop()
+					if not self.google_doc and not self.list:
+						self.o("\n")
+			self.lastWasList = True
+		else:
+			self.lastWasList = False
+
+		if tag == "li":
+			self.pbr()
+			if start:
+				if self.list:
+					li = self.list[-1]
+				else:
+					li = ListElement("ul", 0)
+				if self.google_doc:
+					self.o("  " * self.google_nest_count(tag_style))
+				else:
+					# Indent two spaces per list, except use three spaces for an
+					# unordered list inside an ordered list.
+					# https://spec.commonmark.org/0.28/#motivation
+					# TODO: line up <ol><li>s > 9 correctly.
+					parent_list = None
+					for list in self.list:
+						self.o(
+							"   " if parent_list == "ol" and list.name == "ul" else "  "
+						)
+						parent_list = list.name
+
+				if li.name == "ul":
+					self.o(self.ul_item_mark + " ")
+				elif li.name == "ol":
+					li.num += 1
+					self.o(str(li.num) + ". ")
+				self.start = True
+
+		if tag in ["table", "tr", "td", "th"]:
+			if self.ignore_tables:
+				if tag == "tr":
+					if start:
+						pass
+					else:
+						self.soft_br()
+				else:
+					pass
+
+			elif self.bypass_tables:
+				if start:
+					self.soft_br()
+				if tag in ["td", "th"]:
+					if start:
+						self.o("<{}>\n\n".format(tag))
+					else:
+						self.o("\n</{}>".format(tag))
+				else:
+					if start:
+						self.o("<{}>".format(tag))
+					else:
+						self.o("</{}>".format(tag))
+
+			else:
+				if tag == "table":
+					if start:
+						self.table_start = True
+						if self.pad_tables:
+							self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
+							self.o("  \n")
+					else:
+						if self.pad_tables:
+							# add break in case the table is empty or its 1 row table
+							self.soft_br()
+							self.o("</" + config.TABLE_MARKER_FOR_PAD + ">")
+							self.o("  \n")
+				if tag in ["td", "th"] and start:
+					if self.split_next_td:
+						self.o("| ")
+					self.split_next_td = True
+
+				if tag == "tr" and start:
+					self.td_count = 0
+				if tag == "tr" and not start:
+					self.split_next_td = False
+					self.soft_br()
+				if tag == "tr" and not start and self.table_start:
+					# Underline table header
+					self.o("|".join(["---"] * self.td_count))
+					self.soft_br()
+					self.table_start = False
+				if tag in ["td", "th"] and start:
+					self.td_count += 1
+
+		if tag == "pre":
+			if start:
+				self.startpre = True
+				self.pre = True
+			else:
+				self.pre = False
+				if self.mark_code:
+					self.out("\n[/code]")
+			self.p()
+
+	# TODO: Add docstring for these one letter functions
+	def pbr(self) -> None:
+		"Pretty print has a line break"
+		if self.p_p == 0:
+			self.p_p = 1
+
+	def p(self) -> None:
+		"Set pretty print to 1 or 2 lines"
+		self.p_p = 1 if self.single_line_break else 2
+
+	def soft_br(self) -> None:
+		"Soft breaks"
+		self.pbr()
+		self.br_toggle = "  "
+
+	def o(
+		self, data: str, puredata: bool = False, force: Union[bool, str] = False
+	) -> None:
+		"""
+		Deal with indentation and whitespace
+		"""
+		if self.abbr_data is not None:
+			self.abbr_data += data
+
+		if not self.quiet:
+			if self.google_doc:
+				# prevent white space immediately after 'begin emphasis'
+				# marks ('**' and '_')
+				lstripped_data = data.lstrip()
+				if self.drop_white_space and not (self.pre or self.code):
+					data = lstripped_data
+				if lstripped_data != "":
+					self.drop_white_space = 0
+
+			if puredata and not self.pre:
+				# This is a very dangerous call ... it could mess up
+				# all handling of &nbsp; when not handled properly
+				# (see entityref)
+				data = re.sub(r"\s+", r" ", data)
+				if data and data[0] == " ":
+					self.space = True
+					data = data[1:]
+			if not data and not force:
+				return
+
+			if self.startpre:
+				# self.out(" :") #TODO: not output when already one there
+				if not data.startswith("\n") and not data.startswith("\r\n"):
+					# <pre>stuff...
+					data = "\n" + data
+				if self.mark_code:
+					self.out("\n[code]")
+					self.p_p = 0
+
+			bq = ">" * self.blockquote
+			if not (force and data and data[0] == ">") and self.blockquote:
+				bq += " "
+
+			if self.pre:
+				if not self.list:
+					bq += "    "
+				# else: list content is already partially indented
+				bq += "    " * len(self.list)
+				data = data.replace("\n", "\n" + bq)
+
+			if self.startpre:
+				self.startpre = False
+				if self.list:
+					# use existing initial indentation
+					data = data.lstrip("\n")
+
+			if self.start:
+				self.space = False
+				self.p_p = 0
+				self.start = False
+
+			if force == "end":
+				# It's the end.
+				self.p_p = 0
+				self.out("\n")
+				self.space = False
+
+			if self.p_p:
+				self.out((self.br_toggle + "\n" + bq) * self.p_p)
+				self.space = False
+				self.br_toggle = ""
+
+			if self.space:
+				if not self.lastWasNL:
+					self.out(" ")
+				self.space = False
+
+			if self.a and (
+				(self.p_p == 2 and self.links_each_paragraph) or force == "end"
+			):
+				if force == "end":
+					self.out("\n")
+
+				newa = []
+				for link in self.a:
+					if self.outcount > link.outcount:
+						self.out(
+							"   ["
+							+ str(link.count)
+							+ "]: "
+							+ urlparse.urljoin(self.baseurl, link.attrs["href"])
+						)
+						if "title" in link.attrs:
+							assert link.attrs["title"] is not None
+							self.out(" (" + link.attrs["title"] + ")")
+						self.out("\n")
+					else:
+						newa.append(link)
+
+				# Don't need an extra line when nothing was done.
+				if self.a != newa:
+					self.out("\n")
+
+				self.a = newa
+
+			if self.abbr_list and force == "end":
+				for abbr, definition in self.abbr_list.items():
+					self.out("  *[" + abbr + "]: " + definition + "\n")
+
+			self.p_p = 0
+			self.out(data)
+			self.outcount += 1
+
+	def handle_data(self, data: str, entity_char: bool = False) -> None:
+		if not data:
+			# Data may be empty for some HTML entities. For example,
+			# LEFT-TO-RIGHT MARK.
+			return
+
+		if self.stressed:
+			data = data.strip()
+			self.stressed = False
+			self.preceding_stressed = True
+		elif self.preceding_stressed:
+			if (
+				re.match(r"[^][(){}\s.!?]", data[0])
+				and not hn(self.current_tag)
+				and self.current_tag not in ["a", "code", "pre"]
+			):
+				# should match a letter or common punctuation
+				data = " " + data
+			self.preceding_stressed = False
+
+		if self.style:
+			self.style_def.update(dumb_css_parser(data))
+
+		if self.maybe_automatic_link is not None:
+			href = self.maybe_automatic_link
+			if (
+				href == data
+				and self.absolute_url_matcher.match(href)
+				and self.use_automatic_links
+			):
+				self.o("<" + data + ">")
+				self.empty_link = False
+				return
+			else:
+				self.o("[")
+				self.maybe_automatic_link = None
+				self.empty_link = False
+
+		if not self.code and not self.pre and not entity_char:
+			data = escape_md_section(data, snob=self.escape_snob)
+		self.preceding_data = data
+		self.o(data, puredata=True)
+
+	def charref(self, name: str) -> str:
+		if name[0] in ["x", "X"]:
+			c = int(name[1:], 16)
+		else:
+			c = int(name)
+
+		if not self.unicode_snob and c in unifiable_n:
+			return unifiable_n[c]
+		else:
+			try:
+				return chr(c)
+			except ValueError:  # invalid unicode
+				return ""
+
+	def entityref(self, c: str) -> str:
+		if not self.unicode_snob and c in config.UNIFIABLE:
+			return config.UNIFIABLE[c]
+		try:
+			ch = html.entities.html5[c + ";"]
+		except KeyError:
+			return "&" + c + ";"
+		return config.UNIFIABLE[c] if c == "nbsp" else ch
+
+	def google_nest_count(self, style: Dict[str, str]) -> int:
+		"""
+		Calculate the nesting count of google doc lists
+
+		:type style: dict
+
+		:rtype: int
+		"""
+		nest_count = 0
+		if "margin-left" in style:
+			nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
+
+		return nest_count
+
+	def optwrap(self, text: str) -> str:
+		"""
+		Wrap all paragraphs in the provided text.
+
+		:type text: str
+
+		:rtype: str
+		"""
+		if not self.body_width:
+			return text
+
+		result = ""
+		newlines = 0
+		# I cannot think of a better solution for now.
+		# To avoid the non-wrap behaviour for entire paras
+		# because of the presence of a link in it
+		if not self.wrap_links:
+			self.inline_links = False
+		for para in text.split("\n"):
+			if len(para) > 0:
+				if not skipwrap(
+					para, self.wrap_links, self.wrap_list_items, self.wrap_tables
+				):
+					indent = ""
+					if para.startswith("  " + self.ul_item_mark):
+						# list item continuation: add a double indent to the
+						# new lines
+						indent = "    "
+					elif para.startswith("> "):
+						# blockquote continuation: add the greater than symbol
+						# to the new lines
+						indent = "> "
+					wrapped = wrap(
+						para,
+						self.body_width,
+						break_long_words=False,
+						subsequent_indent=indent,
+					)
+					result += "\n".join(wrapped)
+					if para.endswith("  "):
+						result += "  \n"
+						newlines = 1
+					elif indent:
+						result += "\n"
+						newlines = 1
+					else:
+						result += "\n\n"
+						newlines = 2
+				else:
+					# Warning for the tempted!!!
+					# Be aware that obvious replacement of this with
+					# line.isspace()
+					# DOES NOT work! Explanations are welcome.
+					if not config.RE_SPACE.match(para):
+						result += para + "\n"
+						newlines = 1
+			else:
+				if newlines < 2:
+					result += "\n"
+					newlines += 1
+		return result
+
+
+def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
+	h = html.strip() or ''
+	if h: 
+		h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
+		h = h.handle(html.strip())
+		# print('[html2text] %d bytes' % len(html))
+	return h
diff --git a/migration/html2text/__main__.py b/migration/html2text/__main__.py
new file mode 100644
index 00000000..4e28416e
--- /dev/null
+++ b/migration/html2text/__main__.py
@@ -0,0 +1,3 @@
+from .cli import main
+
+main()
diff --git a/migration/html2text/cli.py b/migration/html2text/cli.py
new file mode 100644
index 00000000..d0c62c97
--- /dev/null
+++ b/migration/html2text/cli.py
@@ -0,0 +1,322 @@
+import argparse
+import sys
+
+from . import HTML2Text, __version__, config
+
+
+def main() -> None:
+    baseurl = ""
+
+    class bcolors:
+        HEADER = "\033[95m"
+        OKBLUE = "\033[94m"
+        OKGREEN = "\033[92m"
+        WARNING = "\033[93m"
+        FAIL = "\033[91m"
+        ENDC = "\033[0m"
+        BOLD = "\033[1m"
+        UNDERLINE = "\033[4m"
+
+    p = argparse.ArgumentParser()
+    p.add_argument(
+        "--default-image-alt",
+        dest="default_image_alt",
+        default=config.DEFAULT_IMAGE_ALT,
+        help="The default alt string for images with missing ones",
+    )
+    p.add_argument(
+        "--pad-tables",
+        dest="pad_tables",
+        action="store_true",
+        default=config.PAD_TABLES,
+        help="pad the cells to equal column width in tables",
+    )
+    p.add_argument(
+        "--no-wrap-links",
+        dest="wrap_links",
+        action="store_false",
+        default=config.WRAP_LINKS,
+        help="don't wrap links during conversion",
+    )
+    p.add_argument(
+        "--wrap-list-items",
+        dest="wrap_list_items",
+        action="store_true",
+        default=config.WRAP_LIST_ITEMS,
+        help="wrap list items during conversion",
+    )
+    p.add_argument(
+        "--wrap-tables",
+        dest="wrap_tables",
+        action="store_true",
+        default=config.WRAP_TABLES,
+        help="wrap tables",
+    )
+    p.add_argument(
+        "--ignore-emphasis",
+        dest="ignore_emphasis",
+        action="store_true",
+        default=config.IGNORE_EMPHASIS,
+        help="don't include any formatting for emphasis",
+    )
+    p.add_argument(
+        "--reference-links",
+        dest="inline_links",
+        action="store_false",
+        default=config.INLINE_LINKS,
+        help="use reference style links instead of inline links",
+    )
+    p.add_argument(
+        "--ignore-links",
+        dest="ignore_links",
+        action="store_true",
+        default=config.IGNORE_ANCHORS,
+        help="don't include any formatting for links",
+    )
+    p.add_argument(
+        "--ignore-mailto-links",
+        action="store_true",
+        dest="ignore_mailto_links",
+        default=config.IGNORE_MAILTO_LINKS,
+        help="don't include mailto: links",
+    )
+    p.add_argument(
+        "--protect-links",
+        dest="protect_links",
+        action="store_true",
+        default=config.PROTECT_LINKS,
+        help="protect links from line breaks surrounding them with angle brackets",
+    )
+    p.add_argument(
+        "--ignore-images",
+        dest="ignore_images",
+        action="store_true",
+        default=config.IGNORE_IMAGES,
+        help="don't include any formatting for images",
+    )
+    p.add_argument(
+        "--images-as-html",
+        dest="images_as_html",
+        action="store_true",
+        default=config.IMAGES_AS_HTML,
+        help=(
+            "Always write image tags as raw html; preserves `height`, `width` and "
+            "`alt` if possible."
+        ),
+    )
+    p.add_argument(
+        "--images-to-alt",
+        dest="images_to_alt",
+        action="store_true",
+        default=config.IMAGES_TO_ALT,
+        help="Discard image data, only keep alt text",
+    )
+    p.add_argument(
+        "--images-with-size",
+        dest="images_with_size",
+        action="store_true",
+        default=config.IMAGES_WITH_SIZE,
+        help=(
+            "Write image tags with height and width attrs as raw html to retain "
+            "dimensions"
+        ),
+    )
+    p.add_argument(
+        "-g",
+        "--google-doc",
+        action="store_true",
+        dest="google_doc",
+        default=False,
+        help="convert an html-exported Google Document",
+    )
+    p.add_argument(
+        "-d",
+        "--dash-unordered-list",
+        action="store_true",
+        dest="ul_style_dash",
+        default=False,
+        help="use a dash rather than a star for unordered list items",
+    )
+    p.add_argument(
+        "-e",
+        "--asterisk-emphasis",
+        action="store_true",
+        dest="em_style_asterisk",
+        default=False,
+        help="use an asterisk rather than an underscore for emphasized text",
+    )
+    p.add_argument(
+        "-b",
+        "--body-width",
+        dest="body_width",
+        type=int,
+        default=config.BODY_WIDTH,
+        help="number of characters per output line, 0 for no wrap",
+    )
+    p.add_argument(
+        "-i",
+        "--google-list-indent",
+        dest="list_indent",
+        type=int,
+        default=config.GOOGLE_LIST_INDENT,
+        help="number of pixels Google indents nested lists",
+    )
+    p.add_argument(
+        "-s",
+        "--hide-strikethrough",
+        action="store_true",
+        dest="hide_strikethrough",
+        default=False,
+        help="hide strike-through text. only relevant when -g is " "specified as well",
+    )
+    p.add_argument(
+        "--escape-all",
+        action="store_true",
+        dest="escape_snob",
+        default=False,
+        help=(
+            "Escape all special characters.  Output is less readable, but avoids "
+            "corner case formatting issues."
+        ),
+    )
+    p.add_argument(
+        "--bypass-tables",
+        action="store_true",
+        dest="bypass_tables",
+        default=config.BYPASS_TABLES,
+        help="Format tables in HTML rather than Markdown syntax.",
+    )
+    p.add_argument(
+        "--ignore-tables",
+        action="store_true",
+        dest="ignore_tables",
+        default=config.IGNORE_TABLES,
+        help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
+    )
+    p.add_argument(
+        "--single-line-break",
+        action="store_true",
+        dest="single_line_break",
+        default=config.SINGLE_LINE_BREAK,
+        help=(
+            "Use a single line break after a block element rather than two line "
+            "breaks. NOTE: Requires --body-width=0"
+        ),
+    )
+    p.add_argument(
+        "--unicode-snob",
+        action="store_true",
+        dest="unicode_snob",
+        default=config.UNICODE_SNOB,
+        help="Use unicode throughout document",
+    )
+    p.add_argument(
+        "--no-automatic-links",
+        action="store_false",
+        dest="use_automatic_links",
+        default=config.USE_AUTOMATIC_LINKS,
+        help="Do not use automatic links wherever applicable",
+    )
+    p.add_argument(
+        "--no-skip-internal-links",
+        action="store_false",
+        dest="skip_internal_links",
+        default=config.SKIP_INTERNAL_LINKS,
+        help="Do not skip internal links",
+    )
+    p.add_argument(
+        "--links-after-para",
+        action="store_true",
+        dest="links_each_paragraph",
+        default=config.LINKS_EACH_PARAGRAPH,
+        help="Put links after each paragraph instead of document",
+    )
+    p.add_argument(
+        "--mark-code",
+        action="store_true",
+        dest="mark_code",
+        default=config.MARK_CODE,
+        help="Mark program code blocks with [code]...[/code]",
+    )
+    p.add_argument(
+        "--decode-errors",
+        dest="decode_errors",
+        default=config.DECODE_ERRORS,
+        help=(
+            "What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
+            "acceptable values"
+        ),
+    )
+    p.add_argument(
+        "--open-quote",
+        dest="open_quote",
+        default=config.OPEN_QUOTE,
+        help="The character used to open quotes",
+    )
+    p.add_argument(
+        "--close-quote",
+        dest="close_quote",
+        default=config.CLOSE_QUOTE,
+        help="The character used to close quotes",
+    )
+    p.add_argument(
+        "--version", action="version", version=".".join(map(str, __version__))
+    )
+    p.add_argument("filename", nargs="?")
+    p.add_argument("encoding", nargs="?", default="utf-8")
+    args = p.parse_args()
+
+    if args.filename and args.filename != "-":
+        with open(args.filename, "rb") as fp:
+            data = fp.read()
+    else:
+        data = sys.stdin.buffer.read()
+
+    try:
+        html = data.decode(args.encoding, args.decode_errors)
+    except UnicodeDecodeError as err:
+        warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
+        warning += " Use the " + bcolors.OKGREEN
+        warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
+        print(warning)
+        raise err
+
+    h = HTML2Text(baseurl=baseurl)
+    # handle options
+    if args.ul_style_dash:
+        h.ul_item_mark = "-"
+    if args.em_style_asterisk:
+        h.emphasis_mark = "*"
+        h.strong_mark = "__"
+
+    h.body_width = args.body_width
+    h.google_list_indent = args.list_indent
+    h.ignore_emphasis = args.ignore_emphasis
+    h.ignore_links = args.ignore_links
+    h.ignore_mailto_links = args.ignore_mailto_links
+    h.protect_links = args.protect_links
+    h.ignore_images = args.ignore_images
+    h.images_as_html = args.images_as_html
+    h.images_to_alt = args.images_to_alt
+    h.images_with_size = args.images_with_size
+    h.google_doc = args.google_doc
+    h.hide_strikethrough = args.hide_strikethrough
+    h.escape_snob = args.escape_snob
+    h.bypass_tables = args.bypass_tables
+    h.ignore_tables = args.ignore_tables
+    h.single_line_break = args.single_line_break
+    h.inline_links = args.inline_links
+    h.unicode_snob = args.unicode_snob
+    h.use_automatic_links = args.use_automatic_links
+    h.skip_internal_links = args.skip_internal_links
+    h.links_each_paragraph = args.links_each_paragraph
+    h.mark_code = args.mark_code
+    h.wrap_links = args.wrap_links
+    h.wrap_list_items = args.wrap_list_items
+    h.wrap_tables = args.wrap_tables
+    h.pad_tables = args.pad_tables
+    h.default_image_alt = args.default_image_alt
+    h.open_quote = args.open_quote
+    h.close_quote = args.close_quote
+
+    sys.stdout.write(h.handle(html))
diff --git a/migration/html2text/config.py b/migration/html2text/config.py
new file mode 100644
index 00000000..0f4d29bc
--- /dev/null
+++ b/migration/html2text/config.py
@@ -0,0 +1,164 @@
+import re
+
+# Use Unicode characters instead of their ascii pseudo-replacements
+UNICODE_SNOB = True
+
+# Marker to use for marking tables for padding post processing
+TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
+# Escape all special characters.  Output is less readable, but avoids
+# corner case formatting issues.
+ESCAPE_SNOB = True
+
+# Put the links after each paragraph instead of at the end.
+LINKS_EACH_PARAGRAPH = False
+
+# Wrap long lines at position. 0 for no wrapping.
+BODY_WIDTH = 0
+
+# Don't show internal links (href="#local-anchor") -- corresponding link
+# targets won't be visible in the plain text file anyway.
+SKIP_INTERNAL_LINKS = False
+
+# Use inline, rather than reference, formatting for images and links
+INLINE_LINKS = True
+
+# Protect links from line breaks surrounding them with angle brackets (in
+# addition to their square brackets)
+PROTECT_LINKS = True
+WRAP_LINKS = True
+
+# Wrap list items.
+WRAP_LIST_ITEMS = False
+
+# Wrap tables
+WRAP_TABLES = False
+
+# Number of pixels Google indents nested lists
+GOOGLE_LIST_INDENT = 36
+
+# Values Google and others may use to indicate bold text
+BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
+
+IGNORE_ANCHORS = False
+IGNORE_MAILTO_LINKS = False
+IGNORE_IMAGES = False
+IMAGES_AS_HTML = False
+IMAGES_TO_ALT = False
+IMAGES_WITH_SIZE = False
+IGNORE_EMPHASIS = False
+MARK_CODE = True
+DECODE_ERRORS = "strict"
+DEFAULT_IMAGE_ALT = ""
+PAD_TABLES = True
+
+# Convert links with same href and text to <href> format
+# if they are absolute links
+USE_AUTOMATIC_LINKS = True
+
+# For checking space-only lines on line 771
+RE_SPACE = re.compile(r"\s\+")
+
+RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
+RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
+RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
+RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
+
+# to find links in the text
+RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
+
+# to find table separators
+RE_TABLE = re.compile(r" \| ")
+
+RE_MD_DOT_MATCHER = re.compile(
+    r"""
+    ^             # start of line
+    (\s*\d+)      # optional whitespace and a number
+    (\.)          # dot
+    (?=\s)        # lookahead assert whitespace
+    """,
+    re.MULTILINE | re.VERBOSE,
+)
+RE_MD_PLUS_MATCHER = re.compile(
+    r"""
+    ^
+    (\s*)
+    (\+)
+    (?=\s)
+    """,
+    flags=re.MULTILINE | re.VERBOSE,
+)
+RE_MD_DASH_MATCHER = re.compile(
+    r"""
+    ^
+    (\s*)
+    (-)
+    (?=\s|\-)     # followed by whitespace (bullet list, or spaced out hr)
+                  # or another dash (header or hr)
+    """,
+    flags=re.MULTILINE | re.VERBOSE,
+)
+RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
+RE_MD_BACKSLASH_MATCHER = re.compile(
+    r"""
+    (\\)          # match one slash
+    (?=[%s])      # followed by a char that requires escaping
+    """
+    % re.escape(RE_SLASH_CHARS),
+    flags=re.VERBOSE,
+)
+
+UNIFIABLE = {
+    "rsquo": "'",
+    "lsquo": "'",
+    "rdquo": '"',
+    "ldquo": '"',
+    "copy": "(C)",
+    "mdash": "--",
+    "nbsp": " ",
+    "rarr": "->",
+    "larr": "<-",
+    "middot": "*",
+    "ndash": "-",
+    "oelig": "oe",
+    "aelig": "ae",
+    "agrave": "a",
+    "aacute": "a",
+    "acirc": "a",
+    "atilde": "a",
+    "auml": "a",
+    "aring": "a",
+    "egrave": "e",
+    "eacute": "e",
+    "ecirc": "e",
+    "euml": "e",
+    "igrave": "i",
+    "iacute": "i",
+    "icirc": "i",
+    "iuml": "i",
+    "ograve": "o",
+    "oacute": "o",
+    "ocirc": "o",
+    "otilde": "o",
+    "ouml": "o",
+    "ugrave": "u",
+    "uacute": "u",
+    "ucirc": "u",
+    "uuml": "u",
+    "lrm": "",
+    "rlm": "",
+}
+
+# Format tables in HTML rather than Markdown syntax
+BYPASS_TABLES = False
+# Ignore table-related tags (table, th, td, tr) while keeping rows
+IGNORE_TABLES = False
+
+
+# Use a single line break after a block element rather than two line breaks.
+# NOTE: Requires body width setting to be 0.
+SINGLE_LINE_BREAK = False
+
+
+# Use double quotation marks when converting the <q> tag.
+OPEN_QUOTE = '"'
+CLOSE_QUOTE = '"'
diff --git a/migration/html2text/elements.py b/migration/html2text/elements.py
new file mode 100644
index 00000000..2533ec08
--- /dev/null
+++ b/migration/html2text/elements.py
@@ -0,0 +1,18 @@
+from typing import Dict, Optional
+
+
+class AnchorElement:
+    __slots__ = ["attrs", "count", "outcount"]
+
+    def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
+        self.attrs = attrs
+        self.count = count
+        self.outcount = outcount
+
+
+class ListElement:
+    __slots__ = ["name", "num"]
+
+    def __init__(self, name: str, num: int):
+        self.name = name
+        self.num = num
diff --git a/migration/html2text/py.typed b/migration/html2text/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/migration/html2text/typing.py b/migration/html2text/typing.py
new file mode 100644
index 00000000..6e17fed2
--- /dev/null
+++ b/migration/html2text/typing.py
@@ -0,0 +1,3 @@
+class OutCallback:
+    def __call__(self, s: str) -> None:
+        ...
diff --git a/migration/html2text/utils.py b/migration/html2text/utils.py
new file mode 100644
index 00000000..366748b6
--- /dev/null
+++ b/migration/html2text/utils.py
@@ -0,0 +1,290 @@
+import html.entities
+from typing import Dict, List, Optional
+
+from . import config
+
+unifiable_n = {
+    html.entities.name2codepoint[k]: v
+    for k, v in config.UNIFIABLE.items()
+    if k != "nbsp"
+}
+
+
+def hn(tag: str) -> int:
+    if tag[0] == "h" and len(tag) == 2:
+        n = tag[1]
+        if "0" < n <= "9":
+            return int(n)
+    return 0
+
+
+def dumb_property_dict(style: str) -> Dict[str, str]:
+    """
+    :returns: A hash of css attributes
+    """
+    return {
+        x.strip().lower(): y.strip().lower()
+        for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
+    }
+
+
+def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
+    """
+    :type data: str
+
+    :returns: A hash of css selectors, each of which contains a hash of
+    css attributes.
+    :rtype: dict
+    """
+    # remove @import sentences
+    data += ";"
+    importIndex = data.find("@import")
+    while importIndex != -1:
+        data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
+        importIndex = data.find("@import")
+
+    # parse the css. reverted from dictionary comprehension in order to
+    # support older pythons
+    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
+    try:
+        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
+    except ValueError:
+        elements = {}  # not that important
+
+    return elements
+
+
+def element_style(
+    attrs: Dict[str, Optional[str]],
+    style_def: Dict[str, Dict[str, str]],
+    parent_style: Dict[str, str],
+) -> Dict[str, str]:
+    """
+    :type attrs: dict
+    :type style_def: dict
+    :type style_def: dict
+
+    :returns: A hash of the 'final' style attributes of the element
+    :rtype: dict
+    """
+    style = parent_style.copy()
+    if "class" in attrs:
+        assert attrs["class"] is not None
+        for css_class in attrs["class"].split():
+            css_style = style_def.get("." + css_class, {})
+            style.update(css_style)
+    if "style" in attrs:
+        assert attrs["style"] is not None
+        immediate_style = dumb_property_dict(attrs["style"])
+        style.update(immediate_style)
+
+    return style
+
+
+def google_list_style(style: Dict[str, str]) -> str:
+    """
+    Finds out whether this is an ordered or unordered list
+
+    :type style: dict
+
+    :rtype: str
+    """
+    if "list-style-type" in style:
+        list_style = style["list-style-type"]
+        if list_style in ["disc", "circle", "square", "none"]:
+            return "ul"
+
+    return "ol"
+
+
+def google_has_height(style: Dict[str, str]) -> bool:
+    """
+    Check if the style of the element has the 'height' attribute
+    explicitly defined
+
+    :type style: dict
+
+    :rtype: bool
+    """
+    return "height" in style
+
+
+def google_text_emphasis(style: Dict[str, str]) -> List[str]:
+    """
+    :type style: dict
+
+    :returns: A list of all emphasis modifiers of the element
+    :rtype: list
+    """
+    emphasis = []
+    if "text-decoration" in style:
+        emphasis.append(style["text-decoration"])
+    if "font-style" in style:
+        emphasis.append(style["font-style"])
+    if "font-weight" in style:
+        emphasis.append(style["font-weight"])
+
+    return emphasis
+
+
+def google_fixed_width_font(style: Dict[str, str]) -> bool:
+    """
+    Check if the css of the current element defines a fixed width font
+
+    :type style: dict
+
+    :rtype: bool
+    """
+    font_family = ""
+    if "font-family" in style:
+        font_family = style["font-family"]
+    return "courier new" == font_family or "consolas" == font_family
+
+
+def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
+    """
+    Extract numbering from list element attributes
+
+    :type attrs: dict
+
+    :rtype: int or None
+    """
+    if "start" in attrs:
+        assert attrs["start"] is not None
+        try:
+            return int(attrs["start"]) - 1
+        except ValueError:
+            pass
+
+    return 0
+
+
+def skipwrap(
+    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
+) -> bool:
+    # If it appears to contain a link
+    # don't wrap
+    if not wrap_links and config.RE_LINK.search(para):
+        return True
+    # If the text begins with four spaces or one tab, it's a code block;
+    # don't wrap
+    if para[0:4] == "    " or para[0] == "\t":
+        return True
+
+    # If the text begins with only two "--", possibly preceded by
+    # whitespace, that's an emdash; so wrap.
+    stripped = para.lstrip()
+    if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
+        return False
+
+    # I'm not sure what this is for; I thought it was to detect lists,
+    # but there's a <br>-inside-<span> case in one of the tests that
+    # also depends upon it.
+    if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
+        return not wrap_list_items
+
+    # If text contains a pipe character it is likely a table
+    if not wrap_tables and config.RE_TABLE.search(para):
+        return True
+
+    # If the text begins with a single -, *, or +, followed by a space,
+    # or an integer, followed by a ., followed by a space (in either
+    # case optionally proceeded by whitespace), it's a list; don't wrap.
+    return bool(
+        config.RE_ORDERED_LIST_MATCHER.match(stripped)
+        or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
+    )
+
+
+def escape_md(text: str) -> str:
+    """
+    Escapes markdown-sensitive characters within other markdown
+    constructs.
+    """
+    return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
+
+
+def escape_md_section(text: str, snob: bool = False) -> str:
+    """
+    Escapes markdown-sensitive characters across whole document sections.
+    """
+    text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
+
+    if snob:
+        text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
+
+    text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
+    text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
+    text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
+
+    return text
+
+
+def reformat_table(lines: List[str], right_margin: int) -> List[str]:
+    """
+    Given the lines of a table
+    padds the cells and returns the new lines
+    """
+    # find the maximum width of the columns
+    max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
+    max_cols = len(max_width)
+    for line in lines:
+        cols = [x.rstrip() for x in line.split("|")]
+        num_cols = len(cols)
+
+        # don't drop any data if colspan attributes result in unequal lengths
+        if num_cols < max_cols:
+            cols += [""] * (max_cols - num_cols)
+        elif max_cols < num_cols:
+            max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
+            max_cols = num_cols
+
+        max_width = [
+            max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
+        ]
+
+    # reformat
+    new_lines = []
+    for line in lines:
+        cols = [x.rstrip() for x in line.split("|")]
+        if set(line.strip()) == set("-|"):
+            filler = "-"
+            new_cols = [
+                x.rstrip() + (filler * (M - len(x.rstrip())))
+                for x, M in zip(cols, max_width)
+            ]
+            new_lines.append("|-" + "|".join(new_cols) + "|")
+        else:
+            filler = " "
+            new_cols = [
+                x.rstrip() + (filler * (M - len(x.rstrip())))
+                for x, M in zip(cols, max_width)
+            ]
+            new_lines.append("| " + "|".join(new_cols) + "|")
+    return new_lines
+
+
+def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
+    """
+    Provide padding for tables in the text
+    """
+    lines = text.split("\n")
+    table_buffer = []  # type: List[str]
+    table_started = False
+    new_lines = []
+    for line in lines:
+        # Toggle table started
+        if config.TABLE_MARKER_FOR_PAD in line:
+            table_started = not table_started
+            if not table_started:
+                table = reformat_table(table_buffer, right_margin)
+                new_lines.extend(table)
+                table_buffer = []
+                new_lines.append("")
+            continue
+        # Process lines
+        if table_started:
+            table_buffer.append(line)
+        else:
+            new_lines.append(line)
+    return "\n".join(new_lines)
diff --git a/migration/tables/__init__.py b/migration/tables/__init__.py
new file mode 100644
index 00000000..6cc37870
--- /dev/null
+++ b/migration/tables/__init__.py
@@ -0,0 +1 @@
+__all__ = ["users", "tags", "content_items", "comments"],
\ No newline at end of file
diff --git a/migration/tables/comments.py b/migration/tables/comments.py
new file mode 100644
index 00000000..d1147d7a
--- /dev/null
+++ b/migration/tables/comments.py
@@ -0,0 +1,108 @@
+from datetime import datetime
+from dateutil.parser import parse as date_parse
+from orm import Reaction, User
+from orm import reaction
+from orm.base import local_session
+from migration.html2text import html2text
+from orm.reaction import ReactionKind
+from orm.shout import Shout
+
+ts = datetime.now()
+
+def migrate(entry, storage):
+	'''
+	{
+	  "_id": "hdtwS8fSyFLxXCgSC",
+	  "body": "<p>",
+	  "contentItem": "mnK8KsJHPRi8DrybQ",
+	  "createdBy": "bMFPuyNg6qAD2mhXe",
+	  "thread": "01/",
+	  "createdAt": "2016-04-19 04:33:53+00:00",
+	  "ratings": [
+		{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
+		{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
+	  ],
+	  "rating": 2,
+	  "updatedAt": "2020-05-27 19:22:57.091000+00:00",
+	  "updatedBy": "0"
+	}
+
+	->
+
+	type Reaction {
+		id: Int!
+		shout: Shout!
+		createdAt: DateTime!
+		createdBy: User!
+		updatedAt: DateTime
+		deletedAt: DateTime
+		deletedBy: User
+		range: String # full / 0:2340
+		kind: ReactionKind!
+		body: String
+		replyTo: Reaction
+		stat: Stat
+		old_id: String
+		old_thread: String
+		}
+	'''
+	reaction_dict = {}
+	# FIXME: comment_dict['createdAt'] = ts if not entry.get('createdAt') else date_parse(entry.get('createdAt'))
+	# print('[migration] comment original date %r' % entry.get('createdAt'))
+	# print('[migration] comment date %r ' % comment_dict['createdAt'])
+	reaction_dict['body'] = html2text(entry.get('body', ''))
+	reaction_dict['oid'] = entry['_id']
+	if entry.get('createdAt'): reaction_dict['createdAt'] = date_parse(entry.get('createdAt'))
+	shout_oid = entry.get('contentItem')
+	if not shout_oid in storage['shouts']['by_oid']: 
+		if len(storage['shouts']['by_oid']) > 0: 
+			return shout_oid
+		else:
+			print('[migration] no shouts migrated yet')
+			raise Exception
+		return
+	else:
+		with local_session() as session:
+			author = session.query(User).filter(User.oid == entry['createdBy']).first()
+			shout_dict = storage['shouts']['by_oid'][shout_oid]
+			if shout_dict:
+				reaction_dict['shout'] = shout_dict['slug']
+				reaction_dict['createdBy'] = author.slug if author else 'discours'
+				reaction_dict['kind'] = ReactionKind.COMMENT
+
+				# creating reaction from old comment
+				reaction = Reaction.create(**reaction_dict)
+				
+				reaction_dict['id'] = reaction.id
+				for comment_rating_old in entry.get('ratings',[]):
+					rater = session.query(User).filter(User.oid == comment_rating_old['createdBy']).first()
+					reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
+					re_reaction_dict = {
+						'shout': reaction_dict['shout'],
+						'replyTo': reaction.id,
+						'kind': ReactionKind.LIKE if comment_rating_old['value'] > 0 else ReactionKind.DISLIKE,
+						'createdBy': reactedBy.slug if reactedBy else 'discours'
+					}
+					cts = comment_rating_old.get('createdAt')
+					if cts: re_reaction_dict['createdAt'] = date_parse(cts)
+					try:
+						# creating reaction from old rating
+						Reaction.create(**re_reaction_dict)
+					except Exception as e:
+						print('[migration] comment rating error: %r' % re_reaction_dict)
+						raise e
+			else:
+				print('[migration] error: cannot find shout for comment %r' % reaction_dict)
+		return reaction
+
+def migrate_2stage(rr, old_new_id):
+	reply_oid = rr.get('replyTo')
+	if not reply_oid: return
+	new_id = old_new_id.get(rr.get('oid'))
+	if not new_id: return
+	with local_session() as session:
+		comment = session.query(Reaction).filter(Reaction.id == new_id).first()
+		comment.replyTo = old_new_id.get(reply_oid)
+		comment.save()
+		session.commit()
+	if not rr['body']: raise Exception(rr)
diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
new file mode 100644
index 00000000..c5f85840
--- /dev/null
+++ b/migration/tables/content_items.py
@@ -0,0 +1,226 @@
+from dateutil.parser import parse as date_parse
+import sqlalchemy
+from orm.shout import Shout, ShoutTopic, User
+from storages.viewed import ViewedByDay
+from transliterate import translit
+from datetime import datetime
+from orm.base import local_session
+from migration.extract import prepare_html_body
+from orm.community import Community
+from orm.reaction import Reaction, ReactionKind
+
+OLD_DATE = '2016-03-05 22:22:00.350000'
+ts = datetime.now()
+type2layout = {
+	'Article': 'article',
+	'Literature': 'prose',
+	'Music': 'music',
+	'Video': 'video',
+	'Image': 'image'
+}
+
+def get_shout_slug(entry):
+	slug = entry.get('slug', '')
+	if not slug:
+		for friend in entry.get('friendlySlugs', []):
+			slug = friend.get('slug', '')
+			if slug: break
+	return slug
+
+def migrate(entry, storage):
+	# init, set title and layout
+	r = {
+		'layout': type2layout[entry['type']],
+		'title': entry['title'],
+		'community': Community.default_community.id,
+		'authors': [],
+		'topics': set([]),
+		# 'rating': 0,
+		# 'ratings': [],
+		'createdAt': []
+	}
+	topics_by_oid = storage['topics']['by_oid']
+	users_by_oid = storage['users']['by_oid']
+
+	# author
+
+	oid = entry.get('createdBy', entry.get('_id', entry.get('oid'))) 
+	userdata = users_by_oid.get(oid)
+	if not userdata:
+		app = entry.get('application')
+		if app:
+			userslug = translit(app['name'], 'ru', reversed=True)\
+				.replace(' ', '-')\
+				.replace('\'', '')\
+				.replace('.', '-').lower()
+			userdata = {
+				'username': app['email'],
+				'email': app['email'],
+				'name': app['name'],
+				'bio': app.get('bio', ''),
+				'emailConfirmed': False,
+				'slug': userslug,
+				'createdAt': ts,
+				'wasOnlineAt': ts
+			}
+		else: 
+			userdata = User.default_user.dict()
+	assert userdata, 'no user found for %s from ' % [oid, len(users_by_oid.keys())]
+	r['authors'] = [userdata, ]
+
+	# slug 
+
+	slug = get_shout_slug(entry)
+	if slug: r['slug'] = slug
+	else: raise Exception
+	
+	# cover
+	c = ''
+	if entry.get('thumborId'):
+		c = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
+	else:
+		c = entry.get('image', {}).get('url')
+		if not c or 'cloudinary' in c: c = ''
+	r['cover'] = c
+
+	# timestamps
+
+	r['createdAt'] = date_parse(entry.get('createdAt', OLD_DATE))
+	r['updatedAt'] = date_parse(entry['updatedAt']) if 'updatedAt' in entry else ts
+	if entry.get('published'): 
+		r['publishedAt'] = date_parse(entry.get('publishedAt', OLD_DATE))
+		if r['publishedAt'] == OLD_DATE: r['publishedAt'] = ts
+	if 'deletedAt' in entry: r['deletedAt'] = date_parse(entry['deletedAt'])
+
+	# topics
+	category = entry['category']
+	mainTopic = topics_by_oid.get(category)
+	if mainTopic:
+		r['mainTopic'] = storage['replacements'].get(mainTopic["slug"], mainTopic["slug"])
+	topic_oids = [category, ]
+	topic_oids.extend(entry.get('tags', []))
+	for oid in topic_oids:
+		if oid in storage['topics']['by_oid']:
+			r['topics'].add(storage['topics']['by_oid'][oid]['slug'])
+		else:
+			print('[migration] unknown old topic id: ' + oid)
+	r['topics'] = list(r['topics'])
+	
+	entry['topics'] = r['topics']
+	entry['cover'] = r['cover']
+	entry['authors'] = r['authors']
+
+	# body 
+	r['body'] = prepare_html_body(entry)
+
+	# save shout to db
+
+	s = object()
+	shout_dict = r.copy() 
+	user = None
+	del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
+	#del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
+	#del shout_dict['ratings']
+	email = userdata.get('email')
+	slug = userdata.get('slug')
+	with local_session() as session:
+		# c = session.query(Community).all().pop()
+		if email: user = session.query(User).filter(User.email == email).first()
+		if not user and slug: user = session.query(User).filter(User.slug == slug).first()
+		if not user and userdata: 
+			try: user = User.create(**userdata)
+			except sqlalchemy.exc.IntegrityError:
+				print('[migration] user error: ' + userdata)
+			userdata['id'] = user.id
+			userdata['createdAt'] = user.createdAt
+			storage['users']['by_slug'][userdata['slug']] = userdata
+			storage['users']['by_oid'][entry['_id']] = userdata
+	assert user, 'could not get a user'
+	shout_dict['authors'] = [ user, ] 
+
+	try: 
+		s = Shout.create(**shout_dict)
+	except sqlalchemy.exc.IntegrityError as e:
+		with local_session() as session:
+			s = session.query(Shout).filter(Shout.slug == shout_dict['slug']).first()
+			bump = False
+			if s: 
+				for key in shout_dict:
+					if key in s.__dict__:
+						if s.__dict__[key] != shout_dict[key]:
+							print('[migration] shout already exists, but differs in %s' % key)
+							bump = True
+					else:
+						print('[migration] shout already exists, but lacks %s' % key)
+						bump = True
+				if bump:
+					s.update(shout_dict)
+			else:
+				print('[migration] something went wrong with shout: \n%r' % shout_dict)
+				raise e
+			session.commit()
+	except:
+		print(s)
+		raise Exception
+	
+
+	# shout topics aftermath
+	shout_dict['topics'] = []
+	for tpc in r['topics']:
+		oldslug = tpc
+		newslug = storage['replacements'].get(oldslug, oldslug)
+		if newslug:
+			with local_session() as session:
+				shout_topic_old = session.query(ShoutTopic)\
+					.filter(ShoutTopic.shout == shout_dict['slug'])\
+					.filter(ShoutTopic.topic == oldslug).first()
+				if shout_topic_old: 
+					shout_topic_old.update({ 'slug': newslug })
+				else: 
+					shout_topic_new = session.query(ShoutTopic)\
+						.filter(ShoutTopic.shout == shout_dict['slug'])\
+						.filter(ShoutTopic.topic == newslug).first()
+					if not shout_topic_new: 
+						try: ShoutTopic.create(**{ 'shout': shout_dict['slug'], 'topic': newslug })
+						except: print('[migration] shout topic error: ' + newslug)
+				session.commit()
+			if newslug not in shout_dict['topics']:
+				shout_dict['topics'].append(newslug)
+		else:
+			print('[migration] ignored topic slug: \n%r' % tpc['slug'])
+			# raise Exception
+
+	# content_item ratings to reactions
+	try:
+		for content_rating in entry.get('ratings',[]):
+			with local_session() as session:
+				rater = session.query(User).filter(User.oid == content_rating['createdBy']).first()
+				reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
+				if rater:
+					reaction_dict = {
+						'kind': ReactionKind.LIKE if content_rating['value'] > 0 else ReactionKind.DISLIKE,
+						'createdBy': reactedBy.slug,
+						'shout': shout_dict['slug']
+					}
+					cts = content_rating.get('createdAt')
+					if cts: reaction_dict['createdAt'] = date_parse(cts)
+					reaction = session.query(Reaction).\
+						filter(Reaction.shout == reaction_dict['shout']).\
+						filter(Reaction.createdBy == reaction_dict['createdBy']).\
+						filter(Reaction.kind == reaction_dict['kind']).first()
+					if reaction:
+						reaction_dict['kind'] = ReactionKind.AGREE if content_rating['value'] > 0 else ReactionKind.DISAGREE,
+						reaction.update(reaction_dict)
+					else: Reaction.create(**reaction_dict)
+					# shout_dict['ratings'].append(reaction_dict)
+	except:
+		print('[migration] content_item.ratings error: \n%r' % content_rating)
+		raise Exception
+
+	# shout views
+	ViewedByDay.create( shout = shout_dict['slug'], value = entry.get('views', 1) )
+	# del shout_dict['ratings']
+	shout_dict['oid'] = entry.get('_id')
+	storage['shouts']['by_oid'][entry['_id']] = shout_dict
+	storage['shouts']['by_slug'][slug] = shout_dict
+	return shout_dict
diff --git a/migration/tables/replacements.json b/migration/tables/replacements.json
new file mode 100644
index 00000000..e53a0886
--- /dev/null
+++ b/migration/tables/replacements.json
@@ -0,0 +1,768 @@
+{
+    "1990-e": "90s",
+    "2000-e": "2000s",
+    "90-e": "90s",
+    "207": "207",
+    "kartochki-rubinshteyna": "rubinstein-cards",
+    "Georgia": "georgia",
+    "Japan": "japan",
+    "Sweden": "sweden",
+    "abstraktsiya": "abstract",
+    "absurdism": "absurdism",
+    "acclimatization": "acclimatisation",
+    "activism": "activism",
+    "adolf-gitler": "adolf-hitler",
+    "afrika": "africa",
+    "agata-kristi": "agatha-christie",
+    "agressiya": "agression",
+    "agressivnoe-povedenie": "agression",
+    "aktsii": "actions",
+    "aktsionizm": "actionism",
+    "alber-kamyu": "albert-kamus",
+    "albomy": "albums",
+    "aleksandr-griboedov": "aleksander-griboedov",
+    "aleksandr-pushkin": "aleksander-pushkin",
+    "aleksandr-solzhenitsyn": "aleksander-solzhenitsyn",
+    "aleksandr-vvedenskiy": "aleksander-vvedensky",
+    "aleksey-navalnyy": "alexey-navalny",
+    "alfavit": "alphabet",
+    "alkogol": "alcohol",
+    "alternativa": "alternative",
+    "alternative": "alternative",
+    "alternativnaya-istoriya": "alternative-history",
+    "amerika": "america",
+    "anarhizm": "anarchism",
+    "anatoliy-mariengof": "anatoly-mariengof",
+    "ancient-russia": "ancient-russia",
+    "andegraund": "underground",
+    "andrey-platonov": "andrey-platonov",
+    "andrey-rodionov": "andrey-rodionov",
+    "andrey-tarkovskiy": "andrey-tarkovsky",
+    "angliyskie-istorii": "english-stories",
+    "angliyskiy-yazyk": "english-langugae",
+    "animation": "animation",
+    "animatsiya": "animation",
+    "anime": "anime",
+    "anri-volohonskiy": "anri-volohonsky",
+    "antifashizm": "anti-faschism",
+    "antiquity": "antiquity",
+    "antiutopiya": "dystopia",
+    "antropology": "antropology",
+    "antropotsen": "antropocenus",
+    "architecture": "architecture",
+    "arheologiya": "archeology",
+    "arhetipy": "archetypes",
+    "arhiv": "archive",
+    "aristokraty": "aristocracy",
+    "aristotel": "aristotle",
+    "arktika": "arctic",
+    "armiya": "army",
+    "art": "art",
+    "art-is": "art-is",
+    "artists": "artists",
+    "ateizm": "atheism",
+    "audiopoeziya": "audio-poetry",
+    "audio-poetry": "audio-poetry",
+    "audiospektakl": "audio-spectacles",
+    "auktsyon": "auktsyon",
+    "avangard": "avantgarde",
+    "avtofikshn": "autofiction",
+    "avtorskaya-pesnya": "bardsongs",
+    "azbuka-immigratsii": "immigration-basics",
+    "aziatskiy-kinematograf": "asian-cinema",
+    "b-movie": "b-movie",
+    "bannye-chteniya": "sauna-reading",
+    "bardsongs": "bardsongs",
+    "bdsm": "bdsm",
+    "belarus": "belarus",
+    "belgiya": "belgium",
+    "bertold-breht": "berttold-brecht",
+    "bezumie": "madness",
+    "biography": "biography",
+    "biologiya": "biology",
+    "bipolyarnoe-rasstroystvo": "bipolar-disorder",
+    "bitniki": "beatnics",
+    "biznes": "business",
+    "blizhniy-vostok": "middle-east",
+    "blizost": "closeness",
+    "blokada": "blockade",
+    "bob-dilan": "bob-dylan",
+    "bog": "god",
+    "bol": "pain",
+    "bolotnoe-delo": "bolotnaya-case",
+    "books": "books",
+    "boris-eltsin": "boris-eltsin",
+    "boris-godunov": "boris-godunov",
+    "boris-grebenschikov": "boris-grebenschikov",
+    "boris-nemtsov": "boris-nemtsov",
+    "boris-pasternak": "boris-pasternak",
+    "brak": "marriage",
+    "bret-iston-ellis": "bret-iston-ellis",
+    "buddizm": "buddhism",
+    "bullying": "bullying",
+    "bunt": "riot",
+    "burning-man": "burning-man",
+    "bytie": "being",
+    "byurokratiya": "bureaucracy",
+    "capitalism": "capitalism",
+    "censored-in-russia": "censored-in-russia",
+    "ch-rno-beloe": "black-and-white",
+    "ch-rnyy-yumor": "black-humour",
+    "chapters": "chapters",
+    "charity": "charity",
+    "chayldfri": "childfree",
+    "chechenskaya-voyna": "chechen-war",
+    "chechnya": "chechnya",
+    "chelovek": "male",
+    "chernobyl": "chernobyl",
+    "chernyy-yumor": "black-humour",
+    "children": "children",
+    "china": "china",
+    "chinovniki": "bureaucracy",
+    "chukotka": "chukotka",
+    "chuma": "plague",
+    "church": "church",
+    "cinema": "cinema",
+    "city": "city",
+    "civil-position": "civil-position",
+    "clips": "clips",
+    "collage": "collage",
+    "comics": "comics",
+    "conspiracy-theory": "conspiracy-theory",
+    "contemporary-art": "contemporary-art",
+    "contemporary-poetry": "poetry",
+    "contemporary-prose": "prose",
+    "coronavirus": "coronavirus",
+    "corruption": "corruption",
+    "creative-writing-school": "creative-writing-school",
+    "crime": "crime",
+    "criticism": "criticism",
+    "critiques": "reviews",
+    "culture": "culture",
+    "dadaizm": "dadaism",
+    "daniel-defo": "daniel-defoe",
+    "daniil-harms": "daniil-kharms",
+    "dante-aligeri": "dante-alighieri",
+    "darkveyv": "darkwave",
+    "death": "death",
+    "debaty": "debats",
+    "delo-seti": "seti-case",
+    "democracy": "democracy",
+    "demografiya": "demographics",
+    "demonstrations": "demonstrations",
+    "depression": "depression",
+    "derevnya": "village",
+    "design": "design",
+    "detskie-doma": "orphanages",
+    "detstvo": "childhood",
+    "digital": "digital",
+    "digital-art": "digital-art",
+    "directing": "directing",
+    "diskurs": "discours",
+    "diskurs-1": "discourse",
+    "dissidenty": "dissidents",
+    "diy": "diy",
+    "dmitriy-donskoy": "dmitriy-donskoy",
+    "dmitriy-prigov": "dmitriy-prigov",
+    "dnevniki": "dairies",
+    "documentary": "documentary",
+    "dokumenty": "doсuments",
+    "domashnee-nasilie": "home-terror",
+    "donald-tramp": "donald-trump",
+    "donbass": "donbass",
+    "donorstvo": "donation",
+    "drama": "drama",
+    "dramaturgy": "dramaturgy",
+    "drawing": "drawing",
+    "drevo-zhizni": "tree-of-life",
+    "drugs": "drugs",
+    "dzhaz": "jazz",
+    "dzhek-keruak": "jack-keruak",
+    "dzhim-morrison": "jim-morrison",
+    "dzhordzh-romero": "george-romero",
+    "dzhordzho-agamben": "giorgio-agamben",
+    "ecology": "ecology",
+    "economics": "economics",
+    "eda": "food",
+    "editing": "editing",
+    "editorial-statements": "editorial-statements",
+    "eduard-limonov": "eduard-limonov",
+    "education": "education",
+    "egor-letov": "egor-letov",
+    "eksperiment": "experiments",
+    "eksperimentalnaya-muzyka": "experimental-music",
+    "ekspressionizm": "expressionism",
+    "ekstremizm": "extremism",
+    "ekzistentsializm-1": "existentialism",
+    "elections": "elections",
+    "electronic": "electronics",
+    "electronics": "electronics",
+    "elena-glinskaya": "elena-glinskaya",
+    "elena-guro": "elena-guro",
+    "elizaveta-mnatsakanova": "elizaveta-mnatsakanova",
+    "embient": "ambient",
+    "emigration": "emigration",
+    "emil-dyurkgeym": "emile-durkheim",
+    "emotsii": "emotions",
+    "empiric": "empiric",
+    "epidemiya": "pandemic",
+    "erich-von-neff": "erich-von-neff",
+    "erotika": "erotics",
+    "essay": "essay",
+    "estetika": "aestetics",
+    "etika": "ethics",
+    "etnos": "ethnics",
+    "everyday-life": "everyday-life",
+    "evgeniy-onegin": "eugene-onegin",
+    "evolyutsiya": "evolution",
+    "exhibitions": "exhibitions",
+    "experience": "experiences",
+    "experimental": "experimental",
+    "experimental-music": "experimental-music",
+    "explanation": "explanation",
+    "faktcheking": "fact-checking",
+    "falsifikatsii": "falsifications",
+    "family": "family",
+    "fanfiki": "fan-fiction",
+    "fantastika": "sci-fi",
+    "fatalizm": "fatalism",
+    "fedor-dostoevskiy": "fedor-dostoevsky",
+    "fedor-ioannovich": "fedor-ioannovich",
+    "feleton": "feuilleton",
+    "feminism": "feminism",
+    "fenomenologiya": "phenomenology",
+    "fentezi": "fantasy",
+    "festival": "festival",
+    "festival-territoriya": "festival-territory",
+    "folk": "folk",
+    "folklor": "folklore",
+    "fotoreportazh": "photoreports",
+    "france": "france",
+    "frants-kafka": "franz-kafka",
+    "frederik-begbeder": "frederick-begbeder",
+    "freedom": "freedom",
+    "friendship": "friendship",
+    "fsb": "fsb",
+    "futbol": "footbool",
+    "future": "future",
+    "futuristy": "futurists",
+    "futurizm": "futurism",
+    "galereya": "gallery",
+    "gdr": "gdr",
+    "gender": "gender",
+    "gendernyy-diskurs": "gender",
+    "gennadiy-aygi": "gennadiy-aygi",
+    "gerhard-rihter": "gerhard-rihter",
+    "germaniya": "germany",
+    "germenevtika": "hermeneutics",
+    "geroi": "heroes",
+    "girls": "girls",
+    "gkchp": "gkchp",
+    "glitch": "glitch",
+    "globalizatsiya": "globalisation",
+    "gollivud": "hollywood",
+    "gonzo": "gonzo",
+    "gore-ot-uma": "woe-from-wit",
+    "graffiti": "graffiti",
+    "graphics": "graphics",
+    "gravyura": "engraving",
+    "grazhdanskaya-oborona": "grazhdanskaya-oborona",
+    "gretsiya": "greece",
+    "gulag": "gulag",
+    "han-batyy": "khan-batyy",
+    "health": "health",
+    "himiya": "chemistry",
+    "hip-hop": "hip-hop",
+    "history": "history",
+    "history-of-russia": "history-of-russia",
+    "holokost": "holocaust",
+    "horeografiya": "choreography",
+    "horror": "horror",
+    "hospis": "hospice",
+    "hristianstvo": "christianity",
+    "humans": "humans",
+    "humour": "humour",
+    "ideologiya": "ideology",
+    "idm": "idm",
+    "igil": "isis",
+    "igor-pomerantsev": "igor-pomerantsev",
+    "igra-prestolov": "game-of-throne",
+    "igry": "games",
+    "iisus-hristos": "jesus-christ",
+    "illness": "illness",
+    "illustration-history": "illustration-history",
+    "illustrations": "illustrations",
+    "imazhinizm": "imagism",
+    "immanuil-kant": "immanuel-kant",
+    "impressionizm": "impressionism",
+    "improvizatsiya": "improvisation",
+    "indi": "indie",
+    "individualizm": "individualism",
+    "infografika": "infographics",
+    "informatsiya": "information",
+    "ingmar-bergman": "ingmar-bergman",
+    "inklyuziya": "inclusion",
+    "installyatsiya": "installation",
+    "internet": "internet",
+    "interview": "interview",
+    "invalidnost": "disability",
+    "investigations": "investigations",
+    "iosif-brodskiy": "joseph-brodsky",
+    "iosif-stalin": "joseph-stalin",
+    "iskusstvennyy-intellekt": "artificial-intelligence",
+    "islam": "islam",
+    "istoriya-moskvy": "moscow-history",
+    "istoriya-teatra": "theatre-history",
+    "italiya": "italy",
+    "italyanskiy-yazyk": "italian-language",
+    "iudaika": "judaica",
+    "ivan-groznyy": "ivan-grozny",
+    "ivan-iii-gorbatyy": "ivan-iii-gorbaty",
+    "ivan-kalita": "ivan-kalita",
+    "ivan-krylov": "ivan-krylov",
+    "izobreteniya": "inventions",
+    "izrail-1": "israel",
+    "jazz": "jazz",
+    "john-lennon": "john-lennon",
+    "journalism": "journalism",
+    "justice": "justice",
+    "k-pop": "k-pop",
+    "kalligrafiya": "calligraphy",
+    "karikatura": "caricatures",
+    "katrin-nenasheva": "katrin-nenasheva",
+    "kavkaz": "caucasus",
+    "kazan": "kazan",
+    "kiberbezopasnost": "cybersecurity",
+    "kinoklub": "cinema-club",
+    "kirill-serebrennikov": "kirill-serebrennikov",
+    "klassika": "classic",
+    "kollektivnoe-bessoznatelnoe": "сollective-unconscious",
+    "komediya": "comedy",
+    "kommunikatsii": "communications",
+    "kommunizm": "communism",
+    "kommuny": "communes",
+    "kompyuternye-igry": "computer-games",
+    "konservatizm": "conservatism",
+    "kontrkultura": "counter-culture",
+    "kontseptualizm": "conceptualism",
+    "korotkometrazhka": "cinema-shorts",
+    "kosmos": "cosmos",
+    "kraudfanding": "crowdfunding",
+    "krizis": "crisis",
+    "krov": "blood",
+    "krym": "crimea",
+    "kulturologiya": "culturology",
+    "kulty": "cults",
+    "kurdistan": "kurdistan",
+    "kurt-kobeyn": "kurt-cobain",
+    "kurt-vonnegut": "kurt-vonnegut",
+    "kvir": "queer",
+    "laboratoriya": "lab",
+    "language": "languages",
+    "lars-fon-trier": "lars-fon-trier",
+    "laws": "laws",
+    "lectures": "lectures",
+    "leto": "summer",
+    "lev-tolstoy": "leo-tolstoy",
+    "lgbt": "lgbt",
+    "liberalizm": "liberalism",
+    "libertarianstvo": "libertarianism",
+    "life": "life",
+    "likbez": "likbez",
+    "lingvistika": "linguistics",
+    "lirika": "lirics",
+    "literary-studies": "literary-studies",
+    "literature": "literature",
+    "lo-fi": "lo-fi",
+    "love": "love",
+    "luzha-goluboy-krovi": "luzha-goluboy-krovi",
+    "lyudvig-vitgenshteyn": "ludwig-wittgenstein",
+    "lzhedmitriy": "false-dmitry",
+    "lzhenauka": "pseudoscience",
+    "maks-veber": "max-weber",
+    "manifests": "manifests",
+    "manipulyatsii-soznaniem": "mind-manipulation",
+    "marina-abramovich": "marina-abramovich",
+    "marketing": "marketing",
+    "marksizm": "marxism",
+    "marsel-dyushan": "marchel-duchamp",
+    "martin-haydegger": "martin-hidegger",
+    "matematika": "maths",
+    "vladimir-mayakovskiy": "vladimir-mayakovsky",
+    "mayakovskiy": "vladimir-mayakovsky",
+    "ekzistentsiya": "existence",
+    "media": "media",
+    "medicine": "medicine",
+    "memuary": "memoirs",
+    "menedzhment": "management",
+    "merab-mamardashvili": "merab-mamardashvili",
+    "mest": "revenge",
+    "metamodernizm": "metamodern",
+    "metavselennaya": "metaverse",
+    "metro": "metro",
+    "mifologiya": "mythology",
+    "mify": "myth",
+    "mihael-haneke": "michael-haneke",
+    "mihail-baryshnikov": "mihail-baryshnikov",
+    "mihail-bulgakov": "mihail-bulgakov",
+    "mikrotonalnaya-muzyka": "mikrotone-muzyka",
+    "minimalizm": "minimalism",
+    "minkult-privet": "minkult-privet",
+    "mir": "world",
+    "mirovozzrenie": "mindsets",
+    "mishel-fuko": "michel-foucault",
+    "mistika": "mystics",
+    "mitropolit-makariy": "mitropolit-makariy",
+    "mlm": "mlm",
+    "moda": "fashion",
+    "modernizm": "modernism",
+    "mokyumentari": "mockumentary",
+    "moloko-plus": "moloko-plus",
+    "money": "money",
+    "monologs": "monologues",
+    "monstratsiya": "monstration",
+    "moralnaya-otvetstvennost": "moral-responsibility",
+    "more": "sea",
+    "moscow": "moscow",
+    "moshennichestvo": "frauds",
+    "moskovskiy-romanticheskiy-kontseptualizm": "moscow-romantic-conceptualism",
+    "moskovskoe-delo": "moscow-case",
+    "movies": "movies",
+    "mozg": "brain",
+    "multiplikatsiya": "animation",
+    "music": "music",
+    "muzei": "museum",
+    "muzey": "museum",
+    "muzhchiny": "man",
+    "myshlenie": "thinking",
+    "nagornyy-karabah": "nagorno-karabakh",
+    "natsionalizm": "nationalism",
+    "natsionalnaya-ideya": "national-idea",
+    "natsizm": "nazism",
+    "natyurmort": "nature-morte",
+    "nauchpop": "pop-science",
+    "nbp": "nbp",
+    "nenavist": "hate",
+    "neofitsialnaya-literatura": "unofficial-literature",
+    "neoklassika": "neoclassic",
+    "neprozrachnye-smysly": "hidden-meanings",
+    "neravenstvo": "inequality",
+    "new-year": "new-year",
+    "neyronauka": "neuro-science",
+    "neyroseti": "neural-networks",
+    "niu-vshe": "hse",
+    "nizhniy-novgorod": "nizhny-novgorod",
+    "nko": "nonprofits",
+    "nlo": "ufo",
+    "nobelevskaya-premiya": "nobel-prize",
+    "noize-mc": "noize-mc",
+    "nonkonformizm": "nonconformism",
+    "novaya-drama": "new-drama",
+    "novosti": "news",
+    "noyz": "noise",
+    "oberiu": "oberiu",
+    "ocherk": "etudes",
+    "ochevidnyy-nuar": "ochevidnyy-nuar",
+    "odinochestvo": "loneliness",
+    "odna-kniga-odna-istoriya": "one-book-one-story",
+    "okrainy": "outskirts",
+    "opinions": "opinions",
+    "oppozitsiya": "opposition",
+    "orhan-pamuk": "orhan-pamuk",
+    "ornitologiya": "ornitology",
+    "osip-mandelshtam": "osip-mandelshtam",
+    "oskar-uayld": "oscar-wilde",
+    "osoznanie": "awareness",
+    "otnosheniya": "relationship",
+    "pablo-pikasso": "pablo-picasso",
+    "painting": "painting",
+    "paintings": "painting",
+    "pamyat": "memory",
+    "pandemiya": "pandemic",
+    "parizh": "paris",
+    "patriotizm": "patriotism",
+    "paul-tselan": "paul-tselan",
+    "per-burd": "pierre-bourdieu",
+    "performance": "performance",
+    "peyzazh": "landscape",
+    "philology": "philology",
+    "philosophy": "philosophy",
+    "photo": "photography",
+    "photography": "photography",
+    "photoprojects": "photoprojects",
+    "plakaty": "posters",
+    "plastilin": "plasticine",
+    "plays": "plays",
+    "podrostki": "teenagers",
+    "poema": "poem",
+    "poems": "poems",
+    "poeticheskaya-proza": "poetic-prose",
+    "poetry": "poetry",
+    "poetry-of-squares": "poetry-of-squares",
+    "poetry-slam": "poetry-slam",
+    "police": "police",
+    "politics": "politics",
+    "polsha": "poland",
+    "pop-art": "pop-art",
+    "pop-culture": "pop-culture",
+    "pornografiya": "pornography",
+    "portret": "portrait",
+    "poslovitsy": "proverbs",
+    "post-pank": "post-punk",
+    "post-rok": "post-rock",
+    "postmodernism": "postmodernism",
+    "povest": "novells",
+    "povsednevnost": "everyday-life",
+    "power": "power",
+    "pravo": "right",
+    "pravoslavie": "orthodox",
+    "pravozaschitniki": "human-rights-activism",
+    "prazdnik": "holidays",
+    "predatelstvo": "betrayal",
+    "predprinimatelstvo": "entrepreneurship",
+    "premera": "premier",
+    "premiya-oskar": "oscar-prize",
+    "pribaltika-1": "baltic",
+    "priroda": "nature",
+    "prison": "prison",
+    "pritcha": "parable",
+    "privatnost": "privacy",
+    "progress": "progress",
+    "projects": "projects",
+    "prokrastinatsiya": "procrastination",
+    "propaganda": "propaganda",
+    "proschenie": "forgiveness",
+    "prose": "prose",
+    "proshloe": "past",
+    "prostitutsiya": "prostitution",
+    "prosveschenie": "enlightenment",
+    "protests": "protests",
+    "psalmy": "psalms",
+    "psihoanaliz": "psychoanalysis",
+    "psihodeliki": "psychodelics",
+    "pskov": "pskov",
+    "psychiatry": "psychiatry",
+    "psychology": "psychology",
+    "punk": "punk",
+    "r-b": "rnb",
+    "realizm": "realism",
+    "redaktura": "editorial",
+    "refleksiya": "reflection",
+    "reggi": "reggae",
+    "religion": "religion",
+    "rene-zhirar": "rene-girard",
+    "renesanss": "renessance",
+    "renovatsiya": "renovation",
+    "rep": "rap",
+    "reportage": "reportage",
+    "repressions": "repressions",
+    "research": "research",
+    "retroveyv": "retrowave",
+    "review": "review",
+    "revolution": "revolution",
+    "rezo-gabriadze": "rezo-gabriadze",
+    "risunki": "painting",
+    "roboty": "robots",
+    "rock": "rock",
+    "roditeli": "parents",
+    "romantizm": "romantism",
+    "romany": "novell",
+    "ronald-reygan": "ronald-reygan",
+    "roskomnadzor": "roskomnadzor",
+    "rossiyskoe-kino": "russian-cinema",
+    "rozhava": "rojava",
+    "rpts": "rpts",
+    "rus-na-grani-sryva": "rus-na-grani-sryva",
+    "russia": "russia",
+    "russian-language": "russian-language",
+    "russian-literature": "russian-literature",
+    "russkiy-mir": "russkiy-mir",
+    "salvador-dali": "salvador-dali",
+    "samoidentifikatsiya": "self-identity",
+    "samoopredelenie": "self-definition",
+    "sankt-peterburg": "saint-petersburg",
+    "sasha-skochilenko": "sasha-skochilenko",
+    "satira": "satiric",
+    "saund-art": "sound-art",
+    "schaste": "hapiness",
+    "school": "school",
+    "science": "science",
+    "sculpture": "sculpture",
+    "second-world-war": "second-world-war",
+    "sekond-hend": "second-hand",
+    "seksprosvet": "sex-education",
+    "sekty": "sects",
+    "semiotics": "semiotics",
+    "serbiya": "serbia",
+    "serialy": "series",
+    "sever": "north",
+    "severnaya-koreya": "north-korea",
+    "sex": "sex",
+    "shotlandiya": "scotland",
+    "shugeyz": "shoegaze",
+    "siloviki": "siloviki",
+    "simeon-bekbulatovich": "simeon-bekbulatovich",
+    "simvolizm": "simbolism",
+    "siriya": "siria",
+    "skulptura": "sculpture",
+    "slavoy-zhizhek": "slavoj-zizek",
+    "smysl": "meaning",
+    "sny": "dreams",
+    "sobytiya": "events",
+    "social": "society",
+    "society": "society",
+    "sociology": "sociology",
+    "sofya-paleolog": "sofya-paleolog",
+    "sofya-vitovtovna": "sofya-vitovtovna",
+    "soobschestva": "communities",
+    "soprotivlenie": "resistence",
+    "sotsializm": "socialism",
+    "sotsialnaya-filosofiya": "social-philosophy",
+    "sotsseti": "social-networks",
+    "sotvorenie-tretego-rima": "third-rome",
+    "sovremennost": "modernity",
+    "spaces": "spaces",
+    "spektakl": "spectacles",
+    "spetseffekty": "special-fx",
+    "spetsoperatsiya": "special-operation",
+    "spetssluzhby": "special-services",
+    "sport": "sport",
+    "srednevekove": "middle-age",
+    "state": "state",
+    "statistika": "statistics",
+    "stendap": "stand-up",
+    "stoitsizm": "stoicism",
+    "stories": "stories",
+    "stoyanie-na-ugre": "stoyanie-na-ugre",
+    "strah": "fear",
+    "street-art": "street-art",
+    "stsenarii": "scenarios",
+    "summary": "summary",
+    "supergeroi": "superheroes",
+    "svetlana-aleksievich": "svetlana-aleksievich",
+    "svobodu-ivanu-golunovu": "free-ivan-golunov",
+    "syurrealizm": "surrealism",
+    "tales": "tales",
+    "tanets": "dance",
+    "tataro-mongolskoe-igo": "mongol-tatar-yoke",
+    "tatuirovki": "tattoo",
+    "technology": "technology",
+    "televidenie": "tv",
+    "telo": "body",
+    "telo-kak-iskusstvo": "body-as-art",
+    "terrorizm": "terrorism",
+    "tests": "tests",
+    "text": "texts",
+    "the-beatles": "the-beatles",
+    "theater": "theater",
+    "theory": "theory",
+    "tokio": "tokio",
+    "torture": "torture",
+    "totalitarizm": "totalitarism",
+    "traditions": "traditions",
+    "tragicomedy": "tragicomedy",
+    "transgendernost": "transgender",
+    "translation": "translation",
+    "transport": "transport",
+    "travel": "travel",
+    "travma": "trauma",
+    "trendy": "trends",
+    "tretiy-reyh": "third-reich",
+    "triller": "thriller",
+    "tsar": "central-african-republic",
+    "tsar-edip": "oedipus",
+    "tsarevich-dmitriy": "tsarevich-dmitry",
+    "tsennosti": "values",
+    "tsenzura": "censorship",
+    "tseremonii": "ceremonies",
+    "turizm": "tourism",
+    "tvorchestvo": "creativity",
+    "ugnetennyy-zhilischnyy-klass": "oppressed-housing-class",
+    "uilyam-shekspir": "william-shakespeare",
+    "ukraine": "ukraine",
+    "university": "university",
+    "urban-studies": "urban-studies",
+    "uroki-literatury": "literature-lessons",
+    "usa": "usa",
+    "ussr": "ussr",
+    "utopiya": "utopia",
+    "valter-benyamin": "valter-benyamin",
+    "varlam-shalamov": "varlam-shalamov",
+    "vasiliy-ii-temnyy": "basil-ii-temnyy",
+    "vasiliy-iii": "basil-iii",
+    "vdnh": "vdnh",
+    "vechnost": "ethernety",
+    "velikobritaniya": "great-britain",
+    "velimir-hlebnikov": "velimir-hlebnikov",
+    "velkom-tu-greyt-britn": "welcome-to-great-britain",
+    "venedikt-erofeev": "venedikt-erofeev",
+    "venetsiya": "veneece",
+    "vengriya": "hungary",
+    "verlibry": "free-verse",
+    "veschi": "things",
+    "vessels": "vessels",
+    "veterany": "veterans",
+    "video": "video",
+    "videoart": "videoart",
+    "videoklip": "clips",
+    "videopoeziya": "video-poetry",
+    "viktor-astafev": "viktor-astafev",
+    "viktor-pelevin": "viktor-pelevin",
+    "vilgelm-rayh": "wilhelm-reich",
+    "vinzavod": "vinzavod",
+    "violence": "violence",
+    "visual-culture": "visual-culture",
+    "vizualnaya-poeziya": "visual-poetry",
+    "vladimir-lenin": "vladimir-lenin",
+    "vladimir-nabokov": "vladimir-nabokov",
+    "vladimir-putin": "vladimir-putin",
+    "vladimir-sorokin": "vladimir-sorokin",
+    "vladimir-voynovich": "vladimir-voynovich",
+    "volga": "volga",
+    "volontery": "volonteurs",
+    "vong-karvay": "wong-karwai",
+    "vospominaniya": "memories",
+    "vostok": "east",
+    "vremya": "time",
+    "vudi-allen": "woody-allen",
+    "vynuzhdennye-otnosheniya": "forced-relationship",
+    "war": "war",
+    "war-in-ukraine-images": "war-in-ukrahine-images",
+    "women": "women",
+    "work": "work",
+    "writers": "writers",
+    "xx-century": "xx-century",
+    "yakob-yordans": "yakob-yordans",
+    "yan-vermeer": "yan-vermeer",
+    "yanka-dyagileva": "yanka-dyagileva",
+    "yaponskaya-literatura": "japan-literature",
+    "youth": "youth",
+    "yozef-rot": "yozef-rot",
+    "yurgen-habermas": "jorgen-habermas",
+    "za-liniey-mannergeyma": "behind-mannerheim-line",
+    "zahar-prilepin": "zahar-prilepin",
+    "zakonodatelstvo": "laws",
+    "zakony-mira": "world-laws",
+    "zametki": "notes",
+    "zhelanie": "wish",
+    "konets-vesny": "end-of-spring",
+    "zhivotnye": "animals",
+    "zhoze-saramago": "jose-saramago",
+    "zigmund-freyd": "sigmund-freud",
+    "zolotaya-orda": "golden-horde",
+    "zombi": "zombie",
+    "zombi-simpsony": "zombie-simpsons",
+    "rouling": "rowling",
+    "diskurs-analiz": "discourse-analytics",
+    "menty": "police",
+    "ptitsy": "birds",
+    "salo": "lard",
+    "rasizm": "racism",
+    "griby": "mushrooms",
+    "politzaklyuchennye": "political-prisoners",
+    "molodezh": "youth",
+    "blocked-in-russia": "blocked-in-russia",
+    "kavarga": "kavarga",
+    "galereya-anna-nova": "gallery-anna-nova",
+    "derrida": "derrida"
+}
\ No newline at end of file
diff --git a/migration/tables/topics.py b/migration/tables/topics.py
new file mode 100644
index 00000000..57084ecb
--- /dev/null
+++ b/migration/tables/topics.py
@@ -0,0 +1,28 @@
+from migration.extract import extract_md, html2text
+from orm.base import local_session
+from orm import Topic, Community
+
+def migrate(entry):
+	body_orig = entry.get('description', '').replace('&nbsp;', ' ')
+	topic_dict = {
+		'slug': entry['slug'],
+		'oid': entry['_id'],
+		'title': entry['title'].replace('&nbsp;', ' '), #.lower(),
+		'children': [],
+		'community' : Community.default_community.slug
+	}
+	topic_dict['body'] = extract_md(html2text(body_orig), entry['_id'])
+	with local_session() as session:
+		slug = topic_dict['slug']
+		topic = session.query(Topic).filter(Topic.slug == slug).first()
+		if not topic: 
+			topic = Topic.create(**topic_dict)
+		if len(topic.title) > len(topic_dict['title']):
+			topic.update({ 'title':  topic_dict['title'] })
+		if len(topic.body) < len(topic_dict['body']):
+			topic.update({ 'body':  topic_dict['body'] })
+		session.commit()
+	# print(topic.__dict__)
+	rt = topic.__dict__.copy()
+	del rt['_sa_instance_state']
+	return rt
diff --git a/migration/tables/users.py b/migration/tables/users.py
new file mode 100644
index 00000000..40b0eaf4
--- /dev/null
+++ b/migration/tables/users.py
@@ -0,0 +1,106 @@
+import sqlalchemy
+from migration.html2text import html2text
+from orm import User, UserRating
+from dateutil.parser import parse
+from orm.base import local_session
+
+def migrate(entry):
+	if 'subscribedTo' in entry: del entry['subscribedTo']
+	email = entry['emails'][0]['address']
+	user_dict = {
+		'oid': entry['_id'],
+		'roles': [],
+		'ratings': [],
+		'username': email,
+		'email': email,
+		'password': entry['services']['password'].get('bcrypt', ''),
+		'createdAt': parse(entry['createdAt']),
+		'emailConfirmed': bool(entry['emails'][0]['verified']),
+		'muted': False, # amnesty
+		'bio': entry['profile'].get('bio', ''),
+		'notifications': [],
+		'createdAt': parse(entry['createdAt']),
+		'roles': [], # entry['roles'] # roles by community
+		'ratings': [], # entry['ratings']
+		'links': [],
+		'name': 'anonymous'
+	}
+	if 'updatedAt' in entry: user_dict['updatedAt'] = parse(entry['updatedAt'])
+	if 'wasOnineAt' in entry: user_dict['wasOnlineAt'] = parse(entry['wasOnlineAt'])
+	if entry.get('profile'):
+		# slug
+		user_dict['slug'] = entry['profile'].get('path')
+		user_dict['bio'] = html2text(entry.get('profile').get('bio') or '')
+
+		# userpic
+		try: user_dict['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
+		except KeyError:
+			try: user_dict['userpic'] = entry['profile']['image']['url']
+			except KeyError: user_dict['userpic'] = ''
+
+		# name
+		fn = entry['profile'].get('firstName', '')
+		ln = entry['profile'].get('lastName', '')
+		name = user_dict['slug'] if user_dict['slug'] else 'noname'
+		name = fn if fn else name
+		name = (name + ' ' + ln) if ln else name
+		name = entry['profile']['path'].lower().replace(' ', '-') if len(name) < 2 else name
+		user_dict['name'] = name
+
+		# links
+		fb = entry['profile'].get('facebook', False)
+		if fb: user_dict['links'].append(fb)
+		vk = entry['profile'].get('vkontakte', False)
+		if vk: user_dict['links'].append(vk)
+		tr = entry['profile'].get('twitter', False)
+		if tr: user_dict['links'].append(tr)
+		ws = entry['profile'].get('website', False)
+		if ws: user_dict['links'].append(ws)
+
+	# some checks
+	if not user_dict['slug'] and len(user_dict['links']) > 0: 
+		user_dict['slug'] = user_dict['links'][0].split('/')[-1]
+
+	user_dict['slug'] = user_dict.get('slug', user_dict['email'].split('@')[0])
+	oid = user_dict['oid']
+	try: user = User.create(**user_dict.copy())
+	except sqlalchemy.exc.IntegrityError:
+		print('[migration] cannot create user ' + user_dict['slug'])
+		with local_session() as session:
+			old_user = session.query(User).filter(User.slug == user_dict['slug']).first()
+			old_user.oid = oid
+			user = old_user
+			if not user:
+				print('[migration] ERROR: cannot find user ' + user_dict['slug'])
+				raise Exception
+	user_dict['id'] = user.id
+	return user_dict
+
+def migrate_2stage(entry, id_map):
+	ce = 0
+	for rating_entry in entry.get('ratings',[]):
+		rater_oid = rating_entry['createdBy']
+		rater_slug = id_map.get(rater_oid)
+		if not rater_slug:
+			ce +=1
+			# print(rating_entry)
+			continue
+		oid = entry['_id']
+		author_slug = id_map.get(oid)
+		user_rating_dict = {
+			'value': rating_entry['value'],
+			'rater': rater_slug,
+			'user': author_slug
+		}
+		with local_session() as session:
+			try:
+				user_rating = UserRating.create(**user_rating_dict)
+			except sqlalchemy.exc.IntegrityError:
+				old_rating = session.query(UserRating).filter(UserRating.rater == rater_slug).first()
+				print('[migration] cannot create ' + author_slug + '`s rate from ' + rater_slug)
+				print('[migration] concat rating value %d+%d=%d' % (old_rating.value, rating_entry['value'], old_rating.value + rating_entry['value']))
+				old_rating.update({ 'value': old_rating.value + rating_entry['value'] })
+				session.commit()
+			except Exception as e:
+				print(e)
+	return ce
diff --git a/migration/utils.py b/migration/utils.py
new file mode 100644
index 00000000..9a19c556
--- /dev/null
+++ b/migration/utils.py
@@ -0,0 +1,9 @@
+from datetime import datetime
+from json import JSONEncoder
+
+class DateTimeEncoder(JSONEncoder):
+    def default(self, z):
+        if isinstance(z, datetime):
+            return (str(z))
+        else:
+            return super().default(z)
\ No newline at end of file