305 lines
14 KiB
Python
305 lines
14 KiB
Python
''' cmd managed migration '''
|
|
import json
|
|
import base64
|
|
import re
|
|
import frontmatter
|
|
from migration.tables.users import migrate as migrateUser
|
|
from migration.tables.content_items import get_metadata, migrate as migrateShout
|
|
from migration.tables.content_item_categories import migrate as migrateCategory
|
|
from migration.tables.tags import migrate as migrateTag
|
|
from migration.tables.comments import migrate as migrateComment
|
|
from migration.utils import DateTimeEncoder
|
|
from orm import Community
|
|
from dateutil.parser import parse as date_parse
|
|
|
|
|
|
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,(.*?))\)"
|
|
OLD_DATE = '2016-03-05 22:22:00.350000'
|
|
|
|
|
|
def extract_images(article):
|
|
''' extract b64 encoded images from markdown in article body '''
|
|
body = article['body']
|
|
images = []
|
|
matches = re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE)
|
|
for i, match in enumerate(matches, start=1):
|
|
ext = match.group(3)
|
|
link = '/static/upload/image-' + \
|
|
article['old_id'] + str(i) + '.' + ext
|
|
img = match.group(4)
|
|
if img not in images:
|
|
open('..' + link, 'wb').write(base64.b64decode(img))
|
|
images.append(img)
|
|
body = body.replace(match.group(2), link)
|
|
print(link)
|
|
article['body'] = body
|
|
return article
|
|
|
|
|
|
def users():
|
|
''' migrating users first '''
|
|
print('migrating users...')
|
|
newdata = {}
|
|
data = json.loads(open('migration/data/users.json').read())
|
|
counter = 0
|
|
export_data = {}
|
|
for entry in data:
|
|
oid = entry['_id']
|
|
user = migrateUser(entry)
|
|
newdata[oid] = user
|
|
del user['password']
|
|
del user['notifications']
|
|
# del user['oauth']
|
|
del user['emailConfirmed']
|
|
del user['username']
|
|
del user['email']
|
|
export_data[user['slug']] = user
|
|
counter += 1
|
|
export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:]
|
|
open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
|
|
open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list),
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
print(str(len(newdata.items())) + ' user accounts were migrated')
|
|
print(str(len(export_list)) + ' authors were exported')
|
|
|
|
|
|
def topics():
|
|
''' topics from categories and tags '''
|
|
print('migrating topics...')
|
|
cats_data = json.loads(open('migration/data/content_item_categories.json').read())
|
|
cat_topics = {}
|
|
slug_topics = {}
|
|
counter = 0
|
|
try:
|
|
for cat in cats_data:
|
|
topic = migrateCategory(cat)
|
|
cat_topics[topic['cat_id']] = topic
|
|
slug_topics[topic['slug']] = topic
|
|
counter += 1
|
|
except Exception as e:
|
|
print('cats exception, try to remove database first')
|
|
raise e
|
|
'''
|
|
try:
|
|
for tag in tag_data:
|
|
topic = migrateTag(tag)
|
|
newdata[topic['slug']] = topic
|
|
counter += 1
|
|
except Exception:
|
|
print('tags exception, try to remove database first')
|
|
raise Exception
|
|
'''
|
|
export_list = sorted(slug_topics.items(), key=lambda item: str(
|
|
item[1]['createdAt']))
|
|
open('migration/data/topics.dict.json','w').write(json.dumps(cat_topics,
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list),
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
#' tags and ' + str(len(tag_data)) +
|
|
print(str(counter) + ' / ' + str(len(cats_data)) + ' migrated')
|
|
print(str(len(export_list)) + ' topics were exported')
|
|
|
|
|
|
def shouts():
|
|
''' migrating content items one by one '''
|
|
print('loading shouts...')
|
|
counter = 0
|
|
discours_author = 0
|
|
content_data = json.loads(open('migration/data/content_items.json').read())
|
|
content_dict = { x['_id']:x for x in content_data }
|
|
newdata = {}
|
|
print(str(len(content_data)) + ' entries loaded. now migrating...')
|
|
errored = []
|
|
for entry in content_data:
|
|
try:
|
|
shout = migrateShout(entry)
|
|
newdata[shout['slug']] = shout
|
|
author = newdata[shout['slug']]['authors'][0]['slug']
|
|
line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author)
|
|
print(line)
|
|
counter += 1
|
|
if author == 'discours':
|
|
discours_author += 1
|
|
open('./shouts.id.log', 'a').write(line + '\n')
|
|
except Exception as e:
|
|
print(entry['_id'])
|
|
errored.append(entry)
|
|
raise e
|
|
try:
|
|
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
|
|
except ValueError:
|
|
limit = len(content_data)
|
|
open('migration/data/shouts.dict.json',
|
|
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
|
|
print(str(counter) + '/' + str(len(content_data)) +
|
|
' content items were migrated')
|
|
print(str(discours_author) + ' from them by @discours')
|
|
|
|
def comments():
|
|
''' migrating comments on content items one by one '''
|
|
content_data = json.loads(open('migration/data/content_items.json').read()) # old content
|
|
content_dict = { x['_id']: x for x in content_data } # by slug
|
|
shouts_dict = json.loads(open('migration/data/shouts.dict.json', 'r').read()) # all shouts by slug
|
|
print(str(len(shouts_dict.keys())) + ' migrated shouts loaded')
|
|
shouts_old = { x['old_id']: x for slug, x in shouts_dict.items() } # shouts by old_id
|
|
print(str(len(content_data)) + ' content items loaded')
|
|
comments_data = json.loads(open('migration/data/comments.json').read()) # by slug
|
|
print(str(len(comments_data)) + ' comments loaded')
|
|
comments_by_post = {}
|
|
# sort comments by old posts ids
|
|
for old_comment in comments_data:
|
|
cid = old_comment['contentItem']
|
|
comments_by_post[cid] = comments_by_post.get(cid, [])
|
|
comments_by_post[cid].append(old_comment)
|
|
# migrate comments
|
|
comments_by_shoutslug = {}
|
|
for content_item in content_data:
|
|
old_id = content_item['_id']
|
|
if content_item.get('commentedAt', False):
|
|
comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ]
|
|
if comments.length > 0:
|
|
shout = shouts_old.get(old_id, { 'slug': 'abandoned-comments' })
|
|
comments_by_shoutslug[shout['slug']] = comments
|
|
export_articles = json.loads(open('../src/data/articles.json').read())
|
|
print(str(len(export_articles.items())) + ' articles were exported')
|
|
export_comments = {}
|
|
c = 0
|
|
for slug, article in export_articles.items():
|
|
comments = comments_by_shoutslug.get(slug, [])
|
|
if len(comments) > 0:
|
|
export_comments[slug] = comments
|
|
c += len(comments)
|
|
print(str(len(export_comments.items())) + ' after adding those having comments')
|
|
open('../src/data/comments.json', 'w').write(json.dumps(dict(export_comments),
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
print(str(c) + ' comments were exported')
|
|
|
|
|
|
|
|
def export_shouts(limit):
|
|
print('reading json...')
|
|
content_data = json.loads(open('migration/data/content_items.json').read())
|
|
content_dict = { x['_id']:x for x in content_data }
|
|
print(str(len(content_data)) + ' content items loaded')
|
|
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
|
|
print(str(len(newdata.keys())) + ' migrated shouts loaded')
|
|
users_old = json.loads(open('migration/data/users.dict.json').read())
|
|
print(str(len(newdata.keys())) + ' migrated users loaded')
|
|
export_authors = json.loads(open('../src/data/authors.json').read())
|
|
print(str(len(export_authors.items())) + ' exported authors loaded')
|
|
users_slug = { u['slug']: u for old_id, u in users_old.items()}
|
|
print(str(len(users_slug.items())) + ' users loaded')
|
|
|
|
export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']]
|
|
export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)
|
|
print(str(len(export_list)) + ' filtered')
|
|
|
|
export_list = export_list[:limit or len(export_list)]
|
|
export_clean = {}
|
|
for (slug, article) in export_list:
|
|
if article['layout'] == 'article':
|
|
for author in article['authors']:
|
|
export_authors[author['slug']] = users_slug[author['slug']]
|
|
export_clean[article['slug']] = extract_images(article)
|
|
metadata = get_metadata(article)
|
|
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
|
|
open('../content/discours.io/'+slug+'.md', 'w').write(content)
|
|
# print(slug)
|
|
open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body'])
|
|
open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean),
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
print(str(len(export_clean.items())) + ' articles exported')
|
|
open('../src/data/authors.json', 'w').write(json.dumps(export_authors,
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
comments()
|
|
print(str(len(export_authors.items())) + ' total authors exported')
|
|
|
|
def export_slug(slug):
|
|
shouts_dict = json.loads(open('migration/data/shouts.dict.json').read())
|
|
print(str(len(shouts_dict.items())) + ' migrated shouts loaded')
|
|
users_old = json.loads(open('migration/data/users.dict.json').read()) # NOTE: this exact file is by old_id
|
|
print(str(len(users_old.items())) + ' migrated users loaded')
|
|
users_dict = { x[1]['slug']:x for x in users_old.items() }
|
|
exported_authors = json.loads(open('../src/data/authors.json').read())
|
|
print(str(len(exported_authors.items())) + ' exported authors loaded')
|
|
exported_articles = json.loads(open('../src/data/articles.json').read())
|
|
print(str(len(exported_articles.items())) + ' exported articles loaded')
|
|
shout = shouts_dict.get(slug, False)
|
|
if shout:
|
|
author = users_dict.get(shout['authors'][0]['slug'], None)
|
|
exported_authors.update({shout['authors'][0]['slug']: author})
|
|
exported_articles.update({shout['slug']: shout})
|
|
print(shout)
|
|
open('../src/data/articles.json', 'w').write(json.dumps(exported_articles,
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
open('../src/data/authors.json', 'w').write(json.dumps(exported_authors,
|
|
cls=DateTimeEncoder,
|
|
indent=4,
|
|
sort_keys=True,
|
|
ensure_ascii=False))
|
|
else:
|
|
print('no old id error!')
|
|
# print(str(len(shouts_dict)) + ' shouts were migrated')
|
|
print(slug)
|
|
comments()
|
|
print('finished.')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
if len(sys.argv) > 1:
|
|
if sys.argv[1] == "users":
|
|
users()
|
|
elif sys.argv[1] == "topics":
|
|
topics()
|
|
elif sys.argv[1] == "shouts":
|
|
try:
|
|
Community.create(**{
|
|
'slug': 'discours.io',
|
|
'name': 'Дискурс',
|
|
'pic': 'https://discours.io/images/logo-min.svg',
|
|
'createdBy': '0',
|
|
'createdAt': date_parse(OLD_DATE)
|
|
})
|
|
except Exception:
|
|
pass
|
|
shouts()
|
|
elif sys.argv[1] == "comments":
|
|
comments()
|
|
elif sys.argv[1] == "export_shouts":
|
|
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
|
|
export_shouts(limit)
|
|
elif sys.argv[1] == "all":
|
|
users()
|
|
topics()
|
|
shouts()
|
|
comments()
|
|
elif sys.argv[1] == "bson":
|
|
from migration import bson2json
|
|
bson2json.json_tables()
|
|
elif sys.argv[1] == 'slug':
|
|
export_slug(sys.argv[2])
|
|
else:
|
|
print('usage: python migrate.py bson\n.. \ttopics <limit>\n.. \tusers <limit>\n.. \tshouts <limit>\n.. \tcomments\n.. \texport_shouts <limit>\n.. \tslug <slug>\n.. \tall>')
|