''' cmd managed migration ''' import json import base64 import re import frontmatter from migration.tables.users import migrate as migrateUser from migration.tables.content_items import get_metadata, migrate as migrateShout from migration.tables.content_item_categories import migrate as migrateCategory from migration.tables.tags import migrate as migrateTag from migration.tables.comments import migrate as migrateComment from migration.utils import DateTimeEncoder from orm import Community from dateutil.parser import parse as date_parse IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,(.*?))\)" OLD_DATE = '2016-03-05 22:22:00.350000' def extract_images(article): ''' extract b64 encoded images from markdown in article body ''' body = article['body'] images = [] matches = re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE) for i, match in enumerate(matches, start=1): ext = match.group(3) link = '/static/upload/image-' + \ article['old_id'] + str(i) + '.' + ext img = match.group(4) if img not in images: open('..' + link, 'wb').write(base64.b64decode(img)) images.append(img) body = body.replace(match.group(2), link) print(link) article['body'] = body return article def users(): ''' migrating users first ''' print('migrating users...') newdata = {} data = json.loads(open('migration/data/users.json').read()) counter = 0 export_data = {} for entry in data: oid = entry['_id'] user = migrateUser(entry) newdata[oid] = user del user['password'] del user['notifications'] # del user['oauth'] del user['emailConfirmed'] del user['username'] del user['email'] export_data[user['slug']] = user counter += 1 export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:] open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list), cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) print(str(len(newdata.items())) + ' user accounts were migrated') print(str(len(export_list)) + ' authors were exported') def topics(): ''' topics from categories and tags ''' print('migrating topics...') cats_data = json.loads(open('migration/data/content_item_categories.json').read()) cat_topics = {} slug_topics = {} counter = 0 try: for cat in cats_data: topic = migrateCategory(cat) cat_topics[topic['cat_id']] = topic slug_topics[topic['slug']] = topic counter += 1 except Exception as e: print('cats exception, try to remove database first') raise e ''' try: for tag in tag_data: topic = migrateTag(tag) newdata[topic['slug']] = topic counter += 1 except Exception: print('tags exception, try to remove database first') raise Exception ''' export_list = sorted(slug_topics.items(), key=lambda item: str( item[1]['createdAt'])) open('migration/data/topics.dict.json','w').write(json.dumps(cat_topics, cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list), cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) #' tags and ' + str(len(tag_data)) + print(str(counter) + ' / ' + str(len(cats_data)) + ' migrated') print(str(len(export_list)) + ' topics were exported') def shouts(): ''' migrating content items one by one ''' print('loading shouts...') counter = 0 discours_author = 0 content_data = json.loads(open('migration/data/content_items.json').read()) content_dict = { x['_id']:x for x in content_data } newdata = {} print(str(len(content_data)) + ' entries loaded. now migrating...') errored = [] for entry in content_data: try: shout = migrateShout(entry) newdata[shout['slug']] = shout author = newdata[shout['slug']]['authors'][0]['slug'] line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author) print(line) counter += 1 if author == 'discours': discours_author += 1 open('./shouts.id.log', 'a').write(line + '\n') except Exception as e: print(entry['_id']) errored.append(entry) raise e try: limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data) except ValueError: limit = len(content_data) open('migration/data/shouts.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) print(str(counter) + '/' + str(len(content_data)) + ' content items were migrated') print(str(discours_author) + ' from them by @discours') def comments(): ''' migrating comments on content items one by one ''' content_data = json.loads(open('migration/data/content_items.json').read()) # old content content_dict = { x['_id']: x for x in content_data } # by slug shouts_dict = json.loads(open('migration/data/shouts.dict.json', 'r').read()) # all shouts by slug print(str(len(shouts_dict.keys())) + ' migrated shouts loaded') shouts_old = { x['old_id']: x for slug, x in shouts_dict.items() } # shouts by old_id print(str(len(content_data)) + ' content items loaded') comments_data = json.loads(open('migration/data/comments.json').read()) # by slug print(str(len(comments_data)) + ' comments loaded') comments_by_post = {} # sort comments by old posts ids for old_comment in comments_data: cid = old_comment['contentItem'] comments_by_post[cid] = comments_by_post.get(cid, []) comments_by_post[cid].append(old_comment) # migrate comments comments_by_shoutslug = {} for content_item in content_data: old_id = content_item['_id'] if content_item.get('commentedAt', False): comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ] if comments.length > 0: shout = shouts_old.get(old_id, { 'slug': 'abandoned-comments' }) comments_by_shoutslug[shout['slug']] = comments export_articles = json.loads(open('../src/data/articles.json').read()) print(str(len(export_articles.items())) + ' articles were exported') export_comments = {} c = 0 for slug, article in export_articles.items(): comments = comments_by_shoutslug.get(slug, []) if len(comments) > 0: export_comments[slug] = comments c += len(comments) print(str(len(export_comments.items())) + ' after adding those having comments') open('../src/data/comments.json', 'w').write(json.dumps(dict(export_comments), cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) print(str(c) + ' comments were exported') def export_shouts(limit): print('reading json...') content_data = json.loads(open('migration/data/content_items.json').read()) content_dict = { x['_id']:x for x in content_data } print(str(len(content_data)) + ' content items loaded') newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read()) print(str(len(newdata.keys())) + ' migrated shouts loaded') users_old = json.loads(open('migration/data/users.dict.json').read()) print(str(len(newdata.keys())) + ' migrated users loaded') export_authors = json.loads(open('../src/data/authors.json').read()) print(str(len(export_authors.items())) + ' exported authors loaded') users_slug = { u['slug']: u for old_id, u in users_old.items()} print(str(len(users_slug.items())) + ' users loaded') export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']] export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True) print(str(len(export_list)) + ' filtered') export_list = export_list[:limit or len(export_list)] export_clean = {} for (slug, article) in export_list: if article['layout'] == 'article': for author in article['authors']: export_authors[author['slug']] = users_slug[author['slug']] export_clean[article['slug']] = extract_images(article) metadata = get_metadata(article) content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata)) open('../content/discours.io/'+slug+'.md', 'w').write(content) # print(slug) open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body']) open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean), cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) print(str(len(export_clean.items())) + ' articles exported') open('../src/data/authors.json', 'w').write(json.dumps(export_authors, cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) comments() print(str(len(export_authors.items())) + ' total authors exported') def export_slug(slug): shouts_dict = json.loads(open('migration/data/shouts.dict.json').read()) print(str(len(shouts_dict.items())) + ' migrated shouts loaded') users_old = json.loads(open('migration/data/users.dict.json').read()) # NOTE: this exact file is by old_id print(str(len(users_old.items())) + ' migrated users loaded') users_dict = { x[1]['slug']:x for x in users_old.items() } exported_authors = json.loads(open('../src/data/authors.json').read()) print(str(len(exported_authors.items())) + ' exported authors loaded') exported_articles = json.loads(open('../src/data/articles.json').read()) print(str(len(exported_articles.items())) + ' exported articles loaded') shout = shouts_dict.get(slug, False) if shout: author = users_dict.get(shout['authors'][0]['slug'], None) exported_authors.update({shout['authors'][0]['slug']: author}) exported_articles.update({shout['slug']: shout}) print(shout) open('../src/data/articles.json', 'w').write(json.dumps(exported_articles, cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) open('../src/data/authors.json', 'w').write(json.dumps(exported_authors, cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) else: print('no old id error!') # print(str(len(shouts_dict)) + ' shouts were migrated') print(slug) comments() print('finished.') if __name__ == '__main__': import sys if len(sys.argv) > 1: if sys.argv[1] == "users": users() elif sys.argv[1] == "topics": topics() elif sys.argv[1] == "shouts": try: Community.create(**{ 'slug': 'discours.io', 'name': 'Дискурс', 'pic': 'https://discours.io/images/logo-min.svg', 'createdBy': '0', 'createdAt': date_parse(OLD_DATE) }) except Exception: pass shouts() elif sys.argv[1] == "comments": comments() elif sys.argv[1] == "export_shouts": limit = int(sys.argv[2]) if len(sys.argv) > 2 else None export_shouts(limit) elif sys.argv[1] == "all": users() topics() shouts() comments() elif sys.argv[1] == "bson": from migration import bson2json bson2json.json_tables() elif sys.argv[1] == 'slug': export_slug(sys.argv[2]) else: print('usage: python migrate.py bson\n.. \ttopics \n.. \tusers \n.. \tshouts \n.. \tcomments\n.. \texport_shouts \n.. \tslug \n.. \tall>')