upgrade migration

This commit is contained in:
Untone 2021-10-08 07:42:59 +03:00
parent 3efba57cf3
commit c3e0c5720a
4 changed files with 260 additions and 174 deletions

View File

@ -1,18 +1,45 @@
''' cmd managed migration '''
import json import json
import base64
import re
from migration.tables.users import migrate as migrateUser from migration.tables.users import migrate as migrateUser
from migration.tables.content_items import migrate as migrateShout from migration.tables.content_items import migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateTopic from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag
from migration.utils import DateTimeEncoder from migration.utils import DateTimeEncoder
from orm import Community from orm import Community
def users(limit):
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,(.*?))\)"
OLD_DATE = '2016-03-05 22:22:00.350000'
def extract_images(article):
''' extract b64 encoded images from markdown in article body '''
body = article['body']
images = []
matches = re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE)
for i, match in enumerate(matches, start=1):
ext = match.group(3)
link = '/static/upload/image-' + \
article['old_id'] + str(i) + '.' + ext
img = match.group(4)
if img not in images:
open('..' + link, 'wb').write(base64.b64decode(img))
images.append(img)
body = body.replace(match.group(2), link)
print(link)
article['body'] = body
return article
def users():
''' migrating users first '''
print('migrating users...') print('migrating users...')
data = json.loads(open('migration/data/users.json').read())
newdata = {} newdata = {}
exportData = {} data = json.loads(open('migration/data/users.json').read())
counter = 0 counter = 0
# limit = 100 export_data = {}
#try:
for entry in data: for entry in data:
oid = entry['_id'] oid = entry['_id']
user = migrateUser(entry) user = migrateUser(entry)
@ -23,96 +50,127 @@ def users(limit):
del user['emailConfirmed'] del user['emailConfirmed']
del user['username'] del user['username']
del user['email'] del user['email']
exportData[user['slug']] = user export_data[user['slug']] = user
counter += 1 counter += 1
if counter > limit: export_list = sorted(export_data.items(),
break key=lambda item: item[1]['rating'])[-10:]
#except Exception: open('migration/data/users.dict.json',
# print(str(counter) + '/' + str(len(data)) + ' users entries were migrated') 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
# print('try to remove database first') open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list),
open('migration/data/users.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) cls=DateTimeEncoder,
open('../src/data/authors.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) ) indent=4,
print(str(counter) + ' users entries were migrated') sort_keys=True,
ensure_ascii=False))
print(str(len(newdata.items())) + ' user accounts were migrated')
print(str(len(export_list)) + ' authors were exported')
def topics(): def topics():
''' topics from categories and tags '''
print('migrating topics...') print('migrating topics...')
data = json.loads(open('migration/data/content_item_categories.json').read()) cat_data = json.loads(
open('migration/data/content_item_categories.json').read())
tag_data = json.loads(open('migration/data/tags.json').read())
newdata = {} newdata = {}
exportData = {}
counter = 0 counter = 0
try: try:
for entry in data: for cat in cat_data:
oid = entry['_id'] topic = migrateCategory(cat)
newdata[oid] = migrateTopic(entry) newdata[topic['slug']] = topic
exportData[entry['slug']] = newdata[oid]
counter += 1 counter += 1
except Exception: except Exception:
print(str(counter) + '/' + str(len(data)) + ' topics were migrated') print('cats exception, try to remove database first')
print('try to remove database first') try:
open('migration/data/topics.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) for tag in tag_data:
open('../src/data/topics.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) ) topic = migrateTag(tag)
print(str(counter) + ' topics were migrated') newdata[topic['slug']] = topic
counter += 1
except Exception:
print('tags exception, try to remove database first')
raise Exception
export_list = sorted(newdata.items(), key=lambda item: str(
item[1]['createdAt']))[-10:]
open('migration/data/topics.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False))
print(str(counter) + ' from ' + str(len(cat_data)) +
' tags and ' + str(len(tag_data)) + ' cats were migrated')
print(str(len(export_list)) + ' topics were exported')
def shouts(limit):
def shouts():
''' migrating content items one by one '''
print('loading shouts...') print('loading shouts...')
counter = 0 counter = 0
discoursAuthor = 0 discours_author = 0
data = json.loads(open('migration/data/content_items.json').read()) content_data = json.loads(open('migration/data/content_items.json').read())
newdata = {} newdata = {}
print(str(len(data)) + ' entries loaded. now migrating...') print(str(len(content_data)) + ' entries loaded. now migrating...')
errored = [] errored = []
exportData = {} for entry in content_data:
for entry in data:
try: try:
oid = entry['_id'] (shout, content) = migrateShout(entry)
shout = migrateShout(entry) newdata[shout['slug']] = shout
newdata[oid] = shout author = newdata[shout['slug']]['authors'][0]['slug']
author = newdata[oid]['authors'][0]['slug'] line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author)
line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + str(author) print(line)
if shout['layout'] == 'article': counter += 1
counter += 1
exportData[shout['slug']] = shout
print(line)
# counter += 1
if author == 'discours.io': if author == 'discours.io':
discoursAuthor += 1 discours_author += 1
open('./shouts.id.log','a').write(line + '\n') open('./shouts.id.log', 'a').write(line + '\n')
if counter > limit:
break
except Exception: except Exception:
print(entry['_id']) print(entry['_id'])
errored.append(entry) errored.append(entry)
raise Exception raise Exception(" error")
try:
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
except ValueError:
limit = len(content_data)
export_list = sorted(newdata.items(
), key=lambda item: item[1]['createdAt'] if item[1]['layout'] == 'article' else OLD_DATE)[:limit]
export_clean = {}
for slug, a in dict(export_list).items():
export_clean[slug] = extract_images(a)
open('../content/discours.io/'+slug+'.md', 'w').write(content)
open('migration/data/shouts.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean),
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print(str(counter) + '/' + str(len(content_data)) +
' content items were migrated')
print(str(len(export_list)) + ' shouts were exported')
print(str(discours_author) + ' from them by @discours.io')
open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) )
open('../src/data/articles.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) )
print(str(counter) + ' shouts were migrated')
print(str(discoursAuthor) + ' from them by @discours.io')
print(str(len(errored)) + ' shouts without authors')
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
if len(sys.argv) > 1: if len(sys.argv) > 1:
if sys.argv[1] == "users": if sys.argv[1] == "users":
users(668) users()
elif sys.argv[1] == "topics": elif sys.argv[1] == "topics":
topics() topics()
elif sys.argv[1] == "shouts": elif sys.argv[1] == "shouts":
Community.create(**{ try:
'slug': 'discours.io', Community.create(**{
'name': 'Дискурс', 'slug': 'discours.io',
'pic': 'https://discours.io/images/logo-min.svg', 'name': 'Дискурс',
'createdBy': '0', 'pic': 'https://discours.io/images/logo-min.svg',
'createdAt': ts 'createdBy': '0',
'createdAt': OLD_DATE
}) })
shouts(3626) except Exception:
pass
shouts()
elif sys.argv[1] == "all": elif sys.argv[1] == "all":
users()
topics() topics()
users(668) shouts()
shouts(3626)
elif sys.argv[1] == "bson": elif sys.argv[1] == "bson":
import migration.bson2json from migration import bson2json
bson2json.json_tables() bson2json.json_tables()
else: else:
print('usage: python migrate.py <all|topics|users|shouts|comments>') print('usage: python migrate.py <bson|all|topics|users|shouts>')

View File

@ -16,8 +16,9 @@ users_dict['0'] = {
'id': 9999999, 'id': 9999999,
'slug': 'discours.io', 'slug': 'discours.io',
'name': 'Дискурс', 'name': 'Дискурс',
'userpic': 'https://discours.io/images/logo-mini.svg' 'userpic': 'https://discours.io/images/logo-mini.svg',
} 'createdAt': '2016-03-05 22:22:00.350000'
}
ts = datetime.now() ts = datetime.now()
@ -29,8 +30,9 @@ type2layout = {
'Image': 'image' 'Image': 'image'
} }
def migrate(entry, limit=3626, start=0):
''' def migrate(entry):
'''
type Shout { type Shout {
slug: String! slug: String!
author: Int! author: Int!
@ -41,7 +43,7 @@ def migrate(entry, limit=3626, start=0):
deletedBy: Int deletedBy: Int
rating: Int rating: Int
ratigns: [Rating] ratigns: [Rating]
published: Bool! published: Bool!
publishedAt: DateTime # if there is no published field - it is not published publishedAt: DateTime # if there is no published field - it is not published
replyTo: String # another shout replyTo: String # another shout
tags: [String] # actual values tags: [String] # actual values
@ -53,17 +55,19 @@ def migrate(entry, limit=3626, start=0):
views: Int views: Int
} }
''' '''
content = ''
r = { r = {
'layout': type2layout[entry['type']], 'layout': type2layout[entry['type']],
'title': entry['title'], 'title': entry['title'],
'community': 0, 'community': 0,
'authors': [], 'authors': [],
'topics': [], 'topics': [],
'published': entry.get('published', False), 'published': entry.get('published', False),
'views': entry.get('views', 0), 'views': entry.get('views', 0),
'rating': entry.get('rating', 0), 'rating': entry.get('rating', 0),
'ratings': [] 'ratings': [],
} 'createdAt': '2016-03-05 22:22:00.350000'
}
r['slug'] = entry.get('slug', '') r['slug'] = entry.get('slug', '')
body_orig = entry.get('body', '') body_orig = entry.get('body', '')
if not r['slug'] and entry.get('friendlySlugs') is not None: if not r['slug'] and entry.get('friendlySlugs') is not None:
@ -88,7 +92,8 @@ def migrate(entry, limit=3626, start=0):
if body_orig == '': if body_orig == '':
print('EMPTY BODY!') print('EMPTY BODY!')
else: else:
body_html = str(BeautifulSoup(body_orig, features="html.parser")) body_html = str(BeautifulSoup(
body_orig, features="html.parser"))
r['body'] = html2text(body_html).replace('****', '**') r['body'] = html2text(body_html).replace('****', '**')
r['old_id'] = entry.get('_id') r['old_id'] = entry.get('_id')
else: else:
@ -103,20 +108,20 @@ def migrate(entry, limit=3626, start=0):
if videoUrl == '#': if videoUrl == '#':
print(entry.get('media', 'NO MEDIA!')) print(entry.get('media', 'NO MEDIA!'))
# raise Exception # raise Exception
r['body'] = '<ShoutVideo src=\"' + videoUrl + '\" />' + html2text(m.get('body', '')) # FIXME r['body'] = '<ShoutVideo src=\"' + videoUrl + \
'\" />' + html2text(m.get('body', '')) # FIXME
elif entry.get('type') == 'Music': elif entry.get('type') == 'Music':
r['body'] = '<ShoutMusic media={\"' + json.dumps(entry['media']) +'\"} />' # FIXME r['body'] = '<ShoutMusic media={\"' + \
json.dumps(entry['media']) + '\"} />' # FIXME
if r.get('body') is None: if r.get('body') is None:
body_orig = entry.get('body', '') body_orig = entry.get('body', '')
body_html = str(BeautifulSoup(body_orig, features="html.parser")) body_html = str(BeautifulSoup(body_orig, features="html.parser"))
r['body'] = html2text(body_html).replace('****', '**') r['body'] = html2text(body_html).replace('****', '**')
r['old_id'] = entry.get('_id') r['old_id'] = entry.get('_id')
body = r.get('body') body = r.get('body')
user = None user = None
try: try:
userdata = users_dict[entry['createdBy']] userdata = users_dict.get(entry['createdBy'], users_dict['0'])
slug = userdata['slug'] slug = userdata['slug']
name = userdata['name'] name = userdata['name']
userpic = userdata['userpic'] userpic = userdata['userpic']
@ -137,10 +142,11 @@ def migrate(entry, limit=3626, start=0):
user = User.create(**authordata) user = User.create(**authordata)
except IntegrityError: except IntegrityError:
with local_session() as session: with local_session() as session:
user = session.query(User).filter(User.email == authordata['email']).first() user = session.query(User).filter(
User.email == authordata['email']).first()
if user is None: if user is None:
user = session.query(User).filter(User.slug == authordata['slug']).first() user = session.query(User).filter(
User.slug == authordata['slug']).first()
slug = user['slug'] slug = user['slug']
name = user['name'] name = user['name']
userpic = user.userpic userpic = user.userpic
@ -167,15 +173,15 @@ def migrate(entry, limit=3626, start=0):
post = frontmatter.Post(body, **metadata) post = frontmatter.Post(body, **metadata)
dumped = frontmatter.dumps(post) dumped = frontmatter.dumps(post)
if entry['published']: if entry['published']:
#if r.get('old_id', None): # if r.get('old_id', None):
# ext = 'html' # ext = 'html'
# content = str(body).replace('<p></p>', '').replace('<p> </p>', '') # content = str(body).replace('<p></p>', '').replace('<p> </p>', '')
#else: # else:
ext = 'md' ext = 'md'
content = dumped content = dumped
open('migration/content/' + metadata['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) open('migration/content/' +
metadata['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try: try:
shout_dict = r.copy() shout_dict = r.copy()
@ -190,8 +196,8 @@ def migrate(entry, limit=3626, start=0):
else: else:
shout_dict['publishedAt'] = ts shout_dict['publishedAt'] = ts
del shout_dict['published'] del shout_dict['published']
del shout_dict['views'] # FIXME del shout_dict['views'] # FIXME
del shout_dict['rating'] # FIXME del shout_dict['rating'] # FIXME
del shout_dict['ratings'] del shout_dict['ratings']
try: try:
s = Shout.create(**shout_dict) s = Shout.create(**shout_dict)
@ -203,4 +209,4 @@ def migrate(entry, limit=3626, start=0):
print(r) print(r)
# print(s) # print(s)
raise Exception raise Exception
return r return (r, content)

View File

@ -1,20 +1,36 @@
import json
from os.path import abspath
from datetime import datetime
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
users_dict['0'] = {
'id': 9999999,
'slug': 'discours.io',
'name': 'Дискурс',
'userpic': 'https://discours.io/images/logo-mini.svg',
'createdAt': '2016-03-05 22:22:00.350000'
}
ts = datetime.now()
def migrate(entry): def migrate(entry):
``` '''
type Topic { type Topic {
slug: String! # ID slug: String! # ID
createdBy: Int! # User createdBy: Int! # User
createdAt: DateTime! createdAt: DateTime!
value: String title: String
parents: [String] # NOTE: topic can have parent topics parents: [String] # NOTE: topic can have parent topics
children: [String] # and children children: [String] # and children
} }
``` '''
creator = get_new_user_id(entry['createdBy']) creator = users_dict.get(entry['createdBy'], users_dict['0'])
return { return {
'slug': entry['slug'], 'slug': entry['slug'],
'createdBy': creator_id, # NOTE: uses an old user id 'createdBy': creator['id'], # NOTE: uses an old user id
'createdAt': entry['createdAt'], 'createdAt': entry['createdAt'],
'title': entry['value'].lower(), 'title': entry['title'].lower(),
'parents': [], 'parents': [],
'children': [] 'children': []
} }

View File

@ -7,76 +7,82 @@ from migration.html2text import html2text
counter = 0 counter = 0
def migrate(entry, limit=668): def migrate(entry, limit=668):
''' '''
type User {
username: String! # email
createdAt: DateTime!
email: String
password: String
oauth: String # provider:token
name: String # to display
userpic: String
links: [String]
emailConfirmed: Boolean # should contain all emails too
id: Int!
muted: Boolean
rating: Int
roles: [Role]
updatedAt: DateTime
wasOnlineAt: DateTime
ratings: [Rating]
slug: String
bio: String
notifications: [Int]
}
''' type User {
res = {} username: String! # email
res['old_id'] = entry['_id'] createdAt: DateTime!
res['password'] = entry['services']['password'].get('bcrypt', '') email: String
res['username'] = entry['emails'][0]['address'] password: String
res['email'] = res['username'] oauth: String # provider:token
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt'])) name: String # to display
res['emailConfirmed'] = entry['emails'][0]['verified'] userpic: String
res['createdAt'] = parse(entry['createdAt']) links: [String]
res['rating'] = entry['rating'] # number emailConfirmed: Boolean # should contain all emails too
res['roles'] = [] # entry['roles'] # roles by community id: Int!
res['ratings'] = [] # entry['ratings'] muted: Boolean
res['notifications'] = [] rating: Int
res['links'] = [] roles: [Role]
res['muted'] = False updatedAt: DateTime
res['bio'] = html2text(entry.get('bio', '')) wasOnlineAt: DateTime
if entry['profile']: ratings: [Rating]
res['slug'] = entry['profile'].get('path') slug: String
res['userpic'] = entry['profile'].get('image', {'thumborId': ''}).get('thumborId', '') # adding 'https://assets.discours.io/unsafe/1600x' in web ui bio: String
fn = entry['profile'].get('firstName', '') notifications: [Int]
ln = entry['profile'].get('lastName', '') }
name = res['slug'] if res['slug'] else 'anonymous'
name = fn if fn else name '''
name = (name + ' ' + ln) if ln else name res = {}
name = entry['profile']['path'] if len(name) < 2 else name res['old_id'] = entry['_id']
res['name'] = name res['password'] = entry['services']['password'].get('bcrypt', '')
fb = entry['profile'].get('facebook', False) res['username'] = entry['emails'][0]['address']
if fb: res['email'] = res['username']
res['links'].append(fb) res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
vk = entry['profile'].get('vkontakte', False) res['emailConfirmed'] = entry['emails'][0]['verified']
if vk: res['createdAt'] = parse(entry['createdAt'])
res['links'].append(vk) res['rating'] = entry['rating'] # number
tr = entry['profile'].get('twitter', False) res['roles'] = [] # entry['roles'] # roles by community
if tr: res['ratings'] = [] # entry['ratings']
res['links'].append(tr) res['notifications'] = []
ws = entry['profile'].get('website', False) res['links'] = []
if ws: res['muted'] = False
res['links'].append(ws) res['bio'] = html2text(entry.get('bio', ''))
if not res['slug']: if entry['profile']:
res['slug'] = res['links'][0].split('/')[-1] res['slug'] = entry['profile'].get('path')
if not res['slug']: try:
res['slug'] = res['email'].split('@')[0] res['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
else: except KeyError:
old = res['old_id'] try:
del res['old_id'] res['userpic'] = entry['profile']['image']['url']
user = User.create(**res.copy()) except KeyError:
res['id'] = user.id res['userpic'] = ''
res['old_id'] = old fn = entry['profile'].get('firstName', '')
return res ln = entry['profile'].get('lastName', '')
name = res['slug'] if res['slug'] else 'anonymous'
name = fn if fn else name
name = (name + ' ' + ln) if ln else name
name = entry['profile']['path'] if len(name) < 2 else name
res['name'] = name
fb = entry['profile'].get('facebook', False)
if fb:
res['links'].append(fb)
vk = entry['profile'].get('vkontakte', False)
if vk:
res['links'].append(vk)
tr = entry['profile'].get('twitter', False)
if tr:
res['links'].append(tr)
ws = entry['profile'].get('website', False)
if ws:
res['links'].append(ws)
if not res['slug']:
res['slug'] = res['links'][0].split('/')[-1]
if not res['slug']:
res['slug'] = res['email'].split('@')[0]
else:
old = res['old_id']
del res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
res['old_id'] = old
return res