migration fix, new html2text, export wip

This commit is contained in:
2021-10-15 13:00:26 +03:00
parent 7ec763391b
commit 14fdfe71e5
21 changed files with 3358 additions and 564 deletions

View File

@@ -6,9 +6,6 @@ from orm import Shout, Comment, CommentRating, User
from orm.base import local_session
from migration.html2text import html2text
# users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
# topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
def migrate(entry):
'''
{
@@ -55,33 +52,38 @@ def migrate(entry):
'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout
'shout': shout.id
}
if 'rating' in entry:
comment_dict['rating'] = entry['rating']
if entry.get('deleted'):
comment_dict['deletedAt'] = entry['updatedAt']
comment_dict['deletedBy'] = entry['updatedBy']
comment_dict['deletedAt'] = date_parse(entry['updatedAt'])
comment_dict['deletedBy'] = str(entry['updatedBy'])
if entry.get('updatedAt'):
comment_dict['updatedAt'] = date_parse(entry['updatedAt'])
# comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment
if 'thread' in entry:
comment_dict['old_thread'] = entry['thread']
print(comment_dict)
# print(comment_dict)
comment = Comment.create(**comment_dict)
print(comment)
comment_dict['id'] = comment.id
comment_dict['ratings'] = []
# print(comment)
for comment_rating_old in entry.get('ratings',[]):
rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
createdTs = comment_rating_old.get('createdAt', datetime.datetime.now())
u = entry.get('updatedAt', False)
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater_id or 0,
'createdAt': createdTs,
'comment_id': comment.id
}
try:
comment_rating = CommentRating.create(**comment_rating_dict)
# TODO: comment rating append resolver
# comment['ratings'].append(comment_rating)
except Exception as e:
print(comment_rating)
pass # raise e
return comment
rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
if rater and comment:
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater.id,
'comment_id': comment.id
}
cts = comment_rating_old.get('createdAt')
if cts: comment_rating_dict['createdAt'] = date_parse(cts)
try:
comment_rating = CommentRating.create(**comment_rating_dict)
# comment_rating_dict['id'] = comment_rating.id
comment_dict['ratings'].append(comment_rating_dict)
except Exception as e:
print(comment_rating_dict)
raise e
return comment_dict

View File

@@ -1,6 +1,6 @@
from orm.base import local_session
from orm import Topic
# from dateutil.parser import parse as date_parse
from dateutil.parser import parse as date_parse
def migrate(entry):
'''
@@ -16,7 +16,7 @@ def migrate(entry):
topic_dict = {
'slug': entry['slug'],
'createdBy': entry['createdBy'], # NOTE: uses an old user id
'createdAt': entry['createdAt'],
'createdAt': date_parse(entry['createdAt']),
'title': entry['title'].lower(),
'parents': [],
'children': [],
@@ -31,4 +31,4 @@ def migrate(entry):
return topic_dict
except Exception as e:
print(e)
return {}
raise e

View File

@@ -1,7 +1,8 @@
from dateutil.parser import parse as date_parse
from os.path import abspath
import frontmatter
import json
import sqlite3
import sqlalchemy
from orm import Shout, Comment, Topic, ShoutRating, User #, TODO: CommentRating
from bs4 import BeautifulSoup
from migration.html2text import html2text
@@ -10,24 +11,8 @@ from transliterate import translit
from datetime import datetime
from sqlalchemy.exc import IntegrityError
from orm.base import local_session
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
print(str(len(users_dict.items())) + ' users loaded')
cats_data = json.loads(open(abspath('migration/data/content_item_categories.json')).read()) # old_id keyed
cats_dict = { x['_id']: x for x in cats_data }
print(str(len(cats_data)) + ' categories loaded')
comments_data = json.loads(open(abspath('migration/data/comments.json')).read())
print(str(len(comments_data)) + ' comments loaded')
comments_by_post = {}
for comment in comments_data:
p = comment['contentItem']
comments_by_post[p] = comments_by_post.get(p, [])
comments_by_post[p].append(comment)
users_dict['0'] = {
DISCOURS_USER = {
'id': 9999999,
'slug': 'discours',
'name': 'Дискурс',
@@ -57,7 +42,7 @@ def get_metadata(r):
metadata['cover'] = r.get('cover')
return metadata
def migrate(entry):
def migrate(entry, users_by_oid, topics_by_oid):
'''
type Shout {
slug: String!
@@ -92,7 +77,6 @@ def migrate(entry):
'views': entry.get('views', 0),
'rating': entry.get('rating', 0),
'ratings': [],
'comments': [],
'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
}
r['slug'] = entry.get('slug', '')
@@ -106,7 +90,7 @@ def migrate(entry):
# print(entry)
raise Exception
try:
r['topics'].append(cats_dict[entry['category']]['slug'])
r['topics'].append(topics_by_oid[entry['category']]['slug'])
except Exception:
print(entry['category'])
if entry.get('image') is not None:
@@ -149,110 +133,102 @@ def migrate(entry):
r['body'] = html2text(body_html)
body = r.get('body', '')
r['old_id'] = entry.get('_id')
user = None
try:
userdata = users_dict.get(entry['createdBy'], users_dict['0'])
slug = userdata['slug']
name = userdata['name']
userpic = userdata['userpic']
# get author data
userdata = {}
try: userdata = users_by_oid[entry['createdBy']]
except KeyError:
app = entry.get('application')
if app is not None:
authordata = {
if app:
userdata = {
'username': app['email'],
'email': app['email'],
'name': app['name'],
'bio': app.get('bio', ''),
'emailConfirmed': False,
'slug': translit(app['name'], 'ru', reversed=True).replace(' ', '-').lower(),
'slug': translit(app['name'], 'ru', reversed=True).replace(' ', '-').replace('\'', '').lower(),
'createdAt': ts,
'wasOnlineAt': ts
}
try:
user = User.create(**authordata)
except IntegrityError:
with local_session() as session:
user = session.query(User).filter(
User.email == authordata['email']).first()
if user is None:
user = session.query(User).filter(
User.slug == authordata['slug']).first()
slug = user['slug']
name = user['name']
userpic = user['userpic']
else:
# no application, no author!
slug = 'discours'
name = 'Дискурс'
userpic = 'https://discours.io/images/logo-mini.svg'
with local_session() as session:
user = session.query(User).filter(User.slug == slug).first()
r['authors'].append({
'id': user.id,
'slug': slug,
'name': name,
'userpic': userpic
})
r['layout'] = type2layout[entry['type']]
metadata = get_metadata(r)
content = frontmatter.dumps(frontmatter.Post(body, **metadata))
if userdata == {}:
userdata = {
'name': 'Дискурс',
'slug': 'discours',
'userpic': 'https://discours.io/image/logo-mini.svg'
}
# set author data
shout_dict = r.copy()
author = { # a short version for public listings
'slug': userdata.get('slug', 'discours'),
'name': userdata.get('name', 'Дискурс'),
'userpic': userdata.get('userpic', '')
}
shout_dict['authors'] = [ author, ]
if entry['published']:
metadata = get_metadata(r)
content = frontmatter.dumps(frontmatter.Post(body, **metadata))
ext = 'md'
open('migration/content/' +
r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try:
shout_dict = r.copy()
shout_dict['authors'] = [user, ]
if entry.get('createdAt') is not None:
shout_dict['createdAt'] = parse(r.get('createdAt'))
else:
shout_dict['createdAt'] = ts
if entry.get('published'):
if entry.get('publishedAt') is not None:
shout_dict['publishedAt'] = parse(entry.get('publishedAt'))
else:
shout_dict['publishedAt'] = ts
del shout_dict['published']
open('migration/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try:
shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else ts
if entry.get('deletedAt') is not None:
shout_dict['deletedAt'] = date_parse(entry.get('deletedAt'))
shout_dict['deletedBy'] = entry.get('deletedBy', '0')
del shout_dict['published'] # invalid keyword argument for Shout
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
del shout_dict['views'] # FIXME: TypeError: 'views' is an invalid keyword argument for Shout
del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
del shout_dict['ratings']
# get user
user = None
email = userdata.get('email')
slug = userdata.get('slug')
with local_session() as session:
try:
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
del shout_dict['views'] # FIXME: TypeError: 'views' is an invalid keyword argument for Shout
del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
del shout_dict['ratings']
s = Shout.create(**shout_dict)
r['id'] = s.id
if len(entry.get('ratings', [])) > 0:
# TODO: migrate shout ratings
shout_dict['ratings'] = []
for shout_rating_old in entry['ratings']:
shout_rating = ShoutRating.create(
rater_id = users_dict[shout_rating_old['createdBy']]['id'],
shout_id = s.id,
value = shout_rating_old['value']
)
s.ratings.append(shout_rating.id)
s.save()
# TODO: migrate topics
'''
with local_session() as session:
for topic_slug in topic_slugs:
topic = session.query(Topic).filter(Topic.slug == topic_slug).first()
if not topic:
topic_dict = migrateCategory()
if topic_dict:
topic = Topic.create(**topic_dict)
s.topics = [ topic, ]
s.save()
'''
except Exception as e:
r['error'] = 'db error'
# pass
raise e
except Exception as e:
if not r['body']: r['body'] = 'body moved'
raise e
return r
if email: user = session.query(User).filter(User.email == email).first()
if not user and slug: user = session.query(User).filter(User.slug == slug).first()
if not user and userdata: user = User.create(**userdata)
except:
print(userdata)
assert user, 'could not get a user'
shout_dict['authors'] = [ user, ]
try: s = Shout.create(**shout_dict)
except Exception as e: raise e
with local_session() as session:
shout_dict['id'] = s.id
# shout ratings
shout_dict['ratings'] = []
for shout_rating_old in entry.get('ratings',[]):
rater = session.query(User).filter(User.old_id == shout_rating_old['createdBy']).first()
if rater:
shout_rating_dict = {
'value': shout_rating_old['value'],
'rater_id': rater.id,
'shout_id': s.id
}
cts = shout_rating_old.get('createdAt')
if cts: shout_rating_dict['rater_id'] = date_parse(cts)
try: shout_rating = ShoutRating.create(**shout_rating_dict)
except sqlalchemy.exc.IntegrityError: pass
shout_dict['ratings'].append(shout_rating_dict)
# shout topics
shout_dict['topics'] = []
for topic_slug in r['topics']:
topic = session.query(Topic).filter(Topic.slug == topic_slug).first()
if not topic:
try: topic = Topic.create(**{ 'slug': topic_slug, 'title': topic_slug })
except Exception as e: raise e
shout_dict['topics'].append(topic.slug)
except Exception as e:
if not shout_dict['body']: r['body'] = 'body moved'
raise e
return shout_dict # for json

View File

@@ -1,18 +1,8 @@
import json
from os.path import abspath
from datetime import datetime
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
users_dict['0'] = {
'id': 9999999,
'slug': 'discours.io',
'name': 'Дискурс',
'userpic': 'https://discours.io/images/logo-mini.svg',
'createdAt': '2016-03-05 22:22:00.350000'
}
ts = datetime.now()
from orm.base import local_session
from orm import Topic
from dateutil.parser import parse as date_parse
def migrate(entry):
'''
@@ -25,12 +15,26 @@ def migrate(entry):
children: [String] # and children
}
'''
creator = users_dict.get(entry['createdBy'], users_dict['0'])
return {
if type(entry['createdAt']) == type(''):
ts = date_parse(entry['createdAt'])
else:
ts = datetime.fromtimestamp(entry['createdAt']/1000)
topic_dict = {
'slug': entry['slug'],
'createdBy': creator['id'], # NOTE: uses an old user id
'createdAt': entry['createdAt'],
'createdBy': 0,
'createdAt': ts,
'title': entry['title'].lower(),
'parents': [],
'children': []
}
}
try:
with local_session() as session:
topic = session.query(Topic).filter(Topic.slug == entry['slug']).first()
if not topic: topic = Topic.create(**topic_dict)
topic_dict['id'] = topic.id
except Exception as e:
print(e)
raise e
topic_dict['tag_id'] = entry['_id']
return topic_dict

View File

@@ -1,86 +1,110 @@
from orm import User, Role
from orm import User, Role, UserRating
import frontmatter
from dateutil.parser import parse
from migration.html2text import html2text
# from migration.html2md import Converter
# markdown = Converter()
from orm.base import local_session
counter = 0
def migrate(entry, limit=668):
'''
'''
type User {
username: String! # email
createdAt: DateTime!
email: String
password: String
oauth: String # provider:token
name: String # to display
userpic: String
links: [String]
emailConfirmed: Boolean # should contain all emails too
id: Int!
muted: Boolean
rating: Int
roles: [Role]
updatedAt: DateTime
wasOnlineAt: DateTime
ratings: [Rating]
slug: String
bio: String
notifications: [Int]
}
type User {
username: String! # email
createdAt: DateTime!
email: String
password: String
oauth: String # provider:token
name: String # to display
userpic: String
links: [String]
emailConfirmed: Boolean # should contain all emails too
id: Int!
muted: Boolean
rating: Int
roles: [Role]
updatedAt: DateTime
wasOnlineAt: DateTime
ratings: [Rating]
slug: String
bio: String
notifications: [Int]
}
'''
res = {}
res['old_id'] = entry['_id']
res['password'] = entry['services']['password'].get('bcrypt', '')
res['username'] = entry['emails'][0]['address']
res['email'] = res['username']
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
res['emailConfirmed'] = entry['emails'][0]['verified']
res['createdAt'] = parse(entry['createdAt'])
res['rating'] = entry['rating'] # number
res['roles'] = [] # entry['roles'] # roles by community
res['ratings'] = [] # entry['ratings']
res['notifications'] = []
res['links'] = []
res['muted'] = False
res['bio'] = html2text(entry.get('bio', ''))
if entry['profile']:
res['slug'] = entry['profile'].get('path')
try:
res['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
except KeyError:
try:
res['userpic'] = entry['profile']['image']['url']
except KeyError:
res['userpic'] = ''
fn = entry['profile'].get('firstName', '')
ln = entry['profile'].get('lastName', '')
name = res['slug'] if res['slug'] else 'anonymous'
name = fn if fn else name
name = (name + ' ' + ln) if ln else name
name = entry['profile']['path'] if len(name) < 2 else name
res['name'] = name
fb = entry['profile'].get('facebook', False)
if fb:
res['links'].append(fb)
vk = entry['profile'].get('vkontakte', False)
if vk:
res['links'].append(vk)
tr = entry['profile'].get('twitter', False)
if tr:
res['links'].append(tr)
ws = entry['profile'].get('website', False)
if ws:
res['links'].append(ws)
if not res['slug']:
res['slug'] = res['links'][0].split('/')[-1]
if not res['slug']:
res['slug'] = res['email'].split('@')[0]
else:
old = res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
return res
'''
res = {}
res['old_id'] = entry['_id']
res['password'] = entry['services']['password'].get('bcrypt', '')
res['username'] = entry['emails'][0]['address']
res['email'] = res['username']
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
res['emailConfirmed'] = entry['emails'][0]['verified']
res['createdAt'] = parse(entry['createdAt'])
res['rating'] = entry['rating'] # number
res['roles'] = [] # entry['roles'] # roles by community
res['ratings'] = [] # entry['ratings']
res['notifications'] = []
res['links'] = []
res['muted'] = False
res['bio'] = html2text(entry.get('bio', ''))
res['name'] = 'anonymous'
if not res['bio'].strip() or res['bio'] == '\n': del res['bio']
if entry.get('profile'):
# slug
res['slug'] = entry['profile'].get('path')
# userpic
try: res['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
except KeyError:
try: res['userpic'] = entry['profile']['image']['url']
except KeyError: res['userpic'] = ''
# name
fn = entry['profile'].get('firstName', '')
ln = entry['profile'].get('lastName', '')
name = res['slug'] if res['slug'] else 'anonymous'
name = fn if fn else name
name = (name + ' ' + ln) if ln else name
name = entry['profile']['path'].lower().replace(' ', '-') if len(name) < 2 else name
res['name'] = name
# links
fb = entry['profile'].get('facebook', False)
if fb:
res['links'].append(fb)
vk = entry['profile'].get('vkontakte', False)
if vk:
res['links'].append(vk)
tr = entry['profile'].get('twitter', False)
if tr:
res['links'].append(tr)
ws = entry['profile'].get('website', False)
if ws:
res['links'].append(ws)
# some checks
if not res['slug'] and len(res['links']) > 0: res['slug'] = res['links'][0].split('/')[-1]
res['slug'] = res.get('slug', res['email'].split('@')[0])
old = res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
res['ratings'] = []
for user_rating_old in entry.get('ratings',[]):
with local_session() as session:
rater = session.query(User).filter(old == user_rating_old['createdBy']).first()
if rater:
user_rating_dict = {
'value': user_rating_old['value'],
'rater_id': rater.id,
'user_id': user.id
}
cts = user_rating_old.get('createdAt')
if cts: user_rating_dict['createdAt'] = date_parse(cts)
try:
user_rating = UserRating.create(**user_rating_dict)
res['ratings'].append(user_rating_dict)
except Exception as e:
print(comment_rating_dict)
raise e
return res