diff --git a/migrate.py b/migrate.py index d7c60a47..b4228ba4 100644 --- a/migrate.py +++ b/migrate.py @@ -26,8 +26,8 @@ def extract_images(article): article['old_id'] + str(i) + '.' + ext img = match.group(4) if img not in images: - open('..' + link, 'wb').write(base64.b64decode(img)) - images.append(img) + open('..' + link, 'wb').write(base64.b64decode(img)) + images.append(img) body = body.replace(match.group(2), link) print(link) article['body'] = body @@ -55,13 +55,12 @@ def users(): counter += 1 export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:] - open('migration/data/users.dict.json', - 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id + open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list), - cls=DateTimeEncoder, - indent=4, - sort_keys=True, - ensure_ascii=False)) + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) print(str(len(newdata.items())) + ' user accounts were migrated') print(str(len(export_list)) + ' authors were exported') @@ -96,15 +95,14 @@ def topics(): export_list = sorted(new_data.items(), key=lambda item: str( item[1]['createdAt'])) open('migration/data/topics.dict.json', - 'w').write(json.dumps(old_data, cls=DateTimeEncoder)) + 'w').write(json.dumps(old_data, cls=DateTimeEncoder)) open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list), - cls=DateTimeEncoder, - indent=4, - sort_keys=True, - ensure_ascii=False)) - print(str(counter) + ' from ' + str(len(cat_data)) + - #' tags and ' + str(len(tag_data)) + - ' cats were migrated') + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) + print(str(counter) + ' from ' + str(len(cat_data)) + ' cats were migrated') + #' tags and ' + str(len(tag_data)) + print(str(len(export_list)) + ' topics were exported') @@ -114,7 +112,7 @@ def shouts(): counter = 0 discours_author = 0 content_data = json.loads(open('migration/data/content_items.json').read()) - # content_dict = { x['_id']:x for x in content_data } + content_dict = { x['_id']:x for x in content_data } newdata = {} print(str(len(content_data)) + ' entries loaded. now migrating...') errored = [] @@ -129,18 +127,18 @@ def shouts(): if author == 'discours': discours_author += 1 open('./shouts.id.log', 'a').write(line + '\n') - except Exception: + except Exception as e: print(entry['_id']) errored.append(entry) - raise Exception(" error") + raise e try: limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data) except ValueError: limit = len(content_data) open('migration/data/shouts.dict.json', - 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) + 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) print(str(counter) + '/' + str(len(content_data)) + - ' content items were migrated') + ' content items were migrated') print(str(discours_author) + ' from them by @discours') def comments(): @@ -156,13 +154,13 @@ def comments(): print(str(len(export_articles.items())) + ' articles were exported') export_comments = {} c = 0 - for article in export_articles: - print(article['slug']) - print( comments_by_post.get(article['slug'], '') ) - print( export_comments[article['slug']] ) # = comments_by_post.get(article['slug']) - c += len(export_comments[article['slug']]) - print(str(len(export_comments.items())) + ' articles with comments') - open('../src/data/coments.json', 'w').write(json.dumps(dict(export_comments), + for slug, article in export_articles.items(): + comments = comments_by_post.get(slug, []) + if len(comments) > 0: + export_comments[slug] = comments + c += len(comments) + print(str(len(export_comments.items())) + ' after adding those having comments') + open('../src/data/comments.json', 'w').write(json.dumps(dict(export_comments), cls=DateTimeEncoder, indent=4, sort_keys=True, @@ -192,7 +190,7 @@ def export_shouts(limit): for (slug, article) in export_list: if article['layout'] == 'article': for author in article['authors']: - export_authors[author['slug']] = users_slug[author['slug']] + export_authors[author['slug']] = users_slug[author['slug']] export_clean[article['slug']] = extract_images(article) metadata = get_metadata(article) content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata)) @@ -223,23 +221,28 @@ def export_slug(slug): print(str(len(exported_authors.items())) + ' authors were exported before') exported_articles = json.loads(open('../src/data/articles.json').read()) print(str(len(exported_articles.items())) + ' articles were exported before') - shout = shouts_dict.get(slug, None) - author = users_dict.get(shout['authors'][0]['slug'], None) - exported_authors.update({shout['authors'][0]['slug']: author}) - exported_articles.update({shout['slug']: shout}) - print(shout) - open('../src/data/articles.json', 'w').write(json.dumps(exported_articles, - cls=DateTimeEncoder, - indent=4, - sort_keys=True, - ensure_ascii=False)) - open('../src/data/authors.json', 'w').write(json.dumps(exported_authors, - cls=DateTimeEncoder, - indent=4, - sort_keys=True, - ensure_ascii=False)) + shout = shouts_dict.get(slug, False) + if shout: + author = users_dict.get(shout['authors'][0]['slug'], None) + exported_authors.update({shout['authors'][0]['slug']: author}) + exported_articles.update({shout['slug']: shout}) + print(shout) + open('../src/data/articles.json', 'w').write(json.dumps(exported_articles, + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) + open('../src/data/authors.json', 'w').write(json.dumps(exported_authors, + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) + else: + print('no old id error!') + print(str(len(shouts_dict)) + ' shouts were migrated') + print(slug) comments() - print('exported.') + print('finished.') if __name__ == '__main__': diff --git a/migration/tables/comments.py b/migration/tables/comments.py index 271ddc4a..aa67278c 100644 --- a/migration/tables/comments.py +++ b/migration/tables/comments.py @@ -1,7 +1,7 @@ -import datetime +from dateutil.parser import parse as date_parse import json from os.path import abspath -from orm import Shout +from orm import Shout, Comment, CommentRating, User from orm.base import local_session from migration.html2text import html2text @@ -46,14 +46,32 @@ def migrate(entry): ''' with local_session() as session: shout_id = session.query(Shout).filter(Shout.old_id == entry['_id']).first() - return { + author_dict = users_dict[entry['createdBy']] + print(author_dict) + author_id = author_dict['id'] + comment_dict = { 'old_id': entry['_id'], - 'old_thread': entry['thread'], - 'createdBy': users_dict[entry['createdBy']], - 'createdAt': entry['createdAt'], + 'author': author_id, + 'createdAt': date_parse(entry['createdAt']), 'body': html2text(entry['body']), - 'shout': shout_id, - 'rating': entry['rating'], - 'ratings': [] # TODO: ratings in comments + 'shout': shout_id } - return None + if 'rating' in entry: + comment_dict['rating'] = entry['rating'] + if 'deleted' in entry: + comment_dict['deleted'] = entry['deleted'] + if 'thread' in entry: + comment_dict['old_thread'] = entry['thread'] + print(entry.keys()) + comment = Comment.create(**comment_dict) + + for comment_rating_old in entry.get('ratings',[]): + rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first() + comment_rating_dict = { + 'value': cr['value'], + 'createdBy': rater_id, + 'createdAt': date_parse(comment_rating_old['createdAt']) or ts + } + comment_rating = CommentRating.create(**comment_rating_dict) + comment['ratings'].append(comment_rating) + return comment diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index 5acd59dc..915b9f9c 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -1,20 +1,28 @@ from dateutil.parser import parse -from orm import User from os.path import abspath import frontmatter import json -from orm import Shout +from orm import Shout, Comment, Topic, ShoutRating, User #, TODO: CommentRating from bs4 import BeautifulSoup from migration.html2text import html2text +from migration.tables.comments import migrate as migrateComment from transliterate import translit from datetime import datetime from sqlalchemy.exc import IntegrityError from orm.base import local_session -comments_data = json.loads(open(abspath('migration/data/comments.json')).read()) -comments_dict = { x['_id']: x for x in comments_data } users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) +print(str(len(users_dict.items())) + ' users loaded') topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed +print(str(len(topics_dict.items())) + ' topics loaded') +comments_data = json.loads(open(abspath('migration/data/comments.json')).read()) +print(str(len(comments_data)) + ' comments loaded') +comments_by_post = {} +for comment in comments_data: + p = comment['contentItem'] + comments_by_post[p] = comments_by_post.get(p, []) + comments_by_post[p].append(comment) + users_dict['0'] = { 'id': 9999999, 'slug': 'discours', @@ -40,6 +48,7 @@ def get_metadata(r): metadata['authors'] = r.get('authors') metadata['createdAt'] = r.get('createdAt', ts) metadata['layout'] = r['layout'] + metadata['topics'] = r['topics'] if r.get('cover', False): metadata['cover'] = r.get('cover') return metadata @@ -79,7 +88,7 @@ def migrate(entry): 'views': entry.get('views', 0), 'rating': entry.get('rating', 0), 'ratings': [], - 'comments': entry.get('comments', []), + 'comments': [], 'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000') } r['slug'] = entry.get('slug', '') @@ -112,8 +121,7 @@ def migrate(entry): else: body_html = str(BeautifulSoup( body_orig, features="html.parser")) - r['body'] = html2text(body_html).replace('****', '**') - r['old_id'] = entry.get('_id') + r['body'] = html2text(body_html) else: print(r['slug'] + ': literature has no media') elif entry.get('type') == 'Video': @@ -134,9 +142,9 @@ def migrate(entry): if r.get('body') is None: body_orig = entry.get('body', '') body_html = str(BeautifulSoup(body_orig, features="html.parser")) - r['body'] = html2text(body_html).replace('****', '**') - r['old_id'] = entry.get('_id') - body = r.get('body') + r['body'] = html2text(body_html) + body = r.get('body', '') + r['old_id'] = entry.get('_id') user = None try: userdata = users_dict.get(entry['createdBy'], users_dict['0']) @@ -167,7 +175,7 @@ def migrate(entry): User.slug == authordata['slug']).first() slug = user['slug'] name = user['name'] - userpic = user.userpic + userpic = user['userpic'] else: # no application, no author! slug = 'discours' @@ -203,43 +211,55 @@ def migrate(entry): else: shout_dict['publishedAt'] = ts del shout_dict['published'] - - shout_dict['comments'] = [] - for cid in r['comments']: - comment = comments_dict[cid] - comment_ratings = [] - for cr in comment['ratings']: - comment_ratings.append({ - 'value': cr['value'], - 'createdBy': users_dict[cr['createdBy']], - 'createdAt': cr['createdAt'] or ts}) - shout_dict['comments'].append({ - 'old_id': comment['_id'], - 'old_thread': comment['thread'], # TODO: old_thread to replyTo logix - 'createdBy': users_dict[comment['createdBy']], - 'createdAt': comment['createdAt'] or ts, - 'body': html2text(comment['body']), - 'shout': shout_dict['old_id'], - 'rating': comment['rating'], - 'ratings': comment_ratings - }) - - shout_dict['ratings'] = [] - for rating in r['ratings']: - shout_dict['ratings'].append({ - 'value': rating['value'], - 'createdBy': users_dict[rating['createdBy']], - 'createdAt': r['createdAt'] or ts}) + + # shout comments + if entry.get('commentedAt', False): + try: + old_comments = comments_by_post.get(shout_dict['old_id'], []) + if len(old_comments) > 0: + shout_dict['comments'] = [] + + # migrate comments + for entry in old_comments: + comment = migrateComment(entry) + shout_dict['comments'].append(comment) + except KeyError: + print(shout_dict.keys()) + raise 'error' try: - del shout_dict['views'] # FIXME - del shout_dict['rating'] # FIXME - del shout_dict['ratings'] # FIXME - # del shout_dict['comments'] - s = Shout.create(**shout_dict) # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state' + topic_slugs = shout_dict['topics'] + del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state' + del shout_dict['views'] # FIXME: TypeError: 'views' is an invalid keyword argument for Shout + del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout + del shout_dict['ratings'] + s = Shout.create(**shout_dict) r['id'] = s.id + + if len(entry.get('ratings', [])) > 0: + # TODO: adding shout ratings + ''' + shout_dict['ratings'] = [] + for shout_rating_old in entry['ratings']: + shout_rating = ShoutRating.create( + rater_id = users_dict[shout_rating_old['createdBy']]['id'], + shout_id = s.id, + value = shout_rating_old['value'] + ) + shout.ratings.append(shout_rating.id) + ''' + + for topic_slug in topic_slugs: + topic_dict = topics_dict.get(topic_slug) + if topic_dict: + topic = Topic.create(**topic_dict) + shout.topics = [ topic, ] + shout.save() + except Exception as e: - pass # raise e + r['error'] = 'db error' + # pass + raise e except Exception as e: if not r['body']: r['body'] = 'body moved' raise e diff --git a/orm/__init__.py b/orm/__init__.py index a15a6561..ed50dd66 100644 --- a/orm/__init__.py +++ b/orm/__init__.py @@ -7,9 +7,9 @@ from orm.notification import Notification from orm.shout import Shout, ShoutAuthor, ShoutTopic, ShoutRating, ShoutViewByDay,\ ShoutRatingStorage, ShoutViewStorage from orm.base import Base, engine, local_session -from orm.comment import Comment +from orm.comment import Comment, CommentRating -__all__ = ["User", "Role", "Operation", "Permission", "Message", "Shout", "Topic", "Notification"] +__all__ = ["User", "Role", "Operation", "Permission", "Message", "Shout", "Topic", "Notification", "ShoutRating", "Comment", "CommentRating"] Base.metadata.create_all(engine) Operation.init_table() diff --git a/orm/comment.py b/orm/comment.py index 3cf4f4dd..082a2751 100644 --- a/orm/comment.py +++ b/orm/comment.py @@ -1,7 +1,7 @@ from typing import List from datetime import datetime -from sqlalchemy import Column, Integer, String, ForeignKey, DateTime +from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, Boolean from sqlalchemy.orm import relationship from orm.base import Base @@ -19,14 +19,16 @@ class Comment(Base): __tablename__ = 'comment' author: int = Column(ForeignKey("user.id"), nullable=False, comment="Sender") - body: str = Column(String, nullable=False, comment="Body") + body: str = Column(String, nullable=False, comment="Comment Body") createdAt = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") updatedAt = Column(DateTime, nullable=True, comment="Updated at") deletedAt = Column(DateTime, nullable=True, comment="Deleted at") deletedBy = Column(ForeignKey("user.id"), nullable=True, comment="Deleted by") shout: int = Column(ForeignKey("shout.id"), nullable=True, comment="Shout ID") + rating: int = Column(Integer, nullable=True, comment="Comment Rating") ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id) old_id: str = Column(String, nullable = True) + deleted: bool = Column(Boolean, nullable = True) # TODO: work in progress, udpate this code diff --git a/orm/user.py b/orm/user.py index 289fc577..b8bbf460 100644 --- a/orm/user.py +++ b/orm/user.py @@ -58,6 +58,7 @@ class User(Base): ratings = relationship(UserRatings, foreign_keys=UserRatings.user_id) roles = relationship(lambda: Role, secondary=UserRoles) topics = relationship(lambda: Topic, secondary=UserTopics) + old_id: str = Column(String, nullable = True) @classmethod def get_permission(cls, user_id):