diff --git a/migrate.py b/migrate.py index 7e9f4916..d233c6a6 100644 --- a/migrate.py +++ b/migrate.py @@ -9,6 +9,7 @@ from migration.tables.content_items import get_metadata, migrate as migrateShout from migration.tables.content_item_categories import migrate as migrateCategory from migration.tables.tags import migrate as migrateTag from migration.tables.comments import migrate as migrateComment +from migration.tables.comments import migrate_2stage as migrateComment_2stage from migration.utils import DateTimeEncoder from orm import Community, Topic from dateutil.parser import parse as date_parse @@ -206,41 +207,16 @@ def export_slug(slug, export_articles, export_authors, content_dict): export_body(shout, content_dict) comments([slug, ]) -def comments(sluglist, export_comments, export_articles, shouts_by_slug, content_dict): - ''' migrating comments on content items one ''' - if len(sluglist) == 0: - export_articles = json.loads(open('../src/data/articles.json').read()) - print(str(len(export_articles.items())) + ' articles were exported before') - if len(sluglist) == 0: sluglist = list(export_articles.keys()) - - if len(sluglist) > 0: - print('exporting comments for: ') - print(' '.join(sluglist)) - for slug in sluglist: - shout = shouts_by_slug[slug] - old_id = shout['old_id'] - content_item = content_dict.get(old_id, {}) - if content_item.get('commentedAt', False): - comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ] - if len(comments) > 0: - export_comments[slug] = comments - sys.stdout.write('.') - else: - - print('exporting comments for top 10 commented articles...') - comments_by_shoutslug = {} - for content_item in content_data: - old_id = content_item['_id'] - if content_item.get('commentedAt', False): - comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ] - if len(comments) > 0: - shout = shouts_by_oid.get(old_id, { 'slug': 'abandoned-comments' }) - comments_by_shoutslug[shout['slug']] = comments - - top = dict(sorted(comments_by_shoutslug.items(), reverse=True, key=lambda c: len(c[1]))[:10]) - export_comments.update(top) - - print(str(len(export_comments.keys())) + ' articls with comments exported\n') +def comments(comments_data): + id_map = {} + for comment in comments_data: + comment = migrateComment(comment) + id = comment.get('id') + old_id = comment.get('old_id') + id_map[old_id] = id + for comment in comments_data: + migrateComment_2stage(comment, id_map) + print(str(len(id_map)) + ' comments exported') def export_finish(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}): @@ -342,16 +318,14 @@ if __name__ == '__main__': elif cmd == "shouts": shouts(content_data, shouts_by_slug, shouts_by_oid) # NOTE: listens limit elif cmd == "comments": - for comment in comments_data: - migrateComment(comment) + comments(comments_data) elif cmd == "export_shouts": export_shouts(shouts_by_slug, export_articles, export_authors, content_dict) elif cmd == "all": users(users_by_oid, users_by_slug, users_data) topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data) shouts(content_data, shouts_by_slug, shouts_by_oid) - for comment in comments_data: - migrateComment(comment) + comments(comments_data) elif cmd == 'slug': export_slug(sys.argv[2], export_articles, export_authors, content_dict) #export_finish(export_articles, export_authors, export_topics, export_comments) diff --git a/migration/tables/comments.py b/migration/tables/comments.py index febe4c85..d38ed714 100644 --- a/migration/tables/comments.py +++ b/migration/tables/comments.py @@ -7,83 +7,88 @@ from orm.base import local_session from migration.html2text import html2text def migrate(entry): - ''' - { - "_id": "hdtwS8fSyFLxXCgSC", - "body": "

", - "contentItem": "mnK8KsJHPRi8DrybQ", - "createdBy": "bMFPuyNg6qAD2mhXe", - "thread": "01/", - "createdAt": "2016-04-19 04:33:53+00:00", - "ratings": [ - { "createdBy": "AqmRukvRiExNpAe8C", "value": 1 }, - { "createdBy": "YdE76Wth3yqymKEu5", "value": 1 } - ], - "rating": 2, - "updatedAt": "2020-05-27 19:22:57.091000+00:00", - "updatedBy": "0" - } + ''' + { + "_id": "hdtwS8fSyFLxXCgSC", + "body": "

", + "contentItem": "mnK8KsJHPRi8DrybQ", + "createdBy": "bMFPuyNg6qAD2mhXe", + "thread": "01/", + "createdAt": "2016-04-19 04:33:53+00:00", + "ratings": [ + { "createdBy": "AqmRukvRiExNpAe8C", "value": 1 }, + { "createdBy": "YdE76Wth3yqymKEu5", "value": 1 } + ], + "rating": 2, + "updatedAt": "2020-05-27 19:22:57.091000+00:00", + "updatedBy": "0" + } - -> + -> - type Comment { - id: Int! - author: Int! - body: String! - replyTo: Int! - createdAt: DateTime! - updatedAt: DateTime - shout: Int! - deletedAt: DateTime - deletedBy: Int - ratings: [CommentRating] - views: Int - old_id: String - old_thread: String - } - ''' - with local_session() as session: - shout = session.query(Shout).filter(Shout.old_id == entry['_id']).first() - if not shout: shout = session.query(Shout).first() - author = session.query(User).filter(User.old_id == entry['_id']).first() # FIXME - comment_dict = { - 'old_id': entry['_id'], - 'author': author.id if author else 0, - 'createdAt': date_parse(entry['createdAt']), - 'body': html2text(entry['body']), - 'shout': shout.id - } - #TODO save as CommentRating - #if 'rating' in entry: - # comment_dict['rating'] = entry['rating'] - if entry.get('deleted'): - comment_dict['deletedAt'] = date_parse(entry['updatedAt']) - comment_dict['deletedBy'] = str(entry['updatedBy']) - if entry.get('updatedAt'): - comment_dict['updatedAt'] = date_parse(entry['updatedAt']) - # comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment - if 'thread' in entry: - comment_dict['old_thread'] = entry['thread'] - # print(comment_dict) - comment = Comment.create(**comment_dict) - comment_dict['id'] = comment.id - comment_dict['ratings'] = [] - # print(comment) - for comment_rating_old in entry.get('ratings',[]): - rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first() - if rater and comment: - comment_rating_dict = { - 'value': comment_rating_old['value'], - 'createdBy': rater.id, - 'comment_id': comment.id - } - cts = comment_rating_old.get('createdAt') - if cts: comment_rating_dict['createdAt'] = date_parse(cts) - try: - comment_rating = CommentRating.create(**comment_rating_dict) - # comment_rating_dict['id'] = comment_rating.id - comment_dict['ratings'].append(comment_rating_dict) - except Exception as e: - print(comment_rating_dict) - raise e - return comment_dict + type Comment { + id: Int! + author: Int! + body: String! + replyTo: Int! + createdAt: DateTime! + updatedAt: DateTime + shout: Int! + deletedAt: DateTime + deletedBy: Int + ratings: [CommentRating] + views: Int + } + ''' + with local_session() as session: + shout = session.query(Shout).filter(Shout.old_id == entry['contentItem']).first() + if not shout: shout = session.query(Shout).first() + author = session.query(User).filter(User.old_id == entry['createdBy']).first() + comment_dict = { + 'author': author.id if author else 0, + 'createdAt': date_parse(entry['createdAt']), + 'body': html2text(entry['body']), + 'shout': shout.id + } + if entry.get('deleted'): + comment_dict['deletedAt'] = date_parse(entry['updatedAt']) + comment_dict['deletedBy'] = str(entry['updatedBy']) + if entry.get('updatedAt'): + comment_dict['updatedAt'] = date_parse(entry['updatedAt']) + # comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment + # print(comment_dict) + comment = Comment.create(**comment_dict) + comment_dict['id'] = comment.id + comment_dict['ratings'] = [] + comment_dict['old_id'] = entry['_id'] + # print(comment) + for comment_rating_old in entry.get('ratings',[]): + rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first() + if rater and comment: + comment_rating_dict = { + 'value': comment_rating_old['value'], + 'createdBy': rater.id, + 'comment_id': comment.id + } + cts = comment_rating_old.get('createdAt') + if cts: comment_rating_dict['createdAt'] = date_parse(cts) + try: + comment_rating = CommentRating.create(**comment_rating_dict) + # comment_rating_dict['id'] = comment_rating.id + comment_dict['ratings'].append(comment_rating_dict) + except Exception as e: + print(comment_rating_dict) + raise e + return comment_dict + +def migrate_2stage(entry, id_map): + old_reply_to = entry.get('replyTo') + if not old_reply_to: + return + old_id = entry['_id'] + id = id_map.get(old_id) + with local_session() as session: + comment = session.query(Comment).filter(Comment.id == id).first() + reply_to = id_map.get(old_reply_to) + comment.replyTo = reply_to + session.commit() diff --git a/orm/comment.py b/orm/comment.py index f87aafec..f497e23b 100644 --- a/orm/comment.py +++ b/orm/comment.py @@ -28,8 +28,5 @@ class Comment(Base): shout: int = Column(ForeignKey("shout.id"), nullable=False, comment="Shout ID") replyTo: int = Column(ForeignKey("comment.id"), nullable=True, comment="comment ID") ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id) - old_id: str = Column(String, nullable = True) - old_thread: str = Column(String, nullable = True) - # TODO: work in progress, udpate this code