diff --git a/migrate.py b/migrate.py index b4228ba4..6133c65d 100644 --- a/migrate.py +++ b/migrate.py @@ -7,6 +7,7 @@ from migration.tables.users import migrate as migrateUser from migration.tables.content_items import get_metadata, migrate as migrateShout from migration.tables.content_item_categories import migrate as migrateCategory from migration.tables.tags import migrate as migrateTag +from migration.tables.comments import migrate as migrateComment from migration.utils import DateTimeEncoder from orm import Community @@ -53,8 +54,7 @@ def users(): del user['email'] export_data[user['slug']] = user counter += 1 - export_list = sorted(export_data.items(), - key=lambda item: item[1]['rating'])[-10:] + export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:] open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list), cls=DateTimeEncoder, @@ -143,19 +143,35 @@ def shouts(): def comments(): ''' migrating comments on content items one by one ''' - comments_data = json.loads(open('migration/data/comments.json').read()) + content_data = json.loads(open('migration/data/content_items.json').read()) # old content + content_dict = { x['_id']: x for x in content_data } # by slug + shouts_dict = json.loads(open('migration/data/shouts.dict.json', 'r').read()) # all shouts by slug + print(str(len(shouts_dict.keys())) + ' migrated shouts loaded') + shouts_old = { x['old_id']: x for slug, x in shouts_dict.items() } # shouts by old_id + print(str(len(content_data)) + ' content items loaded') + comments_data = json.loads(open('migration/data/comments.json').read()) # by slug print(str(len(comments_data)) + ' comments loaded') comments_by_post = {} - for comment in comments_data: - p = comment['contentItem'] - comments_by_post[p] = comments_by_post.get(p, []) - comments_by_post[p].append(comment) + # sort comments by old posts ids + for old_comment in comments_data: + cid = old_comment['contentItem'] + comments_by_post[cid] = comments_by_post.get(cid, []) + comments_by_post[cid].append(old_comment) + # migrate comments + comments_by_shoutslug = {} + for content_item in content_data: + old_id = content_item['_id'] + if content_item.get('commentedAt', False): + comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ] + if comments.length > 0: + shout = shouts_old.get(old_id, { 'slug': 'abandoned-comments' }) + comments_by_shoutslug[shout['slug']] = comments export_articles = json.loads(open('../src/data/articles.json').read()) print(str(len(export_articles.items())) + ' articles were exported') export_comments = {} c = 0 for slug, article in export_articles.items(): - comments = comments_by_post.get(slug, []) + comments = comments_by_shoutslug.get(slug, []) if len(comments) > 0: export_comments[slug] = comments c += len(comments) @@ -171,13 +187,15 @@ def comments(): def export_shouts(limit): print('reading json...') - newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read()) - print(str(len(newdata.keys())) + ' shouts loaded') content_data = json.loads(open('migration/data/content_items.json').read()) content_dict = { x['_id']:x for x in content_data } + print(str(len(content_data)) + ' content items loaded') + newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read()) + print(str(len(newdata.keys())) + ' migrated shouts loaded') users_old = json.loads(open('migration/data/users.dict.json').read()) + print(str(len(newdata.keys())) + ' migrated users loaded') export_authors = json.loads(open('../src/data/authors.json').read()) - print(str(len(export_authors.items())) + ' pre-exported authors loaded') + print(str(len(export_authors.items())) + ' exported authors loaded') users_slug = { u['slug']: u for old_id, u in users_old.items()} print(str(len(users_slug.items())) + ' users loaded') @@ -204,23 +222,23 @@ def export_shouts(limit): ensure_ascii=False)) print(str(len(export_clean.items())) + ' articles exported') open('../src/data/authors.json', 'w').write(json.dumps(export_authors, - cls=DateTimeEncoder, - indent=4, - sort_keys=True, - ensure_ascii=False)) + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) comments() print(str(len(export_authors.items())) + ' total authors exported') def export_slug(slug): shouts_dict = json.loads(open('migration/data/shouts.dict.json').read()) - print(str(len(shouts_dict.items())) + ' shouts loaded') - users_old = json.loads(open('migration/data/users.dict.json').read()) - print(str(len(users_old.items())) + ' users loaded') + print(str(len(shouts_dict.items())) + ' migrated shouts loaded') + users_old = json.loads(open('migration/data/users.dict.json').read()) # NOTE: this exact file is by old_id + print(str(len(users_old.items())) + ' migrated users loaded') users_dict = { x[1]['slug']:x for x in users_old.items() } exported_authors = json.loads(open('../src/data/authors.json').read()) - print(str(len(exported_authors.items())) + ' authors were exported before') + print(str(len(exported_authors.items())) + ' exported authors loaded') exported_articles = json.loads(open('../src/data/articles.json').read()) - print(str(len(exported_articles.items())) + ' articles were exported before') + print(str(len(exported_articles.items())) + ' exported articles loaded') shout = shouts_dict.get(slug, False) if shout: author = users_dict.get(shout['authors'][0]['slug'], None) @@ -239,7 +257,7 @@ def export_slug(slug): ensure_ascii=False)) else: print('no old id error!') - print(str(len(shouts_dict)) + ' shouts were migrated') + # print(str(len(shouts_dict)) + ' shouts were migrated') print(slug) comments() print('finished.') @@ -252,8 +270,6 @@ if __name__ == '__main__': users() elif sys.argv[1] == "topics": topics() - elif sys.argv[1] == "comments": - comments() elif sys.argv[1] == "shouts": try: Community.create(**{ @@ -266,17 +282,20 @@ if __name__ == '__main__': except Exception: pass shouts() + elif sys.argv[1] == "comments": + comments() elif sys.argv[1] == "export_shouts": - limit = int(sys.argv[2]) if len(sys.argv) > 2 else None - export_shouts(limit) + limit = int(sys.argv[2]) if len(sys.argv) > 2 else None + export_shouts(limit) elif sys.argv[1] == "all": users() topics() shouts() + comments() elif sys.argv[1] == "bson": from migration import bson2json bson2json.json_tables() elif sys.argv[1] == 'slug': export_slug(sys.argv[2]) else: - print('usage: python migrate.py ') + print('usage: python migrate.py bson\n.. \ttopics \n.. \tusers \n.. \tshouts \n.. \tcomments\n.. \texport_shouts \n.. \tslug \n.. \tall>') diff --git a/migration/html2text.py b/migration/html2text.py index 22fe1539..5a150b13 100644 --- a/migration/html2text.py +++ b/migration/html2text.py @@ -388,7 +388,7 @@ class HTML2Text(HTMLParser.HTMLParser): parent_style = {} if start: if self.tag_stack: - parent_style = self.tag_stack[-1][2] + parent_style = self.tag_stack[-1][2] tag_style = element_style(attrs, self.style_def, parent_style) self.tag_stack.append((tag, attrs, tag_style)) else: @@ -418,7 +418,7 @@ class HTML2Text(HTMLParser.HTMLParser): elif attrs['class'] == 'lead': self.o('==') # NOTE: but CriticMarkup uses {== ==} self.span_lead = True - elif self.: + else: if self.span_hightlight: self.o('`') self.span_hightlight = False @@ -600,7 +600,7 @@ class HTML2Text(HTMLParser.HTMLParser): # if self.google_doc: # prevent white space immediately after 'begin emphasis' marks ('**' and '_') lstripped_data = data.lstrip() - if self.drop_white_space and not (self.pre or self.code or self.span_hightlight or self.span_lead): + if self.drop_white_space and not (self.pre or self.code): data = lstripped_data if puredata: # and not self.pre: data = re.sub('\s+', ' ', data) diff --git a/migration/tables/comments.py b/migration/tables/comments.py index aa67278c..b27f5dea 100644 --- a/migration/tables/comments.py +++ b/migration/tables/comments.py @@ -1,12 +1,13 @@ from dateutil.parser import parse as date_parse import json +import datetime from os.path import abspath from orm import Shout, Comment, CommentRating, User from orm.base import local_session from migration.html2text import html2text -users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) -topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed +# users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) +# topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed def migrate(entry): ''' @@ -39,39 +40,46 @@ def migrate(entry): deletedAt: DateTime deletedBy: Int rating: Int - ratigns: [Rating] + ratigns: [CommentRating] views: Int old_id: String + old_thread: String } ''' with local_session() as session: - shout_id = session.query(Shout).filter(Shout.old_id == entry['_id']).first() - author_dict = users_dict[entry['createdBy']] - print(author_dict) - author_id = author_dict['id'] + shout = session.query(Shout).filter(Shout.old_id == entry['_id']).first() + if not shout: print(entry) + assert shout, '=== NO SHOUT IN COMMENT ERROR ===' + author = session.query(User).filter(User.old_id == entry['_id']).first() comment_dict = { 'old_id': entry['_id'], - 'author': author_id, + 'author': author.id if author else 0, 'createdAt': date_parse(entry['createdAt']), 'body': html2text(entry['body']), - 'shout': shout_id + 'shout': shout } if 'rating' in entry: comment_dict['rating'] = entry['rating'] - if 'deleted' in entry: - comment_dict['deleted'] = entry['deleted'] + if entry.get('deleted'): + comment_dict['deletedAt'] = entry['updatedAt'] + comment_dict['deletedBy'] = entry['updatedBy'] if 'thread' in entry: comment_dict['old_thread'] = entry['thread'] - print(entry.keys()) + # print(entry.keys()) comment = Comment.create(**comment_dict) - for comment_rating_old in entry.get('ratings',[]): rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first() comment_rating_dict = { - 'value': cr['value'], - 'createdBy': rater_id, - 'createdAt': date_parse(comment_rating_old['createdAt']) or ts + 'value': comment_rating_old['value'], + 'createdBy': rater_id or 0, + 'createdAt': comment_rating_old.get('createdAt', datetime.datetime.now()), + 'comment_id': comment.id } - comment_rating = CommentRating.create(**comment_rating_dict) - comment['ratings'].append(comment_rating) + try: + comment_rating = CommentRating.create(**comment_rating_dict) + # TODO: comment rating append resolver + # comment['ratings'].append(comment_rating) + except Exception as e: + print(comment_rating) + pass # raise e return comment diff --git a/migration/tables/content_item_categories.py b/migration/tables/content_item_categories.py index 2271005a..88aeda28 100644 --- a/migration/tables/content_item_categories.py +++ b/migration/tables/content_item_categories.py @@ -9,7 +9,7 @@ def migrate(entry): children: [String] # and children } ''' - return { + topic_dict = { 'slug': entry['slug'], 'createdBy': entry['createdBy'], # NOTE: uses an old user id 'createdAt': entry['createdAt'], @@ -17,4 +17,11 @@ def migrate(entry): 'parents': [], 'children': [], 'old_id': entry['_id'] - } \ No newline at end of file + } + + with local_session() as session: + topic = session.query(Topic).filter(Topic.slug == topic_slug).first() + if not topic: + topic = Topic.create(**topic_dict) + topic_dict['id'] = topic.id + return topic_dict \ No newline at end of file diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index 915b9f9c..5ac2f7fd 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -184,6 +184,7 @@ def migrate(entry): with local_session() as session: user = session.query(User).filter(User.slug == slug).first() r['authors'].append({ + 'id': user.id, 'slug': slug, 'name': name, 'userpic': userpic @@ -197,7 +198,7 @@ def migrate(entry): if entry['published']: ext = 'md' open('migration/content/' + - r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) try: shout_dict = r.copy() shout_dict['authors'] = [user, ] @@ -211,21 +212,6 @@ def migrate(entry): else: shout_dict['publishedAt'] = ts del shout_dict['published'] - - # shout comments - if entry.get('commentedAt', False): - try: - old_comments = comments_by_post.get(shout_dict['old_id'], []) - if len(old_comments) > 0: - shout_dict['comments'] = [] - - # migrate comments - for entry in old_comments: - comment = migrateComment(entry) - shout_dict['comments'].append(comment) - except KeyError: - print(shout_dict.keys()) - raise 'error' try: topic_slugs = shout_dict['topics'] @@ -248,18 +234,18 @@ def migrate(entry): ) shout.ratings.append(shout_rating.id) ''' - + # adding topics to created shout for topic_slug in topic_slugs: - topic_dict = topics_dict.get(topic_slug) - if topic_dict: - topic = Topic.create(**topic_dict) + if not topic: + topic_dict = topics_dict.get(topic_slug) + if topic_dict: + topic = Topic.create(**topic_dict) shout.topics = [ topic, ] shout.save() - except Exception as e: - r['error'] = 'db error' - # pass - raise e + r['error'] = 'db error' + # pass + raise e except Exception as e: if not r['body']: r['body'] = 'body moved' raise e diff --git a/migration/tables/users.py b/migration/tables/users.py index 22917a92..8386c61d 100644 --- a/migration/tables/users.py +++ b/migration/tables/users.py @@ -81,8 +81,6 @@ def migrate(entry, limit=668): res['slug'] = res['email'].split('@')[0] else: old = res['old_id'] - del res['old_id'] user = User.create(**res.copy()) res['id'] = user.id - res['old_id'] = old return res diff --git a/orm/comment.py b/orm/comment.py index 082a2751..1cbe90ec 100644 --- a/orm/comment.py +++ b/orm/comment.py @@ -10,9 +10,9 @@ class CommentRating(Base): __tablename__ = "comment_rating" id = None - rater_id = Column(ForeignKey('user.id'), primary_key = True) comment_id = Column(ForeignKey('comment.id'), primary_key = True) - ts: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp") + createdBy = Column(ForeignKey('user.id'), primary_key = True) + createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp") value = Column(Integer) class Comment(Base): @@ -28,7 +28,7 @@ class Comment(Base): rating: int = Column(Integer, nullable=True, comment="Comment Rating") ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id) old_id: str = Column(String, nullable = True) - deleted: bool = Column(Boolean, nullable = True) + old_thread: str = Column(String, nullable = True) # TODO: work in progress, udpate this code diff --git a/schema.graphql b/schema.graphql index 7e1933f5..28ef01aa 100644 --- a/schema.graphql +++ b/schema.graphql @@ -152,7 +152,7 @@ type UserNotification { type User { id: Int! - username: String! # email + username: String! # to login, ex. email createdAt: DateTime! slug: String! name: String # to display @@ -196,10 +196,18 @@ type Comment { deletedAt: DateTime deletedBy: Int rating: Int - ratigns: [Rating] + ratigns: [CommentRating] views: Int old_id: String - deleted: Boolean + old_thread: String +} + +type CommentRating { + id: Int! + comment_id: Int! + createdBy: Int! + createdAt: DateTime! + value: Int! } # is publication