diff --git a/Pipfile b/Pipfile index a9e12850..a04611a3 100644 --- a/Pipfile +++ b/Pipfile @@ -16,6 +16,8 @@ itsdangerous = "*" httpx = "*" psycopg2-binary = "*" Authlib = "*" +bson = "*" +python-frontmatter = "*" [dev-packages] diff --git a/migrate.py b/migrate.py new file mode 100644 index 00000000..5c498493 --- /dev/null +++ b/migrate.py @@ -0,0 +1,71 @@ +import json +from migration.tables.users import migrate as migrateUser +from migration.tables.content_items import migrate as migrateShout +from migration.tables.content_item_categories import migrate as migrateTopic +from migration.utils import DateTimeEncoder + +def users(): + print('migrating users...') + data = json.loads(open('migration/data/users.json').read()) + newdata = {} + counter = 0 + try: + for entry in data: + oid = entry['_id'] + newdata[oid] = migrateUser(entry) + counter += 1 + except Exception: + print(str(counter) + '/' + str(len(data)) + ' users entries were migrated') + print('try to remove database first') + open('migration/data/users.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + print(str(counter) + ' users entries were migrated') + + +def topics(): + print('migrating topics...') + data = json.loads(open('migration/data/content_item_categories.json').read()) + newdata = {} + counter = 0 + try: + for entry in data: + oid = entry['_id'] + newdata[oid] = migrateTopic(entry) + counter += 1 + except Exception: + print(str(counter) + '/' + str(len(data)) + ' topics were migrated') + print('try to remove database first') + open('migration/data/topics.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + print(str(counter) + ' topics were migrated') + +def shouts(): + print('migrating shouts...') + counter = 0 + data = json.loads(open('migration/data/content_items.json').read()) + newdata = {} + + for entry in data: + oid = entry['_id'] + newdata[oid] = migrateShout(entry) + counter += 1 + + open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + print(str(counter) + ' shouts were migrated') + +if __name__ == '__main__': + import sys + if len(sys.argv) > 1: + if sys.argv[1] == "users": + users() + elif sys.argv[1] == "topics": + topics() + elif sys.argv[1] == "shouts": + shouts() + elif sys.argv[1] == "comments": + # comments() + pass + elif sys.argv[1] == "all": + topics() + users() + shouts() + else: + print('usage: python migrate.py ') \ No newline at end of file diff --git a/migration/README.md b/migration/README.md new file mode 100644 index 00000000..caaac573 --- /dev/null +++ b/migration/README.md @@ -0,0 +1,41 @@ +# discours-migration + +First, put the `data` into this folder. + +## Install + +```sh +pipenv install -r requirements.txt +``` + +## Using + +Put the unpacked mongodump to the `data` folder and operate with `pipenv shell && python` + + +1. get old data jsons + +```py +import bson2json + +bson2json.json_tables() # creates all the needed data json from bson mongodump +``` + +2. migrate users + +```py +import json +from migrations.users import migrate + +data = json.loads(open('data/users.json').read()) +newdata = {} + +for u in data: + try: + newdata[u['_id']] = migrate(u) + except: + print('FAIL!') + print(u) + + +``` \ No newline at end of file diff --git a/migration/__init__.py b/migration/__init__.py new file mode 100644 index 00000000..e2750039 --- /dev/null +++ b/migration/__init__.py @@ -0,0 +1 @@ +__all__ = ["tables", "bson2json", "html2md"] \ No newline at end of file diff --git a/migration/bson2json.py b/migration/bson2json.py new file mode 100644 index 00000000..c2ee8f62 --- /dev/null +++ b/migration/bson2json.py @@ -0,0 +1,30 @@ +import bson +import datetime +import json +import importlib + +import DateTimeEncoder from utils + +data = { + "content_items": [], + "content_item_categories": [], + "tags": [], + "email_subscriptions": [], + "users": [], + "comments": [] +} + +def json_tables(): + print('creating json files at data/') + + for table in data.keys(): + lc = [] + with open('data/'+table+'.bson', 'rb') as f: + bs = f.read() + base = 0 + while base < len(bs): + base, d = bson.decode_document(bs, base) + lc.append(d) + data[table] = lc + open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder)) + diff --git a/migration/html2md.py b/migration/html2md.py new file mode 100644 index 00000000..8846d39b --- /dev/null +++ b/migration/html2md.py @@ -0,0 +1,166 @@ +from html.parser import HTMLParser +import os +import codecs +from typing import Tuple + + +class Converter(HTMLParser): + md_file: str + temp_tag: str + code_box: bool + div_count: int + code_box_div_num: int + ol_count: int + related_data: list + is_link: bool + link_ref: str + ignore_data: bool + class_div_count: int + ignore_div: bool + table_start: Tuple[int, int] + + def __init__(self): + super().__init__() + self.md_file = '' + self.code_box = False + self.div_count = 0 + self.code_box_div_num = 0 + self.ol_count = 0 + self.temp_tag = '' + self.related_data = [] + self.is_link = False + self.link_ref = '' + self.ignore_data = False + self.class_div_count = 0 + self.ignore_div = False + + def handle_starttag(self, tag, attrs): + if self.ignore_data: + return None + elif tag == 'br': + self.md_file += ' \n' + elif tag == 'hr': + self.md_file += '\n*** \n' + elif tag == 'title': + self.md_file += '# ' + elif tag == 'h1': + self.md_file += '# ' + elif tag == 'h2': + self.md_file += '## ' + elif tag == 'h3': + self.md_file += '### ' + elif tag == 'b' or tag == 'strong': + self.md_file += '**' + elif tag == 'ul': + self.temp_tag = 'ul' + self.md_file += ' \n' + elif tag == 'ol': + self.ol_count = 0 + self.temp_tag = 'ol' + self.md_file += ' \n' + elif tag == 'li': + if self.temp_tag == 'ul': + self.md_file += '* ' + elif self.temp_tag == 'ol': + self.ol_count += 1 + self.md_file += f'{self.ol_count}. ' + elif tag == 'div': + self.div_count += 1 + attrs_dict = dict(attrs) + if 'style' in attrs_dict and 'codeblock' in attrs_dict['style']: + self.code_box_div_num = self.div_count + self.code_box = True + self.md_file += '```\n' + elif 'class' in attrs_dict: + self.class_div_count = self.div_count + self.ignore_div = True + elif tag == 'en-codeblock': + self.code_box = True + self.md_file += '\n```\n' + elif tag == 'a': + self.is_link = True + attrs_dict = dict(attrs) + self.link_ref = attrs_dict.get('href', '#') + if not self.link_ref.startswith('http') and not self.link_ref.endswith('html') and not '@' in self.link_ref: + self.related_data.append(self.link_ref) + elif tag == 'style': + self.ignore_data = True + elif tag == 'symbol': + self.ignore_data = True + elif tag == 'svg': + self.ignore_data = True + elif tag == 'path': + self.ignore_data = True + elif tag == 'img': + attrs_dict = dict(attrs) + img_ref = attrs_dict['src'] + alt_name = attrs_dict['alt'] if 'alt' in attrs_dict else 'Placeholder' + if self.is_link: + self.related_data.append(img_ref) + self.md_file += f'[![{alt_name}]({img_ref})]({self.link_ref})' + else: + self.related_data.append(img_ref) + self.md_file += f'![{alt_name}]({img_ref})' + elif tag == 'table': + self.ignore_data = True + self.table_start = self.getpos() + + def get_rawdata(self, start, stop, offset): + temp_rawdata = self.rawdata + for i in range(offset-1): + next_section = temp_rawdata.find('\n') + temp_rawdata = temp_rawdata[next_section+1:] + return temp_rawdata[start:stop] + + def handle_endtag(self, tag): + if tag == 'b' or tag == 'strong': + self.md_file += '** \n' + elif tag == 'div': + if self.code_box and self.code_box_div_num == self.div_count: + self.code_box = False + self.md_file += '```\n' + elif self.ignore_div and self.class_div_count == self.div_count: + self.ignore_div = False + else: + self.md_file += ' \n' + self.div_count -= 1 + elif tag == 'en-codeblock': + self.code_box = False + self.md_file += '```\n' + elif tag == 'a': + self.is_link = False + elif tag == 'style': + self.ignore_data = False + elif tag == 'symbol': + self.ignore_data = False + elif tag == 'svg': + self.ignore_data = False + elif tag == 'li': + self.md_file += ' \n' + elif tag == 'table': + offset, lineno_stop = self.getpos() + lineno_stop = lineno_stop + len(tag) + 3 + _, lineno_start = self.table_start + raw_data = self.get_rawdata(lineno_start, lineno_stop, offset) + self.md_file += '\n' + raw_data + self.ignore_data = False + + def handle_startendtag(self, tag, attrs): + if tag == 'br': + self.md_file += ' \n' + elif tag == 'hr': + self.md_file += '\n*** \n' + elif tag == 'img': + attr_dict = dict(attrs) + name = attr_dict['data-filename'] + img_ref = attr_dict['src'] + self.related_data.append(img_ref) + self.md_file += f'![{name}]({img_ref})' + + def handle_data(self, data): + if self.is_link: + self.md_file += f'[{data}]({self.link_ref})' + elif self.ignore_data: + pass + else: + self.md_file += data diff --git a/migration/tables/__init__.py b/migration/tables/__init__.py new file mode 100644 index 00000000..35971e44 --- /dev/null +++ b/migration/tables/__init__.py @@ -0,0 +1 @@ +__all__ = ["users"] \ No newline at end of file diff --git a/migration/tables/comments.py b/migration/tables/comments.py new file mode 100644 index 00000000..a1aa85ad --- /dev/null +++ b/migration/tables/comments.py @@ -0,0 +1,36 @@ +from html2md import Converter +import datetime + +markdown = Converter() + +def migrate(entry): + ``` + # is comment + type Shout { + org: String! + slug: String! + author: Int! + body: String! + createdAt: DateTime! + updatedAt: DateTime! + deletedAt: DateTime + deletedBy: Int + rating: Int + published: DateTime # if there is no published field - it is not published + replyTo: String # another shout + tags: [String] # actual values + topics: [String] # topic-slugs + title: String + versionOf: String + visibleForRoles: [String] # role ids are strings + visibleForUsers: [Int] + } + ``` + # TODO: implement comments migration + return { + 'org': 'discours.io', + 'slug': entry['slug'], + 'createdAt': entry['createdAt'], + 'body': markdown(entry['body']), + 'replyTo': entry[''] + } \ No newline at end of file diff --git a/migration/tables/content_item_categories.py b/migration/tables/content_item_categories.py new file mode 100644 index 00000000..47b9383f --- /dev/null +++ b/migration/tables/content_item_categories.py @@ -0,0 +1,19 @@ +def migrate(entry): + ``` + type Topic { + slug: String! # ID + createdBy: Int! # User + createdAt: DateTime! + value: String + parents: [String] # NOTE: topic can have parent topics + children: [String] # and children + } + ``` + return { + 'slug': entry['slug'], + 'createdBy': entry['createdBy'], # NOTE: uses an old user id + 'createdAt': entry['createdAt'], + 'value': entry['title'].lower(), + 'parents': [], + 'children': [] + } \ No newline at end of file diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py new file mode 100644 index 00000000..43830907 --- /dev/null +++ b/migration/tables/content_items.py @@ -0,0 +1,86 @@ +from migration.html2md import Converter +from dateutil.parser import parse +from os.path import abspath +import json +from orm import Shout + +users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) +users_dict['0'] = {'id': 99999 } + +markdown = Converter() + +type2layout = { + 'Article': 'article', + 'Literature': 'prose', + 'Music': 'music', + 'Video': 'video', + 'Image': 'image' +} + +def migrate(entry): + ''' + type Shout { + org_id: Int! + slug: String! + author: Int! + body: String! + createdAt: DateTime! + updatedAt: DateTime! + deletedAt: DateTime + deletedBy: Int + rating: Int + ratigns: [Rating] + published: Bool! + publishedAt: DateTime # if there is no published field - it is not published + replyTo: String # another shout + tags: [String] # actual values + topics: [String] # topic-slugs, order has matter + title: String + versionOf: String + visibleForRoles: [String] # role ids are strings + visibleForUsers: [Int] + views: Int + } + ''' + r = { + 'org_id': 0, + 'layout': type2layout[entry['type']], + 'title': entry['title'], + 'authors': [ users_dict[entry['createdBy']]['id'], ], + 'topics': [], + 'published': entry['published'], + 'views': entry['views'], + 'rating': entry['rating'], + 'ratings': [] + } + r['slug'] = entry.get('slug') + if not r['slug'] and entry.get('friendlySlugs') is not None: + r['slug'] = entry['friendlySlugs']['slug'][0]['slug'] + if(r['slug'] is None): + r['slug'] = entry['friendlySlugs'][0]['slug'] + if entry.get('image') is not None: + r['cover'] = entry['image']['url'] + elif entry.get('thumborId') is not None: + r['cover'] = 'https://discours.io/' + entry['thumborId'] + + if entry.get('publishedAt') is not None: + r['publishedAt'] = entry['publishedAt'] + if entry.get('createdAt') is not None: + r['createdAt'] = entry['createdAt'] + if entry.get('updatedAt') is not None: + r['updatedAt'] = entry['updatedAt'] + if entry.get('type') == 'Literature': + r['body'] = entry['media'][0]['literatureBody'] + elif entry.get('type') == 'Video': + r['body'] = '' + elif entry.get('type') == 'Music': + r['body'] = '' + else entry.get('type') == 'Image': + r['body'] = r['body'] + else: + r['body'] = '## ' + r['title'] + # TODO: compile md with graymatter + open('migration/content/' + r['slug'] + '.md', 'w').write(mdfile) + shout = Shout.create(**r.copy()) + r['id'] = shout['id'] + return r diff --git a/migration/tables/email_subscriptions.py b/migration/tables/email_subscriptions.py new file mode 100644 index 00000000..f148701f --- /dev/null +++ b/migration/tables/email_subscriptions.py @@ -0,0 +1,2 @@ +def migrate(entry): + return entry \ No newline at end of file diff --git a/migration/tables/tags.py b/migration/tables/tags.py new file mode 100644 index 00000000..5ef92c53 --- /dev/null +++ b/migration/tables/tags.py @@ -0,0 +1,20 @@ +def migrate(entry): + ``` + type Topic { + slug: String! # ID + createdBy: Int! # User + createdAt: DateTime! + value: String + parents: [String] # NOTE: topic can have parent topics + children: [String] # and children + } + ``` + creator = get_new_user_id(entry['cratedBy']) + return { + 'slug': entry['slug'], + 'createdBy': creator_id, # NOTE: uses an old user id + 'createdAt': entry['createdAt'], + 'value': entry['value'].lower(), + 'parents': [], + 'children': [] + } \ No newline at end of file diff --git a/migration/tables/users.py b/migration/tables/users.py new file mode 100644 index 00000000..6e8f42dd --- /dev/null +++ b/migration/tables/users.py @@ -0,0 +1,79 @@ +from orm import User +from dateutil.parser import parse + +counter = 0 + +def migrate(entry): + ''' + + type User { + username: String! # email + createdAt: DateTime! + email: String + password: String + oauth: String # provider:token + viewname: String # to display + userpic: String + links: [String] + emailConfirmed: Boolean # should contain all emails too + id: Int! + muted: Boolean + rating: Int + roles: [Role] + updatedAt: DateTime + wasOnlineAt: DateTime + ratings: [Rating] + slug: String + bio: String + notifications: [Int] + } + + ''' + res = {} + try: + res['old_id'] = entry['_id'] + res['password'] = entry['services']['password'].get('bcrypt', '') + res['username'] = entry['emails'][0]['address'] + res['email'] = res['username'] + res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt'])) + res['emailConfirmed'] = entry['emails'][0]['verified'] + res['createdAt'] = parse(entry['createdAt']) + res['rating'] = entry['rating'] # number + res['roles'] = [] # entry['roles'] # roles without org is for discours.io + res['ratings'] = [] # entry['ratings'] + res['notifications'] = [] + res['links'] = [] + res['muted'] = False + res['viewname'] = 'anonymous' + if entry['profile']: + res['slug'] = entry['profile'].get('path') + res['userpic'] = entry['profile'].get('image', {'url': ''}).get('url', '') + viewname = entry['profile'].get('firstName', '') + ' ' + entry['profile'].get('lastName', '') + viewname = entry['profile']['path'] if len(viewname) < 2 else viewname + res['viewname'] = viewname + fb = entry['profile'].get('facebook', False) + if fb: + res['links'].append(fb) + vk = entry['profile'].get('vkontakte', False) + if vk: + res['links'].append(vk) + tr = entry['profile'].get('twitter', False) + if tr: + res['links'].append(tr) + ws = entry['profile'].get('website', False) + if ws: + res['links'].append(ws) + if not res['slug']: + res['slug'] = res['links'][0].split('/')[-1] + if not res['slug']: + res['slug'] = res['email'].split('@')[0] + except Exception: + print(entry['profile']) + raise Exception + else: + old = res['old_id'] + del res['old_id'] + user = User.create(**res.copy()) + res['id'] = user.id + res['old_id'] = old + return res \ No newline at end of file diff --git a/migration/utils.py b/migration/utils.py new file mode 100644 index 00000000..9a19c556 --- /dev/null +++ b/migration/utils.py @@ -0,0 +1,9 @@ +from datetime import datetime +from json import JSONEncoder + +class DateTimeEncoder(JSONEncoder): + def default(self, z): + if isinstance(z, datetime): + return (str(z)) + else: + return super().default(z) \ No newline at end of file diff --git a/orm/rbac.py b/orm/rbac.py index ced53e0f..55cdaebf 100644 --- a/orm/rbac.py +++ b/orm/rbac.py @@ -2,14 +2,14 @@ import warnings from typing import Type -from sqlalchemy import String, Column, ForeignKey, types, UniqueConstraint +from sqlalchemy import String, Integer, Column, ForeignKey, UniqueConstraint, TypeDecorator from sqlalchemy.orm import relationship from orm.base import Base, REGISTRY, engine, local_session -class ClassType(types.TypeDecorator): - impl = types.String +class ClassType(TypeDecorator): + impl = String @property def python_type(self): @@ -33,7 +33,10 @@ class Organization(Base): class Role(Base): __tablename__ = 'role' - name: str = Column(String, nullable=False, unique=True, comment="Role Name") + + id: int = Column(Integer, primary_key=True) + + name: str = Column(String, nullable=False, comment="Role Name") org_id: int = Column(ForeignKey("organization.id", ondelete="CASCADE"), nullable=False, comment="Organization") permissions = relationship(lambda: Permission) diff --git a/orm/topic.py b/orm/topic.py index ef0e165a..a347a7b2 100644 --- a/orm/topic.py +++ b/orm/topic.py @@ -1,10 +1,19 @@ from typing import List from datetime import datetime -from sqlalchemy import Column, Integer, String, ForeignKey, DateTime +from sqlalchemy import Table, Column, Integer, String, ForeignKey, DateTime, UniqueConstraint from sqlalchemy.orm import relationship, backref from orm import Permission from orm.base import Base + +Connection = Table('topic_connections', + Base.metadata, + Column('child', String, ForeignKey('topic.slug')), + Column('parent', String, ForeignKey('topic.slug')), + UniqueConstraint('parent', 'child', name='unique_usage') +) + + class Topic(Base): __tablename__ = 'topic' @@ -13,6 +22,7 @@ class Topic(Base): createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Author") value: str = Column(String, nullable=False, comment="Value") - alters = relationship(lambda: Topic, backref=backref("topic", remote_side=[slug])) - alter_id: str = Column(ForeignKey("topic.slug")) - # TODO: add all the fields + # list of Topics where the current node is the "other party" or "child" + parents = relationship(lambda: Topic, secondary=Connection, primaryjoin=slug==Connection.c.parent, secondaryjoin=slug==Connection.c.child, viewonly=True) + # list of Topics where the current node is the "parent" + children = relationship(lambda: Topic, secondary=Connection, primaryjoin=slug==Connection.c.child, secondaryjoin=slug==Connection.c.parent) diff --git a/orm/user.py b/orm/user.py index e3c8a8ca..aa3e24df 100644 --- a/orm/user.py +++ b/orm/user.py @@ -25,7 +25,7 @@ UserRatings = Table("user_ratings", UserRoles = Table("user_roles", Base.metadata, Column('user_id', Integer, ForeignKey('user.id')), - Column('role', String, ForeignKey('role.name')) + Column('role_id', Integer, ForeignKey('role.id')) ) class User(Base): diff --git a/requirements.txt b/requirements.txt index 75e48073..bd0ab4b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,6 @@ passlib itsdangerous authlib httpx -psycopg2-binary \ No newline at end of file +psycopg2-binary +bson +python-frontmatter \ No newline at end of file diff --git a/schema.graphql b/schema.graphql index d9de063d..bef45f26 100644 --- a/schema.graphql +++ b/schema.graphql @@ -118,10 +118,12 @@ type Subscription { ############################################ Entities type Role { + id: Int! name: String! - org: String! - level: Int! # 1-8 + org_id: Int! + # level: Int! # 1-8 desc: String + permissions: [Int!]! } type Rating {