role_id and topic relations fixes

2021-08-20 12:27:19 +03:00
parent ee3b186ba1
commit 3075dbb64b
19 changed files with 592 additions and 12 deletions
--- a/migration/README.md
+++ b/migration/README.md
@@ -0,0 +1,41 @@
+# discours-migration
+
+First, put the `data` into this folder.
+
+## Install
+
+```sh
+pipenv install -r requirements.txt
+```
+
+## Using
+
+Put the unpacked mongodump to the `data` folder and operate with `pipenv shell && python`
+
+
+1. get old data jsons 
+
+```py
+import bson2json
+
+bson2json.json_tables() # creates all the needed data json from bson mongodump
+```
+
+2. migrate users
+
+```py
+import json
+from migrations.users import migrate
+
+data = json.loads(open('data/users.json').read())
+newdata = {}
+
+for u in data:
+    try:
+        newdata[u['_id']] = migrate(u)
+    except:
+        print('FAIL!')
+        print(u)
+
+
+```
--- a/migration/init.py
+++ b/migration/init.py
@@ -0,0 +1 @@
+__all__ = ["tables", "bson2json", "html2md"]
--- a/migration/bson2json.py
+++ b/migration/bson2json.py
@@ -0,0 +1,30 @@
+import bson
+import datetime
+import json
+import importlib
+
+import DateTimeEncoder from utils
+
+data = {
+    "content_items": [],
+    "content_item_categories": [],
+    "tags": [],
+    "email_subscriptions": [],
+    "users": [],
+    "comments": []
+}
+
+def json_tables():
+    print('creating json files at data/')
+
+    for table in data.keys():
+        lc = []
+        with open('data/'+table+'.bson', 'rb') as f:
+            bs = f.read()
+            base = 0
+            while base < len(bs):
+                base, d = bson.decode_document(bs, base)
+                lc.append(d)
+            data[table] = lc
+            open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
+
--- a/migration/html2md.py
+++ b/migration/html2md.py
@@ -0,0 +1,166 @@
+from html.parser import HTMLParser
+import os
+import codecs
+from typing import Tuple
+
+
+class Converter(HTMLParser):
+    md_file: str
+    temp_tag: str
+    code_box: bool
+    div_count: int
+    code_box_div_num: int
+    ol_count: int
+    related_data: list
+    is_link: bool
+    link_ref: str
+    ignore_data: bool
+    class_div_count: int
+    ignore_div: bool
+    table_start: Tuple[int, int]
+
+    def __init__(self):
+        super().__init__()
+        self.md_file = ''
+        self.code_box = False
+        self.div_count = 0
+        self.code_box_div_num = 0
+        self.ol_count = 0
+        self.temp_tag = ''
+        self.related_data = []
+        self.is_link = False
+        self.link_ref = ''
+        self.ignore_data = False
+        self.class_div_count = 0
+        self.ignore_div = False
+
+    def handle_starttag(self, tag, attrs):
+        if self.ignore_data:
+            return None
+        elif tag == 'br':
+            self.md_file += '  \n'
+        elif tag == 'hr':
+            self.md_file += '\n***  \n'
+        elif tag == 'title':
+            self.md_file += '# '
+        elif tag == 'h1':
+            self.md_file += '# '
+        elif tag == 'h2':
+            self.md_file += '## '
+        elif tag == 'h3':
+            self.md_file += '### '
+        elif tag == 'b' or tag == 'strong':
+            self.md_file += '**'
+        elif tag == 'ul':
+            self.temp_tag = 'ul'
+            self.md_file += '  \n'
+        elif tag == 'ol':
+            self.ol_count = 0
+            self.temp_tag = 'ol'
+            self.md_file += '  \n'
+        elif tag == 'li':
+            if self.temp_tag == 'ul':
+                self.md_file += '* '
+            elif self.temp_tag == 'ol':
+                self.ol_count += 1
+                self.md_file += f'{self.ol_count}. '
+        elif tag == 'div':
+            self.div_count += 1
+            attrs_dict = dict(attrs)
+            if 'style' in attrs_dict and 'codeblock' in attrs_dict['style']:
+                self.code_box_div_num = self.div_count
+                self.code_box = True
+                self.md_file += '```\n'
+            elif 'class' in attrs_dict:
+                self.class_div_count = self.div_count
+                self.ignore_div = True
+        elif tag == 'en-codeblock':
+            self.code_box = True
+            self.md_file += '\n```\n'
+        elif tag == 'a':
+            self.is_link = True
+            attrs_dict = dict(attrs)
+            self.link_ref = attrs_dict.get('href', '#')
+            if not self.link_ref.startswith('http') and not self.link_ref.endswith('html') and not '@' in self.link_ref:
+                self.related_data.append(self.link_ref)
+        elif tag == 'style':
+            self.ignore_data = True
+        elif tag == 'symbol':
+            self.ignore_data = True
+        elif tag == 'svg':
+            self.ignore_data = True
+        elif tag == 'path':
+            self.ignore_data = True
+        elif tag == 'img':
+            attrs_dict = dict(attrs)
+            img_ref = attrs_dict['src']
+            alt_name = attrs_dict['alt'] if 'alt' in attrs_dict else 'Placeholder'
+            if self.is_link:
+                self.related_data.append(img_ref)
+                self.md_file += f'[![{alt_name}]({img_ref})]({self.link_ref})'
+            else:
+                self.related_data.append(img_ref)
+                self.md_file += f'![{alt_name}]({img_ref})'
+        elif tag == 'table':
+            self.ignore_data = True
+            self.table_start = self.getpos()
+
+    def get_rawdata(self, start, stop, offset):
+        temp_rawdata = self.rawdata
+        for i in range(offset-1):
+            next_section = temp_rawdata.find('\n')
+            temp_rawdata = temp_rawdata[next_section+1:]
+        return temp_rawdata[start:stop]
+
+    def handle_endtag(self, tag):
+        if tag == 'b' or tag == 'strong':
+            self.md_file += '**  \n'
+        elif tag == 'div':
+            if self.code_box and self.code_box_div_num == self.div_count:
+                self.code_box = False
+                self.md_file += '```\n'
+            elif self.ignore_div and self.class_div_count == self.div_count:
+                self.ignore_div = False
+            else:
+                self.md_file += '  \n'
+            self.div_count -= 1
+        elif tag == 'en-codeblock':
+            self.code_box = False
+            self.md_file += '```\n'
+        elif tag == 'a':
+            self.is_link = False
+        elif tag == 'style':
+            self.ignore_data = False
+        elif tag == 'symbol':
+            self.ignore_data = False
+        elif tag == 'svg':
+            self.ignore_data = False
+        elif tag == 'li':
+            self.md_file += '  \n'
+        elif tag == 'table':
+            offset, lineno_stop = self.getpos()
+            lineno_stop = lineno_stop + len(tag) + 3
+            _, lineno_start = self.table_start
+            raw_data = self.get_rawdata(lineno_start, lineno_stop, offset)
+            self.md_file += '\n' + raw_data
+            self.ignore_data = False
+
+    def handle_startendtag(self, tag, attrs):
+        if tag == 'br':
+            self.md_file += '  \n'
+        elif tag == 'hr':
+            self.md_file += '\n***  \n'
+        elif tag == 'img':
+            attr_dict = dict(attrs)
+            name = attr_dict['data-filename']
+            img_ref = attr_dict['src']
+            self.related_data.append(img_ref)
+            self.md_file += f'![{name}]({img_ref})'
+
+    def handle_data(self, data):
+        if self.is_link:
+            self.md_file += f'[{data}]({self.link_ref})'
+        elif self.ignore_data:
+            pass
+        else:
+            self.md_file += data
--- a/migration/tables/init.py
+++ b/migration/tables/init.py
@@ -0,0 +1 @@
+__all__ = ["users"]
--- a/migration/tables/comments.py
+++ b/migration/tables/comments.py
@@ -0,0 +1,36 @@
+from html2md import Converter
+import datetime
+
+markdown = Converter()
+
+def migrate(entry):
+    ```
+    # is comment
+    type Shout {
+        org: String!
+        slug: String!
+        author: Int!
+        body: String!
+        createdAt: DateTime!
+        updatedAt: DateTime!
+        deletedAt: DateTime
+        deletedBy: Int
+        rating: Int
+        published: DateTime # if there is no published field - it is not published
+        replyTo: String # another shout
+        tags: [String] # actual values
+        topics: [String] # topic-slugs
+        title: String
+        versionOf: String
+        visibleForRoles: [String] # role ids are strings
+        visibleForUsers: [Int]
+    }
+    ```
+    # TODO: implement comments migration
+    return {
+        'org': 'discours.io',
+        'slug': entry['slug'],
+        'createdAt': entry['createdAt'],
+        'body': markdown(entry['body']),
+        'replyTo': entry['']
+    }
--- a/migration/tables/content_item_categories.py
+++ b/migration/tables/content_item_categories.py
@@ -0,0 +1,19 @@
+def migrate(entry):
+    ```
+    type Topic {
+        slug: String! # ID
+        createdBy: Int! # User
+        createdAt: DateTime!
+        value: String
+        parents: [String] # NOTE: topic can have parent topics
+        children: [String] # and children
+    }
+    ```
+    return {
+        'slug': entry['slug'],
+        'createdBy': entry['createdBy'], # NOTE: uses an old user id
+        'createdAt': entry['createdAt'],
+        'value': entry['title'].lower(),
+        'parents': [],
+        'children': []
+    }
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@@ -0,0 +1,86 @@
+from migration.html2md import Converter
+from dateutil.parser import parse
+from os.path import abspath
+import json
+from orm import Shout
+
+users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
+users_dict['0'] = {'id': 99999 }
+
+markdown = Converter()
+
+type2layout = {
+    'Article': 'article',
+    'Literature': 'prose',
+    'Music': 'music',
+    'Video': 'video',
+    'Image': 'image'
+}
+
+def migrate(entry):
+    '''  
+    type Shout {
+        org_id: Int!
+        slug: String!
+        author: Int!
+        body: String!
+        createdAt: DateTime!
+        updatedAt: DateTime!
+        deletedAt: DateTime
+        deletedBy: Int
+        rating: Int
+        ratigns: [Rating]
+        published: Bool! 
+        publishedAt: DateTime # if there is no published field - it is not published
+        replyTo: String # another shout
+        tags: [String] # actual values
+        topics: [String] # topic-slugs, order has matter
+        title: String
+        versionOf: String
+        visibleForRoles: [String] # role ids are strings
+        visibleForUsers: [Int]
+        views: Int
+    }
+    '''
+    r = {
+            'org_id': 0,
+            'layout': type2layout[entry['type']],
+            'title': entry['title'],
+            'authors': [ users_dict[entry['createdBy']]['id'], ],
+            'topics': [],
+            'published': entry['published'],
+            'views': entry['views'],
+            'rating': entry['rating'],
+            'ratings': []
+        }
+    r['slug'] = entry.get('slug')
+    if not r['slug'] and entry.get('friendlySlugs') is not None:
+        r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
+        if(r['slug'] is None):
+            r['slug'] = entry['friendlySlugs'][0]['slug']
+    if entry.get('image') is not None:
+        r['cover'] = entry['image']['url']
+    elif entry.get('thumborId') is not None:
+        r['cover'] = 'https://discours.io/' + entry['thumborId']
+
+    if entry.get('publishedAt') is not None:
+        r['publishedAt'] = entry['publishedAt']
+    if entry.get('createdAt') is not None:
+        r['createdAt'] = entry['createdAt']
+    if entry.get('updatedAt') is not None:
+        r['updatedAt'] = entry['updatedAt']
+    if entry.get('type') == 'Literature':
+        r['body'] = entry['media'][0]['literatureBody']
+    elif entry.get('type') == 'Video':
+        r['body'] = '<ShoutVideo src=\"' + entry['media'][0]['youtubeId'] + '\" />'
+    elif entry.get('type') == 'Music':
+        r['body'] = '<ShoutMusic media={\"' + json.dumps(entry['media']) +'\"} />'
+    else entry.get('type') == 'Image':
+        r['body'] = r['body']
+    else:
+        r['body'] = '## ' + r['title']
+    # TODO: compile md with graymatter
+    open('migration/content/' + r['slug'] + '.md', 'w').write(mdfile)
+    shout = Shout.create(**r.copy())
+    r['id'] = shout['id']
+    return r
--- a/migration/tables/email_subscriptions.py
+++ b/migration/tables/email_subscriptions.py
@@ -0,0 +1,2 @@
+def migrate(entry):
+    return entry
--- a/migration/tables/tags.py
+++ b/migration/tables/tags.py
@@ -0,0 +1,20 @@
+def migrate(entry):
+    ```
+    type Topic {
+        slug: String! # ID
+        createdBy: Int! # User
+        createdAt: DateTime!
+        value: String
+        parents: [String] # NOTE: topic can have parent topics
+        children: [String] # and children
+    }
+    ```
+    creator = get_new_user_id(entry['cratedBy'])
+    return {
+        'slug': entry['slug'],
+        'createdBy': creator_id, # NOTE: uses an old user id
+        'createdAt': entry['createdAt'],
+        'value': entry['value'].lower(),
+        'parents': [],
+        'children': []
+    }
--- a/migration/tables/users.py
+++ b/migration/tables/users.py
@@ -0,0 +1,79 @@
+from orm import User
+from dateutil.parser import parse
+
+counter = 0
+
+def migrate(entry):
+        '''
+        
+        type User {
+            username: String! # email
+            createdAt: DateTime!
+            email: String
+            password: String
+            oauth: String # provider:token
+            viewname: String # to display
+            userpic: String
+            links: [String]
+            emailConfirmed: Boolean # should contain all emails too
+            id: Int!
+            muted: Boolean
+            rating: Int
+            roles: [Role]
+            updatedAt: DateTime
+            wasOnlineAt: DateTime
+            ratings: [Rating]
+            slug: String
+            bio: String
+            notifications: [Int] 
+        }
+
+        '''
+        res = {}
+        try:
+            res['old_id'] = entry['_id']
+            res['password'] = entry['services']['password'].get('bcrypt', '')
+            res['username'] = entry['emails'][0]['address']
+            res['email'] = res['username']
+            res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
+            res['emailConfirmed'] = entry['emails'][0]['verified']
+            res['createdAt'] = parse(entry['createdAt'])
+            res['rating'] = entry['rating'] # number
+            res['roles'] = [] # entry['roles'] # roles without org is for discours.io
+            res['ratings'] = [] # entry['ratings']
+            res['notifications'] = []
+            res['links'] = []
+            res['muted'] = False
+            res['viewname'] = 'anonymous'
+            if entry['profile']:
+                res['slug'] = entry['profile'].get('path')
+                res['userpic'] = entry['profile'].get('image', {'url': ''}).get('url', '')
+                viewname = entry['profile'].get('firstName', '') + ' ' + entry['profile'].get('lastName', '')
+                viewname = entry['profile']['path'] if len(viewname) < 2 else viewname
+                res['viewname'] = viewname
+                fb = entry['profile'].get('facebook', False)
+                if fb:
+                    res['links'].append(fb)
+                vk = entry['profile'].get('vkontakte', False)
+                if vk:
+                    res['links'].append(vk)
+                tr = entry['profile'].get('twitter', False)
+                if tr:
+                    res['links'].append(tr)
+                ws = entry['profile'].get('website', False)
+                if ws:
+                    res['links'].append(ws)
+                if not res['slug']:
+                    res['slug'] = res['links'][0].split('/')[-1]
+            if not res['slug']:
+                res['slug'] = res['email'].split('@')[0]
+        except Exception:
+            print(entry['profile'])
+            raise Exception
+        else:
+            old = res['old_id']
+            del res['old_id']
+            user = User.create(**res.copy())
+            res['id'] = user.id
+            res['old_id'] = old
+            return res
--- a/migration/utils.py
+++ b/migration/utils.py
@@ -0,0 +1,9 @@
+from datetime import datetime
+from json import JSONEncoder
+
+class DateTimeEncoder(JSONEncoder):
+    def default(self, z):
+        if isinstance(z, datetime):
+            return (str(z))
+        else:
+            return super().default(z)
				`@@ -0,0 +1 @@`
				`__all__ = ["tables", "bson2json", "html2md"]`