role_id and topic relations fixes

This commit is contained in:
2021-08-20 12:27:19 +03:00
parent ee3b186ba1
commit 3075dbb64b
19 changed files with 592 additions and 12 deletions

41
migration/README.md Normal file
View File

@@ -0,0 +1,41 @@
# discours-migration
First, put the `data` into this folder.
## Install
```sh
pipenv install -r requirements.txt
```
## Using
Put the unpacked mongodump to the `data` folder and operate with `pipenv shell && python`
1. get old data jsons
```py
import bson2json
bson2json.json_tables() # creates all the needed data json from bson mongodump
```
2. migrate users
```py
import json
from migrations.users import migrate
data = json.loads(open('data/users.json').read())
newdata = {}
for u in data:
try:
newdata[u['_id']] = migrate(u)
except:
print('FAIL!')
print(u)
```

1
migration/__init__.py Normal file
View File

@@ -0,0 +1 @@
__all__ = ["tables", "bson2json", "html2md"]

30
migration/bson2json.py Normal file
View File

@@ -0,0 +1,30 @@
import bson
import datetime
import json
import importlib
import DateTimeEncoder from utils
data = {
"content_items": [],
"content_item_categories": [],
"tags": [],
"email_subscriptions": [],
"users": [],
"comments": []
}
def json_tables():
print('creating json files at data/')
for table in data.keys():
lc = []
with open('data/'+table+'.bson', 'rb') as f:
bs = f.read()
base = 0
while base < len(bs):
base, d = bson.decode_document(bs, base)
lc.append(d)
data[table] = lc
open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))

166
migration/html2md.py Normal file
View File

@@ -0,0 +1,166 @@
from html.parser import HTMLParser
import os
import codecs
from typing import Tuple
class Converter(HTMLParser):
md_file: str
temp_tag: str
code_box: bool
div_count: int
code_box_div_num: int
ol_count: int
related_data: list
is_link: bool
link_ref: str
ignore_data: bool
class_div_count: int
ignore_div: bool
table_start: Tuple[int, int]
def __init__(self):
super().__init__()
self.md_file = ''
self.code_box = False
self.div_count = 0
self.code_box_div_num = 0
self.ol_count = 0
self.temp_tag = ''
self.related_data = []
self.is_link = False
self.link_ref = ''
self.ignore_data = False
self.class_div_count = 0
self.ignore_div = False
def handle_starttag(self, tag, attrs):
if self.ignore_data:
return None
elif tag == 'br':
self.md_file += ' \n'
elif tag == 'hr':
self.md_file += '\n*** \n'
elif tag == 'title':
self.md_file += '# '
elif tag == 'h1':
self.md_file += '# '
elif tag == 'h2':
self.md_file += '## '
elif tag == 'h3':
self.md_file += '### '
elif tag == 'b' or tag == 'strong':
self.md_file += '**'
elif tag == 'ul':
self.temp_tag = 'ul'
self.md_file += ' \n'
elif tag == 'ol':
self.ol_count = 0
self.temp_tag = 'ol'
self.md_file += ' \n'
elif tag == 'li':
if self.temp_tag == 'ul':
self.md_file += '* '
elif self.temp_tag == 'ol':
self.ol_count += 1
self.md_file += f'{self.ol_count}. '
elif tag == 'div':
self.div_count += 1
attrs_dict = dict(attrs)
if 'style' in attrs_dict and 'codeblock' in attrs_dict['style']:
self.code_box_div_num = self.div_count
self.code_box = True
self.md_file += '```\n'
elif 'class' in attrs_dict:
self.class_div_count = self.div_count
self.ignore_div = True
elif tag == 'en-codeblock':
self.code_box = True
self.md_file += '\n```\n'
elif tag == 'a':
self.is_link = True
attrs_dict = dict(attrs)
self.link_ref = attrs_dict.get('href', '#')
if not self.link_ref.startswith('http') and not self.link_ref.endswith('html') and not '@' in self.link_ref:
self.related_data.append(self.link_ref)
elif tag == 'style':
self.ignore_data = True
elif tag == 'symbol':
self.ignore_data = True
elif tag == 'svg':
self.ignore_data = True
elif tag == 'path':
self.ignore_data = True
elif tag == 'img':
attrs_dict = dict(attrs)
img_ref = attrs_dict['src']
alt_name = attrs_dict['alt'] if 'alt' in attrs_dict else 'Placeholder'
if self.is_link:
self.related_data.append(img_ref)
self.md_file += f'[![{alt_name}]({img_ref})]({self.link_ref})'
else:
self.related_data.append(img_ref)
self.md_file += f'![{alt_name}]({img_ref})'
elif tag == 'table':
self.ignore_data = True
self.table_start = self.getpos()
def get_rawdata(self, start, stop, offset):
temp_rawdata = self.rawdata
for i in range(offset-1):
next_section = temp_rawdata.find('\n')
temp_rawdata = temp_rawdata[next_section+1:]
return temp_rawdata[start:stop]
def handle_endtag(self, tag):
if tag == 'b' or tag == 'strong':
self.md_file += '** \n'
elif tag == 'div':
if self.code_box and self.code_box_div_num == self.div_count:
self.code_box = False
self.md_file += '```\n'
elif self.ignore_div and self.class_div_count == self.div_count:
self.ignore_div = False
else:
self.md_file += ' \n'
self.div_count -= 1
elif tag == 'en-codeblock':
self.code_box = False
self.md_file += '```\n'
elif tag == 'a':
self.is_link = False
elif tag == 'style':
self.ignore_data = False
elif tag == 'symbol':
self.ignore_data = False
elif tag == 'svg':
self.ignore_data = False
elif tag == 'li':
self.md_file += ' \n'
elif tag == 'table':
offset, lineno_stop = self.getpos()
lineno_stop = lineno_stop + len(tag) + 3
_, lineno_start = self.table_start
raw_data = self.get_rawdata(lineno_start, lineno_stop, offset)
self.md_file += '\n' + raw_data
self.ignore_data = False
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self.md_file += ' \n'
elif tag == 'hr':
self.md_file += '\n*** \n'
elif tag == 'img':
attr_dict = dict(attrs)
name = attr_dict['data-filename']
img_ref = attr_dict['src']
self.related_data.append(img_ref)
self.md_file += f'![{name}]({img_ref})'
def handle_data(self, data):
if self.is_link:
self.md_file += f'[{data}]({self.link_ref})'
elif self.ignore_data:
pass
else:
self.md_file += data

View File

@@ -0,0 +1 @@
__all__ = ["users"]

View File

@@ -0,0 +1,36 @@
from html2md import Converter
import datetime
markdown = Converter()
def migrate(entry):
```
# is comment
type Shout {
org: String!
slug: String!
author: Int!
body: String!
createdAt: DateTime!
updatedAt: DateTime!
deletedAt: DateTime
deletedBy: Int
rating: Int
published: DateTime # if there is no published field - it is not published
replyTo: String # another shout
tags: [String] # actual values
topics: [String] # topic-slugs
title: String
versionOf: String
visibleForRoles: [String] # role ids are strings
visibleForUsers: [Int]
}
```
# TODO: implement comments migration
return {
'org': 'discours.io',
'slug': entry['slug'],
'createdAt': entry['createdAt'],
'body': markdown(entry['body']),
'replyTo': entry['']
}

View File

@@ -0,0 +1,19 @@
def migrate(entry):
```
type Topic {
slug: String! # ID
createdBy: Int! # User
createdAt: DateTime!
value: String
parents: [String] # NOTE: topic can have parent topics
children: [String] # and children
}
```
return {
'slug': entry['slug'],
'createdBy': entry['createdBy'], # NOTE: uses an old user id
'createdAt': entry['createdAt'],
'value': entry['title'].lower(),
'parents': [],
'children': []
}

View File

@@ -0,0 +1,86 @@
from migration.html2md import Converter
from dateutil.parser import parse
from os.path import abspath
import json
from orm import Shout
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
users_dict['0'] = {'id': 99999 }
markdown = Converter()
type2layout = {
'Article': 'article',
'Literature': 'prose',
'Music': 'music',
'Video': 'video',
'Image': 'image'
}
def migrate(entry):
'''
type Shout {
org_id: Int!
slug: String!
author: Int!
body: String!
createdAt: DateTime!
updatedAt: DateTime!
deletedAt: DateTime
deletedBy: Int
rating: Int
ratigns: [Rating]
published: Bool!
publishedAt: DateTime # if there is no published field - it is not published
replyTo: String # another shout
tags: [String] # actual values
topics: [String] # topic-slugs, order has matter
title: String
versionOf: String
visibleForRoles: [String] # role ids are strings
visibleForUsers: [Int]
views: Int
}
'''
r = {
'org_id': 0,
'layout': type2layout[entry['type']],
'title': entry['title'],
'authors': [ users_dict[entry['createdBy']]['id'], ],
'topics': [],
'published': entry['published'],
'views': entry['views'],
'rating': entry['rating'],
'ratings': []
}
r['slug'] = entry.get('slug')
if not r['slug'] and entry.get('friendlySlugs') is not None:
r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
if(r['slug'] is None):
r['slug'] = entry['friendlySlugs'][0]['slug']
if entry.get('image') is not None:
r['cover'] = entry['image']['url']
elif entry.get('thumborId') is not None:
r['cover'] = 'https://discours.io/' + entry['thumborId']
if entry.get('publishedAt') is not None:
r['publishedAt'] = entry['publishedAt']
if entry.get('createdAt') is not None:
r['createdAt'] = entry['createdAt']
if entry.get('updatedAt') is not None:
r['updatedAt'] = entry['updatedAt']
if entry.get('type') == 'Literature':
r['body'] = entry['media'][0]['literatureBody']
elif entry.get('type') == 'Video':
r['body'] = '<ShoutVideo src=\"' + entry['media'][0]['youtubeId'] + '\" />'
elif entry.get('type') == 'Music':
r['body'] = '<ShoutMusic media={\"' + json.dumps(entry['media']) +'\"} />'
else entry.get('type') == 'Image':
r['body'] = r['body']
else:
r['body'] = '## ' + r['title']
# TODO: compile md with graymatter
open('migration/content/' + r['slug'] + '.md', 'w').write(mdfile)
shout = Shout.create(**r.copy())
r['id'] = shout['id']
return r

View File

@@ -0,0 +1,2 @@
def migrate(entry):
return entry

20
migration/tables/tags.py Normal file
View File

@@ -0,0 +1,20 @@
def migrate(entry):
```
type Topic {
slug: String! # ID
createdBy: Int! # User
createdAt: DateTime!
value: String
parents: [String] # NOTE: topic can have parent topics
children: [String] # and children
}
```
creator = get_new_user_id(entry['cratedBy'])
return {
'slug': entry['slug'],
'createdBy': creator_id, # NOTE: uses an old user id
'createdAt': entry['createdAt'],
'value': entry['value'].lower(),
'parents': [],
'children': []
}

79
migration/tables/users.py Normal file
View File

@@ -0,0 +1,79 @@
from orm import User
from dateutil.parser import parse
counter = 0
def migrate(entry):
'''
type User {
username: String! # email
createdAt: DateTime!
email: String
password: String
oauth: String # provider:token
viewname: String # to display
userpic: String
links: [String]
emailConfirmed: Boolean # should contain all emails too
id: Int!
muted: Boolean
rating: Int
roles: [Role]
updatedAt: DateTime
wasOnlineAt: DateTime
ratings: [Rating]
slug: String
bio: String
notifications: [Int]
}
'''
res = {}
try:
res['old_id'] = entry['_id']
res['password'] = entry['services']['password'].get('bcrypt', '')
res['username'] = entry['emails'][0]['address']
res['email'] = res['username']
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
res['emailConfirmed'] = entry['emails'][0]['verified']
res['createdAt'] = parse(entry['createdAt'])
res['rating'] = entry['rating'] # number
res['roles'] = [] # entry['roles'] # roles without org is for discours.io
res['ratings'] = [] # entry['ratings']
res['notifications'] = []
res['links'] = []
res['muted'] = False
res['viewname'] = 'anonymous'
if entry['profile']:
res['slug'] = entry['profile'].get('path')
res['userpic'] = entry['profile'].get('image', {'url': ''}).get('url', '')
viewname = entry['profile'].get('firstName', '') + ' ' + entry['profile'].get('lastName', '')
viewname = entry['profile']['path'] if len(viewname) < 2 else viewname
res['viewname'] = viewname
fb = entry['profile'].get('facebook', False)
if fb:
res['links'].append(fb)
vk = entry['profile'].get('vkontakte', False)
if vk:
res['links'].append(vk)
tr = entry['profile'].get('twitter', False)
if tr:
res['links'].append(tr)
ws = entry['profile'].get('website', False)
if ws:
res['links'].append(ws)
if not res['slug']:
res['slug'] = res['links'][0].split('/')[-1]
if not res['slug']:
res['slug'] = res['email'].split('@')[0]
except Exception:
print(entry['profile'])
raise Exception
else:
old = res['old_id']
del res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
res['old_id'] = old
return res

9
migration/utils.py Normal file
View File

@@ -0,0 +1,9 @@
from datetime import datetime
from json import JSONEncoder
class DateTimeEncoder(JSONEncoder):
def default(self, z):
if isinstance(z, datetime):
return (str(z))
else:
return super().default(z)