aftermerge, migration fixed

This commit is contained in:
Untone 2021-08-26 00:20:53 +03:00
parent 78b41cb9c4
commit 698de9114c
14 changed files with 161 additions and 96 deletions

View File

@ -19,6 +19,7 @@ Authlib = "*"
bson = "*" bson = "*"
python-frontmatter = "*" python-frontmatter = "*"
bs4 = "*" bs4 = "*"
transliterate = "*"
[dev-packages] [dev-packages]

View File

@ -48,22 +48,31 @@ def shouts(limit):
data = json.loads(open('migration/data/content_items.json').read()) data = json.loads(open('migration/data/content_items.json').read())
newdata = {} newdata = {}
print(str(len(data)) + ' entries was loaded. now migrating...') print(str(len(data)) + ' entries was loaded. now migrating...')
errored = []
for entry in data: for entry in data:
try:
oid = entry['_id'] oid = entry['_id']
newdata[oid] = migrateShout(entry) newdata[oid] = migrateShout(entry)
counter += 1 counter += 1
author = newdata[oid]['authors'][0]['slug'] author = newdata[oid]['authors'][0]['slug']
if author == 'discours': if author == 'discours':
discoursAuthor += 1 discoursAuthor += 1
line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + author line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + str(author)
print(line) print(line)
open('./shouts.id.log','a').write(line + '\n') open('./shouts.id.log','a').write(line + '\n')
if counter > limit: if counter > limit:
break break
except Exception:
print(entry['_id'])
errored.append(entry)
raise Exception
open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) )
print(str(counter) + ' shouts were migrated') print(str(counter) + ' shouts were migrated')
print(str(discoursAuthor) + ' from them by uknown users') print(str(discoursAuthor) + ' from them by @discours')
print(str(len(errored)) + ' shouts without authors')
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys

View File

@ -18,18 +18,10 @@ def has_key(x, y):
if hasattr(x, 'has_key'): return x.has_key(y) if hasattr(x, 'has_key'): return x.has_key(y)
else: return y in x else: return y in x
try: import html.entities as htmlentitydefs
import htmlentitydefs import urllib.parse as urlparse
import urlparse import html.parser as HTMLParser
import HTMLParser import urllib.request as urllib
except ImportError: #Python3
import html.entities as htmlentitydefs
import urllib.parse as urlparse
import html.parser as HTMLParser
try: #Python3
import urllib.request as urllib
except:
import urllib
import optparse, re, sys, codecs, types import optparse, re, sys, codecs, types
try: from textwrap import wrap try: from textwrap import wrap
@ -45,11 +37,11 @@ ESCAPE_SNOB = 0
LINKS_EACH_PARAGRAPH = 0 LINKS_EACH_PARAGRAPH = 0
# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
BODY_WIDTH = 78 BODY_WIDTH = 0
# Don't show internal links (href="#local-anchor") -- corresponding link targets # Don't show internal links (href="#local-anchor") -- corresponding link targets
# won't be visible in the plain text file anyway. # won't be visible in the plain text file anyway.
SKIP_INTERNAL_LINKS = True SKIP_INTERNAL_LINKS = False
# Use inline, rather than reference, formatting for images and links # Use inline, rather than reference, formatting for images and links
INLINE_LINKS = True INLINE_LINKS = True

View File

@ -1,4 +1,4 @@
# from html2md import Converter from html2text import html2text
import datetime import datetime
# markdown = Converter() # markdown = Converter()
@ -8,7 +8,7 @@ def migrate(entry):
# is comment # is comment
type Shout { type Shout {
org: String! org: String!
slug: String! slug: String
author: Int! author: Int!
body: String! body: String!
createdAt: DateTime! createdAt: DateTime!
@ -28,7 +28,7 @@ def migrate(entry):
''' '''
# TODO: implement comments migration # TODO: implement comments migration
return { return {
'org': 'discours.io', 'org_id': 0,
'slug': entry['slug'], 'slug': entry['slug'],
'createdAt': entry['createdAt'], 'createdAt': entry['createdAt'],
'body': html2text(entry['body']), 'body': html2text(entry['body']),

View File

@ -1,16 +1,25 @@
# from migration.html2md import Converter
from dateutil.parser import parse from dateutil.parser import parse
from orm import User
from os.path import abspath from os.path import abspath
import frontmatter import frontmatter
import json import json
from orm import Shout from orm import Shout
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from migration.html2text import html2text from migration.html2text import html2text
from transliterate import translit
from datetime import datetime
from sqlalchemy.exc import IntegrityError
from orm.base import local_session
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
users_dict['0'] = {'id': 9999999, 'slug': 'discours', 'viewname': 'Дискурс' } users_dict['0'] = {
'id': 9999999,
'slug': 'discours',
'viewname': 'Дискурс',
'userpic': 'https://discours.io/images/logo-mini.svg'
}
# markdown = Converter() ts = datetime.now()
type2layout = { type2layout = {
'Article': 'article', 'Article': 'article',
@ -20,7 +29,7 @@ type2layout = {
'Image': 'image' 'Image': 'image'
} }
def migrate(entry, data=users_dict): def migrate(entry, limit=3626, start=0):
''' '''
type Shout { type Shout {
org_id: Int! org_id: Int!
@ -45,21 +54,15 @@ def migrate(entry, data=users_dict):
views: Int views: Int
} }
''' '''
try:
author = data[entry['createdBy']]
except KeyError:
author = data['0']
# print(author)
r = { r = {
'org_id': 0, 'org_id': 0,
'layout': type2layout[entry['type']], 'layout': type2layout[entry['type']],
'title': entry['title'], 'title': entry['title'],
'authors': [ { 'slug': author['slug'], 'name': author['viewname'], 'pic': author.get('userpic', '') }, ], 'authors': [],
'topics': [], 'topics': [],
'published': entry['published'], 'published': entry.get('published', False),
'views': entry['views'], 'views': entry.get('views', 0),
'rating': entry['rating'], 'rating': entry.get('rating', 0),
'ratings': [] 'ratings': []
} }
r['slug'] = entry.get('slug', '') r['slug'] = entry.get('slug', '')
@ -76,12 +79,8 @@ def migrate(entry, data=users_dict):
r['cover'] = entry['image']['url'] r['cover'] = entry['image']['url']
if entry.get('thumborId') is not None: if entry.get('thumborId') is not None:
r['cover'] = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId'] r['cover'] = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
if entry.get('publishedAt') is not None:
r['publishedAt'] = entry['publishedAt']
if entry.get('createdAt') is not None:
r['createdAt'] = entry['createdAt']
if entry.get('updatedAt') is not None: if entry.get('updatedAt') is not None:
r['updatedAt'] = entry['updatedAt'] r['updatedAt'] = parse(entry['updatedAt'])
if entry.get('type') == 'Literature': if entry.get('type') == 'Literature':
media = entry.get('media', '') media = entry.get('media', '')
# print(media[0]['literatureBody']) # print(media[0]['literatureBody'])
@ -91,10 +90,9 @@ def migrate(entry, data=users_dict):
print('EMPTY BODY!') print('EMPTY BODY!')
else: else:
# body_html = str(BeautifulSoup(body_orig, features="html.parser")) # body_html = str(BeautifulSoup(body_orig, features="html.parser"))
#markdown.feed(body_html) # body = html2text(body_orig).replace('****', '**')
body = html2text(body_orig).replace('****', '**') r['old_id'] = entry.get('_id')
r['body'] = body r['body'] = body_orig
# r['body2'] = markdown.md_file
else: else:
print(r['slug'] + ': literature has no media') print(r['slug'] + ': literature has no media')
elif entry.get('type') == 'Video': elif entry.get('type') == 'Video':
@ -105,7 +103,7 @@ def migrate(entry, data=users_dict):
if videoUrl == '#': if videoUrl == '#':
videoUrl = 'https://vimeo.com/' + vm if vm else '#' videoUrl = 'https://vimeo.com/' + vm if vm else '#'
if videoUrl == '#': if videoUrl == '#':
print(m) print(entry.get('media', 'NO MEDIA!'))
# raise Exception # raise Exception
r['body'] = '<ShoutVideo src=\"' + videoUrl + '\" />' + html2text(m.get('body', '')) # FIXME r['body'] = '<ShoutVideo src=\"' + videoUrl + '\" />' + html2text(m.get('body', '')) # FIXME
elif entry.get('type') == 'Music': elif entry.get('type') == 'Music':
@ -113,30 +111,88 @@ def migrate(entry, data=users_dict):
if r.get('body') is None: if r.get('body') is None:
body_orig = entry.get('body', '') body_orig = entry.get('body', '')
# body_html = BeautifulSoup(body_orig, features="html.parser") body_html = BeautifulSoup(body_orig, features="html.parser")
r['body'] = html2text(body_orig).replace('****', '**') r['body'] = body_html # html2text(body_orig).replace('****', '**')
# markdown.feed(body_html) r['old_id'] = entry.get('_id')
# r['body2'] = markdown.md_file
if not r['body']: body = r.get('body')
r['body'] = entry.get('body') user = None
try:
userdata = users_dict[entry['createdBy']]
slug = userdata['slug']
name = userdata['viewname']
userpic = userdata['userpic']
except KeyError:
app = entry.get('application')
if app is not None:
authordata = {
'username': app['email'],
'email': app['email'],
'viewname': app['name'],
'bio': app.get('bio', ''),
'emailConfirmed': False,
'slug': translit(app['name'], 'ru', reversed=True).replace(' ', '-').lower(),
'createdAt': ts,
'wasOnlineAt': ts
}
try:
user = User.create(**authordata)
except IntegrityError:
with local_session() as session:
user = session.query(User).filter(User.email == authordata['email']).first()
if user is None:
user = session.query(User).filter(User.slug == authordata['slug']).first()
slug = user.slug
name = user.viewname
userpic = user.userpic
else:
# no application, no author!
slug = 'discours'
name = 'Дискурс'
userpic = 'https://discours.io/images/logo-mini.svg'
with local_session() as session:
user = session.query(User).filter(User.slug == slug).first()
r['authors'].append({
'slug': slug,
'name': name,
'pic': userpic
})
metadata = {} metadata = {}
metadata['title'] = r.get('title') metadata['title'] = r.get('title')
metadata['authors'] = r.get('authors') metadata['authors'] = r.get('authors')
if r.get('cover', False): if r.get('cover', False):
metadata['cover'] = r.get('cover') metadata['cover'] = r.get('cover')
body = r.get('body')
post = frontmatter.Post(body, **metadata) post = frontmatter.Post(body, **metadata)
dumped = frontmatter.dumps(post) dumped = frontmatter.dumps(post)
# raise Exception
open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.md', 'w').write(dumped) if entry['published']:
# open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.my.md', 'w').write(r['body2']) if r.get('old_id', None):
#if body_orig: ext = 'html'
# open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.html', 'w').write(body_orig) content = str(body).replace('<p></p>', '').replace('<p> </p>', '')
#markdown.related_data = [] else:
#markdown.md_file = '' ext = 'md'
#markdown.reset() content = dumped
r['body'] = dumped open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.' + ext, 'w').write(content)
# shout = Shout.create(**r.copy())
# r['id'] = shout['id'] try:
shout_dict = r.copy()
shout_dict['authors'] = [user, ]
if entry.get('createdAt') is not None:
shout_dict['createdAt'] = parse(entry.get('createdAt'))
else:
shout_dict['createdAt'] = ts
if entry.get('published'):
if entry.get('publishedAt') is not None:
shout_dict['publishedAt'] = parse(entry.get('publishedAt'))
else:
shout_dict['publishedAt'] = ts
s = Shout.create(**shout_dict)
r['id'] = s.id
except:
r['body'] = 'body moved'
print(r)
# print(s)
raise Exception
return r return r

View File

@ -6,13 +6,7 @@ from migration.html2text import html2text
# markdown = Converter() # markdown = Converter()
counter = 0 counter = 0
def migrate(entry, limit=668):
def add(data):
data.emailConfirmed = False
user = User.create(**data)
return user
def migrate(entry):
''' '''
type User { type User {

View File

@ -11,7 +11,7 @@ class Like(Base):
id: int = None id: int = None
user_id: str = Column(ForeignKey("user.id"), comment="Author", primary_key = True) user_id: str = Column(ForeignKey("user.id"), comment="Author", primary_key = True)
shout: str = Column(String, ForeignKey("shout.slug"), comment="Liked shout slug", primary_key = True) shout_id: int = Column(Integer, ForeignKey("shout.id"), comment="Liked shout id", primary_key = True)
value: int = Column(Integer, nullable=False, comment="Value") value: int = Column(Integer, nullable=False, comment="Value")
# TODO: add resolvers, debug, etc. # TODO: add resolvers, debug, etc.

View File

@ -10,9 +10,9 @@ class Proposal(Base):
__tablename__ = 'proposal' __tablename__ = 'proposal'
author_id: int = Column(Integer, ForeignKey("user.id"), nullable=False, comment="Author") author_id: int = Column(Integer, ForeignKey("user.id"), nullable=False, comment="Author")
shout_id: int = Column(Integer, ForeignKey("shout.id"), nullable=False, comment="Shout")
body: str = Column(String, nullable=False, comment="Body") body: str = Column(String, nullable=False, comment="Body")
createdAt: str = Column(datetime, nullable=False, comment="Created at") createdAt: str = Column(datetime, nullable=False, comment="Created at")
shout: str = Column(String, ForeignKey("shout.slug"), nullable=False, comment="Updated at")
range: str = Column(String, nullable=True, comment="Range in format <start index>:<end>") range: str = Column(String, nullable=True, comment="Range in format <start index>:<end>")
# TODO: debug, logix # TODO: debug, logix

View File

@ -1,3 +0,0 @@
from sqlalchemy import Column, Integer, String, ForeignKey
# from orm import Permission
from orm.base import Base

View File

@ -24,7 +24,7 @@ class ClassType(TypeDecorator):
def process_result_value(self, value, dialect): def process_result_value(self, value, dialect):
class_ = REGISTRY.get(value) class_ = REGISTRY.get(value)
if class_ is None: if class_ is None:
warnings.warn(f"Can't find class <{value}>,find it yourself 😊", stacklevel=2) warnings.warn(f"Can't find class <{value}>,find it yourself!", stacklevel=2)
return class_ return class_
class Organization(Base): class Organization(Base):

View File

@ -7,31 +7,46 @@ from orm.base import Base
ShoutAuthors = Table('shout_authors', ShoutAuthors = Table('shout_authors',
Base.metadata, Base.metadata,
Column('shout', String, ForeignKey('shout.slug')), Column('shout', Integer, ForeignKey('shout.id')),
Column('user_id', Integer, ForeignKey('user.id')) Column('user_id', Integer, ForeignKey('user.id'))
) )
ShoutTopics = Table('shout_topics', ShoutTopics = Table('shout_topics',
Base.metadata, Base.metadata,
Column('shout', String, ForeignKey('shout.slug')), Column('shout', Integer, ForeignKey('shout.id')),
Column('topic', String, ForeignKey('topic.slug')) Column('topic', Integer, ForeignKey('topic.id'))
) )
class ShoutRatings(Base):
__tablename__ = "user_ratings"
id = None
rater_id = Column(ForeignKey('user.id'), primary_key = True)
shout_id = Column(ForeignKey('shout.id'), primary_key = True)
value = Column(Integer)
class Shout(Base): class Shout(Base):
__tablename__ = 'shout' __tablename__ = 'shout'
slug: str = Column(String, primary_key=True) # NOTE: automatic ID here
slug: str = Column(String, nullable=False, unique=True)
org_id: int = Column(Integer, ForeignKey("organization.id"), nullable=False, comment="Organization") org_id: int = Column(Integer, ForeignKey("organization.id"), nullable=False, comment="Organization")
body: str = Column(String, nullable=False, comment="Body") body: str = Column(String, nullable=False, comment="Body")
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
updatedAt: str = Column(DateTime, nullable=True, comment="Updated at") updatedAt: str = Column(DateTime, nullable=True, comment="Updated at")
replyTo: str = Column(ForeignKey("shout.slug"), nullable=True) replyTo: int = Column(ForeignKey("shout.id"), nullable=True)
versionOf: str = Column(ForeignKey("shout.slug"), nullable=True) versionOf: int = Column(ForeignKey("shout.id"), nullable=True)
tags: str = Column(String, nullable=True) tags: str = Column(String, nullable=True)
views: int = Column(Integer, default=0) views: int = Column(Integer, default=0)
published: bool = Column(Boolean, default=False) published: bool = Column(Boolean, default=False)
publishedAt: str = Column(DateTime, nullable=True) publishedAt: str = Column(DateTime, nullable=True)
cover: str = Column(String, nullable = True) cover: str = Column(String, nullable = True)
title: str = Column(String, nullable = True)
subtitle: str = Column(String, nullable = True)
layout: str = Column(String, nullable = True) layout: str = Column(String, nullable = True)
authors = relationship(lambda: User, secondary=ShoutAuthors) # NOTE: multiple authors authors = relationship(lambda: User, secondary=ShoutAuthors) # NOTE: multiple authors
topics = relationship(lambda: Topic, secondary=ShoutTopics) topics = relationship(lambda: Topic, secondary=ShoutTopics)
rating: int = Column(Integer, nullable=True, comment="Rating")
ratings = relationship(ShoutRatings, foreign_keys=ShoutRatings.shout_id)
old_id: str = Column(String, nullable = True)

View File

@ -8,8 +8,8 @@ from orm.base import Base
Connection = Table('topic_connections', Connection = Table('topic_connections',
Base.metadata, Base.metadata,
Column('child', String, ForeignKey('topic.slug')), Column('child', Integer, ForeignKey('topic.id')),
Column('parent', String, ForeignKey('topic.slug')), Column('parent', Integer, ForeignKey('topic.id')),
UniqueConstraint('parent', 'child', name='unique_usage') UniqueConstraint('parent', 'child', name='unique_usage')
) )
@ -17,8 +17,7 @@ Connection = Table('topic_connections',
class Topic(Base): class Topic(Base):
__tablename__ = 'topic' __tablename__ = 'topic'
id: int = None slug: str = Column(String, unique = True, nullable = False)
slug: str = Column(String, unique = True, nullable = False, primary_key=True)
org_id: str = Column(ForeignKey("organization.id"), nullable=False) org_id: str = Column(ForeignKey("organization.id"), nullable=False)
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Author") createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Author")

View File

@ -12,4 +12,4 @@ httpx
psycopg2-binary psycopg2-binary
bson bson
python-frontmatter python-frontmatter
bs4 transliterate

View File

@ -179,7 +179,7 @@ type Message {
# is publication # is publication
type Shout { type Shout {
org_id: Int! org_id: Int!
slug: String! slug: String
authors: [Int!]! authors: [Int!]!
cover: String cover: String
layout: String layout: String
@ -196,10 +196,12 @@ type Shout {
tags: [String] # actual values tags: [String] # actual values
topics: [String] # topic-slugs, order has matter topics: [String] # topic-slugs, order has matter
title: String title: String
subtitle: String
versionOf: String versionOf: String
visibleForRoles: [String] # role ids are strings visibleForRoles: [String] # role ids are strings
visibleForUsers: [Int] visibleForUsers: [Int]
views: Int views: Int
old_id: String
} }
type Topic { type Topic {