aftermerge, migration fixed

This commit is contained in:
Untone 2021-08-26 00:20:53 +03:00
parent 78b41cb9c4
commit 698de9114c
14 changed files with 161 additions and 96 deletions

View File

@ -19,6 +19,7 @@ Authlib = "*"
bson = "*"
python-frontmatter = "*"
bs4 = "*"
transliterate = "*"
[dev-packages]

View File

@ -48,22 +48,31 @@ def shouts(limit):
data = json.loads(open('migration/data/content_items.json').read())
newdata = {}
print(str(len(data)) + ' entries was loaded. now migrating...')
errored = []
for entry in data:
oid = entry['_id']
newdata[oid] = migrateShout(entry)
counter += 1
author = newdata[oid]['authors'][0]['slug']
if author == 'discours':
discoursAuthor += 1
line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + author
print(line)
open('./shouts.id.log','a').write(line + '\n')
if counter > limit:
break
try:
oid = entry['_id']
newdata[oid] = migrateShout(entry)
counter += 1
author = newdata[oid]['authors'][0]['slug']
if author == 'discours':
discoursAuthor += 1
line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + str(author)
print(line)
open('./shouts.id.log','a').write(line + '\n')
if counter > limit:
break
except Exception:
print(entry['_id'])
errored.append(entry)
raise Exception
open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) )
print(str(counter) + ' shouts were migrated')
print(str(discoursAuthor) + ' from them by uknown users')
print(str(discoursAuthor) + ' from them by @discours')
print(str(len(errored)) + ' shouts without authors')
if __name__ == '__main__':
import sys

View File

@ -18,18 +18,10 @@ def has_key(x, y):
if hasattr(x, 'has_key'): return x.has_key(y)
else: return y in x
try:
import htmlentitydefs
import urlparse
import HTMLParser
except ImportError: #Python3
import html.entities as htmlentitydefs
import urllib.parse as urlparse
import html.parser as HTMLParser
try: #Python3
import urllib.request as urllib
except:
import urllib
import html.entities as htmlentitydefs
import urllib.parse as urlparse
import html.parser as HTMLParser
import urllib.request as urllib
import optparse, re, sys, codecs, types
try: from textwrap import wrap
@ -45,11 +37,11 @@ ESCAPE_SNOB = 0
LINKS_EACH_PARAGRAPH = 0
# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
BODY_WIDTH = 78
BODY_WIDTH = 0
# Don't show internal links (href="#local-anchor") -- corresponding link targets
# won't be visible in the plain text file anyway.
SKIP_INTERNAL_LINKS = True
SKIP_INTERNAL_LINKS = False
# Use inline, rather than reference, formatting for images and links
INLINE_LINKS = True

View File

@ -1,4 +1,4 @@
# from html2md import Converter
from html2text import html2text
import datetime
# markdown = Converter()
@ -8,7 +8,7 @@ def migrate(entry):
# is comment
type Shout {
org: String!
slug: String!
slug: String
author: Int!
body: String!
createdAt: DateTime!
@ -28,7 +28,7 @@ def migrate(entry):
'''
# TODO: implement comments migration
return {
'org': 'discours.io',
'org_id': 0,
'slug': entry['slug'],
'createdAt': entry['createdAt'],
'body': html2text(entry['body']),

View File

@ -1,16 +1,25 @@
# from migration.html2md import Converter
from dateutil.parser import parse
from orm import User
from os.path import abspath
import frontmatter
import json
from orm import Shout
from bs4 import BeautifulSoup
from migration.html2text import html2text
from transliterate import translit
from datetime import datetime
from sqlalchemy.exc import IntegrityError
from orm.base import local_session
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
users_dict['0'] = {'id': 9999999, 'slug': 'discours', 'viewname': 'Дискурс' }
users_dict['0'] = {
'id': 9999999,
'slug': 'discours',
'viewname': 'Дискурс',
'userpic': 'https://discours.io/images/logo-mini.svg'
}
# markdown = Converter()
ts = datetime.now()
type2layout = {
'Article': 'article',
@ -20,7 +29,7 @@ type2layout = {
'Image': 'image'
}
def migrate(entry, data=users_dict):
def migrate(entry, limit=3626, start=0):
'''
type Shout {
org_id: Int!
@ -45,21 +54,15 @@ def migrate(entry, data=users_dict):
views: Int
}
'''
try:
author = data[entry['createdBy']]
except KeyError:
author = data['0']
# print(author)
r = {
'org_id': 0,
'layout': type2layout[entry['type']],
'title': entry['title'],
'authors': [ { 'slug': author['slug'], 'name': author['viewname'], 'pic': author.get('userpic', '') }, ],
'authors': [],
'topics': [],
'published': entry['published'],
'views': entry['views'],
'rating': entry['rating'],
'published': entry.get('published', False),
'views': entry.get('views', 0),
'rating': entry.get('rating', 0),
'ratings': []
}
r['slug'] = entry.get('slug', '')
@ -76,12 +79,8 @@ def migrate(entry, data=users_dict):
r['cover'] = entry['image']['url']
if entry.get('thumborId') is not None:
r['cover'] = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
if entry.get('publishedAt') is not None:
r['publishedAt'] = entry['publishedAt']
if entry.get('createdAt') is not None:
r['createdAt'] = entry['createdAt']
if entry.get('updatedAt') is not None:
r['updatedAt'] = entry['updatedAt']
r['updatedAt'] = parse(entry['updatedAt'])
if entry.get('type') == 'Literature':
media = entry.get('media', '')
# print(media[0]['literatureBody'])
@ -91,10 +90,9 @@ def migrate(entry, data=users_dict):
print('EMPTY BODY!')
else:
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
#markdown.feed(body_html)
body = html2text(body_orig).replace('****', '**')
r['body'] = body
# r['body2'] = markdown.md_file
# body = html2text(body_orig).replace('****', '**')
r['old_id'] = entry.get('_id')
r['body'] = body_orig
else:
print(r['slug'] + ': literature has no media')
elif entry.get('type') == 'Video':
@ -105,7 +103,7 @@ def migrate(entry, data=users_dict):
if videoUrl == '#':
videoUrl = 'https://vimeo.com/' + vm if vm else '#'
if videoUrl == '#':
print(m)
print(entry.get('media', 'NO MEDIA!'))
# raise Exception
r['body'] = '<ShoutVideo src=\"' + videoUrl + '\" />' + html2text(m.get('body', '')) # FIXME
elif entry.get('type') == 'Music':
@ -113,30 +111,88 @@ def migrate(entry, data=users_dict):
if r.get('body') is None:
body_orig = entry.get('body', '')
# body_html = BeautifulSoup(body_orig, features="html.parser")
r['body'] = html2text(body_orig).replace('****', '**')
# markdown.feed(body_html)
# r['body2'] = markdown.md_file
if not r['body']:
r['body'] = entry.get('body')
body_html = BeautifulSoup(body_orig, features="html.parser")
r['body'] = body_html # html2text(body_orig).replace('****', '**')
r['old_id'] = entry.get('_id')
body = r.get('body')
user = None
try:
userdata = users_dict[entry['createdBy']]
slug = userdata['slug']
name = userdata['viewname']
userpic = userdata['userpic']
except KeyError:
app = entry.get('application')
if app is not None:
authordata = {
'username': app['email'],
'email': app['email'],
'viewname': app['name'],
'bio': app.get('bio', ''),
'emailConfirmed': False,
'slug': translit(app['name'], 'ru', reversed=True).replace(' ', '-').lower(),
'createdAt': ts,
'wasOnlineAt': ts
}
try:
user = User.create(**authordata)
except IntegrityError:
with local_session() as session:
user = session.query(User).filter(User.email == authordata['email']).first()
if user is None:
user = session.query(User).filter(User.slug == authordata['slug']).first()
slug = user.slug
name = user.viewname
userpic = user.userpic
else:
# no application, no author!
slug = 'discours'
name = 'Дискурс'
userpic = 'https://discours.io/images/logo-mini.svg'
with local_session() as session:
user = session.query(User).filter(User.slug == slug).first()
r['authors'].append({
'slug': slug,
'name': name,
'pic': userpic
})
metadata = {}
metadata['title'] = r.get('title')
metadata['authors'] = r.get('authors')
if r.get('cover', False):
metadata['cover'] = r.get('cover')
body = r.get('body')
post = frontmatter.Post(body, **metadata)
dumped = frontmatter.dumps(post)
# raise Exception
open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.md', 'w').write(dumped)
# open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.my.md', 'w').write(r['body2'])
#if body_orig:
# open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.html', 'w').write(body_orig)
#markdown.related_data = []
#markdown.md_file = ''
#markdown.reset()
r['body'] = dumped
# shout = Shout.create(**r.copy())
# r['id'] = shout['id']
if entry['published']:
if r.get('old_id', None):
ext = 'html'
content = str(body).replace('<p></p>', '').replace('<p> </p>', '')
else:
ext = 'md'
content = dumped
open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.' + ext, 'w').write(content)
try:
shout_dict = r.copy()
shout_dict['authors'] = [user, ]
if entry.get('createdAt') is not None:
shout_dict['createdAt'] = parse(entry.get('createdAt'))
else:
shout_dict['createdAt'] = ts
if entry.get('published'):
if entry.get('publishedAt') is not None:
shout_dict['publishedAt'] = parse(entry.get('publishedAt'))
else:
shout_dict['publishedAt'] = ts
s = Shout.create(**shout_dict)
r['id'] = s.id
except:
r['body'] = 'body moved'
print(r)
# print(s)
raise Exception
return r

View File

@ -6,13 +6,7 @@ from migration.html2text import html2text
# markdown = Converter()
counter = 0
def add(data):
data.emailConfirmed = False
user = User.create(**data)
return user
def migrate(entry):
def migrate(entry, limit=668):
'''
type User {

View File

@ -11,7 +11,7 @@ class Like(Base):
id: int = None
user_id: str = Column(ForeignKey("user.id"), comment="Author", primary_key = True)
shout: str = Column(String, ForeignKey("shout.slug"), comment="Liked shout slug", primary_key = True)
shout_id: int = Column(Integer, ForeignKey("shout.id"), comment="Liked shout id", primary_key = True)
value: int = Column(Integer, nullable=False, comment="Value")
# TODO: add resolvers, debug, etc.

View File

@ -10,9 +10,9 @@ class Proposal(Base):
__tablename__ = 'proposal'
author_id: int = Column(Integer, ForeignKey("user.id"), nullable=False, comment="Author")
shout_id: int = Column(Integer, ForeignKey("shout.id"), nullable=False, comment="Shout")
body: str = Column(String, nullable=False, comment="Body")
createdAt: str = Column(datetime, nullable=False, comment="Created at")
shout: str = Column(String, ForeignKey("shout.slug"), nullable=False, comment="Updated at")
range: str = Column(String, nullable=True, comment="Range in format <start index>:<end>")
# TODO: debug, logix

View File

@ -1,3 +0,0 @@
from sqlalchemy import Column, Integer, String, ForeignKey
# from orm import Permission
from orm.base import Base

View File

@ -24,7 +24,7 @@ class ClassType(TypeDecorator):
def process_result_value(self, value, dialect):
class_ = REGISTRY.get(value)
if class_ is None:
warnings.warn(f"Can't find class <{value}>,find it yourself 😊", stacklevel=2)
warnings.warn(f"Can't find class <{value}>,find it yourself!", stacklevel=2)
return class_
class Organization(Base):

View File

@ -7,31 +7,46 @@ from orm.base import Base
ShoutAuthors = Table('shout_authors',
Base.metadata,
Column('shout', String, ForeignKey('shout.slug')),
Column('shout', Integer, ForeignKey('shout.id')),
Column('user_id', Integer, ForeignKey('user.id'))
)
ShoutTopics = Table('shout_topics',
Base.metadata,
Column('shout', String, ForeignKey('shout.slug')),
Column('topic', String, ForeignKey('topic.slug'))
Column('shout', Integer, ForeignKey('shout.id')),
Column('topic', Integer, ForeignKey('topic.id'))
)
class ShoutRatings(Base):
__tablename__ = "user_ratings"
id = None
rater_id = Column(ForeignKey('user.id'), primary_key = True)
shout_id = Column(ForeignKey('shout.id'), primary_key = True)
value = Column(Integer)
class Shout(Base):
__tablename__ = 'shout'
slug: str = Column(String, primary_key=True)
# NOTE: automatic ID here
slug: str = Column(String, nullable=False, unique=True)
org_id: int = Column(Integer, ForeignKey("organization.id"), nullable=False, comment="Organization")
body: str = Column(String, nullable=False, comment="Body")
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
updatedAt: str = Column(DateTime, nullable=True, comment="Updated at")
replyTo: str = Column(ForeignKey("shout.slug"), nullable=True)
versionOf: str = Column(ForeignKey("shout.slug"), nullable=True)
replyTo: int = Column(ForeignKey("shout.id"), nullable=True)
versionOf: int = Column(ForeignKey("shout.id"), nullable=True)
tags: str = Column(String, nullable=True)
views: int = Column(Integer, default=0)
published: bool = Column(Boolean, default=False)
publishedAt: str = Column(DateTime, nullable=True)
cover: str = Column(String, nullable = True)
title: str = Column(String, nullable = True)
subtitle: str = Column(String, nullable = True)
layout: str = Column(String, nullable = True)
authors = relationship(lambda: User, secondary=ShoutAuthors) # NOTE: multiple authors
topics = relationship(lambda: Topic, secondary=ShoutTopics)
rating: int = Column(Integer, nullable=True, comment="Rating")
ratings = relationship(ShoutRatings, foreign_keys=ShoutRatings.shout_id)
old_id: str = Column(String, nullable = True)

View File

@ -8,8 +8,8 @@ from orm.base import Base
Connection = Table('topic_connections',
Base.metadata,
Column('child', String, ForeignKey('topic.slug')),
Column('parent', String, ForeignKey('topic.slug')),
Column('child', Integer, ForeignKey('topic.id')),
Column('parent', Integer, ForeignKey('topic.id')),
UniqueConstraint('parent', 'child', name='unique_usage')
)
@ -17,8 +17,7 @@ Connection = Table('topic_connections',
class Topic(Base):
__tablename__ = 'topic'
id: int = None
slug: str = Column(String, unique = True, nullable = False, primary_key=True)
slug: str = Column(String, unique = True, nullable = False)
org_id: str = Column(ForeignKey("organization.id"), nullable=False)
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Author")

View File

@ -12,4 +12,4 @@ httpx
psycopg2-binary
bson
python-frontmatter
bs4
transliterate

View File

@ -179,7 +179,7 @@ type Message {
# is publication
type Shout {
org_id: Int!
slug: String!
slug: String
authors: [Int!]!
cover: String
layout: String
@ -196,10 +196,12 @@ type Shout {
tags: [String] # actual values
topics: [String] # topic-slugs, order has matter
title: String
subtitle: String
versionOf: String
visibleForRoles: [String] # role ids are strings
visibleForUsers: [Int]
views: Int
old_id: String
}
type Topic {