This commit is contained in:
Untone 2021-10-08 12:58:19 +03:00
parent c3e0c5720a
commit fe28c3918c
3 changed files with 66 additions and 47 deletions

View File

@ -2,8 +2,9 @@
import json
import base64
import re
import frontmatter
from migration.tables.users import migrate as migrateUser
from migration.tables.content_items import migrate as migrateShout
from migration.tables.content_items import get_metadata, migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag
from migration.utils import DateTimeEncoder
@ -70,16 +71,19 @@ def topics():
print('migrating topics...')
cat_data = json.loads(
open('migration/data/content_item_categories.json').read())
tag_data = json.loads(open('migration/data/tags.json').read())
newdata = {}
# tag_data = json.loads(open('migration/data/tags.json').read())
new_data = {}
old_data = {}
counter = 0
try:
for cat in cat_data:
topic = migrateCategory(cat)
newdata[topic['slug']] = topic
old_data[topic['old_id']] = topic
new_data[topic['slug']] = topic
counter += 1
except Exception:
print('cats exception, try to remove database first')
'''
try:
for tag in tag_data:
topic = migrateTag(tag)
@ -88,14 +92,19 @@ def topics():
except Exception:
print('tags exception, try to remove database first')
raise Exception
export_list = sorted(newdata.items(), key=lambda item: str(
item[1]['createdAt']))[-10:]
'''
export_list = sorted(new_data.items(), key=lambda item: str(
item[1]['createdAt']))
open('migration/data/topics.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
'w').write(json.dumps(old_data, cls=DateTimeEncoder))
open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False))
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print(str(counter) + ' from ' + str(len(cat_data)) +
' tags and ' + str(len(tag_data)) + ' cats were migrated')
#' tags and ' + str(len(tag_data)) +
' cats were migrated')
print(str(len(export_list)) + ' topics were exported')
@ -110,7 +119,7 @@ def shouts():
errored = []
for entry in content_data:
try:
(shout, content) = migrateShout(entry)
shout = migrateShout(entry)
newdata[shout['slug']] = shout
author = newdata[shout['slug']]['authors'][0]['slug']
line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author)
@ -127,12 +136,14 @@ def shouts():
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
except ValueError:
limit = len(content_data)
export_list = sorted(newdata.items(
), key=lambda item: item[1]['createdAt'] if item[1]['layout'] == 'article' else OLD_DATE)[:limit]
export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']]
export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)[:limit]
export_clean = {}
for slug, a in dict(export_list).items():
export_clean[slug] = extract_images(a)
open('../content/discours.io/'+slug+'.md', 'w').write(content)
for (slug, a) in export_list:
export_clean[a['slug']] = extract_images(a)
metadata = get_metadata(a)
content = frontmatter.dumps(frontmatter.Post(a['body'], **metadata))
open('../content/discours.io/'+a['slug']+'.md', 'w').write(content)
open('migration/data/shouts.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean),

View File

@ -3,10 +3,10 @@ import datetime
import json
import importlib
import DateTimeEncoder from utils
from migration.utils import DateTimeEncoder
def json_tables():
print('creating json files at data/')
print('creating json files at migration/data/')
data = {
"content_items": [],
"content_item_categories": [],
@ -17,13 +17,13 @@ def json_tables():
}
for table in data.keys():
lc = []
with open('data/'+table+'.bson', 'rb') as f:
with open('migration/data/'+table+'.bson', 'rb') as f:
bs = f.read()
base = 0
while base < len(bs):
base, d = bson.decode_document(bs, base)
lc.append(d)
data[table] = lc
open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
open('migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
return data

View File

@ -12,6 +12,7 @@ from sqlalchemy.exc import IntegrityError
from orm.base import local_session
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
users_dict['0'] = {
'id': 9999999,
'slug': 'discours.io',
@ -31,6 +32,16 @@ type2layout = {
}
def get_metadata(r):
metadata = {}
metadata['title'] = r.get('title')
metadata['authors'] = r.get('authors')
metadata['createdAt'] = r.get('createdAt', ts)
metadata['layout'] = r['layout']
if r.get('cover', False):
metadata['cover'] = r.get('cover')
return metadata
def migrate(entry):
'''
type Shout {
@ -66,7 +77,7 @@ def migrate(entry):
'views': entry.get('views', 0),
'rating': entry.get('rating', 0),
'ratings': [],
'createdAt': '2016-03-05 22:22:00.350000'
'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
}
r['slug'] = entry.get('slug', '')
body_orig = entry.get('body', '')
@ -78,6 +89,10 @@ def migrate(entry):
print('NO SLUG ERROR')
# print(entry)
raise Exception
try:
r['topics'].append(topics_dict[entry['category']]['slug'])
except Exception:
print(entry['category'])
if entry.get('image') is not None:
r['cover'] = entry['image']['url']
if entry.get('thumborId') is not None:
@ -99,16 +114,16 @@ def migrate(entry):
else:
print(r['slug'] + ': literature has no media')
elif entry.get('type') == 'Video':
m = entry['media'][0]
yt = m.get('youtubeId', '')
vm = m.get('vimeoId', '')
videoUrl = 'https://www.youtube.com/watch?v=' + yt if yt else '#'
if videoUrl == '#':
videoUrl = 'https://vimeo.com/' + vm if vm else '#'
if videoUrl == '#':
print(entry.get('media', 'NO MEDIA!'))
# raise Exception
r['body'] = '<ShoutVideo src=\"' + videoUrl + \
m = entry['media'][0]
yt = m.get('youtubeId', '')
vm = m.get('vimeoId', '')
video_url = 'https://www.youtube.com/watch?v=' + yt if yt else '#'
if video_url == '#':
video_url = 'https://vimeo.com/' + vm if vm else '#'
if video_url == '#':
print(entry.get('media', 'NO MEDIA!'))
# raise Exception
r['body'] = '<ShoutVideo src=\"' + video_url + \
'\" />' + html2text(m.get('body', '')) # FIXME
elif entry.get('type') == 'Music':
r['body'] = '<ShoutMusic media={\"' + \
@ -163,15 +178,10 @@ def migrate(entry):
'pic': userpic
})
metadata = {}
metadata['title'] = r.get('title')
metadata['authors'] = r.get('authors')
metadata['createdAt'] = entry.get('createdAt', ts)
metadata['layout'] = type2layout[entry['type']]
if r.get('cover', False):
metadata['cover'] = r.get('cover')
post = frontmatter.Post(body, **metadata)
dumped = frontmatter.dumps(post)
r['layout'] = type2layout[entry['type']]
metadata = get_metadata(r)
content = frontmatter.dumps(frontmatter.Post(body, **metadata))
if entry['published']:
# if r.get('old_id', None):
@ -179,15 +189,13 @@ def migrate(entry):
# content = str(body).replace('<p></p>', '').replace('<p> </p>', '')
# else:
ext = 'md'
content = dumped
open('migration/content/' +
metadata['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try:
shout_dict = r.copy()
shout_dict['authors'] = [user, ]
if entry.get('createdAt') is not None:
shout_dict['createdAt'] = parse(entry.get('createdAt'))
shout_dict['createdAt'] = parse(r.get('createdAt'))
else:
shout_dict['createdAt'] = ts
if entry.get('published'):
@ -196,9 +204,9 @@ def migrate(entry):
else:
shout_dict['publishedAt'] = ts
del shout_dict['published']
del shout_dict['views'] # FIXME
del shout_dict['rating'] # FIXME
del shout_dict['ratings']
# del shout_dict['views']
# del shout_dict['rating']
del shout_dict['ratings'] # FIXME
try:
s = Shout.create(**shout_dict)
r['id'] = s.id
@ -209,4 +217,4 @@ def migrate(entry):
print(r)
# print(s)
raise Exception
return (r, content)
return r