diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
index 94b30f82..a529d02f 100644
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@@ -1,19 +1,16 @@
from dateutil.parser import parse as date_parse
import frontmatter
import json
-import sqlite3
import sqlalchemy
-from orm import Shout, Comment, Topic, ShoutTopic, ShoutRating, ShoutViewByDay, User
-from bs4 import BeautifulSoup
+from orm import Shout, ShoutTopic, ShoutRating, ShoutViewByDay, User, shout
+# from bs4 import BeautifulSoup
from migration.html2text import html2text
-from migration.tables.comments import migrate as migrateComment
from transliterate import translit
from datetime import datetime
-from sqlalchemy.exc import IntegrityError
from orm.base import local_session
from orm.community import Community
+from migration.extract import extract
import os
-import string
DISCOURS_USER = {
'id': 9999999,
@@ -35,7 +32,7 @@ type2layout = {
def get_metadata(r):
metadata = {}
- metadata['title'] = r.get('title')
+ metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
metadata['authors'] = r.get('authors')
metadata['createdAt'] = r.get('createdAt', ts)
metadata['layout'] = r['layout']
@@ -84,15 +81,19 @@ def migrate(entry, users_by_oid, topics_by_oid):
'ratings': [],
'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
}
- r['slug'] = entry.get('slug', '')
- if not r['slug'] and entry.get('friendlySlugs') is not None:
- r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
- if(r['slug'] is None):
- r['slug'] = entry['friendlySlugs'][0]['slug']
- if not r['slug']:
- print('NO SLUG ERROR')
- # print(entry)
- raise Exception
+
+ # slug
+
+ s = entry.get('slug', '')
+ fslugs = entry.get('friendlySlugs')
+ if not s and fslugs:
+ if type(fslugs) != 'list': fslugs = fslugs.get('slug', [])
+ try: s = fslugs.pop(0).get('slug')
+ except: raise Exception
+ if s: r['slug'] = s
+ else: raise Exception
+
+ # topics
category = entry['category']
mainTopic = topics_by_oid.get(category)
@@ -107,68 +108,106 @@ def migrate(entry, users_by_oid, topics_by_oid):
else:
# print('ERROR: unknown old topic id: ' + oid)
topic_errors.append(oid)
+
+ # cover
+
if entry.get('image') is not None:
r['cover'] = entry['image']['url']
if entry.get('thumborId') is not None:
r['cover'] = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
if entry.get('updatedAt') is not None:
r['updatedAt'] = date_parse(entry['updatedAt'])
+
+ # body
+
+ body = ''
+ body_orig = entry.get('body')
+ if not body_orig: body_orig = ''
+
+ # body modifications
+
if entry.get('type') == 'Literature':
- media = entry.get('media', '')
- # print(media[0]['literatureBody'])
- if type(media) == list and media:
- body_orig = media[0].get('literatureBody', '')
- if body_orig == '':
- print('EMPTY BODY!')
- else:
- # body_html = str(BeautifulSoup(
- # body_orig, features="html.parser"))
- r['body'] = html2text(body_orig)
- else:
- print(r['slug'] + ': literature has no media')
+ for m in entry.get('media', []):
+ t = m.get('title', '')
+ if t: body_orig += '### ' + t + '\n'
+ body_orig += (m.get('body', '') or '')
+ body_orig += '\n' + m.get('literatureBody', '') + '\n'
+
+
elif entry.get('type') == 'Video':
- m = entry['media'][0]
- yt = m.get('youtubeId', '')
- vm = m.get('vimeoId', '')
- video_url = 'https://www.youtube.com/watch?v=' + yt if yt else '#'
- therestof = html2text(m.get('body', entry.get('body', '')))
- r['body'] = 'import { YouTube } from \'solid-social\'\n\n' + \
- '\n\n' + therestof
- if video_url == '#':
- video_url = 'https://vimeo.com/' + vm if vm else '#'
- r['body'] = 'import { Vimeo } from \'solid-social\'\n\n' + \
- '\n\n' + therestof
- if video_url == '#':
- print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
- # raise Exception
+ providers = set([])
+ video_url = ''
+ require = False
+ for m in entry.get('media', []):
+ yt = m.get('youtubeId', '')
+ vm = m.get('vimeoId', '')
+ if yt:
+ require = True
+ providers.add('YouTube')
+ video_url = 'https://www.youtube.com/watch?v=' + yt
+ body += '\n'
+ if vm:
+ require = True
+ providers.add('Vimeo')
+ video_url = 'https://vimeo.com/' + vm
+ body += '\n'
+ body += extract(html2text(m.get('body', '')), entry['_id'])
+ if video_url == '#': print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
+ if require: body = 'import { ' + ','.join(list(providers)) + ' } from \'solid-social\'\n\n' + body + '\n'
+ body += extract(html2text(body_orig), entry['_id'])
+
elif entry.get('type') == 'Music':
- r['body'] = ''
- for m in entry['media']:
- if m == { 'main': 'true' } or m == { 'main': True } or m == {}:
- continue
+ require = False
+ for m in entry.get('media', []):
+ if 'fileUrl' in m:
+ require = True
+ artist = m.get('performer')
+ trackname = ''
+ if artist: trackname += artist + ' - '
+ trackname += m.get('title','')
+ body += '\n'
+ body += extract(html2text(m.get('body', '')), entry['_id'])
else:
- # TODO: mark highlighted track isMain == True
- fileUrl = m.get('fileUrl', '')
- if not fileUrl:
- print(m)
- continue
- else:
- r['body'] = 'import MusicPlayer from \'../src/components/MusicPlayer\'\n\n'
- r['body'] += '\n'
- r['body'] += html2text(entry.get('body', ''))
+ print(m)
+ if require: body = 'import MusicPlayer from \'$/components/Article/MusicPlayer\'\n\n' + body + '\n'
+ body += extract(html2text(body_orig), entry['_id'])
+
elif entry.get('type') == 'Image':
- r['body'] = ''
- if 'cover' in r: r['body'] = '
'
- mbody = r.get('media', [{'body': ''},])[0].get('body', '')
- r['body'] += mbody + entry.get('body', '')
- if r['body'] == '': print(entry)
- if r.get('body') is None:
- body_orig = entry.get('body', entry.get('bodyHistory', [{ 'text': '' }, ])[0].get('text', ''))
+ cover = r.get('cover')
+ images = {}
+ for m in entry.get('media', []):
+ t = m.get('title', '')
+ if t: body += '#### ' + t + '\n'
+ u = m.get('image', {}).get('url', '')
+ if 'cloudinary' in u:
+ u = m.get('thumborId')
+ if not u: u = cover
+ if u not in images.keys():
+ if u.startswith('production'): u = 'https://discours-io.s3.amazonaws.com/' + u
+ body += '\n' # TODO: gallery here
+ images[u] = u
+ body += extract(html2text(m.get('body', '')), entry['_id']) + '\n'
+ body += extract(html2text(body_orig), entry['_id'])
+
+ # simple post or no body stored
+ if body == '':
+ if not body_orig:
+ print('[migration] using body history...')
+ try: body_orig += entry.get('bodyHistory', [{'body': ''}])[0].get('body', '')
+ except: pass
+ # need to extract
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
- r['body'] = html2text(body_orig)
- body = r.get('body', '')
+ body += extract(html2text(body_orig), entry['_id'])
+ else:
+ # EVERYTHING IS FINE HERE
+ pass
+
+ # replace some topics
for oldtopicslug, newtopicslug in retopics.items():
body.replace(oldtopicslug, newtopicslug)
+
+ # authors
+
# get author data
userdata = {}
try: userdata = users_by_oid[entry['createdBy']]
@@ -194,6 +233,7 @@ def migrate(entry, users_by_oid, topics_by_oid):
}
# set author data
+ r['body'] = body
shout_dict = r.copy()
author = { # a short version for public listings
'slug': userdata.get('slug', 'discours'),
@@ -202,15 +242,21 @@ def migrate(entry, users_by_oid, topics_by_oid):
}
shout_dict['authors'] = [ author, ]
+ # save mdx for prerender if published
+
if entry['published']:
metadata = get_metadata(shout_dict)
- content = frontmatter.dumps(frontmatter.Post(body, **metadata))
+ content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
ext = 'mdx'
parentDir = '/'.join(os.getcwd().split('/')[:-1])
- filepath = parentDir + '/discoursio-web/content/' + r['slug'] + '.' + ext
+ filepath = parentDir + '/discoursio-web/content/' + r['slug']
# print(filepath)
bc = bytes(content,'utf-8').decode('utf-8','ignore')
- open(filepath, 'w').write(bc)
+ open(filepath + '.' + ext, 'w').write(bc)
+ # open(filepath + '.html', 'w').write(body_orig)
+
+ # save shout to db
+
try:
shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else None
@@ -234,51 +280,65 @@ def migrate(entry, users_by_oid, topics_by_oid):
if not user and slug: user = session.query(User).filter(User.slug == slug).first()
if not user and userdata: user = User.create(**userdata)
except:
- print(userdata)
+ print('[migration] content_items error: \n%r' % entry)
assert user, 'could not get a user'
-
shout_dict['authors'] = [ user, ]
- try:
- s = Shout.create(**shout_dict)
+
+ # create shout
- # shout ratings
- shout_dict['ratings'] = []
- for shout_rating_old in entry.get('ratings',[]):
- with local_session() as session:
- rater = session.query(User).\
- filter(User.old_id == shout_rating_old['createdBy']).first()
- if rater:
- shout_rating_dict = {
- 'value': shout_rating_old['value'],
- 'rater': rater.slug,
- 'shout': s.slug
- }
- cts = shout_rating_old.get('createdAt')
- if cts: shout_rating_dict['ts'] = date_parse(cts)
- try: shout_rating = ShoutRating.create(**shout_rating_dict)
- except sqlalchemy.exc.IntegrityError: pass
+ s = object()
+ try: s = Shout.create(**shout_dict)
+ except: print('[migration] content_items error: \n%r' % entry)
+
+ # shout ratings
+
+ shout_dict['ratings'] = []
+ for shout_rating_old in entry.get('ratings',[]):
+ with local_session() as session:
+ rater = session.query(User).\
+ filter(User.old_id == shout_rating_old['createdBy']).first()
+ if rater:
+ shout_rating_dict = {
+ 'value': shout_rating_old['value'],
+ 'rater': rater.slug,
+ 'shout': s.slug
+ }
+ cts = shout_rating_old.get('createdAt')
+ if cts: shout_rating_dict['ts'] = date_parse(cts)
+ try:
+ shout_rating = session.query(ShoutRating).\
+ filter(ShoutRating.shout == s.slug).\
+ filter(ShoutRating.rater == rater.slug).first()
+ if shout_rating:
+ shout_rating_dict['value'] += int(shout_rating.value or 0)
+ shout_rating.update(shout_rating_dict)
+ else: ShoutRating.create(**shout_rating_dict)
shout_dict['ratings'].append(shout_rating_dict)
-
- # shout topics
- shout_dict['topics'] = []
- for topic in r['topics']:
- try:
- tpc = topics_by_oid[topic['oid']]
- slug = retopics.get(tpc['slug'], tpc['slug'])
- ShoutTopic.create(**{ 'shout': s.slug, 'topic': slug })
- shout_dict['topics'].append(slug)
- except sqlalchemy.exc.IntegrityError:
+ except sqlalchemy.exc.IntegrityError:
+ print('[migration] shout_rating error: \n%r' % shout_rating_dict)
pass
- views = entry.get('views', 1)
- ShoutViewByDay.create(
- shout = s.slug,
- value = views
- )
+ # shout topics
- except Exception as e:
- raise e
- except Exception as e:
+ shout_dict['topics'] = []
+ for topic in r['topics']:
+ try:
+ tpc = topics_by_oid[topic['oid']]
+ slug = retopics.get(tpc['slug'], tpc['slug'])
+ ShoutTopic.create(**{ 'shout': s.slug, 'topic': slug })
+ shout_dict['topics'].append(slug)
+ except sqlalchemy.exc.IntegrityError:
+ pass
+
+ # shout views
+
+ views = entry.get('views', 1)
+ ShoutViewByDay.create(
+ shout = s.slug,
+ value = views
+ )
+
+ except Exception as e:
raise e
shout_dict['old_id'] = entry.get('_id')
return shout_dict, topic_errors