migration topics fixed, markdown fixed
This commit is contained in:
@@ -385,15 +385,15 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
elif self.current_class == 'lead' and \
|
||||
self.inheader == False and \
|
||||
self.span_highlight == False:
|
||||
self.o("==") # NOTE: but CriticMarkup uses {== ==}
|
||||
#self.o("==") # NOTE: CriticMarkup {==
|
||||
self.span_lead = True
|
||||
else:
|
||||
if self.span_highlight:
|
||||
self.o('`')
|
||||
self.span_highlight = False
|
||||
elif self.span_lead:
|
||||
self.o('==')
|
||||
self.span_lead = False
|
||||
#self.o('==')
|
||||
self.span_lead = False
|
||||
|
||||
if tag in ["p", "div"]:
|
||||
if self.google_doc:
|
||||
@@ -401,7 +401,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.p()
|
||||
else:
|
||||
self.soft_br()
|
||||
elif self.astack:
|
||||
elif self.astack or self.inheader:
|
||||
pass
|
||||
else:
|
||||
self.p()
|
||||
@@ -468,20 +468,21 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
# without it, Markdown won't render the resulting *** correctly.
|
||||
# (Don't add a space otherwise, though, since there isn't one in the
|
||||
# original HTML.)
|
||||
if (
|
||||
start
|
||||
and self.preceding_data
|
||||
and self.preceding_data[-1] == self.strong_mark[0]
|
||||
):
|
||||
strong = " " + self.strong_mark
|
||||
self.preceding_data += " "
|
||||
else:
|
||||
strong = self.strong_mark
|
||||
if not self.inheader and not self.astack \
|
||||
and not self.span_lead and not self.span_highlight:
|
||||
if (
|
||||
start
|
||||
and self.preceding_data
|
||||
and self.preceding_data[-1] == self.strong_mark[0]
|
||||
):
|
||||
strong = " " + self.strong_mark
|
||||
self.preceding_data += " "
|
||||
else:
|
||||
strong = self.strong_mark
|
||||
|
||||
if not self.span_lead and not self.span_highlight:
|
||||
self.o(strong)
|
||||
if start:
|
||||
self.stressed = True
|
||||
if start:
|
||||
self.stressed = True
|
||||
|
||||
if tag in ["del", "strike", "s"]:
|
||||
if start and self.preceding_data and self.preceding_data[-1] == "~":
|
||||
@@ -1030,4 +1031,12 @@ def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) ->
|
||||
bodywidth = config.BODY_WIDTH
|
||||
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
|
||||
|
||||
return h.handle(html)
|
||||
return h.handle(html)\
|
||||
.replace('<...>', '**...**')\
|
||||
.replace('<…>', '***...**')\
|
||||
.replace('****', '')\
|
||||
.replace('\u00a0',' ')\
|
||||
.replace('\u200c', '')\
|
||||
.replace('\u200b', '')\
|
||||
.replace('\ufeff', '')
|
||||
# .replace('\u2212', '-')
|
||||
|
@@ -156,7 +156,7 @@ IGNORE_TABLES = False
|
||||
|
||||
# Use a single line break after a block element rather than two line breaks.
|
||||
# NOTE: Requires body width setting to be 0.
|
||||
SINGLE_LINE_BREAK = True
|
||||
SINGLE_LINE_BREAK = False
|
||||
|
||||
|
||||
# Use double quotation marks when converting the <q> tag.
|
||||
|
@@ -1,8 +1,11 @@
|
||||
from orm.base import local_session
|
||||
from orm import Topic, Community
|
||||
from dateutil.parser import parse as date_parse
|
||||
import json
|
||||
from migration.html2text import html2text
|
||||
import sqlalchemy
|
||||
|
||||
def migrate(entry):
|
||||
def migrate(entry, topics_by_oid):
|
||||
'''
|
||||
type Topic {
|
||||
slug: String! # ID
|
||||
@@ -14,22 +17,40 @@ def migrate(entry):
|
||||
'''
|
||||
topic_dict = {
|
||||
'slug': entry['slug'],
|
||||
'oid': entry['_id'],
|
||||
# 'createdBy': entry['createdBy'],
|
||||
# 'createdAt': date_parse(entry['createdAt']),
|
||||
'title': entry['title'].replace(' ', ' '), #.lower(),
|
||||
'children': [],
|
||||
'community' : Community.default_community.slug,
|
||||
'body' : entry.get('description')
|
||||
'body' : html2text(entry.get('description', '').replace(' ', ' '))
|
||||
}
|
||||
try:
|
||||
with local_session() as session:
|
||||
topic = session.query(Topic).filter(Topic.slug == topic_dict['slug']).first()
|
||||
if not topic:
|
||||
topic = session.query(Topic).filter(Topic.title == topic_dict['title']).first()
|
||||
retopics = json.loads(open('migration/tables/replacements.json').read())
|
||||
with local_session() as session:
|
||||
slug = topics_by_oid.get(topic_dict['oid'], topic_dict)['slug']
|
||||
if slug:
|
||||
try:
|
||||
topic = session.query(Topic).filter(Topic.slug == slug).first()
|
||||
if not topic:
|
||||
del topic_dict['oid']
|
||||
topic = Topic.create(**topic_dict)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
topic_dict['cat_id'] = entry['_id']
|
||||
print('created')
|
||||
else:
|
||||
if len(topic.title) > len(topic_dict['title']) or \
|
||||
len(topic.body) < len(topic_dict['body']):
|
||||
print('updating topic')
|
||||
topic.update({
|
||||
'slug': slug,
|
||||
'title': topic_dict['title'] if len(topic.title) > len(topic_dict['title']) else topic.title,
|
||||
'body': topic_dict['body'] if len(topic.body) < len(topic_dict['body']) else topic.body,
|
||||
#'views': topic.views + topic_dict['views']
|
||||
#'authors': topic.views + topic_dict['views']
|
||||
#'followers': topic.views + topic_dict['views']
|
||||
})
|
||||
print(slug + ': ' + topic.title)
|
||||
except Exception as e:
|
||||
print('not found old topic: ' + slug)
|
||||
else:
|
||||
raise Exception
|
||||
topic_dict['oid'] = entry['_id']
|
||||
return topic_dict
|
||||
|
@@ -13,6 +13,7 @@ from sqlalchemy.exc import IntegrityError
|
||||
from orm.base import local_session
|
||||
from orm.community import Community
|
||||
import os
|
||||
import string
|
||||
|
||||
DISCOURS_USER = {
|
||||
'id': 9999999,
|
||||
@@ -32,7 +33,6 @@ type2layout = {
|
||||
'Image': 'image'
|
||||
}
|
||||
|
||||
|
||||
def get_metadata(r):
|
||||
metadata = {}
|
||||
metadata['title'] = r.get('title')
|
||||
@@ -45,6 +45,9 @@ def get_metadata(r):
|
||||
metadata['cover'] = r.get('cover')
|
||||
return metadata
|
||||
|
||||
|
||||
retopics = json.loads(open('migration/tables/replacements.json').read())
|
||||
|
||||
def migrate(entry, users_by_oid, topics_by_oid):
|
||||
'''
|
||||
type Shout {
|
||||
@@ -96,11 +99,14 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
if mainTopic:
|
||||
r['mainTopic'] = mainTopic["slug"]
|
||||
topic_oids = [category, ]
|
||||
taglist = entry.get("tags", [])
|
||||
topic_oids.extend(taglist)
|
||||
topic_errors = []
|
||||
topic_oids.extend(entry.get('tags', []))
|
||||
for oid in topic_oids:
|
||||
if oid in topics_by_oid:
|
||||
r['topics'].append(topics_by_oid[oid])
|
||||
else:
|
||||
# print('ERROR: unknown old topic id: ' + oid)
|
||||
topic_errors.append(oid)
|
||||
if entry.get('image') is not None:
|
||||
r['cover'] = entry['image']['url']
|
||||
if entry.get('thumborId') is not None:
|
||||
@@ -115,9 +121,9 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
if body_orig == '':
|
||||
print('EMPTY BODY!')
|
||||
else:
|
||||
body_html = str(BeautifulSoup(
|
||||
body_orig, features="html.parser"))
|
||||
r['body'] = html2text(body_html)
|
||||
# body_html = str(BeautifulSoup(
|
||||
# body_orig, features="html.parser"))
|
||||
r['body'] = html2text(body_orig)
|
||||
else:
|
||||
print(r['slug'] + ': literature has no media')
|
||||
elif entry.get('type') == 'Video':
|
||||
@@ -126,12 +132,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
vm = m.get('vimeoId', '')
|
||||
video_url = 'https://www.youtube.com/watch?v=' + yt if yt else '#'
|
||||
therestof = html2text(m.get('body', entry.get('body', '')))
|
||||
r['body'] = 'import { YouTube } from \"solid-social\"\n' + \
|
||||
'<YouTube youtubeId=\"''' + yt + '\" />\n\n' + therestof
|
||||
r['body'] = 'import { YouTube } from \'solid-social\'\n\n' + \
|
||||
'<YouTube youtubeId=\'' + yt + '\' />\n\n' + therestof
|
||||
if video_url == '#':
|
||||
video_url = 'https://vimeo.com/' + vm if vm else '#'
|
||||
r['body'] = 'import { Vimeo } from \"solid-social\"\n' + \
|
||||
'<Vimeo vimeoId=\"''' + vm + '\" />\n\n' + therestof
|
||||
r['body'] = 'import { Vimeo } from \'solid-social\'\n\n' + \
|
||||
'<Vimeo vimeoId=\'' + vm + '\' />\n\n' + therestof
|
||||
if video_url == '#':
|
||||
print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
|
||||
# raise Exception
|
||||
@@ -147,21 +153,22 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
print(m)
|
||||
continue
|
||||
else:
|
||||
r['body'] = 'import MusicPlayer from \"src/components/MusicPlayer\"\n\n'
|
||||
r['body'] += '<MusicPlayer src=\"' + fileUrl + '\" title=\"' + m.get('title','') + '\" />\n'
|
||||
r['body'] = 'import MusicPlayer from \'../src/components/MusicPlayer\'\n\n'
|
||||
r['body'] += '<MusicPlayer src=\'' + fileUrl + '\' title=\'' + m.get('title','') + '\' />\n'
|
||||
r['body'] += html2text(entry.get('body', ''))
|
||||
elif entry.get('type') == 'Image':
|
||||
m = r.get('media')
|
||||
r['body'] = ''
|
||||
if 'cover' in r: r['body'] = '<img src=\"' + r.get('cover', '') + '\" />'
|
||||
r['body'] += entry.get('body', '')
|
||||
mbody = r.get('media', [{'body': ''},])[0].get('body', '')
|
||||
r['body'] += mbody + entry.get('body', '')
|
||||
if r['body'] == '': print(entry)
|
||||
if r.get('body') is None:
|
||||
body_orig = entry.get('body', '')
|
||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
r['body'] = html2text(body_html)
|
||||
body_orig = entry.get('body', entry.get('bodyHistory', [{ 'text': '' }, ])[0].get('text', ''))
|
||||
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
r['body'] = html2text(body_orig)
|
||||
body = r.get('body', '')
|
||||
|
||||
for oldtopicslug, newtopicslug in retopics.items():
|
||||
body.replace(oldtopicslug, newtopicslug)
|
||||
# get author data
|
||||
userdata = {}
|
||||
try: userdata = users_by_oid[entry['createdBy']]
|
||||
@@ -200,9 +207,10 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
content = frontmatter.dumps(frontmatter.Post(body, **metadata))
|
||||
ext = 'mdx'
|
||||
parentDir = '/'.join(os.getcwd().split('/')[:-1])
|
||||
filepath = parentDir + '/discoursio-web/content/' + r['layout'] + '/' + r['slug'] + '.' + ext
|
||||
filepath = parentDir + '/discoursio-web/content/' + r['slug'] + '.' + ext
|
||||
# print(filepath)
|
||||
open(filepath, 'w').write(content)
|
||||
bc = bytes(content,'utf-8').decode('utf-8','ignore')
|
||||
open(filepath, 'w').write(bc)
|
||||
try:
|
||||
shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
|
||||
shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else None
|
||||
@@ -256,7 +264,9 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
for topic in r['topics']:
|
||||
try:
|
||||
ShoutTopic.create(**{ 'shout': s.slug, 'topic': topic['slug'] })
|
||||
shout_dict['topics'].append(topic['slug'])
|
||||
tpc = topics_by_oid[topic['oid']]
|
||||
slug = retopics.get(tpc['slug'], tpc['slug'])
|
||||
shout_dict['topics'].append(slug)
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
pass
|
||||
|
||||
@@ -269,7 +279,6 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
||||
except Exception as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
if not shout_dict['body']: r['body'] = 'body moved'
|
||||
raise e
|
||||
shout_dict['old_id'] = entry.get('_id')
|
||||
return shout_dict # for json
|
||||
return shout_dict, topic_errors
|
||||
|
@@ -2,6 +2,8 @@
|
||||
"1990-e": "90s",
|
||||
"2000-e": "2000s",
|
||||
"90-e": "90s",
|
||||
"207": "207",
|
||||
"kartochki-rubinshteyna": "rubinstein-cards",
|
||||
"Georgia": "georgia",
|
||||
"Japan": "japan",
|
||||
"Sweden": "sweden",
|
||||
@@ -13,6 +15,7 @@
|
||||
"afrika": "africa",
|
||||
"agata-kristi": "agatha-christie",
|
||||
"agressiya": "agression",
|
||||
"agressivnoe-povedenie": "agression",
|
||||
"aktsii": "actions",
|
||||
"aktsionizm": "actionism",
|
||||
"alber-kamyu": "albert-kamus",
|
||||
@@ -59,6 +62,7 @@
|
||||
"artists": "artists",
|
||||
"ateizm": "atheism",
|
||||
"audiopoeziya": "audio-poetry",
|
||||
"audio-poetry": "audio-poetry",
|
||||
"audiospektakl": "audio-spectacles",
|
||||
"auktsyon": "auktsyon",
|
||||
"avangard": "avantgarde",
|
||||
@@ -385,6 +389,8 @@
|
||||
"martin-haydegger": "martin-hidegger",
|
||||
"matematika": "maths",
|
||||
"vladimir-mayakovskiy": "vladimir-mayakovsky",
|
||||
"mayakovskiy": "vladimir-mayakovsky",
|
||||
"ekzistentsiya": "existence",
|
||||
"media": "media",
|
||||
"medicine": "medicine",
|
||||
"memuary": "memoirs",
|
||||
@@ -738,6 +744,8 @@
|
||||
"zakonodatelstvo": "laws",
|
||||
"zakony-mira": "world-laws",
|
||||
"zametki": "notes",
|
||||
"zhelanie": "wish",
|
||||
"konets-vesny": "end-of-spring",
|
||||
"zhivotnye": "animals",
|
||||
"zhoze-saramago": "jose-saramago",
|
||||
"zigmund-freyd": "sigmund-freud",
|
||||
|
@@ -4,7 +4,7 @@ from orm.base import local_session
|
||||
from orm import Topic, Community
|
||||
from dateutil.parser import parse as date_parse
|
||||
|
||||
def migrate(entry):
|
||||
def migrate(entry, topics_by_oid):
|
||||
'''
|
||||
type Topic {
|
||||
slug: String! # ID
|
||||
@@ -21,23 +21,30 @@ def migrate(entry):
|
||||
ts = datetime.fromtimestamp(entry['createdAt']/1000)
|
||||
topic_dict = {
|
||||
'slug': entry['slug'],
|
||||
'oid': entry['_id'],
|
||||
# 'createdBy': entry['createdBy'],
|
||||
# 'createdAt': ts,
|
||||
'title': entry['title'].replace(' ', ' '), # .lower(),
|
||||
'children': [],
|
||||
'community' : Community.default_community.slug,
|
||||
'body' : entry.get('description')
|
||||
'body' : entry.get('description','').replace(' ', ' ')
|
||||
}
|
||||
try:
|
||||
retopics = json.loads(open('migration/tables/replacements.json').read())
|
||||
with local_session() as session:
|
||||
topic = session.query(Topic).filter(Topic.slug == topic_dict['slug']).first()
|
||||
if not topic:
|
||||
topic = session.query(Topic).filter(Topic.title == topic_dict['title']).first()
|
||||
if not topic:
|
||||
slug = topics_by_oid.get(topic_dict['oid'], topic_dict)['slug']
|
||||
if slug:
|
||||
topic = session.query(Topic).filter(Topic.slug == slug).first()
|
||||
if not topic:
|
||||
del topic_dict['oid']
|
||||
topic = Topic.create(**topic_dict)
|
||||
else:
|
||||
print(slug + ': ' + topic.title)
|
||||
else:
|
||||
print('not found topic: ' + slug)
|
||||
raise Exception
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
topic_dict['tag_id'] = entry['_id']
|
||||
topic_dict['oid'] = entry['_id']
|
||||
return topic_dict
|
||||
|
Reference in New Issue
Block a user