migration fixes
This commit is contained in:
parent
ddacbbb0d5
commit
a075bfaf10
|
@ -87,7 +87,7 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
self.open_quote = config.OPEN_QUOTE # covered in cli
|
||||
self.close_quote = config.CLOSE_QUOTE # covered in cli
|
||||
self.header_id = None
|
||||
self.span_hightlight = False
|
||||
self.span_highlight = False
|
||||
self.span_lead = False
|
||||
|
||||
if out is None:
|
||||
|
@ -137,6 +137,7 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
self.preceding_stressed = False
|
||||
self.preceding_data = ""
|
||||
self.current_tag = ""
|
||||
self.current_class = ""
|
||||
|
||||
config.UNIFIABLE["nbsp"] = " _place_holder;"
|
||||
|
||||
|
@ -340,7 +341,8 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
parent_style = self.tag_stack[-1][2]
|
||||
|
||||
if hn(tag):
|
||||
# check if nh is inside of an 'a' tag (incorrect but found in the wild)
|
||||
# check if nh is inside of an 'a' tag
|
||||
# (incorrect but found in the wild)
|
||||
if self.astack:
|
||||
if start:
|
||||
self.inheader = True
|
||||
|
@ -362,19 +364,33 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
else:
|
||||
self.inheader = False
|
||||
return # prevent redundant emphasis marks on headers
|
||||
|
||||
if 'class' in attrs:
|
||||
self.current_class = attrs.get('class')
|
||||
# self.p()
|
||||
if not start:
|
||||
self.current_class = ''
|
||||
if 'style' in attrs:
|
||||
if attrs.get('style') == 'text-align: center':
|
||||
self.current_class = 'center'
|
||||
if not start:
|
||||
self.current_class = ''
|
||||
if tag == 'span':
|
||||
if start and 'class' in attrs:
|
||||
if attrs['class'] == 'highlight':
|
||||
if start:
|
||||
if self.current_class == 'highlight' and \
|
||||
self.inheader == False and \
|
||||
self.span_lead == False and \
|
||||
self.astack == False:
|
||||
self.o('`') # NOTE: same as <code>
|
||||
self.span_hightlight = True
|
||||
elif attrs['class'] == 'lead':
|
||||
self.span_highlight = True
|
||||
elif self.current_class == 'lead' and \
|
||||
self.inheader == False and \
|
||||
self.span_highlight == False:
|
||||
self.o('==') # NOTE: but CriticMarkup uses {== ==}
|
||||
self.span_lead = True
|
||||
else:
|
||||
if self.span_hightlight:
|
||||
if self.span_highlight:
|
||||
self.o('`')
|
||||
self.span_hightlight = False
|
||||
self.span_highlight = False
|
||||
elif self.span_lead:
|
||||
self.o('==')
|
||||
self.span_lead = False
|
||||
|
|
|
@ -39,6 +39,7 @@ def get_metadata(r):
|
|||
metadata['createdAt'] = r.get('createdAt', ts)
|
||||
metadata['layout'] = r['layout']
|
||||
metadata['topics'] = [topic['slug'] for topic in r['topics']]
|
||||
metadata['topics'].sort()
|
||||
if r.get('cover', False):
|
||||
metadata['cover'] = r.get('cover')
|
||||
return metadata
|
||||
|
@ -80,7 +81,6 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
|||
'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
|
||||
}
|
||||
r['slug'] = entry.get('slug', '')
|
||||
body_orig = entry.get('body', '')
|
||||
if not r['slug'] and entry.get('friendlySlugs') is not None:
|
||||
r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
|
||||
if(r['slug'] is None):
|
||||
|
@ -94,12 +94,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
|||
mainTopic = topics_by_oid.get(category)
|
||||
if mainTopic:
|
||||
r['mainTopic'] = mainTopic["slug"]
|
||||
topic_oids = set([category])
|
||||
topic_oids.update(entry.get("tags", []))
|
||||
topic_oids = [category, ]
|
||||
taglist = entry.get("tags", [])
|
||||
topic_oids.extend(taglist)
|
||||
for oid in topic_oids:
|
||||
if oid in topics_by_oid:
|
||||
r['topics'].append(topics_by_oid[oid])
|
||||
|
||||
if entry.get('image') is not None:
|
||||
r['cover'] = entry['image']['url']
|
||||
if entry.get('thumborId') is not None:
|
||||
|
@ -116,7 +116,7 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
|||
else:
|
||||
body_html = str(BeautifulSoup(
|
||||
body_orig, features="html.parser"))
|
||||
r['body'] = body_html # html2text(body_html)
|
||||
r['body'] = html2text(body_html)
|
||||
else:
|
||||
print(r['slug'] + ': literature has no media')
|
||||
elif entry.get('type') == 'Video':
|
||||
|
@ -127,17 +127,31 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
|||
if video_url == '#':
|
||||
video_url = 'https://vimeo.com/' + vm if vm else '#'
|
||||
if video_url == '#':
|
||||
print(entry.get('media', 'NO MEDIA!'))
|
||||
print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
|
||||
# raise Exception
|
||||
r['body'] = '<ShoutVideo src=\"' + video_url + \
|
||||
'\" />' + html2text(m.get('body', '')) # FIXME
|
||||
therestof = html2text(m.get('body', ''))
|
||||
r['body'] = 'import VideoPlayer from \"src/components/Article/VideoPlayer\"\n' + \
|
||||
'<VideoPlayer src=\"''' + video_url + '\" />\n\n' + therestof
|
||||
elif entry.get('type') == 'Music':
|
||||
r['body'] = '<ShoutMusic media={\"' + \
|
||||
json.dumps(entry['media']) + '\"} />' # FIXME
|
||||
r['body'] = 'import MusicPlayer from \"src/components/MusicPlayer\"\n'
|
||||
for m in entry['media']:
|
||||
if m == { 'main': 'true' } or m == { 'main': True } or m == {}:
|
||||
continue
|
||||
# TODO: mark highlighted track isMain == True
|
||||
try: r['body'] += '<MusicPlayer src=\"' + m['fileUrl'] + '\"'
|
||||
except: print(m)
|
||||
try: r['body'] += ' title=\"' + m['title'] + '\"'
|
||||
except: print(m)
|
||||
r['body'] += ' />\n\n'
|
||||
r['body'] += html2text(m.get('body', ''))
|
||||
elif entry.get('type') == 'Image':
|
||||
m = r.get('media')
|
||||
try: r['body'] = '<img src=\"' + r['cover'] + '\" />'
|
||||
except: print(entry)
|
||||
if r.get('body') is None:
|
||||
body_orig = entry.get('body', '')
|
||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
r['body'] = body_html # html2text(body_html)
|
||||
r['body'] = html2text(body_html)
|
||||
body = r.get('body', '')
|
||||
|
||||
# get author data
|
||||
|
@ -174,10 +188,10 @@ def migrate(entry, users_by_oid, topics_by_oid):
|
|||
shout_dict['authors'] = [ author, ]
|
||||
|
||||
if entry['published']:
|
||||
metadata = get_metadata(r)
|
||||
metadata = get_metadata(shout_dict)
|
||||
content = frontmatter.dumps(frontmatter.Post(body, **metadata))
|
||||
ext = 'md'
|
||||
open('migration/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
|
||||
ext = 'mdx'
|
||||
open('../discoursio-web/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
|
||||
try:
|
||||
shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
|
||||
shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else None
|
||||
|
|
Loading…
Reference in New Issue
Block a user