discours content decode
This commit is contained in:
parent
14fdfe71e5
commit
2a6baa7404
66
migrate.py
66
migrate.py
|
@ -21,7 +21,7 @@ if __name__ == '__main__':
|
|||
import sys
|
||||
|
||||
users_data = json.loads(open('migration/data/users.json').read())
|
||||
users_dict = { x['_id']: x for x in users_data } # by id
|
||||
# users_dict = { x['_id']: x for x in users_data } # by id
|
||||
print(str(len(users_data)) + ' users loaded')
|
||||
users_by_oid = {}
|
||||
users_by_slug = {}
|
||||
|
@ -49,7 +49,8 @@ if __name__ == '__main__':
|
|||
for old_comment in comments_data:
|
||||
cid = old_comment['contentItem']
|
||||
comments_by_post[cid] = comments_by_post.get(cid, [])
|
||||
comments_by_post[cid].append(old_comment)
|
||||
if 'deletedAt' not in old_comment:
|
||||
comments_by_post[cid].append(old_comment)
|
||||
print(str(len(comments_by_post.keys())) + ' articles with comments')
|
||||
|
||||
export_articles = {} # slug: shout
|
||||
|
@ -77,7 +78,7 @@ if __name__ == '__main__':
|
|||
return article
|
||||
|
||||
|
||||
def users():
|
||||
def users(users_by_oid, users_by_slug, users_data):
|
||||
''' migrating users first '''
|
||||
# limiting
|
||||
limit = len(users_data)
|
||||
|
@ -102,7 +103,7 @@ if __name__ == '__main__':
|
|||
print(str(len(users_by_slug.items())) + ' users migrated')
|
||||
|
||||
|
||||
def topics():
|
||||
def topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data):
|
||||
''' topics from categories and tags '''
|
||||
# limiting
|
||||
limit = len(cats_data) + len(tags_data)
|
||||
|
@ -133,7 +134,7 @@ if __name__ == '__main__':
|
|||
sort_keys=True,
|
||||
ensure_ascii=False))
|
||||
|
||||
def shouts():
|
||||
def shouts(content_data, shouts_by_slug, shouts_by_oid):
|
||||
''' migrating content items one by one '''
|
||||
# limiting
|
||||
limit = len(content_data)
|
||||
|
@ -168,7 +169,7 @@ if __name__ == '__main__':
|
|||
print(str(counter) + '/' + str(len(content_data)) + ' content items were migrated')
|
||||
print(str(discours_author) + ' authored by @discours')
|
||||
|
||||
def export_shouts(shouts_by_slug, export_articles, export_authors):
|
||||
def export_shouts(shouts_by_slug, export_articles, export_authors, content_dict):
|
||||
# update what was just migrated or load json again
|
||||
if len(export_authors.keys()) == 0:
|
||||
export_authors = json.loads(open('../src/data/authors.json').read())
|
||||
|
@ -190,33 +191,33 @@ if __name__ == '__main__':
|
|||
|
||||
for (slug, article) in export_list:
|
||||
if article['layout'] == 'article':
|
||||
export_slug(slug, export_articles, export_authors)
|
||||
export_slug(slug, export_articles, export_authors, content_dict)
|
||||
|
||||
def export_body(article):
|
||||
def export_body(article, content_dict):
|
||||
article = extract_images(article)
|
||||
metadata = get_metadata(article)
|
||||
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
|
||||
open('../content/discours.io/'+slug+'.md', 'w').write(content)
|
||||
open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body'])
|
||||
|
||||
def export_slug(slug, export_articles, export_authors):
|
||||
if exported_authors == {}:
|
||||
exported_authors = json.loads(open('../src/data/authors.json').read())
|
||||
print(str(len(exported_authors.items())) + ' exported authors loaded')
|
||||
if exported_articles == {}:
|
||||
exported_articles = json.loads(open('../src/data/articles.json').read())
|
||||
print(str(len(exported_articles.items())) + ' exported articles loaded')
|
||||
def export_slug(slug, export_articles, export_authors, content_dict):
|
||||
print('exporting %s ' % slug)
|
||||
if export_authors == {}:
|
||||
export_authors = json.loads(open('../src/data/authors.json').read())
|
||||
print(str(len(export_authors.items())) + ' exported authors loaded')
|
||||
if export_articles == {}:
|
||||
export_articles = json.loads(open('../src/data/articles.json').read())
|
||||
print(str(len(export_articles.items())) + ' exported articles loaded')
|
||||
|
||||
shout = shouts_by_slug.get(slug, False)
|
||||
assert shout, 'no data error'
|
||||
author = users_by_slug.get(shout['authors'][0]['slug'], None)
|
||||
exported_authors.update({shout['authors'][0]['slug']: author})
|
||||
exported_articles.update({shout['slug']: shout})
|
||||
export_body(shout)
|
||||
export_authors.update({shout['authors'][0]['slug']: author})
|
||||
export_articles.update({shout['slug']: shout})
|
||||
export_body(shout, content_dict)
|
||||
comments([slug, ])
|
||||
|
||||
|
||||
def comments(sluglist = []):
|
||||
def comments(sluglist, export_comments, export_articles, shouts_by_slug, content_dict):
|
||||
''' migrating comments on content items one '''
|
||||
if len(sluglist) == 0:
|
||||
export_articles = json.loads(open('../src/data/articles.json').read())
|
||||
|
@ -224,7 +225,8 @@ if __name__ == '__main__':
|
|||
if len(sluglist) == 0: sluglist = list(export_articles.keys())
|
||||
|
||||
if len(sluglist) > 0:
|
||||
print('exporting comments for exact articles...')
|
||||
print('exporting comments for: ')
|
||||
print(' '.join(sluglist))
|
||||
for slug in sluglist:
|
||||
shout = shouts_by_slug[slug]
|
||||
old_id = shout['old_id']
|
||||
|
@ -282,9 +284,9 @@ if __name__ == '__main__':
|
|||
if len(sys.argv) > 1:
|
||||
cmd = sys.argv[1]
|
||||
if cmd == "users":
|
||||
users(users_by_oid, users_by_slug, users_data, users_dict)
|
||||
users(users_by_oid, users_by_slug, users_data)
|
||||
elif cmd == "topics":
|
||||
topics(topics_by_cat, topics_by_tag, topics_by_slug)
|
||||
topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data)
|
||||
elif cmd == "shouts":
|
||||
try:
|
||||
Community.create(**{
|
||||
|
@ -298,19 +300,23 @@ if __name__ == '__main__':
|
|||
pass
|
||||
shouts(shouts_by_slug, shouts_by_oid) # NOTE: listens limit
|
||||
elif cmd == "comments":
|
||||
comments()
|
||||
cl = sys.argv[2] if len(sys.argv) > 2 else 10
|
||||
topCommented = sorted([ c[0] for c in comments_by_post.items()], reverse=True, key=lambda i: len(i[1]))[-cl:]
|
||||
comments(topCommented, export_comments, export_articles, shouts_by_slug, content_dict)
|
||||
elif cmd == "export_shouts":
|
||||
export_shouts(shouts_by_slug, export_articles, export_authors)
|
||||
export_shouts(shouts_by_slug, export_articles, export_authors, content_dict)
|
||||
elif cmd == "all":
|
||||
users()
|
||||
topics()
|
||||
shouts()
|
||||
comments()
|
||||
users(users_by_oid, users_by_slug, users_data)
|
||||
topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data)
|
||||
shouts(content_data, shouts_by_slug, shouts_by_oid)
|
||||
cl = sys.argv[2] if len(sys.argv) > 2 else 10
|
||||
topCommented = sorted([ c[0] for c in comments_by_post.items()], reverse=True, key=lambda i: len(i[1]))[-cl:]
|
||||
comments(topCommented, export_comments, export_articles, shouts_by_slug, content_dict)
|
||||
elif cmd == "bson":
|
||||
from migration import bson2json
|
||||
bson2json.json_tables()
|
||||
elif cmd == 'slug':
|
||||
export_slug(sys.argv[2], export_articles, export_authors)
|
||||
export_slug(sys.argv[2], export_articles, export_authors, content_dict)
|
||||
export_finish(export_articles, export_authors, export_topics, export_comments)
|
||||
else:
|
||||
print('''
|
||||
|
|
|
@ -86,6 +86,9 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
self.tag_callback = None
|
||||
self.open_quote = config.OPEN_QUOTE # covered in cli
|
||||
self.close_quote = config.CLOSE_QUOTE # covered in cli
|
||||
self.header_id = None
|
||||
self.span_hightlight = False
|
||||
self.span_lead = False
|
||||
|
||||
if out is None:
|
||||
self.out = self.outtextf
|
||||
|
@ -347,18 +350,34 @@ class HTML2Text(html.parser.HTMLParser):
|
|||
self.space = False
|
||||
self.o(hn(tag) * "#" + " ")
|
||||
self.o("[")
|
||||
else:
|
||||
self.p_p = 0 # don't break up link name
|
||||
self.inheader = False
|
||||
return # prevent redundant emphasis marks on headers
|
||||
self.header_id = attrs.get('id')
|
||||
else:
|
||||
self.p()
|
||||
if start:
|
||||
self.inheader = True
|
||||
self.o(hn(tag) * "#" + " ")
|
||||
if self.header_id:
|
||||
self.o(' {#' + self.header_id + '}')
|
||||
self.header_id = None
|
||||
else:
|
||||
self.inheader = False
|
||||
return # prevent redundant emphasis marks on headers
|
||||
|
||||
if tag == 'span':
|
||||
if start and 'class' in attrs:
|
||||
if attrs['class'] == 'highlight':
|
||||
self.o('`') # NOTE: same as <code>
|
||||
self.span_hightlight = True
|
||||
elif attrs['class'] == 'lead':
|
||||
self.o('==') # NOTE: but CriticMarkup uses {== ==}
|
||||
self.span_lead = True
|
||||
else:
|
||||
if self.span_hightlight:
|
||||
self.o('`')
|
||||
self.span_hightlight = False
|
||||
elif self.span_lead:
|
||||
self.o('==')
|
||||
self.span_lead = False
|
||||
|
||||
if tag in ["p", "div"]:
|
||||
if self.google_doc:
|
||||
|
|
|
@ -17,7 +17,7 @@ BODY_WIDTH = 78
|
|||
|
||||
# Don't show internal links (href="#local-anchor") -- corresponding link
|
||||
# targets won't be visible in the plain text file anyway.
|
||||
SKIP_INTERNAL_LINKS = True
|
||||
SKIP_INTERNAL_LINKS = False
|
||||
|
||||
# Use inline, rather than reference, formatting for images and links
|
||||
INLINE_LINKS = True
|
||||
|
@ -25,7 +25,6 @@ INLINE_LINKS = True
|
|||
# Protect links from line breaks surrounding them with angle brackets (in
|
||||
# addition to their square brackets)
|
||||
PROTECT_LINKS = False
|
||||
# WRAP_LINKS = True
|
||||
WRAP_LINKS = True
|
||||
|
||||
# Wrap list items.
|
||||
|
|
Loading…
Reference in New Issue
Block a user