discours content decode
This commit is contained in:
parent
14fdfe71e5
commit
2a6baa7404
66
migrate.py
66
migrate.py
|
@ -21,7 +21,7 @@ if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
users_data = json.loads(open('migration/data/users.json').read())
|
users_data = json.loads(open('migration/data/users.json').read())
|
||||||
users_dict = { x['_id']: x for x in users_data } # by id
|
# users_dict = { x['_id']: x for x in users_data } # by id
|
||||||
print(str(len(users_data)) + ' users loaded')
|
print(str(len(users_data)) + ' users loaded')
|
||||||
users_by_oid = {}
|
users_by_oid = {}
|
||||||
users_by_slug = {}
|
users_by_slug = {}
|
||||||
|
@ -49,7 +49,8 @@ if __name__ == '__main__':
|
||||||
for old_comment in comments_data:
|
for old_comment in comments_data:
|
||||||
cid = old_comment['contentItem']
|
cid = old_comment['contentItem']
|
||||||
comments_by_post[cid] = comments_by_post.get(cid, [])
|
comments_by_post[cid] = comments_by_post.get(cid, [])
|
||||||
comments_by_post[cid].append(old_comment)
|
if 'deletedAt' not in old_comment:
|
||||||
|
comments_by_post[cid].append(old_comment)
|
||||||
print(str(len(comments_by_post.keys())) + ' articles with comments')
|
print(str(len(comments_by_post.keys())) + ' articles with comments')
|
||||||
|
|
||||||
export_articles = {} # slug: shout
|
export_articles = {} # slug: shout
|
||||||
|
@ -77,7 +78,7 @@ if __name__ == '__main__':
|
||||||
return article
|
return article
|
||||||
|
|
||||||
|
|
||||||
def users():
|
def users(users_by_oid, users_by_slug, users_data):
|
||||||
''' migrating users first '''
|
''' migrating users first '''
|
||||||
# limiting
|
# limiting
|
||||||
limit = len(users_data)
|
limit = len(users_data)
|
||||||
|
@ -102,7 +103,7 @@ if __name__ == '__main__':
|
||||||
print(str(len(users_by_slug.items())) + ' users migrated')
|
print(str(len(users_by_slug.items())) + ' users migrated')
|
||||||
|
|
||||||
|
|
||||||
def topics():
|
def topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data):
|
||||||
''' topics from categories and tags '''
|
''' topics from categories and tags '''
|
||||||
# limiting
|
# limiting
|
||||||
limit = len(cats_data) + len(tags_data)
|
limit = len(cats_data) + len(tags_data)
|
||||||
|
@ -133,7 +134,7 @@ if __name__ == '__main__':
|
||||||
sort_keys=True,
|
sort_keys=True,
|
||||||
ensure_ascii=False))
|
ensure_ascii=False))
|
||||||
|
|
||||||
def shouts():
|
def shouts(content_data, shouts_by_slug, shouts_by_oid):
|
||||||
''' migrating content items one by one '''
|
''' migrating content items one by one '''
|
||||||
# limiting
|
# limiting
|
||||||
limit = len(content_data)
|
limit = len(content_data)
|
||||||
|
@ -168,7 +169,7 @@ if __name__ == '__main__':
|
||||||
print(str(counter) + '/' + str(len(content_data)) + ' content items were migrated')
|
print(str(counter) + '/' + str(len(content_data)) + ' content items were migrated')
|
||||||
print(str(discours_author) + ' authored by @discours')
|
print(str(discours_author) + ' authored by @discours')
|
||||||
|
|
||||||
def export_shouts(shouts_by_slug, export_articles, export_authors):
|
def export_shouts(shouts_by_slug, export_articles, export_authors, content_dict):
|
||||||
# update what was just migrated or load json again
|
# update what was just migrated or load json again
|
||||||
if len(export_authors.keys()) == 0:
|
if len(export_authors.keys()) == 0:
|
||||||
export_authors = json.loads(open('../src/data/authors.json').read())
|
export_authors = json.loads(open('../src/data/authors.json').read())
|
||||||
|
@ -190,33 +191,33 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
for (slug, article) in export_list:
|
for (slug, article) in export_list:
|
||||||
if article['layout'] == 'article':
|
if article['layout'] == 'article':
|
||||||
export_slug(slug, export_articles, export_authors)
|
export_slug(slug, export_articles, export_authors, content_dict)
|
||||||
|
|
||||||
def export_body(article):
|
def export_body(article, content_dict):
|
||||||
article = extract_images(article)
|
article = extract_images(article)
|
||||||
metadata = get_metadata(article)
|
metadata = get_metadata(article)
|
||||||
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
|
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
|
||||||
open('../content/discours.io/'+slug+'.md', 'w').write(content)
|
open('../content/discours.io/'+slug+'.md', 'w').write(content)
|
||||||
open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body'])
|
open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body'])
|
||||||
|
|
||||||
def export_slug(slug, export_articles, export_authors):
|
def export_slug(slug, export_articles, export_authors, content_dict):
|
||||||
if exported_authors == {}:
|
print('exporting %s ' % slug)
|
||||||
exported_authors = json.loads(open('../src/data/authors.json').read())
|
if export_authors == {}:
|
||||||
print(str(len(exported_authors.items())) + ' exported authors loaded')
|
export_authors = json.loads(open('../src/data/authors.json').read())
|
||||||
if exported_articles == {}:
|
print(str(len(export_authors.items())) + ' exported authors loaded')
|
||||||
exported_articles = json.loads(open('../src/data/articles.json').read())
|
if export_articles == {}:
|
||||||
print(str(len(exported_articles.items())) + ' exported articles loaded')
|
export_articles = json.loads(open('../src/data/articles.json').read())
|
||||||
|
print(str(len(export_articles.items())) + ' exported articles loaded')
|
||||||
|
|
||||||
shout = shouts_by_slug.get(slug, False)
|
shout = shouts_by_slug.get(slug, False)
|
||||||
assert shout, 'no data error'
|
assert shout, 'no data error'
|
||||||
author = users_by_slug.get(shout['authors'][0]['slug'], None)
|
author = users_by_slug.get(shout['authors'][0]['slug'], None)
|
||||||
exported_authors.update({shout['authors'][0]['slug']: author})
|
export_authors.update({shout['authors'][0]['slug']: author})
|
||||||
exported_articles.update({shout['slug']: shout})
|
export_articles.update({shout['slug']: shout})
|
||||||
export_body(shout)
|
export_body(shout, content_dict)
|
||||||
comments([slug, ])
|
comments([slug, ])
|
||||||
|
|
||||||
|
|
||||||
def comments(sluglist = []):
|
def comments(sluglist, export_comments, export_articles, shouts_by_slug, content_dict):
|
||||||
''' migrating comments on content items one '''
|
''' migrating comments on content items one '''
|
||||||
if len(sluglist) == 0:
|
if len(sluglist) == 0:
|
||||||
export_articles = json.loads(open('../src/data/articles.json').read())
|
export_articles = json.loads(open('../src/data/articles.json').read())
|
||||||
|
@ -224,7 +225,8 @@ if __name__ == '__main__':
|
||||||
if len(sluglist) == 0: sluglist = list(export_articles.keys())
|
if len(sluglist) == 0: sluglist = list(export_articles.keys())
|
||||||
|
|
||||||
if len(sluglist) > 0:
|
if len(sluglist) > 0:
|
||||||
print('exporting comments for exact articles...')
|
print('exporting comments for: ')
|
||||||
|
print(' '.join(sluglist))
|
||||||
for slug in sluglist:
|
for slug in sluglist:
|
||||||
shout = shouts_by_slug[slug]
|
shout = shouts_by_slug[slug]
|
||||||
old_id = shout['old_id']
|
old_id = shout['old_id']
|
||||||
|
@ -282,9 +284,9 @@ if __name__ == '__main__':
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
cmd = sys.argv[1]
|
cmd = sys.argv[1]
|
||||||
if cmd == "users":
|
if cmd == "users":
|
||||||
users(users_by_oid, users_by_slug, users_data, users_dict)
|
users(users_by_oid, users_by_slug, users_data)
|
||||||
elif cmd == "topics":
|
elif cmd == "topics":
|
||||||
topics(topics_by_cat, topics_by_tag, topics_by_slug)
|
topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data)
|
||||||
elif cmd == "shouts":
|
elif cmd == "shouts":
|
||||||
try:
|
try:
|
||||||
Community.create(**{
|
Community.create(**{
|
||||||
|
@ -298,19 +300,23 @@ if __name__ == '__main__':
|
||||||
pass
|
pass
|
||||||
shouts(shouts_by_slug, shouts_by_oid) # NOTE: listens limit
|
shouts(shouts_by_slug, shouts_by_oid) # NOTE: listens limit
|
||||||
elif cmd == "comments":
|
elif cmd == "comments":
|
||||||
comments()
|
cl = sys.argv[2] if len(sys.argv) > 2 else 10
|
||||||
|
topCommented = sorted([ c[0] for c in comments_by_post.items()], reverse=True, key=lambda i: len(i[1]))[-cl:]
|
||||||
|
comments(topCommented, export_comments, export_articles, shouts_by_slug, content_dict)
|
||||||
elif cmd == "export_shouts":
|
elif cmd == "export_shouts":
|
||||||
export_shouts(shouts_by_slug, export_articles, export_authors)
|
export_shouts(shouts_by_slug, export_articles, export_authors, content_dict)
|
||||||
elif cmd == "all":
|
elif cmd == "all":
|
||||||
users()
|
users(users_by_oid, users_by_slug, users_data)
|
||||||
topics()
|
topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data)
|
||||||
shouts()
|
shouts(content_data, shouts_by_slug, shouts_by_oid)
|
||||||
comments()
|
cl = sys.argv[2] if len(sys.argv) > 2 else 10
|
||||||
|
topCommented = sorted([ c[0] for c in comments_by_post.items()], reverse=True, key=lambda i: len(i[1]))[-cl:]
|
||||||
|
comments(topCommented, export_comments, export_articles, shouts_by_slug, content_dict)
|
||||||
elif cmd == "bson":
|
elif cmd == "bson":
|
||||||
from migration import bson2json
|
from migration import bson2json
|
||||||
bson2json.json_tables()
|
bson2json.json_tables()
|
||||||
elif cmd == 'slug':
|
elif cmd == 'slug':
|
||||||
export_slug(sys.argv[2], export_articles, export_authors)
|
export_slug(sys.argv[2], export_articles, export_authors, content_dict)
|
||||||
export_finish(export_articles, export_authors, export_topics, export_comments)
|
export_finish(export_articles, export_authors, export_topics, export_comments)
|
||||||
else:
|
else:
|
||||||
print('''
|
print('''
|
||||||
|
|
|
@ -86,6 +86,9 @@ class HTML2Text(html.parser.HTMLParser):
|
||||||
self.tag_callback = None
|
self.tag_callback = None
|
||||||
self.open_quote = config.OPEN_QUOTE # covered in cli
|
self.open_quote = config.OPEN_QUOTE # covered in cli
|
||||||
self.close_quote = config.CLOSE_QUOTE # covered in cli
|
self.close_quote = config.CLOSE_QUOTE # covered in cli
|
||||||
|
self.header_id = None
|
||||||
|
self.span_hightlight = False
|
||||||
|
self.span_lead = False
|
||||||
|
|
||||||
if out is None:
|
if out is None:
|
||||||
self.out = self.outtextf
|
self.out = self.outtextf
|
||||||
|
@ -347,18 +350,34 @@ class HTML2Text(html.parser.HTMLParser):
|
||||||
self.space = False
|
self.space = False
|
||||||
self.o(hn(tag) * "#" + " ")
|
self.o(hn(tag) * "#" + " ")
|
||||||
self.o("[")
|
self.o("[")
|
||||||
else:
|
self.header_id = attrs.get('id')
|
||||||
self.p_p = 0 # don't break up link name
|
|
||||||
self.inheader = False
|
|
||||||
return # prevent redundant emphasis marks on headers
|
|
||||||
else:
|
else:
|
||||||
self.p()
|
self.p()
|
||||||
if start:
|
if start:
|
||||||
self.inheader = True
|
self.inheader = True
|
||||||
self.o(hn(tag) * "#" + " ")
|
self.o(hn(tag) * "#" + " ")
|
||||||
|
if self.header_id:
|
||||||
|
self.o(' {#' + self.header_id + '}')
|
||||||
|
self.header_id = None
|
||||||
else:
|
else:
|
||||||
self.inheader = False
|
self.inheader = False
|
||||||
return # prevent redundant emphasis marks on headers
|
return # prevent redundant emphasis marks on headers
|
||||||
|
|
||||||
|
if tag == 'span':
|
||||||
|
if start and 'class' in attrs:
|
||||||
|
if attrs['class'] == 'highlight':
|
||||||
|
self.o('`') # NOTE: same as <code>
|
||||||
|
self.span_hightlight = True
|
||||||
|
elif attrs['class'] == 'lead':
|
||||||
|
self.o('==') # NOTE: but CriticMarkup uses {== ==}
|
||||||
|
self.span_lead = True
|
||||||
|
else:
|
||||||
|
if self.span_hightlight:
|
||||||
|
self.o('`')
|
||||||
|
self.span_hightlight = False
|
||||||
|
elif self.span_lead:
|
||||||
|
self.o('==')
|
||||||
|
self.span_lead = False
|
||||||
|
|
||||||
if tag in ["p", "div"]:
|
if tag in ["p", "div"]:
|
||||||
if self.google_doc:
|
if self.google_doc:
|
||||||
|
|
|
@ -17,7 +17,7 @@ BODY_WIDTH = 78
|
||||||
|
|
||||||
# Don't show internal links (href="#local-anchor") -- corresponding link
|
# Don't show internal links (href="#local-anchor") -- corresponding link
|
||||||
# targets won't be visible in the plain text file anyway.
|
# targets won't be visible in the plain text file anyway.
|
||||||
SKIP_INTERNAL_LINKS = True
|
SKIP_INTERNAL_LINKS = False
|
||||||
|
|
||||||
# Use inline, rather than reference, formatting for images and links
|
# Use inline, rather than reference, formatting for images and links
|
||||||
INLINE_LINKS = True
|
INLINE_LINKS = True
|
||||||
|
@ -25,7 +25,6 @@ INLINE_LINKS = True
|
||||||
# Protect links from line breaks surrounding them with angle brackets (in
|
# Protect links from line breaks surrounding them with angle brackets (in
|
||||||
# addition to their square brackets)
|
# addition to their square brackets)
|
||||||
PROTECT_LINKS = False
|
PROTECT_LINKS = False
|
||||||
# WRAP_LINKS = True
|
|
||||||
WRAP_LINKS = True
|
WRAP_LINKS = True
|
||||||
|
|
||||||
# Wrap list items.
|
# Wrap list items.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user