This commit is contained in:
tonyrewin 2022-11-13 19:44:54 +03:00
parent fbe490d322
commit 6ca4c9241e
4 changed files with 12 additions and 10 deletions

View File

@ -1,4 +1,4 @@
WAIT=120
WAIT=30
TIMEOUT=10
ATTEMPTS=60 # 60 * 30 = 30 min

View File

@ -50,10 +50,11 @@ def export_mdx(r):
def export_body(shout, storage):
entry = storage["content_items"]["by_oid"][shout["oid"]]
if entry:
shout["body"] = prepare_html_body(entry) # prepare_md_body(entry)
shout["body"], media = prepare_html_body(entry) # prepare_md_body(entry)
shout["media"] = media
export_mdx(shout)
print("[export] html for %s" % shout["slug"])
body = extract_html(entry)
body, _media = extract_html(entry)
open(contentDir + shout["slug"] + ".html", "w").write(body)
else:
raise Exception("no content_items entry found")

View File

@ -293,12 +293,12 @@ def prepare_md_body(entry):
)
body = "import MusicPlayer from '$/components/Article/MusicPlayer'\n\n" + addon
body_orig = extract_html(entry)
body_orig, media = extract_html(entry)
if body_orig:
body += extract_md(html2text(body_orig), entry["_id"])
if not body:
print("[extract] empty MDX body")
return body
return body, media
def prepare_html_body(entry):
@ -339,11 +339,11 @@ def prepare_html_body(entry):
addon += '"></audio></figure>'
body += addon
body = extract_html(entry)
body, media = extract_html(entry)
# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
if not body:
print("[extract] empty HTML body")
return body
return body, media
def extract_html(entry):
@ -403,4 +403,4 @@ def extract_html(entry):
if not body_orig:
print("[extract] empty HTML body")
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
return body_orig
return body_orig, media

View File

@ -190,8 +190,9 @@ async def migrate(entry, storage):
entry["cover"] = r["cover"]
# body
r["body"] = prepare_html_body(entry)
r["body"], media = prepare_html_body(entry)
if media:
print(media)
# save shout to db
s = object()
shout_dict = r.copy()