fixing-wip

2023-01-17 22:56:48 +03:00
parent b966ce6c24
commit 82c6236a7f
8 changed files with 106 additions and 99 deletions
--- a/migration/extract.py
+++ b/migration/extract.py
@@ -27,6 +27,39 @@ def replace_tooltips(body):
    return newbody


+
+def extract_footnotes(body, shout_dict):
+    parts = body.split("&&&")
+    lll = len(parts)
+    newparts = list(parts)
+    placed = False
+    if lll & 1:
+        if lll > 1:
+            i = 1
+            print("[extract] found %d footnotes in body" % (lll - 1))
+            for part in parts[1:]:
+                if i & 1:
+                    placed = True
+                    if 'a class="footnote-url" href=' in part:
+                        print("[extract] footnote: " + part)
+                        fn = 'a class="footnote-url" href="'
+                        exxtracted_link = part.split(fn, 1)[1].split('"', 1)[0]
+                        extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
+                        print("[extract] footnote link: " + extracted_link)
+                        with local_session() as session:
+                            Reaction.create({
+                                "shout": shout_dict['id'],
+                                "kind": ReactionKind.FOOTNOTE,
+                                "body": extracted_body,
+                                "range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
+                            })
+                        newparts[i] = "<a href='#'>ℹ️</a>"
+                else:
+                    newparts[i] = part
+                i += 1
+    return ("".join(newparts), placed)
+
+
 def place_tooltips(body):
    parts = body.split("&&&")
    lll = len(parts)
@@ -203,7 +236,7 @@ def extract_dataimages(parts, prefix):
 di = "data:image"


-def extract_md_images(body, oid):
+def extract_md_images(body, prefix):
    newbody = ""
    body = (
        body.replace("\n! [](" + di, "\n ![](" + di)
@@ -212,7 +245,7 @@ def extract_md_images(body, oid):
    )
    parts = body.split(di)
    if len(parts) > 1:
-        newbody = extract_dataimages(parts, oid)
+        newbody = extract_dataimages(parts, prefix)
    else:
        newbody = body
    return newbody
@@ -238,24 +271,24 @@ def cleanup(body):
    return newbody


-def extract_md(body, oid=""):
+def extract_md(body, shout_dict = None):
    newbody = body
    if newbody:
-        uid = oid or uuid.uuid4()
-        newbody = extract_md_images(newbody, uid)
-        if not newbody:
-            raise Exception("extract_images error")
-
        newbody = cleanup(newbody)
        if not newbody:
            raise Exception("cleanup error")

-        newbody, placed = place_tooltips(newbody)
-        if not newbody:
-            raise Exception("place_tooltips error")
+        if shout_dict:
+
+            uid = shout_dict['id'] or uuid.uuid4()
+            newbody = extract_md_images(newbody, uid)
+            if not newbody:
+                raise Exception("extract_images error")
+
+            newbody, placed = extract_footnotes(body, shout_dict)
+            if not newbody:
+                raise Exception("extract_footnotes error")

-        if placed:
-            newbody = "import Tooltip from '$/components/Article/Tooltip'\n\n" + newbody
    return newbody


@@ -342,7 +375,9 @@ def prepare_html_body(entry):
    return body


-def extract_html(entry):
+def extract_html(entry, shout_id = None):
    body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
+    if shout_id:
+        extract_footnotes(body_orig, shout_id)
    body_html = str(BeautifulSoup(body_orig, features="html.parser"))
    return body_html
--- a/migration/tables/remarks.py
+++ b/migration/tables/remarks.py
@@ -1,31 +1,42 @@
 from base.orm import local_session
 from migration.extract import extract_md
 from migration.html2text import html2text
-from orm.remark import Remark
+from orm.reaction import Reaction, ReactionKind


 def migrate(entry, storage):
    post_oid = entry['contentItem']
    print(post_oid)
    shout_dict = storage['shouts']['by_oid'].get(post_oid)
-    remark = {
-        "shout": shout_dict['id'],
-        "body": extract_md(
-            html2text(entry['body']),
-            entry['_id']
-        ),
-        "desc": extract_md(
-            html2text(
-                entry['textAfter'] or '' + \
-                entry['textBefore'] or '' + \
-                entry['textSelected'] or ''
+    if shout_dict:
+        print(shout_dict['body'])
+        remark = {
+            "shout": shout_dict['id'],
+            "body": extract_md(
+                html2text(entry['body']),
+                shout_dict
            ),
-            entry["_id"]
-        )
-    }
+            "kind": ReactionKind.REMARK
+        }

-    with local_session() as session:
-        rmrk = Remark.create(**remark)
-        session.commit()
-        del rmrk["_sa_instance_state"]
-        return rmrk
+        if entry.get('textBefore'):
+            remark['range'] = str(
+                    shout_dict['body']
+                        .index(
+                            entry['textBefore'] or ''
+                        )
+                ) + ':' + str(
+                    shout_dict['body']
+                        .index(
+                            entry['textAfter'] or ''
+                        ) + len(
+                            entry['textAfter'] or ''
+                        )
+                )
+
+        with local_session() as session:
+            rmrk = Reaction.create(**remark)
+            session.commit()
+            del rmrk["_sa_instance_state"]
+            return rmrk
+    return
--- a/migration/tables/topics.py
+++ b/migration/tables/topics.py
@@ -10,7 +10,7 @@ def migrate(entry):
        "slug": entry["slug"],
        "oid": entry["_id"],
        "title": entry["title"].replace("&nbsp;", " "),
-        "body": extract_md(html2text(body_orig), entry["_id"])
+        "body": extract_md(html2text(body_orig))
    }

    with local_session() as session: