Revert "Feature/lint"

2023-10-27 00:07:35 +03:00
parent 05136699ee
commit b142949805
70 changed files with 1465 additions and 1223 deletions
--- a/migration/html2text/init.py
+++ b/migration/html2text/init.py
@@ -1,5 +1,13 @@
 """html2text: Turn HTML into equivalent Markdown-structured text."""

+import html.entities
+import html.parser
+import re
+import string
+import urllib.parse as urlparse
+from textwrap import wrap
+from typing import Dict, List, Optional, Tuple, Union
+
 from . import config
 from .elements import AnchorElement, ListElement
 from .typing import OutCallback
@@ -18,14 +26,6 @@ from .utils import (
    skipwrap,
    unifiable_n,
 )
-from textwrap import wrap
-from typing import Dict, List, Optional, Tuple, Union
-
-import html.entities
-import html.parser
-import re
-import string
-import urllib.parse as urlparse

 __version__ = (2020, 1, 16)

@@ -119,7 +119,9 @@ class HTML2Text(html.parser.HTMLParser):
        self.lastWasList = False
        self.style = 0
        self.style_def = {}  # type: Dict[str, Dict[str, str]]
-        self.tag_stack = []  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
+        self.tag_stack = (
+            []
+        )  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
        self.emphasis = 0
        self.drop_white_space = 0
        self.inheader = False
@@ -298,7 +300,9 @@ class HTML2Text(html.parser.HTMLParser):
            if strikethrough:
                self.quiet -= 1

-    def handle_tag(self, tag: str, attrs: Dict[str, Optional[str]], start: bool) -> None:
+    def handle_tag(
+        self, tag: str, attrs: Dict[str, Optional[str]], start: bool
+    ) -> None:
        self.current_tag = tag

        if self.tag_callback is not None:
@@ -329,7 +333,9 @@ class HTML2Text(html.parser.HTMLParser):
                tag_style = element_style(attrs, self.style_def, parent_style)
                self.tag_stack.append((tag, attrs, tag_style))
            else:
-                dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
+                dummy, attrs, tag_style = (
+                    self.tag_stack.pop() if self.tag_stack else (None, {}, {})
+                )
                if self.tag_stack:
                    parent_style = self.tag_stack[-1][2]

@@ -379,7 +385,11 @@ class HTML2Text(html.parser.HTMLParser):
                ):
                    self.o("`")  # NOTE: same as <code>
                    self.span_highlight = True
-                elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
+                elif (
+                    self.current_class == "lead"
+                    and not self.inheader
+                    and not self.span_highlight
+                ):
                    # self.o("==") # NOTE:  CriticMarkup {==
                    self.span_lead = True
            else:
@@ -469,7 +479,11 @@ class HTML2Text(html.parser.HTMLParser):
                and not self.span_lead
                and not self.span_highlight
            ):
-                if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
+                if (
+                    start
+                    and self.preceding_data
+                    and self.preceding_data[-1] == self.strong_mark[0]
+                ):
                    strong = " " + self.strong_mark
                    self.preceding_data += " "
                else:
@@ -534,8 +548,13 @@ class HTML2Text(html.parser.HTMLParser):
                        "href" in attrs
                        and not attrs["href"].startswith("#_ftn")
                        and attrs["href"] is not None
-                        and not (self.skip_internal_links and attrs["href"].startswith("#"))
-                        and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
+                        and not (
+                            self.skip_internal_links and attrs["href"].startswith("#")
+                        )
+                        and not (
+                            self.ignore_mailto_links
+                            and attrs["href"].startswith("mailto:")
+                        )
                    ):
                        self.astack.append(attrs)
                        self.maybe_automatic_link = attrs["href"]
@@ -619,7 +638,9 @@ class HTML2Text(html.parser.HTMLParser):
                    self.o("![" + escape_md(alt) + "]")
                    if self.inline_links:
                        href = attrs.get("href") or ""
-                        self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
+                        self.o(
+                            "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
+                        )
                    else:
                        i = self.previousIndex(attrs)
                        if i is not None:
@@ -675,7 +696,9 @@ class HTML2Text(html.parser.HTMLParser):
                    # WARNING: does not line up <ol><li>s > 9 correctly.
                    parent_list = None
                    for list in self.list:
-                        self.o("   " if parent_list == "ol" and list.name == "ul" else "  ")
+                        self.o(
+                            "   " if parent_list == "ol" and list.name == "ul" else "  "
+                        )
                        parent_list = list.name

                if li.name == "ul":
@@ -764,7 +787,9 @@ class HTML2Text(html.parser.HTMLParser):
        self.pbr()
        self.br_toggle = "  "

-    def o(self, data: str, puredata: bool = False, force: Union[bool, str] = False) -> None:
+    def o(
+        self, data: str, puredata: bool = False, force: Union[bool, str] = False
+    ) -> None:
        """
        Deal with indentation and whitespace
        """
@@ -839,7 +864,9 @@ class HTML2Text(html.parser.HTMLParser):
                    self.out(" ")
                self.space = False

-            if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
+            if self.a and (
+                (self.p_p == 2 and self.links_each_paragraph) or force == "end"
+            ):
                if force == "end":
                    self.out("\n")

@@ -898,7 +925,11 @@ class HTML2Text(html.parser.HTMLParser):

        if self.maybe_automatic_link is not None:
            href = self.maybe_automatic_link
-            if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
+            if (
+                href == data
+                and self.absolute_url_matcher.match(href)
+                and self.use_automatic_links
+            ):
                self.o("<" + data + ">")
                self.empty_link = False
                return
@@ -969,7 +1000,9 @@ class HTML2Text(html.parser.HTMLParser):
            self.inline_links = False
        for para in text.split("\n"):
            if len(para) > 0:
-                if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
+                if not skipwrap(
+                    para, self.wrap_links, self.wrap_list_items, self.wrap_tables
+                ):
                    indent = ""
                    if para.startswith("  " + self.ul_item_mark):
                        # list item continuation: add a double indent to the
@@ -1010,7 +1043,9 @@ class HTML2Text(html.parser.HTMLParser):
        return result


-def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
+def html2text(
+    html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
+) -> str:
    h = html.strip() or ""
    if h:
        h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
--- a/migration/html2text/cli.py
+++ b/migration/html2text/cli.py
@@ -1,8 +1,8 @@
-from . import __version__, config, HTML2Text
-
 import argparse
 import sys

+from . import HTML2Text, __version__, config
+

 # noinspection DuplicatedCode
 def main() -> None:
@@ -117,7 +117,10 @@ def main() -> None:
        dest="images_with_size",
        action="store_true",
        default=config.IMAGES_WITH_SIZE,
-        help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
+        help=(
+            "Write image tags with height and width attrs as raw html to retain "
+            "dimensions"
+        ),
    )
    p.add_argument(
        "-g",
@@ -257,7 +260,9 @@ def main() -> None:
        default=config.CLOSE_QUOTE,
        help="The character used to close quotes",
    )
-    p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
+    p.add_argument(
+        "--version", action="version", version=".".join(map(str, __version__))
+    )
    p.add_argument("filename", nargs="?")
    p.add_argument("encoding", nargs="?", default="utf-8")
    args = p.parse_args()
--- a/migration/html2text/utils.py
+++ b/migration/html2text/utils.py
@@ -1,10 +1,12 @@
-from . import config
+import html.entities
 from typing import Dict, List, Optional

-import html.entities
+from . import config

 unifiable_n = {
-    html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
+    html.entities.name2codepoint[k]: v
+    for k, v in config.UNIFIABLE.items()
+    if k != "nbsp"
 }


@@ -154,7 +156,9 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
    return 0


-def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
+def skipwrap(
+    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
+) -> bool:
    # If it appears to contain a link
    # don't wrap
    if not wrap_links and config.RE_LINK.search(para):
@@ -232,7 +236,9 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
            max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
            max_cols = num_cols

-        max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
+        max_width = [
+            max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
+        ]

    # reformat
    new_lines = []
@@ -241,13 +247,15 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
        if set(line.strip()) == set("-|"):
            filler = "-"
            new_cols = [
-                x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
+                x.rstrip() + (filler * (M - len(x.rstrip())))
+                for x, M in zip(cols, max_width)
            ]
            new_lines.append("|-" + "|".join(new_cols) + "|")
        else:
            filler = " "
            new_cols = [
-                x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
+                x.rstrip() + (filler * (M - len(x.rstrip())))
+                for x, M in zip(cols, max_width)
            ]
            new_lines.append("| " + "|".join(new_cols) + "|")
    return new_lines