Revert "Feature/lint"

This commit is contained in:
Kosta
2023-10-27 00:07:35 +03:00
committed by GitHub
parent 05136699ee
commit b142949805
70 changed files with 1465 additions and 1223 deletions

View File

@@ -1,5 +1,13 @@
"""html2text: Turn HTML into equivalent Markdown-structured text."""
import html.entities
import html.parser
import re
import string
import urllib.parse as urlparse
from textwrap import wrap
from typing import Dict, List, Optional, Tuple, Union
from . import config
from .elements import AnchorElement, ListElement
from .typing import OutCallback
@@ -18,14 +26,6 @@ from .utils import (
skipwrap,
unifiable_n,
)
from textwrap import wrap
from typing import Dict, List, Optional, Tuple, Union
import html.entities
import html.parser
import re
import string
import urllib.parse as urlparse
__version__ = (2020, 1, 16)
@@ -119,7 +119,9 @@ class HTML2Text(html.parser.HTMLParser):
self.lastWasList = False
self.style = 0
self.style_def = {} # type: Dict[str, Dict[str, str]]
self.tag_stack = [] # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.tag_stack = (
[]
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.emphasis = 0
self.drop_white_space = 0
self.inheader = False
@@ -298,7 +300,9 @@ class HTML2Text(html.parser.HTMLParser):
if strikethrough:
self.quiet -= 1
def handle_tag(self, tag: str, attrs: Dict[str, Optional[str]], start: bool) -> None:
def handle_tag(
self, tag: str, attrs: Dict[str, Optional[str]], start: bool
) -> None:
self.current_tag = tag
if self.tag_callback is not None:
@@ -329,7 +333,9 @@ class HTML2Text(html.parser.HTMLParser):
tag_style = element_style(attrs, self.style_def, parent_style)
self.tag_stack.append((tag, attrs, tag_style))
else:
dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
dummy, attrs, tag_style = (
self.tag_stack.pop() if self.tag_stack else (None, {}, {})
)
if self.tag_stack:
parent_style = self.tag_stack[-1][2]
@@ -379,7 +385,11 @@ class HTML2Text(html.parser.HTMLParser):
):
self.o("`") # NOTE: same as <code>
self.span_highlight = True
elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
elif (
self.current_class == "lead"
and not self.inheader
and not self.span_highlight
):
# self.o("==") # NOTE: CriticMarkup {==
self.span_lead = True
else:
@@ -469,7 +479,11 @@ class HTML2Text(html.parser.HTMLParser):
and not self.span_lead
and not self.span_highlight
):
if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
if (
start
and self.preceding_data
and self.preceding_data[-1] == self.strong_mark[0]
):
strong = " " + self.strong_mark
self.preceding_data += " "
else:
@@ -534,8 +548,13 @@ class HTML2Text(html.parser.HTMLParser):
"href" in attrs
and not attrs["href"].startswith("#_ftn")
and attrs["href"] is not None
and not (self.skip_internal_links and attrs["href"].startswith("#"))
and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
and not (
self.skip_internal_links and attrs["href"].startswith("#")
)
and not (
self.ignore_mailto_links
and attrs["href"].startswith("mailto:")
)
):
self.astack.append(attrs)
self.maybe_automatic_link = attrs["href"]
@@ -619,7 +638,9 @@ class HTML2Text(html.parser.HTMLParser):
self.o("![" + escape_md(alt) + "]")
if self.inline_links:
href = attrs.get("href") or ""
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
self.o(
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
)
else:
i = self.previousIndex(attrs)
if i is not None:
@@ -675,7 +696,9 @@ class HTML2Text(html.parser.HTMLParser):
# WARNING: does not line up <ol><li>s > 9 correctly.
parent_list = None
for list in self.list:
self.o(" " if parent_list == "ol" and list.name == "ul" else " ")
self.o(
" " if parent_list == "ol" and list.name == "ul" else " "
)
parent_list = list.name
if li.name == "ul":
@@ -764,7 +787,9 @@ class HTML2Text(html.parser.HTMLParser):
self.pbr()
self.br_toggle = " "
def o(self, data: str, puredata: bool = False, force: Union[bool, str] = False) -> None:
def o(
self, data: str, puredata: bool = False, force: Union[bool, str] = False
) -> None:
"""
Deal with indentation and whitespace
"""
@@ -839,7 +864,9 @@ class HTML2Text(html.parser.HTMLParser):
self.out(" ")
self.space = False
if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
if self.a and (
(self.p_p == 2 and self.links_each_paragraph) or force == "end"
):
if force == "end":
self.out("\n")
@@ -898,7 +925,11 @@ class HTML2Text(html.parser.HTMLParser):
if self.maybe_automatic_link is not None:
href = self.maybe_automatic_link
if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
if (
href == data
and self.absolute_url_matcher.match(href)
and self.use_automatic_links
):
self.o("<" + data + ">")
self.empty_link = False
return
@@ -969,7 +1000,9 @@ class HTML2Text(html.parser.HTMLParser):
self.inline_links = False
for para in text.split("\n"):
if len(para) > 0:
if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
if not skipwrap(
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
):
indent = ""
if para.startswith(" " + self.ul_item_mark):
# list item continuation: add a double indent to the
@@ -1010,7 +1043,9 @@ class HTML2Text(html.parser.HTMLParser):
return result
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
def html2text(
html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
) -> str:
h = html.strip() or ""
if h:
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)

View File

@@ -1,8 +1,8 @@
from . import __version__, config, HTML2Text
import argparse
import sys
from . import HTML2Text, __version__, config
# noinspection DuplicatedCode
def main() -> None:
@@ -117,7 +117,10 @@ def main() -> None:
dest="images_with_size",
action="store_true",
default=config.IMAGES_WITH_SIZE,
help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
help=(
"Write image tags with height and width attrs as raw html to retain "
"dimensions"
),
)
p.add_argument(
"-g",
@@ -257,7 +260,9 @@ def main() -> None:
default=config.CLOSE_QUOTE,
help="The character used to close quotes",
)
p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
p.add_argument(
"--version", action="version", version=".".join(map(str, __version__))
)
p.add_argument("filename", nargs="?")
p.add_argument("encoding", nargs="?", default="utf-8")
args = p.parse_args()

View File

@@ -1,10 +1,12 @@
from . import config
import html.entities
from typing import Dict, List, Optional
import html.entities
from . import config
unifiable_n = {
html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
html.entities.name2codepoint[k]: v
for k, v in config.UNIFIABLE.items()
if k != "nbsp"
}
@@ -154,7 +156,9 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
return 0
def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
def skipwrap(
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
) -> bool:
# If it appears to contain a link
# don't wrap
if not wrap_links and config.RE_LINK.search(para):
@@ -232,7 +236,9 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
max_cols = num_cols
max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
max_width = [
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
]
# reformat
new_lines = []
@@ -241,13 +247,15 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
if set(line.strip()) == set("-|"):
filler = "-"
new_cols = [
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
x.rstrip() + (filler * (M - len(x.rstrip())))
for x, M in zip(cols, max_width)
]
new_lines.append("|-" + "|".join(new_cols) + "|")
else:
filler = " "
new_cols = [
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
x.rstrip() + (filler * (M - len(x.rstrip())))
for x, M in zip(cols, max_width)
]
new_lines.append("| " + "|".join(new_cols) + "|")
return new_lines