From 5fedd007c7ac2cbd1e64a5340503d9580233893b Mon Sep 17 00:00:00 2001
From: Tony Rewin \s*\s*
",
- r"\s*
",
- r"\s*
",
- r"\s*
",
- r"
\s*
- self.span_highlight = True
- elif (
- self.current_class == "lead"
- and not self.inheader
- and not self.span_highlight
- ):
- # self.o("==") # NOTE: CriticMarkup {==
- self.span_lead = True
- else:
- if self.span_highlight:
- self.o("`")
- self.span_highlight = False
- elif self.span_lead:
- # self.o('==')
- self.span_lead = False
-
- if tag in ["p", "div"]:
- if self.google_doc:
- if start and google_has_height(tag_style):
- self.p()
- else:
- self.soft_br()
- elif self.astack or self.inheader:
- pass
- else:
- self.p()
-
- if tag == "br" and start:
- if self.blockquote > 0:
- self.o(" \n> ")
- else:
- self.o(" \n")
-
- if tag == "hr" and start:
- self.p()
- self.o("* * *")
- self.p()
-
- if tag in ["head", "style", "script"]:
- if start:
- self.quiet += 1
- else:
- self.quiet -= 1
-
- if tag == "style":
- if start:
- self.style += 1
- else:
- self.style -= 1
-
- if tag in ["body"]:
- self.quiet = 0 # sites like 9rules.com never close
-
- if tag == "blockquote":
- if start:
- self.p()
- self.o("> ", force=True)
- self.start = True
- self.blockquote += 1
- else:
- self.blockquote -= 1
- self.p()
-
- if tag in ["em", "i", "u"] and not self.ignore_emphasis:
- # Separate with a space if we immediately follow an alphanumeric
- # character, since otherwise Markdown won't render the emphasis
- # marks, and we'll be left with eg 'foo_bar_' visible.
- # (Don't add a space otherwise, though, since there isn't one in the
- # original HTML.)
- if (
- start
- and self.preceding_data
- and self.preceding_data[-1] not in string.whitespace
- and self.preceding_data[-1] not in string.punctuation
- ):
- emphasis = " " + self.emphasis_mark
- self.preceding_data += " "
- else:
- emphasis = self.emphasis_mark
-
- self.o(emphasis)
- if start:
- self.stressed = True
-
- if tag in ["strong", "b"] and not self.ignore_emphasis:
- # Separate with space if we immediately follow an * character, since
- # without it, Markdown won't render the resulting *** correctly.
- # (Don't add a space otherwise, though, since there isn't one in the
- # original HTML.)
- if (
- not self.inheader
- and not self.astack
- and not self.span_lead
- and not self.span_highlight
- ):
- if (
- start
- and self.preceding_data
- and self.preceding_data[-1] == self.strong_mark[0]
- ):
- strong = " " + self.strong_mark
- self.preceding_data += " "
- else:
- strong = self.strong_mark
-
- self.o(strong)
- if start:
- self.stressed = True
-
- if tag in ["del", "strike", "s"]:
- if start and self.preceding_data and self.preceding_data[-1] == "~":
- strike = " ~~"
- self.preceding_data += " "
- else:
- strike = "~~"
-
- self.o(strike)
- if start:
- self.stressed = True
-
- if self.google_doc:
- if not self.inheader:
- # handle some font attributes, but leave headers clean
- self.handle_emphasis(start, tag_style, parent_style)
-
- if tag in ["kbd", "code", "tt"] and not self.pre:
- self.o("`") # `` `this` ``
- self.code = not self.code
-
- if tag == "abbr":
- if start:
- self.abbr_title = None
- self.abbr_data = ""
- if "title" in attrs:
- self.abbr_title = attrs["title"]
- else:
- if self.abbr_title is not None:
- assert self.abbr_data is not None
- self.abbr_list[self.abbr_data] = self.abbr_title
- self.abbr_title = None
- self.abbr_data = None
-
- if tag == "q":
- if not self.quote:
- self.o(self.open_quote)
- else:
- self.o(self.close_quote)
- self.quote = not self.quote
-
- def link_url(self: HTML2Text, link: str, title: str = "") -> None:
- url = urlparse.urljoin(self.baseurl, link)
- title = ' "{}"'.format(title) if title.strip() else ""
- self.o("]({url}{title})".format(url=escape_md(url), title=title))
-
- if tag == "a" and not self.ignore_links:
- if start:
- if "data-original-title" in attrs:
- # WARNING: old discours specific code
- self.o("&&&%s&&&" % attrs["data-original-title"])
- else:
- if (
- "href" in attrs
- and not attrs["href"].startswith("#_ftn")
- and attrs["href"] is not None
- and not (
- self.skip_internal_links and attrs["href"].startswith("#")
- )
- and not (
- self.ignore_mailto_links
- and attrs["href"].startswith("mailto:")
- )
- ):
- self.astack.append(attrs)
- self.maybe_automatic_link = attrs["href"]
- self.empty_link = True
- if self.protect_links:
- attrs["href"] = "<" + attrs["href"] + ">"
- else:
- self.astack.append(None)
- else:
- if self.astack:
- a = self.astack.pop()
- if self.maybe_automatic_link and not self.empty_link:
- self.maybe_automatic_link = None
- elif a:
- assert a["href"] is not None
- if self.empty_link:
- self.o("[")
- self.empty_link = False
- self.maybe_automatic_link = None
- if self.inline_links:
- self.p_p = 0
- title = a.get("title") or ""
- title = escape_md(title)
- link_url(self, a["href"], title)
- else:
- i = self.previousIndex(a)
- if i is not None:
- a_props = self.a[i]
- else:
- self.acount += 1
- a_props = AnchorElement(a, self.acount, self.outcount)
- self.a.append(a_props)
- self.o("][" + str(a_props.count) + "]")
-
- if tag == "img" and start and not self.ignore_images:
- # skip cloudinary images
- if "src" in attrs and "cloudinary" not in attrs["src"]:
- assert attrs["src"] is not None
- if not self.images_to_alt:
- attrs["href"] = attrs["src"]
- alt = attrs.get("alt") or self.default_image_alt
-
- # If we have images_with_size, write raw html including width,
- # height, and alt attributes
- if self.images_as_html or (
- self.images_with_size and ("width" in attrs or "height" in attrs)
- ):
- self.o("
")
- return
-
- # If we have a link to create, output the start
- if self.maybe_automatic_link is not None:
- href = self.maybe_automatic_link
- if (
- self.images_to_alt
- and escape_md(alt) == href
- and self.absolute_url_matcher.match(href)
- ):
- self.o("<" + escape_md(alt) + ">")
- self.empty_link = False
- return
- else:
- self.o("[")
- self.maybe_automatic_link = None
- self.empty_link = False
-
- # If we have images_to_alt, we discard the image itself,
- # considering only the alt text.
- if self.images_to_alt:
- self.o(escape_md(alt))
- else:
- self.o("![" + escape_md(alt) + "]")
- if self.inline_links:
- href = attrs.get("href") or ""
- self.o(
- "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
- )
- else:
- i = self.previousIndex(attrs)
- if i is not None:
- a_props = self.a[i]
- else:
- self.acount += 1
- a_props = AnchorElement(attrs, self.acount, self.outcount)
- self.a.append(a_props)
- self.o("[" + str(a_props.count) + "]")
-
- if tag == "dl" and start:
- self.p()
- if tag == "dt" and not start:
- self.pbr()
- if tag == "dd" and start:
- self.o(" ")
- if tag == "dd" and not start:
- self.pbr()
-
- if tag in ["ol", "ul"]:
- # Google Docs create sub lists as top level lists
- if not self.list and not self.lastWasList:
- self.p()
- if start:
- if self.google_doc:
- list_style = google_list_style(tag_style)
- else:
- list_style = tag
- numbering_start = list_numbering_start(attrs)
- self.list.append(ListElement(list_style, numbering_start))
- else:
- if self.list:
- self.list.pop()
- if not self.google_doc and not self.list:
- self.o("\n")
- self.lastWasList = True
- else:
- self.lastWasList = False
-
- if tag == "li":
- self.pbr()
- if start:
- if self.list:
- li = self.list[-1]
- else:
- li = ListElement("ul", 0)
- if self.google_doc:
- self.o(" " * self.google_nest_count(tag_style))
- else:
- # Indent two spaces per list, except use three spaces for an
- # unordered list inside an ordered list.
- # https://spec.commonmark.org/0.28/#motivation
- # WARNING: does not line up - s > 9 correctly.
- parent_list = None
- for list in self.list:
- self.o(
- " " if parent_list == "ol" and list.name == "ul" else " "
- )
- parent_list = list.name
-
- if li.name == "ul":
- self.o(self.ul_item_mark + " ")
- elif li.name == "ol":
- li.num += 1
- self.o(str(li.num) + ". ")
- self.start = True
-
- if tag in ["table", "tr", "td", "th"]:
- if self.ignore_tables:
- if tag == "tr":
- if start:
- pass
- else:
- self.soft_br()
- else:
- pass
-
- elif self.bypass_tables:
- if start:
- self.soft_br()
- if tag in ["td", "th"]:
- if start:
- self.o("<{}>\n\n".format(tag))
- else:
- self.o("\n{}>".format(tag))
- else:
- if start:
- self.o("<{}>".format(tag))
- else:
- self.o("{}>".format(tag))
-
- else:
- if tag == "table":
- if start:
- self.table_start = True
- if self.pad_tables:
- self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
- self.o(" \n")
- else:
- if self.pad_tables:
- # add break in case the table is empty or its 1 row table
- self.soft_br()
- self.o("" + config.TABLE_MARKER_FOR_PAD + ">")
- self.o(" \n")
- if tag in ["td", "th"] and start:
- if self.split_next_td:
- self.o("| ")
- self.split_next_td = True
-
- if tag == "tr" and start:
- self.td_count = 0
- if tag == "tr" and not start:
- self.split_next_td = False
- self.soft_br()
- if tag == "tr" and not start and self.table_start:
- # Underline table header
- self.o("|".join(["---"] * self.td_count))
- self.soft_br()
- self.table_start = False
- if tag in ["td", "th"] and start:
- self.td_count += 1
-
- if tag == "pre":
- if start:
- self.startpre = True
- self.pre = True
- else:
- self.pre = False
- if self.mark_code:
- self.out("\n[/code]")
- self.p()
-
- def pbr(self) -> None:
- "Pretty print has a line break"
- if self.p_p == 0:
- self.p_p = 1
-
- def p(self) -> None:
- "Set pretty print to 1 or 2 lines"
- self.p_p = 1 if self.single_line_break else 2
-
- def soft_br(self) -> None:
- "Soft breaks"
- self.pbr()
- self.br_toggle = " "
-
- def o(
- self, data: str, puredata: bool = False, force: Union[bool, str] = False
- ) -> None:
- """
- Deal with indentation and whitespace
- """
- if self.abbr_data is not None:
- self.abbr_data += data
-
- if not self.quiet:
- if self.google_doc:
- # prevent white space immediately after 'begin emphasis'
- # marks ('**' and '_')
- lstripped_data = data.lstrip()
- if self.drop_white_space and not (self.pre or self.code):
- data = lstripped_data
- if lstripped_data != "":
- self.drop_white_space = 0
-
- if puredata and not self.pre:
- # This is a very dangerous call ... it could mess up
- # all handling of when not handled properly
- # (see entityref)
- data = re.sub(r"\s+", r" ", data)
- if data and data[0] == " ":
- self.space = True
- data = data[1:]
- if not data and not force:
- return
-
- if self.startpre:
- # self.out(" :") # not an output when already one there
- if not data.startswith("\n") and not data.startswith("\r\n"):
- #
stuff...
- data = "\n" + data
- if self.mark_code:
- self.out("\n[code]")
- self.p_p = 0
-
- bq = ">" * self.blockquote
- if not (force and data and data[0] == ">") and self.blockquote:
- bq += " "
-
- if self.pre:
- if not self.list:
- bq += " "
- # else: list content is already partially indented
- bq += " " * len(self.list)
- data = data.replace("\n", "\n" + bq)
-
- if self.startpre:
- self.startpre = False
- if self.list:
- # use existing initial indentation
- data = data.lstrip("\n")
-
- if self.start:
- self.space = False
- self.p_p = 0
- self.start = False
-
- if force == "end":
- # It's the end.
- self.p_p = 0
- self.out("\n")
- self.space = False
-
- if self.p_p:
- self.out((self.br_toggle + "\n" + bq) * self.p_p)
- self.space = False
- self.br_toggle = ""
-
- if self.space:
- if not self.lastWasNL:
- self.out(" ")
- self.space = False
-
- if self.a and (
- (self.p_p == 2 and self.links_each_paragraph) or force == "end"
- ):
- if force == "end":
- self.out("\n")
-
- newa = []
- for link in self.a:
- if self.outcount > link.outcount:
- self.out(
- " ["
- + str(link.count)
- + "]: "
- + urlparse.urljoin(self.baseurl, link.attrs["href"])
- )
- if "title" in link.attrs:
- assert link.attrs["title"] is not None
- self.out(" (" + link.attrs["title"] + ")")
- self.out("\n")
- else:
- newa.append(link)
-
- # Don't need an extra line when nothing was done.
- if self.a != newa:
- self.out("\n")
-
- self.a = newa
-
- if self.abbr_list and force == "end":
- for abbr, definition in self.abbr_list.items():
- self.out(" *[" + abbr + "]: " + definition + "\n")
-
- self.p_p = 0
- self.out(data)
- self.outcount += 1
-
- def handle_data(self, data: str, entity_char: bool = False) -> None:
- if not data:
- # Data may be empty for some HTML entities. For example,
- # LEFT-TO-RIGHT MARK.
- return
-
- if self.stressed:
- data = data.strip()
- self.stressed = False
- self.preceding_stressed = True
- elif self.preceding_stressed:
- if (
- re.match(r"[^][(){}\s.!?]", data[0])
- and not hn(self.current_tag)
- and self.current_tag not in ["a", "code", "pre"]
- ):
- # should match a letter or common punctuation
- data = " " + data
- self.preceding_stressed = False
-
- if self.style:
- self.style_def.update(dumb_css_parser(data))
-
- if self.maybe_automatic_link is not None:
- href = self.maybe_automatic_link
- if (
- href == data
- and self.absolute_url_matcher.match(href)
- and self.use_automatic_links
- ):
- self.o("<" + data + ">")
- self.empty_link = False
- return
- else:
- self.o("[")
- self.maybe_automatic_link = None
- self.empty_link = False
-
- if not self.code and not self.pre and not entity_char:
- data = escape_md_section(data, snob=self.escape_snob)
- self.preceding_data = data
- self.o(data, puredata=True)
-
- def charref(self, name: str) -> str:
- if name[0] in ["x", "X"]:
- c = int(name[1:], 16)
- else:
- c = int(name)
-
- if not self.unicode_snob and c in unifiable_n:
- return unifiable_n[c]
- else:
- try:
- return chr(c)
- except ValueError: # invalid unicode
- return ""
-
- def entityref(self, c: str) -> str:
- if not self.unicode_snob and c in config.UNIFIABLE:
- return config.UNIFIABLE[c]
- try:
- ch = html.entities.html5[c + ";"]
- except KeyError:
- return "&" + c + ";"
- return config.UNIFIABLE[c] if c == "nbsp" else ch
-
- def google_nest_count(self, style: Dict[str, str]) -> int:
- """
- Calculate the nesting count of google doc lists
-
- :type style: dict
-
- :rtype: int
- """
- nest_count = 0
- if "margin-left" in style:
- nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
-
- return nest_count
-
- def optwrap(self, text: str) -> str:
- """
- Wrap all paragraphs in the provided text.
-
- :type text: str
-
- :rtype: str
- """
- if not self.body_width:
- return text
-
- result = ""
- newlines = 0
- # I cannot think of a better solution for now.
- # To avoid the non-wrap behaviour for entire paras
- # because of the presence of a link in it
- if not self.wrap_links:
- self.inline_links = False
- for para in text.split("\n"):
- if len(para) > 0:
- if not skipwrap(
- para, self.wrap_links, self.wrap_list_items, self.wrap_tables
- ):
- indent = ""
- if para.startswith(" " + self.ul_item_mark):
- # list item continuation: add a double indent to the
- # new lines
- indent = " "
- elif para.startswith("> "):
- # blockquote continuation: add the greater than symbol
- # to the new lines
- indent = "> "
- wrapped = wrap(
- para,
- self.body_width,
- break_long_words=False,
- subsequent_indent=indent,
- )
- result += "\n".join(wrapped)
- if para.endswith(" "):
- result += " \n"
- newlines = 1
- elif indent:
- result += "\n"
- newlines = 1
- else:
- result += "\n\n"
- newlines = 2
- else:
- # Warning for the tempted!!!
- # Be aware that obvious replacement of this with
- # line.isspace()
- # DOES NOT work! Explanations are welcome.
- if not config.RE_SPACE.match(para):
- result += para + "\n"
- newlines = 1
- else:
- if newlines < 2:
- result += "\n"
- newlines += 1
- return result
-
-
-def html2text(
- html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
-) -> str:
- h = html.strip() or ""
- if h:
- h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
- h = h.handle(html.strip())
- # print('[html2text] %d bytes' % len(html))
- return h
diff --git a/migration/html2text/__main__.py b/migration/html2text/__main__.py
deleted file mode 100644
index 4e28416e..00000000
--- a/migration/html2text/__main__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .cli import main
-
-main()
diff --git a/migration/html2text/cli.py b/migration/html2text/cli.py
deleted file mode 100644
index dbaba28b..00000000
--- a/migration/html2text/cli.py
+++ /dev/null
@@ -1,323 +0,0 @@
-import argparse
-import sys
-
-from . import HTML2Text, __version__, config
-
-
-# noinspection DuplicatedCode
-def main() -> None:
- baseurl = ""
-
- class bcolors:
- HEADER = "\033[95m"
- OKBLUE = "\033[94m"
- OKGREEN = "\033[92m"
- WARNING = "\033[93m"
- FAIL = "\033[91m"
- ENDC = "\033[0m"
- BOLD = "\033[1m"
- UNDERLINE = "\033[4m"
-
- p = argparse.ArgumentParser()
- p.add_argument(
- "--default-image-alt",
- dest="default_image_alt",
- default=config.DEFAULT_IMAGE_ALT,
- help="The default alt string for images with missing ones",
- )
- p.add_argument(
- "--pad-tables",
- dest="pad_tables",
- action="store_true",
- default=config.PAD_TABLES,
- help="pad the cells to equal column width in tables",
- )
- p.add_argument(
- "--no-wrap-links",
- dest="wrap_links",
- action="store_false",
- default=config.WRAP_LINKS,
- help="don't wrap links during conversion",
- )
- p.add_argument(
- "--wrap-list-items",
- dest="wrap_list_items",
- action="store_true",
- default=config.WRAP_LIST_ITEMS,
- help="wrap list items during conversion",
- )
- p.add_argument(
- "--wrap-tables",
- dest="wrap_tables",
- action="store_true",
- default=config.WRAP_TABLES,
- help="wrap tables",
- )
- p.add_argument(
- "--ignore-emphasis",
- dest="ignore_emphasis",
- action="store_true",
- default=config.IGNORE_EMPHASIS,
- help="don't include any formatting for emphasis",
- )
- p.add_argument(
- "--reference-links",
- dest="inline_links",
- action="store_false",
- default=config.INLINE_LINKS,
- help="use reference style links instead of inline links",
- )
- p.add_argument(
- "--ignore-links",
- dest="ignore_links",
- action="store_true",
- default=config.IGNORE_ANCHORS,
- help="don't include any formatting for links",
- )
- p.add_argument(
- "--ignore-mailto-links",
- action="store_true",
- dest="ignore_mailto_links",
- default=config.IGNORE_MAILTO_LINKS,
- help="don't include mailto: links",
- )
- p.add_argument(
- "--protect-links",
- dest="protect_links",
- action="store_true",
- default=config.PROTECT_LINKS,
- help="protect links from line breaks surrounding them with angle brackets",
- )
- p.add_argument(
- "--ignore-images",
- dest="ignore_images",
- action="store_true",
- default=config.IGNORE_IMAGES,
- help="don't include any formatting for images",
- )
- p.add_argument(
- "--images-as-html",
- dest="images_as_html",
- action="store_true",
- default=config.IMAGES_AS_HTML,
- help=(
- "Always write image tags as raw html; preserves `height`, `width` and "
- "`alt` if possible."
- ),
- )
- p.add_argument(
- "--images-to-alt",
- dest="images_to_alt",
- action="store_true",
- default=config.IMAGES_TO_ALT,
- help="Discard image data, only keep alt text",
- )
- p.add_argument(
- "--images-with-size",
- dest="images_with_size",
- action="store_true",
- default=config.IMAGES_WITH_SIZE,
- help=(
- "Write image tags with height and width attrs as raw html to retain "
- "dimensions"
- ),
- )
- p.add_argument(
- "-g",
- "--google-doc",
- action="store_true",
- dest="google_doc",
- default=False,
- help="convert an html-exported Google Document",
- )
- p.add_argument(
- "-d",
- "--dash-unordered-list",
- action="store_true",
- dest="ul_style_dash",
- default=False,
- help="use a dash rather than a star for unordered list items",
- )
- p.add_argument(
- "-e",
- "--asterisk-emphasis",
- action="store_true",
- dest="em_style_asterisk",
- default=False,
- help="use an asterisk rather than an underscore for emphasized text",
- )
- p.add_argument(
- "-b",
- "--body-width",
- dest="body_width",
- type=int,
- default=config.BODY_WIDTH,
- help="number of characters per output line, 0 for no wrap",
- )
- p.add_argument(
- "-i",
- "--google-list-indent",
- dest="list_indent",
- type=int,
- default=config.GOOGLE_LIST_INDENT,
- help="number of pixels Google indents nested lists",
- )
- p.add_argument(
- "-s",
- "--hide-strikethrough",
- action="store_true",
- dest="hide_strikethrough",
- default=False,
- help="hide strike-through text. only relevant when -g is " "specified as well",
- )
- p.add_argument(
- "--escape-all",
- action="store_true",
- dest="escape_snob",
- default=False,
- help=(
- "Escape all special characters. Output is less readable, but avoids "
- "corner case formatting issues."
- ),
- )
- p.add_argument(
- "--bypass-tables",
- action="store_true",
- dest="bypass_tables",
- default=config.BYPASS_TABLES,
- help="Format tables in HTML rather than Markdown syntax.",
- )
- p.add_argument(
- "--ignore-tables",
- action="store_true",
- dest="ignore_tables",
- default=config.IGNORE_TABLES,
- help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
- )
- p.add_argument(
- "--single-line-break",
- action="store_true",
- dest="single_line_break",
- default=config.SINGLE_LINE_BREAK,
- help=(
- "Use a single line break after a block element rather than two line "
- "breaks. NOTE: Requires --body-width=0"
- ),
- )
- p.add_argument(
- "--unicode-snob",
- action="store_true",
- dest="unicode_snob",
- default=config.UNICODE_SNOB,
- help="Use unicode throughout document",
- )
- p.add_argument(
- "--no-automatic-links",
- action="store_false",
- dest="use_automatic_links",
- default=config.USE_AUTOMATIC_LINKS,
- help="Do not use automatic links wherever applicable",
- )
- p.add_argument(
- "--no-skip-internal-links",
- action="store_false",
- dest="skip_internal_links",
- default=config.SKIP_INTERNAL_LINKS,
- help="Do not skip internal links",
- )
- p.add_argument(
- "--links-after-para",
- action="store_true",
- dest="links_each_paragraph",
- default=config.LINKS_EACH_PARAGRAPH,
- help="Put links after each paragraph instead of document",
- )
- p.add_argument(
- "--mark-code",
- action="store_true",
- dest="mark_code",
- default=config.MARK_CODE,
- help="Mark program code blocks with [code]...[/code]",
- )
- p.add_argument(
- "--decode-errors",
- dest="decode_errors",
- default=config.DECODE_ERRORS,
- help=(
- "What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
- "acceptable values"
- ),
- )
- p.add_argument(
- "--open-quote",
- dest="open_quote",
- default=config.OPEN_QUOTE,
- help="The character used to open quotes",
- )
- p.add_argument(
- "--close-quote",
- dest="close_quote",
- default=config.CLOSE_QUOTE,
- help="The character used to close quotes",
- )
- p.add_argument(
- "--version", action="version", version=".".join(map(str, __version__))
- )
- p.add_argument("filename", nargs="?")
- p.add_argument("encoding", nargs="?", default="utf-8")
- args = p.parse_args()
-
- if args.filename and args.filename != "-":
- with open(args.filename, "rb") as fp:
- data = fp.read()
- else:
- data = sys.stdin.buffer.read()
-
- try:
- html = data.decode(args.encoding, args.decode_errors)
- except UnicodeDecodeError as err:
- warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
- warning += " Use the " + bcolors.OKGREEN
- warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
- print(warning)
- raise err
-
- h = HTML2Text(baseurl=baseurl)
- # handle options
- if args.ul_style_dash:
- h.ul_item_mark = "-"
- if args.em_style_asterisk:
- h.emphasis_mark = "*"
- h.strong_mark = "__"
-
- h.body_width = args.body_width
- h.google_list_indent = args.list_indent
- h.ignore_emphasis = args.ignore_emphasis
- h.ignore_links = args.ignore_links
- h.ignore_mailto_links = args.ignore_mailto_links
- h.protect_links = args.protect_links
- h.ignore_images = args.ignore_images
- h.images_as_html = args.images_as_html
- h.images_to_alt = args.images_to_alt
- h.images_with_size = args.images_with_size
- h.google_doc = args.google_doc
- h.hide_strikethrough = args.hide_strikethrough
- h.escape_snob = args.escape_snob
- h.bypass_tables = args.bypass_tables
- h.ignore_tables = args.ignore_tables
- h.single_line_break = args.single_line_break
- h.inline_links = args.inline_links
- h.unicode_snob = args.unicode_snob
- h.use_automatic_links = args.use_automatic_links
- h.skip_internal_links = args.skip_internal_links
- h.links_each_paragraph = args.links_each_paragraph
- h.mark_code = args.mark_code
- h.wrap_links = args.wrap_links
- h.wrap_list_items = args.wrap_list_items
- h.wrap_tables = args.wrap_tables
- h.pad_tables = args.pad_tables
- h.default_image_alt = args.default_image_alt
- h.open_quote = args.open_quote
- h.close_quote = args.close_quote
-
- sys.stdout.write(h.handle(html))
diff --git a/migration/html2text/config.py b/migration/html2text/config.py
deleted file mode 100644
index 0f4d29bc..00000000
--- a/migration/html2text/config.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import re
-
-# Use Unicode characters instead of their ascii pseudo-replacements
-UNICODE_SNOB = True
-
-# Marker to use for marking tables for padding post processing
-TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
-# Escape all special characters. Output is less readable, but avoids
-# corner case formatting issues.
-ESCAPE_SNOB = True
-
-# Put the links after each paragraph instead of at the end.
-LINKS_EACH_PARAGRAPH = False
-
-# Wrap long lines at position. 0 for no wrapping.
-BODY_WIDTH = 0
-
-# Don't show internal links (href="#local-anchor") -- corresponding link
-# targets won't be visible in the plain text file anyway.
-SKIP_INTERNAL_LINKS = False
-
-# Use inline, rather than reference, formatting for images and links
-INLINE_LINKS = True
-
-# Protect links from line breaks surrounding them with angle brackets (in
-# addition to their square brackets)
-PROTECT_LINKS = True
-WRAP_LINKS = True
-
-# Wrap list items.
-WRAP_LIST_ITEMS = False
-
-# Wrap tables
-WRAP_TABLES = False
-
-# Number of pixels Google indents nested lists
-GOOGLE_LIST_INDENT = 36
-
-# Values Google and others may use to indicate bold text
-BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
-
-IGNORE_ANCHORS = False
-IGNORE_MAILTO_LINKS = False
-IGNORE_IMAGES = False
-IMAGES_AS_HTML = False
-IMAGES_TO_ALT = False
-IMAGES_WITH_SIZE = False
-IGNORE_EMPHASIS = False
-MARK_CODE = True
-DECODE_ERRORS = "strict"
-DEFAULT_IMAGE_ALT = ""
-PAD_TABLES = True
-
-# Convert links with same href and text to format
-# if they are absolute links
-USE_AUTOMATIC_LINKS = True
-
-# For checking space-only lines on line 771
-RE_SPACE = re.compile(r"\s\+")
-
-RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
-RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
-RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
-RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
-
-# to find links in the text
-RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
-
-# to find table separators
-RE_TABLE = re.compile(r" \| ")
-
-RE_MD_DOT_MATCHER = re.compile(
- r"""
- ^ # start of line
- (\s*\d+) # optional whitespace and a number
- (\.) # dot
- (?=\s) # lookahead assert whitespace
- """,
- re.MULTILINE | re.VERBOSE,
-)
-RE_MD_PLUS_MATCHER = re.compile(
- r"""
- ^
- (\s*)
- (\+)
- (?=\s)
- """,
- flags=re.MULTILINE | re.VERBOSE,
-)
-RE_MD_DASH_MATCHER = re.compile(
- r"""
- ^
- (\s*)
- (-)
- (?=\s|\-) # followed by whitespace (bullet list, or spaced out hr)
- # or another dash (header or hr)
- """,
- flags=re.MULTILINE | re.VERBOSE,
-)
-RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
-RE_MD_BACKSLASH_MATCHER = re.compile(
- r"""
- (\\) # match one slash
- (?=[%s]) # followed by a char that requires escaping
- """
- % re.escape(RE_SLASH_CHARS),
- flags=re.VERBOSE,
-)
-
-UNIFIABLE = {
- "rsquo": "'",
- "lsquo": "'",
- "rdquo": '"',
- "ldquo": '"',
- "copy": "(C)",
- "mdash": "--",
- "nbsp": " ",
- "rarr": "->",
- "larr": "<-",
- "middot": "*",
- "ndash": "-",
- "oelig": "oe",
- "aelig": "ae",
- "agrave": "a",
- "aacute": "a",
- "acirc": "a",
- "atilde": "a",
- "auml": "a",
- "aring": "a",
- "egrave": "e",
- "eacute": "e",
- "ecirc": "e",
- "euml": "e",
- "igrave": "i",
- "iacute": "i",
- "icirc": "i",
- "iuml": "i",
- "ograve": "o",
- "oacute": "o",
- "ocirc": "o",
- "otilde": "o",
- "ouml": "o",
- "ugrave": "u",
- "uacute": "u",
- "ucirc": "u",
- "uuml": "u",
- "lrm": "",
- "rlm": "",
-}
-
-# Format tables in HTML rather than Markdown syntax
-BYPASS_TABLES = False
-# Ignore table-related tags (table, th, td, tr) while keeping rows
-IGNORE_TABLES = False
-
-
-# Use a single line break after a block element rather than two line breaks.
-# NOTE: Requires body width setting to be 0.
-SINGLE_LINE_BREAK = False
-
-
-# Use double quotation marks when converting the tag.
-OPEN_QUOTE = '"'
-CLOSE_QUOTE = '"'
diff --git a/migration/html2text/elements.py b/migration/html2text/elements.py
deleted file mode 100644
index 2533ec08..00000000
--- a/migration/html2text/elements.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from typing import Dict, Optional
-
-
-class AnchorElement:
- __slots__ = ["attrs", "count", "outcount"]
-
- def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
- self.attrs = attrs
- self.count = count
- self.outcount = outcount
-
-
-class ListElement:
- __slots__ = ["name", "num"]
-
- def __init__(self, name: str, num: int):
- self.name = name
- self.num = num
diff --git a/migration/html2text/py.typed b/migration/html2text/py.typed
deleted file mode 100644
index e69de29b..00000000
diff --git a/migration/html2text/typing.py b/migration/html2text/typing.py
deleted file mode 100644
index 6e17fed2..00000000
--- a/migration/html2text/typing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-class OutCallback:
- def __call__(self, s: str) -> None:
- ...
diff --git a/migration/html2text/utils.py b/migration/html2text/utils.py
deleted file mode 100644
index 1cf22b52..00000000
--- a/migration/html2text/utils.py
+++ /dev/null
@@ -1,287 +0,0 @@
-import html.entities
-from typing import Dict, List, Optional
-
-from . import config
-
-unifiable_n = {
- html.entities.name2codepoint[k]: v
- for k, v in config.UNIFIABLE.items()
- if k != "nbsp"
-}
-
-
-def hn(tag: str) -> int:
- if tag[0] == "h" and len(tag) == 2:
- n = tag[1]
- if "0" < n <= "9":
- return int(n)
- return 0
-
-
-def dumb_property_dict(style: str) -> Dict[str, str]:
- """
- :returns: A hash of css attributes
- """
- return {
- x.strip().lower(): y.strip().lower()
- for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
- }
-
-
-def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
- """
- :type data: str
-
- :returns: A hash of css selectors, each of which contains a hash of
- css attributes.
- :rtype: dict
- """
- # remove @import sentences
- data += ";"
- importIndex = data.find("@import")
- while importIndex != -1:
- data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
- importIndex = data.find("@import")
-
- # parse the css. reverted from dictionary comprehension in order to
- # support older pythons
- pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
- try:
- elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
- except ValueError:
- elements = {} # not that important
-
- return elements
-
-
-def element_style(
- attrs: Dict[str, Optional[str]],
- style_def: Dict[str, Dict[str, str]],
- parent_style: Dict[str, str],
-) -> Dict[str, str]:
- """
- :type attrs: dict
- :type style_def: dict
- :type style_def: dict
-
- :returns: A hash of the 'final' style attributes of the element
- :rtype: dict
- """
- style = parent_style.copy()
- if attrs.get("class"):
- for css_class in attrs["class"].split():
- css_style = style_def.get("." + css_class, {})
- style.update(css_style)
- if attrs.get("style"):
- immediate_style = dumb_property_dict(attrs["style"])
- style.update(immediate_style)
-
- return style
-
-
-def google_list_style(style: Dict[str, str]) -> str:
- """
- Finds out whether this is an ordered or unordered list
-
- :type style: dict
-
- :rtype: str
- """
- if "list-style-type" in style:
- list_style = style["list-style-type"]
- if list_style in ["disc", "circle", "square", "none"]:
- return "ul"
-
- return "ol"
-
-
-def google_has_height(style: Dict[str, str]) -> bool:
- """
- Check if the style of the element has the 'height' attribute
- explicitly defined
-
- :type style: dict
-
- :rtype: bool
- """
- return "height" in style
-
-
-def google_text_emphasis(style: Dict[str, str]) -> List[str]:
- """
- :type style: dict
-
- :returns: A list of all emphasis modifiers of the element
- :rtype: list
- """
- emphasis = []
- if "text-decoration" in style:
- emphasis.append(style["text-decoration"])
- if "font-style" in style:
- emphasis.append(style["font-style"])
- if "font-weight" in style:
- emphasis.append(style["font-weight"])
-
- return emphasis
-
-
-def google_fixed_width_font(style: Dict[str, str]) -> bool:
- """
- Check if the css of the current element defines a fixed width font
-
- :type style: dict
-
- :rtype: bool
- """
- font_family = ""
- if "font-family" in style:
- font_family = style["font-family"]
- return "courier new" == font_family or "consolas" == font_family
-
-
-def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
- """
- Extract numbering from list element attributes
-
- :type attrs: dict
-
- :rtype: int or None
- """
- if attrs.get("start"):
- try:
- return int(attrs["start"]) - 1
- except ValueError:
- pass
-
- return 0
-
-
-def skipwrap(
- para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
-) -> bool:
- # If it appears to contain a link
- # don't wrap
- if not wrap_links and config.RE_LINK.search(para):
- return True
- # If the text begins with four spaces or one tab, it's a code block;
- # don't wrap
- if para[0:4] == " " or para[0] == "\t":
- return True
-
- # If the text begins with only two "--", possibly preceded by
- # whitespace, that's an emdash; so wrap.
- stripped = para.lstrip()
- if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
- return False
-
- # I'm not sure what this is for; I thought it was to detect lists,
- # but there's a
-inside- case in one of the tests that
- # also depends upon it.
- if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
- return not wrap_list_items
-
- # If text contains a pipe character it is likely a table
- if not wrap_tables and config.RE_TABLE.search(para):
- return True
-
- # If the text begins with a single -, *, or +, followed by a space,
- # or an integer, followed by a ., followed by a space (in either
- # case optionally proceeded by whitespace), it's a list; don't wrap.
- return bool(
- config.RE_ORDERED_LIST_MATCHER.match(stripped)
- or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
- )
-
-
-def escape_md(text: str) -> str:
- """
- Escapes markdown-sensitive characters within other markdown
- constructs.
- """
- return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
-
-
-def escape_md_section(text: str, snob: bool = False) -> str:
- """
- Escapes markdown-sensitive characters across whole document sections.
- """
- text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
-
- if snob:
- text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
-
- text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
- text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
- text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
-
- return text
-
-
-def reformat_table(lines: List[str], right_margin: int) -> List[str]:
- """
- Given the lines of a table
- padds the cells and returns the new lines
- """
- # find the maximum width of the columns
- max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
- max_cols = len(max_width)
- for line in lines:
- cols = [x.rstrip() for x in line.split("|")]
- num_cols = len(cols)
-
- # don't drop any data if colspan attributes result in unequal lengths
- if num_cols < max_cols:
- cols += [""] * (max_cols - num_cols)
- elif max_cols < num_cols:
- max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
- max_cols = num_cols
-
- max_width = [
- max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
- ]
-
- # reformat
- new_lines = []
- for line in lines:
- cols = [x.rstrip() for x in line.split("|")]
- if set(line.strip()) == set("-|"):
- filler = "-"
- new_cols = [
- x.rstrip() + (filler * (M - len(x.rstrip())))
- for x, M in zip(cols, max_width)
- ]
- new_lines.append("|-" + "|".join(new_cols) + "|")
- else:
- filler = " "
- new_cols = [
- x.rstrip() + (filler * (M - len(x.rstrip())))
- for x, M in zip(cols, max_width)
- ]
- new_lines.append("| " + "|".join(new_cols) + "|")
- return new_lines
-
-
-def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
- """
- Provide padding for tables in the text
- """
- lines = text.split("\n")
- table_buffer = [] # type: List[str]
- table_started = False
- new_lines = []
- for line in lines:
- # Toggle table started
- if config.TABLE_MARKER_FOR_PAD in line:
- table_started = not table_started
- if not table_started:
- table = reformat_table(table_buffer, right_margin)
- new_lines.extend(table)
- table_buffer = []
- new_lines.append("")
- continue
- # Process lines
- if table_started:
- table_buffer.append(line)
- else:
- new_lines.append(line)
- return "\n".join(new_lines)
diff --git a/migration/tables/__init__.py b/migration/tables/__init__.py
deleted file mode 100644
index 8e7ee938..00000000
--- a/migration/tables/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__all__ = (["users", "topics", "content_items", "comments"],)
diff --git a/migration/tables/comments.py b/migration/tables/comments.py
deleted file mode 100644
index d5f35c20..00000000
--- a/migration/tables/comments.py
+++ /dev/null
@@ -1,209 +0,0 @@
-from datetime import datetime, timezone
-
-from dateutil.parser import parse as date_parse
-
-from services.db import local_session
-from migration.html2text import html2text
-from orm.reaction import Reaction, ReactionKind
-from orm.shout import ShoutReactionsFollower
-from orm.topic import TopicFollower
-from orm.user import User
-from orm.shout import Shout
-
-ts = datetime.now(tz=timezone.utc)
-
-
-def auto_followers(session, topics, reaction_dict):
- # creating shout's reactions following for reaction author
- following1 = (
- session.query(ShoutReactionsFollower)
- .where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
- .filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
- .first()
- )
- if not following1:
- following1 = ShoutReactionsFollower.create(
- follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
- )
- session.add(following1)
- # creating topics followings for reaction author
- for t in topics:
- tf = (
- session.query(TopicFollower)
- .where(TopicFollower.follower == reaction_dict["createdBy"])
- .filter(TopicFollower.topic == t["id"])
- .first()
- )
- if not tf:
- topic_following = TopicFollower.create(
- follower=reaction_dict["createdBy"], topic=t["id"], auto=True
- )
- session.add(topic_following)
-
-
-def migrate_ratings(session, entry, reaction_dict):
- for comment_rating_old in entry.get("ratings", []):
- rater = (
- session.query(User)
- .filter(User.oid == comment_rating_old["createdBy"])
- .first()
- )
- re_reaction_dict = {
- "shout": reaction_dict["shout"],
- "replyTo": reaction_dict["id"],
- "kind": ReactionKind.LIKE
- if comment_rating_old["value"] > 0
- else ReactionKind.DISLIKE,
- "createdBy": rater.id if rater else 1,
- }
- cts = comment_rating_old.get("createdAt")
- if cts:
- re_reaction_dict["createdAt"] = date_parse(cts)
- try:
- # creating reaction from old rating
- rr = Reaction.create(**re_reaction_dict)
- following2 = (
- session.query(ShoutReactionsFollower)
- .where(ShoutReactionsFollower.follower == re_reaction_dict["createdBy"])
- .filter(ShoutReactionsFollower.shout == rr.shout)
- .first()
- )
- if not following2:
- following2 = ShoutReactionsFollower.create(
- follower=re_reaction_dict["createdBy"], shout=rr.shout, auto=True
- )
- session.add(following2)
- session.add(rr)
-
- except Exception as e:
- print("[migration] comment rating error: %r" % re_reaction_dict)
- raise e
- session.commit()
-
-
-async def migrate(entry, storage):
- """
- {
- "_id": "hdtwS8fSyFLxXCgSC",
- "body": "",
- "contentItem": "mnK8KsJHPRi8DrybQ",
- "createdBy": "bMFPuyNg6qAD2mhXe",
- "thread": "01/",
- "createdAt": "2016-04-19 04:33:53+00:00",
- "ratings": [
- { "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
- { "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
- ],
- "rating": 2,
- "updatedAt": "2020-05-27 19:22:57.091000+00:00",
- "updatedBy": "0"
- }
- ->
- type Reaction {
- id: Int!
- shout: Shout!
- createdAt: DateTime!
- createdBy: User!
- updatedAt: DateTime
- deletedAt: DateTime
- deletedBy: User
- range: String # full / 0:2340
- kind: ReactionKind!
- body: String
- replyTo: Reaction
- stat: Stat
- old_id: String
- old_thread: String
- }
- """
- old_ts = entry.get("createdAt")
- reaction_dict = {
- "createdAt": (ts if not old_ts else date_parse(old_ts)),
- "body": html2text(entry.get("body", "")),
- "oid": entry["_id"],
- }
- shout_oid = entry.get("contentItem")
- if shout_oid not in storage["shouts"]["by_oid"]:
- if len(storage["shouts"]["by_oid"]) > 0:
- return shout_oid
- else:
- print("[migration] no shouts migrated yet")
- raise Exception
- return
- else:
- stage = "started"
- reaction = None
- with local_session() as session:
- author = session.query(User).filter(User.oid == entry["createdBy"]).first()
- old_shout = storage["shouts"]["by_oid"].get(shout_oid)
- if not old_shout:
- raise Exception("no old shout in storage")
- else:
- stage = "author and old id found"
- try:
- shout = (
- session.query(Shout)
- .where(Shout.slug == old_shout["slug"])
- .one()
- )
- if shout:
- reaction_dict["shout"] = shout.id
- reaction_dict["createdBy"] = author.id if author else 1
- reaction_dict["kind"] = ReactionKind.COMMENT
-
- # creating reaction from old comment
- reaction = Reaction.create(**reaction_dict)
- session.add(reaction)
- # session.commit()
- stage = "new reaction commited"
- reaction_dict = reaction.dict()
- topics = [t.dict() for t in shout.topics]
- auto_followers(session, topics, reaction_dict)
-
- migrate_ratings(session, entry, reaction_dict)
-
- return reaction
- except Exception as e:
- print(e)
- print(reaction)
- raise Exception(stage)
- return
-
-
-def migrate_2stage(old_comment, idmap):
- if old_comment.get("body"):
- new_id = idmap.get(old_comment.get("oid"))
- new_id = idmap.get(old_comment.get("_id"))
- if new_id:
- new_replyto_id = None
- old_replyto_id = old_comment.get("replyTo")
- if old_replyto_id:
- new_replyto_id = int(idmap.get(old_replyto_id, "0"))
- with local_session() as session:
- comment = session.query(Reaction).where(Reaction.id == new_id).first()
- try:
- if new_replyto_id:
- new_reply = (
- session.query(Reaction)
- .where(Reaction.id == new_replyto_id)
- .first()
- )
- if not new_reply:
- print(new_replyto_id)
- raise Exception("cannot find reply by id!")
- comment.replyTo = new_reply.id
- session.add(comment)
- srf = (
- session.query(ShoutReactionsFollower)
- .where(ShoutReactionsFollower.shout == comment.shout)
- .filter(ShoutReactionsFollower.follower == comment.createdBy)
- .first()
- )
- if not srf:
- srf = ShoutReactionsFollower.create(
- shout=comment.shout, follower=comment.createdBy, auto=True
- )
- session.add(srf)
- session.commit()
- except Exception:
- raise Exception("cannot find a comment by oldid")
diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
deleted file mode 100644
index ec263fc6..00000000
--- a/migration/tables/content_items.py
+++ /dev/null
@@ -1,420 +0,0 @@
-from datetime import datetime, timezone
-import json
-from dateutil.parser import parse as date_parse
-from sqlalchemy.exc import IntegrityError
-from transliterate import translit
-from services.db import local_session
-from migration.extract import extract_html, extract_media
-from orm.reaction import Reaction, ReactionKind
-from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
-from orm.user import User
-from orm.topic import TopicFollower, Topic
-from services.viewed import ViewedStorage
-import re
-
-OLD_DATE = "2016-03-05 22:22:00.350000"
-ts = datetime.now(tz=timezone.utc)
-type2layout = {
- "Article": "article",
- "Literature": "literature",
- "Music": "audio",
- "Video": "video",
- "Image": "image",
-}
-
-anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"}
-discours = {"slug": "discours", "id": 2, "name": "Дискурс"}
-
-
-def get_shout_slug(entry):
- slug = entry.get("slug", "")
- if not slug:
- for friend in entry.get("friendlySlugs", []):
- slug = friend.get("slug", "")
- if slug:
- break
- slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
- return slug
-
-
-def create_author_from_app(app):
- user = None
- userdata = None
- # check if email is used
- if app["email"]:
- with local_session() as session:
- user = session.query(User).where(User.email == app["email"]).first()
- if not user:
- # print('[migration] app %r' % app)
- name = app.get("name")
- if name:
- slug = translit(name, "ru", reversed=True).lower()
- slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
- print("[migration] created slug %s" % slug)
- # check if slug is used
- if slug:
- user = session.query(User).where(User.slug == slug).first()
-
- # get slug from email
- if user:
- slug = app["email"].split("@")[0]
- user = session.query(User).where(User.slug == slug).first()
- # one more try
- if user:
- slug += "-author"
- user = (
- session.query(User).where(User.slug == slug).first()
- )
-
- # create user with application data
- if not user:
- userdata = {
- "username": app["email"],
- "email": app["email"],
- "name": app.get("name", ""),
- "bio": app.get("bio", ""),
- "emailConfirmed": False,
- "slug": slug,
- "createdAt": ts,
- "lastSeen": ts,
- }
- # print('[migration] userdata %r' % userdata)
- user = User.create(**userdata)
- session.add(user)
- session.commit()
- userdata["id"] = user.id
-
- userdata = user.dict()
- return userdata
- else:
- raise Exception("app is not ok", app)
-
-
-async def create_shout(shout_dict):
- s = Shout.create(**shout_dict)
- author = s.authors[0]
- with local_session() as session:
- srf = (
- session.query(ShoutReactionsFollower)
- .where(ShoutReactionsFollower.shout == s.id)
- .filter(ShoutReactionsFollower.follower == author.id)
- .first()
- )
- if not srf:
- srf = ShoutReactionsFollower.create(
- shout=s.id, follower=author.id, auto=True
- )
- session.add(srf)
- session.commit()
- return s
-
-
-async def get_user(entry, storage):
- app = entry.get("application")
- userdata = None
- user_oid = None
- if app:
- userdata = create_author_from_app(app)
- else:
- user_oid = entry.get("createdBy")
- if user_oid == "0":
- userdata = discours
- elif user_oid:
- userdata = storage["users"]["by_oid"].get(user_oid)
- if not userdata:
- print("no userdata by oid, anonymous")
- userdata = anondict
- print(app)
- # cleanup slug
- if userdata:
- slug = userdata.get("slug", "")
- if slug:
- slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
- userdata["slug"] = slug
- else:
- userdata = anondict
-
- user = await process_user(userdata, storage, user_oid)
- return user, user_oid
-
-
-async def migrate(entry, storage):
- author, user_oid = await get_user(entry, storage)
- r = {
- "layout": type2layout[entry["type"]],
- "title": entry["title"],
- "authors": [
- author,
- ],
- "slug": get_shout_slug(entry),
- "cover": (
- "https://assets.discours.io/unsafe/1600x/" + entry["thumborId"]
- if entry.get("thumborId")
- else entry.get("image", {}).get("url")
- ),
- "visibility": "public" if entry.get("published") else "authors",
- "publishedAt": date_parse(entry.get("publishedAt"))
- if entry.get("published")
- else None,
- "deletedAt": date_parse(entry.get("deletedAt"))
- if entry.get("deletedAt")
- else None,
- "createdAt": date_parse(entry.get("createdAt", OLD_DATE)),
- "updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
- "topics": await add_topics_follower(entry, storage, author),
- "body": extract_html(entry, cleanup=True),
- }
-
- # main topic patch
- r["mainTopic"] = r["topics"][0]
-
- # published author auto-confirm
- if entry.get("published"):
- with local_session() as session:
- # update user.emailConfirmed if published
- author.emailConfirmed = True
- session.add(author)
- session.commit()
-
- # media
- media = extract_media(entry)
- r["media"] = json.dumps(media, ensure_ascii=True) if media else None
-
- # ----------------------------------- copy
- shout_dict = r.copy()
- del shout_dict["topics"]
-
- try:
- # save shout to db
- shout_dict["oid"] = entry.get("_id", "")
- shout = await create_shout(shout_dict)
- except IntegrityError as e:
- print("[migration] create_shout integrity error", e)
- shout = await resolve_create_shout(shout_dict)
- except Exception as e:
- raise Exception(e)
-
- # udpate data
- shout_dict = shout.dict()
- shout_dict["authors"] = [
- author.dict(),
- ]
-
- # shout topics aftermath
- shout_dict["topics"] = await topics_aftermath(r, storage)
-
- # content_item ratings to reactions
- await content_ratings_to_reactions(entry, shout_dict["slug"])
-
- # shout views
- await ViewedStorage.increment(
- shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours"
- )
- # del shout_dict['ratings']
-
- storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
- storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
- return shout_dict
-
-
-async def add_topics_follower(entry, storage, user):
- topics = set([])
- category = entry.get("category")
- topics_by_oid = storage["topics"]["by_oid"]
- oids = [
- category,
- ] + entry.get("tags", [])
- for toid in oids:
- tslug = topics_by_oid.get(toid, {}).get("slug")
- if tslug:
- topics.add(tslug)
- ttt = list(topics)
- # add author as TopicFollower
- with local_session() as session:
- for tpcslug in topics:
- try:
- tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
- if tpc:
- tf = (
- session.query(TopicFollower)
- .where(TopicFollower.follower == user.id)
- .filter(TopicFollower.topic == tpc.id)
- .first()
- )
- if not tf:
- tf = TopicFollower.create(
- topic=tpc.id, follower=user.id, auto=True
- )
- session.add(tf)
- session.commit()
- except IntegrityError:
- print("[migration.shout] hidden by topic " + tpc.slug)
- # main topic
- maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug"))
- if maintopic in ttt:
- ttt.remove(maintopic)
- ttt.insert(0, maintopic)
- return ttt
-
-
-async def process_user(userdata, storage, oid):
- with local_session() as session:
- uid = userdata.get("id") # anonymous as
- if not uid:
- print(userdata)
- print("has no id field, set it @anonymous")
- userdata = anondict
- uid = 1
- user = session.query(User).filter(User.id == uid).first()
- if not user:
- try:
- slug = userdata["slug"].lower().strip()
- slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
- userdata["slug"] = slug
- user = User.create(**userdata)
- session.add(user)
- session.commit()
- except IntegrityError:
- print(f"[migration] user creating with slug {userdata['slug']}")
- print("[migration] from userdata")
- print(userdata)
- raise Exception(
- "[migration] cannot create user in content_items.get_user()"
- )
- if user.id == 946:
- print("[migration] ***************** ALPINA")
- if user.id == 2:
- print("[migration] +++++++++++++++++ DISCOURS")
- userdata["id"] = user.id
- userdata["createdAt"] = user.createdAt
- storage["users"]["by_slug"][userdata["slug"]] = userdata
- storage["users"]["by_oid"][oid] = userdata
- if not user:
- raise Exception("could not get a user")
- return user
-
-
-async def resolve_create_shout(shout_dict):
- with local_session() as session:
- s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
- bump = False
- if s:
- if s.createdAt != shout_dict["createdAt"]:
- # create new with different slug
- shout_dict["slug"] += "-" + shout_dict["layout"]
- try:
- await create_shout(shout_dict)
- except IntegrityError as e:
- print(e)
- bump = True
- else:
- # update old
- for key in shout_dict:
- if key in s.__dict__:
- if s.__dict__[key] != shout_dict[key]:
- print(
- "[migration] shout already exists, but differs in %s"
- % key
- )
- bump = True
- else:
- print("[migration] shout already exists, but lacks %s" % key)
- bump = True
- if bump:
- s.update(shout_dict)
- else:
- print("[migration] something went wrong with shout: \n%r" % shout_dict)
- raise Exception("")
- session.commit()
- return s
-
-
-async def topics_aftermath(entry, storage):
- r = []
- for tpc in filter(lambda x: bool(x), entry["topics"]):
- oldslug = tpc
- newslug = storage["replacements"].get(oldslug, oldslug)
-
- if newslug:
- with local_session() as session:
- shout = session.query(Shout).where(Shout.slug == entry["slug"]).first()
- new_topic = session.query(Topic).where(Topic.slug == newslug).first()
-
- shout_topic_old = (
- session.query(ShoutTopic)
- .join(Shout)
- .join(Topic)
- .filter(Shout.slug == entry["slug"])
- .filter(Topic.slug == oldslug)
- .first()
- )
- if shout_topic_old:
- shout_topic_old.update({"topic": new_topic.id})
- else:
- shout_topic_new = (
- session.query(ShoutTopic)
- .join(Shout)
- .join(Topic)
- .filter(Shout.slug == entry["slug"])
- .filter(Topic.slug == newslug)
- .first()
- )
- if not shout_topic_new:
- try:
- ShoutTopic.create(
- **{"shout": shout.id, "topic": new_topic.id}
- )
- except Exception:
- print("[migration] shout topic error: " + newslug)
- session.commit()
- if newslug not in r:
- r.append(newslug)
- else:
- print("[migration] ignored topic slug: \n%r" % tpc["slug"])
- # raise Exception
- return r
-
-
-async def content_ratings_to_reactions(entry, slug):
- try:
- with local_session() as session:
- for content_rating in entry.get("ratings", []):
- rater = (
- session.query(User)
- .filter(User.oid == content_rating["createdBy"])
- .first()
- ) or User.default_user
- shout = session.query(Shout).where(Shout.slug == slug).first()
- cts = content_rating.get("createdAt")
- reaction_dict = {
- "createdAt": date_parse(cts) if cts else None,
- "kind": ReactionKind.LIKE
- if content_rating["value"] > 0
- else ReactionKind.DISLIKE,
- "createdBy": rater.id,
- "shout": shout.id,
- }
- reaction = (
- session.query(Reaction)
- .filter(Reaction.shout == reaction_dict["shout"])
- .filter(Reaction.createdBy == reaction_dict["createdBy"])
- .filter(Reaction.kind == reaction_dict["kind"])
- .first()
- )
- if reaction:
- k = (
- ReactionKind.AGREE
- if content_rating["value"] > 0
- else ReactionKind.DISAGREE
- )
- reaction_dict["kind"] = k
- reaction.update(reaction_dict)
- session.add(reaction)
- else:
- rea = Reaction.create(**reaction_dict)
- session.add(rea)
- # shout_dict['ratings'].append(reaction_dict)
-
- session.commit()
- except Exception:
- print("[migration] content_item.ratings error: \n%r" % content_rating)
diff --git a/migration/tables/remarks.py b/migration/tables/remarks.py
deleted file mode 100644
index 9a426346..00000000
--- a/migration/tables/remarks.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from services.db import local_session
-from migration.extract import extract_md
-from migration.html2text import html2text
-from orm.reaction import Reaction, ReactionKind
-
-
-def migrate(entry, storage):
- post_oid = entry["contentItem"]
- print(post_oid)
- shout_dict = storage["shouts"]["by_oid"].get(post_oid)
- if shout_dict:
- print(shout_dict["body"])
- remark = {
- "shout": shout_dict["id"],
- "body": extract_md(html2text(entry["body"]), shout_dict),
- "kind": ReactionKind.REMARK,
- }
-
- if entry.get("textBefore"):
- remark["range"] = (
- str(shout_dict["body"].index(entry["textBefore"] or ""))
- + ":"
- + str(
- shout_dict["body"].index(entry["textAfter"] or "")
- + len(entry["textAfter"] or "")
- )
- )
-
- with local_session() as session:
- rmrk = Reaction.create(**remark)
- session.commit()
- del rmrk["_sa_instance_state"]
- return rmrk
- return
diff --git a/migration/tables/replacements.json b/migration/tables/replacements.json
deleted file mode 100644
index 5e042e66..00000000
--- a/migration/tables/replacements.json
+++ /dev/null
@@ -1,828 +0,0 @@
-{
- "207": "207",
- "1990-e": "90s",
- "2000-e": "2000s",
- "90-e": "90s",
- "Georgia": "georgia",
- "Japan": "japan",
- "Sweden": "sweden",
- "abstraktsiya": "abstract",
- "absurdism": "absurdism",
- "acclimatization": "acclimatisation",
- "activism": "activism",
- "adolf-gitler": "adolf-hitler",
- "afrika": "africa",
- "agata-kristi": "agatha-christie",
- "agressivnoe-povedenie": "agression",
- "agressiya": "agression",
- "aktsii": "actions",
- "aktsionizm": "actionism",
- "alber-kamyu": "albert-kamus",
- "albomy": "albums",
- "aleksandr-griboedov": "aleksander-griboedov",
- "aleksandr-pushkin": "aleksander-pushkin",
- "aleksandr-solzhenitsyn": "aleksander-solzhenitsyn",
- "aleksandr-vvedenskiy": "aleksander-vvedensky",
- "aleksey-navalnyy": "alexey-navalny",
- "alfavit": "alphabet",
- "alkogol": "alcohol",
- "alternativa": "alternative",
- "alternative": "alternative",
- "alternativnaya-istoriya": "alternative-history",
- "amerika": "america",
- "anarhizm": "anarchism",
- "anatoliy-mariengof": "anatoly-mariengof",
- "ancient-russia": "ancient-russia",
- "andegraund": "underground",
- "andrey-platonov": "andrey-platonov",
- "andrey-rodionov": "andrey-rodionov",
- "andrey-tarkovskiy": "andrey-tarkovsky",
- "angliyskie-istorii": "english-stories",
- "angliyskiy-yazyk": "english-langugae",
- "ango": "ango",
- "animation": "animation",
- "animatsiya": "animation",
- "anime": "anime",
- "anri-volohonskiy": "anri-volohonsky",
- "antifashizm": "anti-faschism",
- "antiquity": "antiquity",
- "antiutopiya": "dystopia",
- "anton-dolin": "anton-dolin",
- "antropology": "antropology",
- "antropotsen": "antropocenus",
- "architecture": "architecture",
- "arheologiya": "archeology",
- "arhetipy": "archetypes",
- "arhiv": "archive",
- "aristokraty": "aristocracy",
- "aristotel": "aristotle",
- "arktika": "arctic",
- "armiya": "army",
- "armiya-1": "army",
- "art": "art",
- "art-is": "art-is",
- "artists": "artists",
- "ateizm": "atheism",
- "audio-poetry": "audio-poetry",
- "audiopoeziya": "audio-poetry",
- "audiospektakl": "audio-spectacles",
- "auktsyon": "auktsyon",
- "avangard": "avantgarde",
- "avtofikshn": "autofiction",
- "avtorskaya-pesnya": "bardsongs",
- "azbuka-immigratsii": "immigration-basics",
- "aziatskiy-kinematograf": "asian-cinema",
- "b-movie": "b-movie",
- "bannye-chteniya": "sauna-reading",
- "bardsongs": "bardsongs",
- "bdsm": "bdsm",
- "beecake": "beecake",
- "belarus": "belarus",
- "belgiya": "belgium",
- "bertold-breht": "berttold-brecht",
- "bezumie": "madness",
- "biography": "biography",
- "biologiya": "biology",
- "bipolyarnoe-rasstroystvo": "bipolar-disorder",
- "bitniki": "beatnics",
- "biznes": "business",
- "blizhniy-vostok": "middle-east",
- "blizost": "closeness",
- "blocked-in-russia": "blocked-in-russia",
- "blokada": "blockade",
- "bob-dilan": "bob-dylan",
- "bog": "god",
- "bol": "pain",
- "bolotnoe-delo": "bolotnaya-case",
- "books": "books",
- "boris-eltsin": "boris-eltsin",
- "boris-godunov": "boris-godunov",
- "boris-grebenschikov": "boris-grebenschikov",
- "boris-nemtsov": "boris-nemtsov",
- "boris-pasternak": "boris-pasternak",
- "brak": "marriage",
- "bret-iston-ellis": "bret-iston-ellis",
- "buddizm": "buddhism",
- "bullying": "bullying",
- "bunt": "riot",
- "burning-man": "burning-man",
- "bytie": "being",
- "byurokratiya": "bureaucracy",
- "capitalism": "capitalism",
- "censored-in-russia": "censored-in-russia",
- "ch-rno-beloe": "black-and-white",
- "ch-rnyy-yumor": "black-humour",
- "chapters": "chapters",
- "charity": "charity",
- "chayldfri": "childfree",
- "chechenskaya-voyna": "chechen-war",
- "chechnya": "chechnya",
- "chelovek": "male",
- "chernobyl": "chernobyl",
- "chernyy-yumor": "black-humour",
- "children": "children",
- "china": "china",
- "chinovniki": "bureaucracy",
- "chukotka": "chukotka",
- "chuma": "plague",
- "church": "church",
- "cinema": "cinema",
- "city": "city",
- "civil-position": "civil-position",
- "clips": "clips",
- "collage": "collage",
- "comics": "comics",
- "conspiracy-theory": "conspiracy-theory",
- "contemporary-art": "contemporary-art",
- "contemporary-poetry": "poetry",
- "contemporary-prose": "prose",
- "coronavirus": "coronavirus",
- "corruption": "corruption",
- "creative-writing-school": "creative-writing-school",
- "crime": "crime",
- "criticism": "criticism",
- "critiques": "reviews",
- "culture": "culture",
- "dadaizm": "dadaism",
- "daniel-defo": "daniel-defoe",
- "daniil-harms": "daniil-kharms",
- "dante-aligeri": "dante-alighieri",
- "darkveyv": "darkwave",
- "death": "death",
- "debaty": "debats",
- "delo-seti": "seti-case",
- "democracy": "democracy",
- "demografiya": "demographics",
- "demonstrations": "demonstrations",
- "depression": "depression",
- "derevnya": "village",
- "derrida": "derrida",
- "design": "design",
- "detskie-doma": "orphanages",
- "detstvo": "childhood",
- "devid-linch": "david-linch",
- "devyanostye": "90s",
- "dialog": "dialogue",
- "digital": "digital",
- "digital-art": "digital-art",
- "dinozavry": "dinosaurs",
- "directing": "directing",
- "diskurs": "discours",
- "diskurs-1": "discourse",
- "diskurs-analiz": "discourse-analytics",
- "dissidenty": "dissidents",
- "diy": "diy",
- "dmitriy-donskoy": "dmitriy-donskoy",
- "dmitriy-prigov": "dmitriy-prigov",
- "dnevnik-1": "dairy",
- "dnevniki": "dairies",
- "documentary": "documentary",
- "dokumentalnaya-poema": "documentary-poem",
- "dokumentalnaya-poeziya": "documentary-poetry",
- "dokumenty": "doсuments",
- "domashnee-nasilie": "home-terror",
- "donald-tramp": "donald-trump",
- "donbass": "donbass",
- "donbass-diary": "donbass-diary",
- "donorstvo": "donation",
- "dozhd": "rain",
- "drama": "drama",
- "dramaturgy": "dramaturgy",
- "drawing": "drawing",
- "drevo-zhizni": "tree-of-life",
- "drugs": "drugs",
- "duh": "spirit",
- "dzhaz": "jazz",
- "dzhek-keruak": "jack-keruak",
- "dzhim-morrison": "jim-morrison",
- "dzhordzh-romero": "george-romero",
- "dzhordzho-agamben": "giorgio-agamben",
- "ecology": "ecology",
- "economics": "economics",
- "eda": "food",
- "editorial-statements": "editorial-statements",
- "eduard-limonov": "eduard-limonov",
- "education": "education",
- "egor-letov": "egor-letov",
- "ekspat": "expat",
- "eksperiment": "experiments",
- "eksperimentalnaya-muzyka": "experimental-music",
- "ekspressionizm": "expressionism",
- "ekstremizm": "extremism",
- "ekzistentsializm-1": "existentialism",
- "ekzistentsiya": "existence",
- "elections": "elections",
- "electronic": "electronics",
- "electronics": "electronics",
- "elena-glinskaya": "elena-glinskaya",
- "elena-guro": "elena-guro",
- "elizaveta-mnatsakanova": "elizaveta-mnatsakanova",
- "embient": "ambient",
- "emigration": "emigration",
- "emil-dyurkgeym": "emile-durkheim",
- "emotsii": "emotions",
- "empiric": "empiric",
- "epidemiya": "pandemic",
- "erich-von-neff": "erich-von-neff",
- "erotika": "erotics",
- "essay": "essay",
- "estetika": "aestetics",
- "etika": "ethics",
- "etno": "ethno",
- "etnos": "ethnics",
- "everyday-life": "everyday-life",
- "evgeniy-onegin": "eugene-onegin",
- "evolyutsiya": "evolution",
- "exhibitions": "exhibitions",
- "experience": "experiences",
- "experimental": "experimental",
- "experimental-music": "experimental-music",
- "explanation": "explanation",
- "faktcheking": "fact-checking",
- "falsifikatsii": "falsifications",
- "family": "family",
- "fanfiki": "fan-fiction",
- "fantastika": "sci-fi",
- "fatalizm": "fatalism",
- "fedor-dostoevskiy": "fedor-dostoevsky",
- "fedor-ioannovich": "fedor-ioannovich",
- "feleton": "feuilleton",
- "feminism": "feminism",
- "fenomenologiya": "phenomenology",
- "fentezi": "fantasy",
- "festival": "festival",
- "festival-territoriya": "festival-territory",
- "folk": "folk",
- "folklor": "folklore",
- "fotoreportazh": "photoreports",
- "france": "france",
- "frants-kafka": "franz-kafka",
- "frederik-begbeder": "frederick-begbeder",
- "freedom": "freedom",
- "friendship": "friendship",
- "fsb": "fsb",
- "futbol": "footbool",
- "future": "future",
- "futuristy": "futurists",
- "futurizm": "futurism",
- "galereya": "gallery",
- "galereya-anna-nova": "gallery-anna-nova",
- "gdr": "gdr",
- "gender": "gender",
- "gendernyy-diskurs": "gender",
- "gennadiy-aygi": "gennadiy-aygi",
- "gerhard-rihter": "gerhard-rihter",
- "germaniya": "germany",
- "germenevtika": "hermeneutics",
- "geroi": "heroes",
- "girls": "girls",
- "gkchp": "gkchp",
- "glitch": "glitch",
- "globalizatsiya": "globalisation",
- "gollivud": "hollywood",
- "gonzo": "gonzo",
- "gore-ot-uma": "woe-from-wit",
- "graffiti": "graffiti",
- "graficheskaya-novella": "graphic-novell",
- "graphics": "graphics",
- "gravyura": "engraving",
- "grazhdanskaya-oborona": "grazhdanskaya-oborona",
- "gretsiya": "greece",
- "griby": "mushrooms",
- "gruziya-2": "georgia",
- "gulag": "gulag",
- "han-batyy": "khan-batyy",
- "hayku": "haiku",
- "health": "health",
- "himiya": "chemistry",
- "hip-hop": "hip-hop",
- "history": "history",
- "history-of-russia": "history-of-russia",
- "holokost": "holocaust",
- "horeografiya": "choreography",
- "horror": "horror",
- "hospis": "hospice",
- "hristianstvo": "christianity",
- "humans": "humans",
- "humour": "humour",
- "ideologiya": "ideology",
- "idm": "idm",
- "igil": "isis",
- "igor-pomerantsev": "igor-pomerantsev",
- "igra": "game",
- "igra-prestolov": "game-of-throne",
- "igry": "games",
- "iisus-hristos": "jesus-christ",
- "illness": "illness",
- "illustration-history": "illustration-history",
- "illustrations": "illustrations",
- "imazhinizm": "imagism",
- "immanuil-kant": "immanuel-kant",
- "impressionizm": "impressionism",
- "improvizatsiya": "improvisation",
- "indi": "indie",
- "individualizm": "individualism",
- "infografika": "infographics",
- "informatsiya": "information",
- "ingmar-bergman": "ingmar-bergman",
- "inklyuziya": "inclusion",
- "installyatsiya": "installation",
- "internet": "internet",
- "interview": "interview",
- "invalidnost": "disability",
- "investigations": "investigations",
- "iosif-brodskiy": "joseph-brodsky",
- "iosif-stalin": "joseph-stalin",
- "iskusstvennyy-intellekt": "artificial-intelligence",
- "islam": "islam",
- "istoriya-moskvy": "moscow-history",
- "istoriya-nauki": "history-of-sceince",
- "istoriya-o-medsestre": "nurse-story",
- "istoriya-teatra": "theatre-history",
- "italiya": "italy",
- "italyanskiy-yazyk": "italian-language",
- "iudaika": "judaica",
- "ivan-groznyy": "ivan-grozny",
- "ivan-iii-gorbatyy": "ivan-iii-gorbaty",
- "ivan-kalita": "ivan-kalita",
- "ivan-krylov": "ivan-krylov",
- "izobreteniya": "inventions",
- "izrail-1": "israel",
- "jazz": "jazz",
- "john-lennon": "john-lennon",
- "journalism": "journalism",
- "justice": "justice",
- "k-pop": "k-pop",
- "kalligrafiya": "calligraphy",
- "karikatura": "caricatures",
- "kartochki-rubinshteyna": "rubinstein-cards",
- "katrin-nenasheva": "katrin-nenasheva",
- "kavarga": "kavarga",
- "kavkaz": "caucasus",
- "kazan": "kazan",
- "kiberbezopasnost": "cybersecurity",
- "kinoklub": "cinema-club",
- "kinokritika": "film-criticism",
- "kirill-serebrennikov": "kirill-serebrennikov",
- "kladbische": "cemetery",
- "klassika": "classic",
- "kollektivnoe-bessoznatelnoe": "сollective-unconscious",
- "komediya": "comedy",
- "kommunikatsii": "communications",
- "kommunizm": "communism",
- "kommuny": "communes",
- "kompyuternye-igry": "computer-games",
- "konets-vesny": "end-of-spring",
- "konservatizm": "conservatism",
- "kontrkultura": "counter-culture",
- "kontseptualizm": "conceptualism",
- "korotkometrazhka": "cinema-shorts",
- "kosmos": "cosmos",
- "kraudfanding": "crowdfunding",
- "kriptovalyuty": "cryptocurrencies",
- "krizis": "crisis",
- "krov": "blood",
- "krym": "crimea",
- "kulturologiya": "culturology",
- "kulty": "cults",
- "kurdistan": "kurdistan",
- "kurt-kobeyn": "kurt-cobain",
- "kurt-vonnegut": "kurt-vonnegut",
- "kvir": "queer",
- "laboratoriya": "lab",
- "language": "languages",
- "lars-fon-trier": "lars-fon-trier",
- "laws": "laws",
- "lectures": "lectures",
- "leto": "summer",
- "lev-tolstoy": "leo-tolstoy",
- "lgbt": "lgbt",
- "liberalizm": "liberalism",
- "libertarianstvo": "libertarianism",
- "life": "life",
- "likbez": "likbez",
- "lingvistika": "linguistics",
- "lirika": "lirics",
- "literary-studies": "literary-studies",
- "literature": "literature",
- "literaturnyykaver": "literature-cover",
- "lo-fi": "lo-fi",
- "lomonosov": "lomonosov",
- "love": "love",
- "luzha-goluboy-krovi": "luzha-goluboy-krovi",
- "lyudvig-vitgenshteyn": "ludwig-wittgenstein",
- "lzhedmitriy": "false-dmitry",
- "lzhenauka": "pseudoscience",
- "magiya": "magic",
- "maks-veber": "max-weber",
- "manifests": "manifests",
- "manipulyatsii-soznaniem": "mind-manipulation",
- "marina-abramovich": "marina-abramovich",
- "marketing": "marketing",
- "marksizm": "marxism",
- "marsel-dyushan": "marchel-duchamp",
- "marsel-prust": "marcel-proust",
- "martin-haydegger": "martin-hidegger",
- "matematika": "maths",
- "mayakovskiy": "vladimir-mayakovsky",
- "media": "media",
- "medicine": "medicine",
- "memuary": "memoirs",
- "menedzhment": "management",
- "menty": "police",
- "merab-mamardashvili": "merab-mamardashvili",
- "mest": "revenge",
- "metamodernizm": "metamodern",
- "metavselennaya": "metaverse",
- "metro": "metro",
- "mifologiya": "mythology",
- "mify": "myth",
- "mihael-haneke": "michael-haneke",
- "mihail-baryshnikov": "mihail-baryshnikov",
- "mihail-bulgakov": "mihail-bulgakov",
- "mikrotonalnaya-muzyka": "mikrotone-muzyka",
- "minimalizm": "minimalism",
- "minkult-privet": "minkult-privet",
- "mir": "world",
- "mirovozzrenie": "mindsets",
- "mishel-fuko": "michel-foucault",
- "mistika": "mystics",
- "mitropolit-makariy": "mitropolit-makariy",
- "mlm": "mlm",
- "mobilizatsiya": "mobilisation",
- "moda": "fashion",
- "modernizm": "modernism",
- "mokyumentari": "mockumentary",
- "molodezh": "youth",
- "moloko-plus": "moloko-plus",
- "money": "money",
- "monologs": "monologues",
- "monstratsiya": "monstration",
- "moralnaya-otvetstvennost": "moral-responsibility",
- "more": "sea",
- "moscow": "moscow",
- "moshennichestvo": "frauds",
- "moskovskiy-romanticheskiy-kontseptualizm": "moscow-romantic-conceptualism",
- "moskovskoe-delo": "moscow-case",
- "movies": "movies",
- "mozg": "brain",
- "multiplikatsiya": "animation",
- "music": "music",
- "musulmanstvo": "islam",
- "muzei": "museum",
- "muzey": "museum",
- "muzhchiny": "man",
- "myshlenie": "thinking",
- "nagornyy-karabah": "nagorno-karabakh",
- "nasilie-1": "violence",
- "natsionalizm": "nationalism",
- "natsionalnaya-ideya": "national-idea",
- "natsizm": "nazism",
- "natyurmort": "nature-morte",
- "nauchpop": "pop-science",
- "nbp": "nbp",
- "nenavist": "hate",
- "neofitsialnaya-literatura": "unofficial-literature",
- "neoklassika": "neoclassic",
- "neprozrachnye-smysly": "hidden-meanings",
- "neravenstvo": "inequality",
- "net-voyne": "no-war",
- "new-year": "new-year",
- "neyronauka": "neuro-science",
- "neyroseti": "neural-networks",
- "niu-vshe": "hse",
- "nizhniy-novgorod": "nizhny-novgorod",
- "nko": "nonprofits",
- "nlo": "ufo",
- "nobelevskaya-premiya": "nobel-prize",
- "noize-mc": "noize-mc",
- "nonkonformizm": "nonconformism",
- "notforall": "notforall",
- "novaya-drama": "new-drama",
- "novosti": "news",
- "noyz": "noise",
- "nuar": "noir",
- "oberiu": "oberiu",
- "ocherk": "etudes",
- "ochevidnyy-nuar": "ochevidnyy-nuar",
- "odinochestvo": "loneliness",
- "odna-kniga-odna-istoriya": "one-book-one-story",
- "okrainy": "outskirts",
- "omon": "swat",
- "opinions": "opinions",
- "oppozitsiya": "opposition",
- "orhan-pamuk": "orhan-pamuk",
- "ornitologiya": "ornitology",
- "osen": "autumn",
- "osip-mandelshtam": "osip-mandelshtam",
- "oskar-uayld": "oscar-wilde",
- "osoznanie": "awareness",
- "otnosheniya": "relationship",
- "pablo-pikasso": "pablo-picasso",
- "painting": "painting",
- "paintings": "painting",
- "pamyat": "memory",
- "pandemiya": "pandemic",
- "parizh": "paris",
- "patriotizm": "patriotism",
- "patsifizm": "pacifism",
- "paul-tselan": "paul-tselan",
- "per-burd": "pierre-bourdieu",
- "perezhivaniya": "worries",
- "performance": "performance",
- "peyzazh": "landscape",
- "philology": "philology",
- "philosophy": "philosophy",
- "photo": "photography",
- "photography": "photography",
- "photoprojects": "photoprojects",
- "plakaty": "posters",
- "plastilin": "plasticine",
- "plays": "plays",
- "podrostki": "teenagers",
- "poema": "poem",
- "poems": "poems",
- "poeticheskaya-proza": "poetic-prose",
- "poetry": "poetry",
- "poetry-of-squares": "poetry-of-squares",
- "poetry-slam": "poetry-slam",
- "pokoy": "peace",
- "police": "police",
- "politicheskoe-fentezi": "political-fantasy",
- "politics": "politics",
- "politzaklyuchennye": "political-prisoners",
- "polsha": "poland",
- "pomosch": "help",
- "pop-art": "pop-art",
- "pop-culture": "pop-culture",
- "populyarnaya-psihologiya": "popular-psychology",
- "pornografiya": "pornography",
- "portret": "portrait",
- "poslovitsy": "proverbs",
- "post-pank": "post-punk",
- "post-rok": "post-rock",
- "postmodernism": "postmodernism",
- "povest": "novells",
- "povsednevnost": "everyday-life",
- "power": "power",
- "pravo": "right",
- "pravoslavie": "orthodox",
- "pravozaschitniki": "human-rights-activism",
- "prazdnik": "holidays",
- "predatelstvo": "betrayal",
- "predprinimatelstvo": "entrepreneurship",
- "premera": "premier",
- "premiya-oskar": "oscar-prize",
- "pribaltika-1": "baltic",
- "priroda": "nature",
- "prison": "prison",
- "pritcha": "parable",
- "privatnost": "privacy",
- "progress": "progress",
- "projects": "projects",
- "prokrastinatsiya": "procrastination",
- "propaganda": "propaganda",
- "proschenie": "forgiveness",
- "prose": "prose",
- "proshloe": "past",
- "prostitutsiya": "prostitution",
- "prosveschenie": "enlightenment",
- "protests": "protests",
- "psalmy": "psalms",
- "psihoanaliz": "psychoanalysis",
- "psihodeliki": "psychodelics",
- "pskov": "pskov",
- "psychiatry": "psychiatry",
- "psychology": "psychology",
- "ptitsy": "birds",
- "punk": "punk",
- "r-b": "rnb",
- "rasizm": "racism",
- "realizm": "realism",
- "redaktura": "editing",
- "refleksiya": "reflection",
- "reggi": "reggae",
- "religion": "religion",
- "rene-zhirar": "rene-girard",
- "renesanss": "renessance",
- "renovatsiya": "renovation",
- "rep": "rap",
- "reportage": "reportage",
- "reportazh-1": "reportage",
- "repressions": "repressions",
- "research": "research",
- "retroveyv": "retrowave",
- "review": "review",
- "revolution": "revolution",
- "rezo-gabriadze": "rezo-gabriadze",
- "risunki": "painting",
- "roboty": "robots",
- "rock": "rock",
- "roditeli": "parents",
- "romantizm": "romantism",
- "romany": "novell",
- "ronald-reygan": "ronald-reygan",
- "roskomnadzor": "roskomnadzor",
- "rossiyskoe-kino": "russian-cinema",
- "rouling": "rowling",
- "rozhava": "rojava",
- "rpts": "rpts",
- "rus-na-grani-sryva": "rus-na-grani-sryva",
- "russia": "russia",
- "russian-language": "russian-language",
- "russian-literature": "russian-literature",
- "russkaya-toska": "russian-toska",
- "russkiy-mir": "russkiy-mir",
- "salo": "lard",
- "salvador-dali": "salvador-dali",
- "samoidentifikatsiya": "self-identity",
- "samoopredelenie": "self-definition",
- "sankt-peterburg": "saint-petersburg",
- "sasha-skochilenko": "sasha-skochilenko",
- "satira": "satiric",
- "saund-art": "sound-art",
- "schaste": "happiness",
- "school": "school",
- "science": "science",
- "sculpture": "sculpture",
- "second-world-war": "second-world-war",
- "sekond-hend": "second-hand",
- "seksprosvet": "sex-education",
- "seksualizirovannoe-nasilie": "sexualized-violence",
- "seksualnoe-nasilie": "sexualized-violence",
- "sekty": "sects",
- "semi": "semi",
- "semiotics": "semiotics",
- "serbiya": "serbia",
- "sergey-bodrov-mladshiy": "sergey-bodrov-junior",
- "sergey-solov-v": "sergey-solovyov",
- "serialy": "series",
- "sever": "north",
- "severnaya-koreya": "north-korea",
- "sex": "sex",
- "shotlandiya": "scotland",
- "shugeyz": "shoegaze",
- "siloviki": "siloviki",
- "simeon-bekbulatovich": "simeon-bekbulatovich",
- "simvolizm": "simbolism",
- "siriya": "siria",
- "skulptura": "sculpture",
- "slavoy-zhizhek": "slavoj-zizek",
- "smert-1": "death",
- "smysl": "meaning",
- "sny": "dreams",
- "sobytiya": "events",
- "social": "society",
- "society": "society",
- "sociology": "sociology",
- "sofya-paleolog": "sofya-paleolog",
- "sofya-vitovtovna": "sofya-vitovtovna",
- "soobschestva": "communities",
- "soprotivlenie": "resistence",
- "sotsializm": "socialism",
- "sotsialnaya-filosofiya": "social-philosophy",
- "sotsiologiya-1": "sociology",
- "sotsseti": "social-networks",
- "sotvorenie-tretego-rima": "third-rome",
- "sovremennost": "modernity",
- "spaces": "spaces",
- "spektakl": "spectacles",
- "spetseffekty": "special-fx",
- "spetsoperatsiya": "special-operation",
- "spetssluzhby": "special-services",
- "sport": "sport",
- "srednevekove": "middle-age",
- "state": "state",
- "statistika": "statistics",
- "stendap": "stand-up",
- "stihi": "poetry",
- "stoitsizm": "stoicism",
- "stories": "stories",
- "stoyanie-na-ugre": "stoyanie-na-ugre",
- "strah": "fear",
- "street-art": "street-art",
- "stsenarii": "scenarios",
- "sud": "court",
- "summary": "summary",
- "supergeroi": "superheroes",
- "svetlana-aleksievich": "svetlana-aleksievich",
- "svobodu-ivanu-golunovu": "free-ivan-golunov",
- "syurrealizm": "surrealism",
- "tales": "tales",
- "tanets": "dance",
- "tataro-mongolskoe-igo": "mongol-tatar-yoke",
- "tatuirovki": "tattoo",
- "technology": "technology",
- "televidenie": "television",
- "telo": "body",
- "telo-kak-iskusstvo": "body-as-art",
- "terrorizm": "terrorism",
- "tests": "tests",
- "text": "texts",
- "the-beatles": "the-beatles",
- "theater": "theater",
- "theory": "theory",
- "tokio": "tokio",
- "torture": "torture",
- "totalitarizm": "totalitarism",
- "traditions": "traditions",
- "tragicomedy": "tragicomedy",
- "transgendernost": "transgender",
- "translation": "translation",
- "transport": "transport",
- "travel": "travel",
- "travma": "trauma",
- "trendy": "trends",
- "tretiy-reyh": "third-reich",
- "triller": "thriller",
- "tsar": "central-african-republic",
- "tsar-edip": "oedipus",
- "tsarevich-dmitriy": "tsarevich-dmitry",
- "tsennosti": "values",
- "tsenzura": "censorship",
- "tseremonii": "ceremonies",
- "turizm": "tourism",
- "tvorchestvo": "creativity",
- "ugnetennyy-zhilischnyy-klass": "oppressed-housing-class",
- "uilyam-shekspir": "william-shakespeare",
- "ukraina-2": "ukraine",
- "ukraine": "ukraine",
- "university": "university",
- "urban-studies": "urban-studies",
- "uroki-literatury": "literature-lessons",
- "usa": "usa",
- "ussr": "ussr",
- "utopiya": "utopia",
- "utrata": "loss",
- "valter-benyamin": "valter-benyamin",
- "varlam-shalamov": "varlam-shalamov",
- "vasiliy-ii-temnyy": "basil-ii-temnyy",
- "vasiliy-iii": "basil-iii",
- "vdnh": "vdnh",
- "vechnost": "ethernety",
- "velikobritaniya": "great-britain",
- "velimir-hlebnikov": "velimir-hlebnikov",
- "velkom-tu-greyt-britn": "welcome-to-great-britain",
- "venedikt-erofeev": "venedikt-erofeev",
- "venetsiya": "veneece",
- "vengriya": "hungary",
- "verlibry": "free-verse",
- "veschi": "things",
- "vessels": "vessels",
- "veterany": "veterans",
- "video": "video",
- "videoart": "videoart",
- "videoklip": "clips",
- "videopoeziya": "video-poetry",
- "viktor-astafev": "viktor-astafev",
- "viktor-pelevin": "viktor-pelevin",
- "vilgelm-rayh": "wilhelm-reich",
- "vinzavod": "vinzavod",
- "violence": "violence",
- "visual-culture": "visual-culture",
- "vizualnaya-poeziya": "visual-poetry",
- "vladimir-lenin": "vladimir-lenin",
- "vladimir-mayakovskiy": "vladimir-mayakovsky",
- "vladimir-nabokov": "vladimir-nabokov",
- "vladimir-putin": "vladimir-putin",
- "vladimir-sorokin": "vladimir-sorokin",
- "vladimir-voynovich": "vladimir-voynovich",
- "vnutrenniy-opyt": "inner-expirience",
- "volga": "volga",
- "volontery": "volonteurs",
- "vong-karvay": "wong-karwai",
- "vospominaniya": "memories",
- "vostok": "east",
- "voyna-na-ukraine": "war-in-ukraine",
- "voyna-v-ukraine": "war-in-ukraine",
- "vremya": "time",
- "vudi-allen": "woody-allen",
- "vynuzhdennye-otnosheniya": "forced-relationship",
- "war": "war",
- "war-in-ukraine-images": "war-in-ukrahine-images",
- "women": "women",
- "work": "work",
- "writers": "writers",
- "xx-century": "xx-century",
- "yakob-yordans": "yakob-yordans",
- "yan-vermeer": "yan-vermeer",
- "yanka-dyagileva": "yanka-dyagileva",
- "yaponskaya-literatura": "japan-literature",
- "yazychestvo": "paganism",
- "youth": "youth",
- "yozef-rot": "yozef-rot",
- "yurgen-habermas": "jorgen-habermas",
- "za-liniey-mannergeyma": "behind-mannerheim-line",
- "zabota": "care",
- "zahar-prilepin": "zahar-prilepin",
- "zakonodatelstvo": "laws",
- "zakony-mira": "world-laws",
- "zametki": "notes",
- "zhelanie": "wish",
- "zhivotnye": "animals",
- "zhoze-saramago": "jose-saramago",
- "zigmund-freyd": "sigmund-freud",
- "zolotaya-orda": "golden-horde",
- "zombi": "zombie",
- "zombi-simpsony": "zombie-simpsons"
-}
diff --git a/migration/tables/topics.py b/migration/tables/topics.py
deleted file mode 100644
index 9fb5c45f..00000000
--- a/migration/tables/topics.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from services.db import local_session
-from migration.extract import extract_md
-from migration.html2text import html2text
-from orm import Topic
-
-
-def migrate(entry):
- body_orig = entry.get("description", "").replace(" ", " ")
- topic_dict = {
- "slug": entry["slug"],
- "oid": entry["_id"],
- "title": entry["title"].replace(" ", " "),
- "body": extract_md(html2text(body_orig)),
- }
-
- with local_session() as session:
- slug = topic_dict["slug"]
- topic = session.query(Topic).filter(Topic.slug == slug).first() or Topic.create(
- **topic_dict
- )
- if not topic:
- raise Exception("no topic!")
- if topic:
- if len(topic.title) > len(topic_dict["title"]):
- Topic.update(topic, {"title": topic_dict["title"]})
- if len(topic.body) < len(topic_dict["body"]):
- Topic.update(topic, {"body": topic_dict["body"]})
- session.commit()
- # print(topic.__dict__)
- rt = topic.__dict__.copy()
- del rt["_sa_instance_state"]
- return rt
diff --git a/migration/tables/users.py b/migration/tables/users.py
deleted file mode 100644
index 9e85a3df..00000000
--- a/migration/tables/users.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import re
-
-from bs4 import BeautifulSoup
-from dateutil.parser import parse
-from sqlalchemy.exc import IntegrityError
-
-from services.db import local_session
-from orm.user import AuthorFollower, User, UserRating
-
-
-def migrate(entry):
- if "subscribedTo" in entry:
- del entry["subscribedTo"]
- email = entry["emails"][0]["address"]
- user_dict = {
- "oid": entry["_id"],
- "roles": [],
- "ratings": [],
- "username": email,
- "email": email,
- "createdAt": parse(entry["createdAt"]),
- "emailConfirmed": ("@discours.io" in email)
- or bool(entry["emails"][0]["verified"]),
- "muted": False, # amnesty
- "bio": entry["profile"].get("bio", ""),
- "links": [],
- "name": "anonymous",
- "password": entry["services"]["password"].get("bcrypt"),
- }
-
- if "updatedAt" in entry:
- user_dict["updatedAt"] = parse(entry["updatedAt"])
- if "wasOnineAt" in entry:
- user_dict["lastSeen"] = parse(entry["wasOnlineAt"])
- if entry.get("profile"):
- # slug
- slug = entry["profile"].get("path").lower()
- slug = re.sub("[^0-9a-zA-Z]+", "-", slug).strip()
- user_dict["slug"] = slug
- bio = (
- (entry.get("profile", {"bio": ""}).get("bio") or "")
- .replace("\(", "(")
- .replace("\)", ")")
- )
- bio_text = BeautifulSoup(bio, features="lxml").text
-
- if len(bio_text) > 120:
- user_dict["about"] = bio_text
- else:
- user_dict["bio"] = bio_text
-
- # userpic
- try:
- user_dict["userpic"] = (
- "https://assets.discours.io/unsafe/100x/"
- + entry["profile"]["thumborId"]
- )
- except KeyError:
- try:
- user_dict["userpic"] = entry["profile"]["image"]["url"]
- except KeyError:
- user_dict["userpic"] = ""
-
- # name
- fn = entry["profile"].get("firstName", "")
- ln = entry["profile"].get("lastName", "")
- name = fn if fn else ""
- name = (name + " " + ln) if ln else name
- if not name:
- name = slug if slug else "anonymous"
- name = (
- entry["profile"]["path"].lower().strip().replace(" ", "-")
- if len(name) < 2
- else name
- )
- user_dict["name"] = name
-
- # links
- fb = entry["profile"].get("facebook", False)
- if fb:
- user_dict["links"].append(fb)
- vk = entry["profile"].get("vkontakte", False)
- if vk:
- user_dict["links"].append(vk)
- tr = entry["profile"].get("twitter", False)
- if tr:
- user_dict["links"].append(tr)
- ws = entry["profile"].get("website", False)
- if ws:
- user_dict["links"].append(ws)
-
- # some checks
- if not user_dict["slug"] and len(user_dict["links"]) > 0:
- user_dict["slug"] = user_dict["links"][0].split("/")[-1]
-
- user_dict["slug"] = user_dict.get("slug", user_dict["email"].split("@")[0])
- oid = user_dict["oid"]
- user_dict["slug"] = user_dict["slug"].lower().strip().replace(" ", "-")
- try:
- user = User.create(**user_dict.copy())
- except IntegrityError:
- print("[migration] cannot create user " + user_dict["slug"])
- with local_session() as session:
- old_user = (
- session.query(User).filter(User.slug == user_dict["slug"]).first()
- )
- old_user.oid = oid
- old_user.password = user_dict["password"]
- session.commit()
- user = old_user
- if not user:
- print("[migration] ERROR: cannot find user " + user_dict["slug"])
- raise Exception
- user_dict["id"] = user.id
- return user_dict
-
-
-def post_migrate():
- old_discours_dict = {
- "slug": "old-discours",
- "username": "old-discours",
- "email": "old@discours.io",
- "name": "Просмотры на старой версии сайта",
- }
-
- with local_session() as session:
- old_discours_user = User.create(**old_discours_dict)
- session.add(old_discours_user)
- session.commit()
-
-
-def migrate_2stage(entry, id_map):
- ce = 0
- for rating_entry in entry.get("ratings", []):
- rater_oid = rating_entry["createdBy"]
- rater_slug = id_map.get(rater_oid)
- if not rater_slug:
- ce += 1
- # print(rating_entry)
- continue
- oid = entry["_id"]
- author_slug = id_map.get(oid)
-
- with local_session() as session:
- try:
- rater = session.query(User).where(User.slug == rater_slug).one()
- user = session.query(User).where(User.slug == author_slug).one()
-
- user_rating_dict = {
- "value": rating_entry["value"],
- "rater": rater.id,
- "user": user.id,
- }
-
- user_rating = UserRating.create(**user_rating_dict)
- if user_rating_dict["value"] > 0:
- af = AuthorFollower.create(
- author=user.id, follower=rater.id, auto=True
- )
- session.add(af)
- session.add(user_rating)
- session.commit()
- except IntegrityError:
- print("[migration] cannot rate " + author_slug + "`s by " + rater_slug)
- except Exception as e:
- print(e)
- return ce
diff --git a/migration/utils.py b/migration/utils.py
deleted file mode 100644
index 39b47583..00000000
--- a/migration/utils.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from datetime import datetime
-from json import JSONEncoder
-
-
-class DateTimeEncoder(JSONEncoder):
- def default(self, z):
- if isinstance(z, datetime):
- return str(z)
- else:
- return super().default(z)
diff --git a/requirements.txt b/requirements.txt
index a653cb31..c891a256 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ pyjwt>=2.6.0
git+https://github.com/encode/starlette.git#main
sqlalchemy>=1.4.41
graphql-core>=3.0.3
-gql~=3.4.0
+gql[httpx]
uvicorn>=0.18.3
pydantic>=1.10.2
passlib~=1.7.4
@@ -14,14 +14,11 @@ itsdangerous
authlib>=1.1.0
httpx>=0.23.0
psycopg2-binary
-transliterate~=1.10.2
bcrypt>=4.0.0
websockets
-bson~=0.5.10
flake8
DateTime~=4.7
python-dateutil~=2.8.2
-beautifulsoup4~=4.11.1
lxml
sentry-sdk>=1.14.0
boto3~=1.28.2
diff --git a/services/viewed.py b/services/viewed.py
index 21d688b7..a7b17a3b 100644
--- a/services/viewed.py
+++ b/services/viewed.py
@@ -2,18 +2,16 @@ import asyncio
import time
from datetime import timedelta, timezone, datetime
from os import environ, path
-from ssl import create_default_context
from gql import Client, gql
-from gql.transport.aiohttp import AIOHTTPTransport
+from gql.transport.httpx import HTTPXAsyncTransport
from services.db import local_session
from orm import Topic
from orm.shout import ShoutTopic, Shout
load_facts = gql(
- """
-query getDomains {
+ """ query getDomains {
domains {
id
title
@@ -23,14 +21,11 @@ query getDomains {
viewsMonth
viewsYear
}
- }
-}
-"""
+ } } """
)
load_pages = gql(
- """
-query getDomains {
+ """ query getDomains {
domains {
title
statistics {
@@ -41,10 +36,9 @@ query getDomains {
value
}
}
- }
-}
-"""
+ } } """
)
+
schema_str = open(path.dirname(__file__) + "/ackee.graphql").read()
token = environ.get("ACKEE_TOKEN", "")
@@ -52,9 +46,8 @@ token = environ.get("ACKEE_TOKEN", "")
def create_client(headers=None, schema=None):
return Client(
schema=schema,
- transport=AIOHTTPTransport(
+ transport=HTTPXAsyncTransport(
url="https://ackee.discours.io/api",
- ssl=create_default_context(),
headers=headers,
),
)