s > 9 correctly.
- if li['name'] == "ul": self.o(self.ul_item_mark + " ")
- elif li['name'] == "ol":
- li['num'] += 1
- self.o(str(li['num'])+". ")
- self.start = 1
-
- if tag in ["table", "tr"] and start: self.p()
- if tag == 'td': self.pbr()
-
- if tag == "pre":
- if start:
- self.startpre = 1
- self.pre = 1
- else:
- self.pre = 0
- self.p()
-
- def pbr(self):
- if self.p_p == 0:
- self.p_p = 1
-
- def p(self):
- self.p_p = 2
-
- def soft_br(self):
- self.pbr()
- self.br_toggle = ' '
-
- def o(self, data, puredata=0, force=0):
- if self.abbr_data is not None:
- self.abbr_data += data
-
- if not self.quiet:
- if self.google_doc:
- # prevent white space immediately after 'begin emphasis' marks ('**' and '_')
- lstripped_data = data.lstrip()
- if self.drop_white_space and not (self.pre or self.code):
- data = lstripped_data
- if lstripped_data != '':
- self.drop_white_space = 0
-
- if puredata and not self.pre:
- data = re.sub('\s+', ' ', data)
- if data and data[0] == ' ':
- self.space = 1
- data = data[1:]
- if not data and not force: return
-
- if self.startpre:
- #self.out(" :") #TODO: not output when already one there
- if not data.startswith("\n"): # stuff...
- data = "\n" + data
-
- bq = (">" * self.blockquote)
- if not (force and data and data[0] == ">") and self.blockquote: bq += " "
-
- if self.pre:
- if not self.list:
- bq += " "
- #else: list content is already partially indented
- for i in xrange(len(self.list)):
- bq += " "
- data = data.replace("\n", "\n"+bq)
-
- if self.startpre:
- self.startpre = 0
- if self.list:
- data = data.lstrip("\n") # use existing initial indentation
-
- if self.start:
- self.space = 0
- self.p_p = 0
- self.start = 0
-
- if force == 'end':
- # It's the end.
- self.p_p = 0
- self.out("\n")
- self.space = 0
-
- if self.p_p:
- self.out((self.br_toggle+'\n'+bq)*self.p_p)
- self.space = 0
- self.br_toggle = ''
-
- if self.space:
- if not self.lastWasNL: self.out(' ')
- self.space = 0
-
- if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
- if force == "end": self.out("\n")
-
- newa = []
- for link in self.a:
- if self.outcount > link['outcount']:
- self.out(" ["+ str(link['count']) +"]: " + urlparse.urljoin(self.baseurl, link['href']))
- if has_key(link, 'title'): self.out(" ("+link['title']+")")
- self.out("\n")
- else:
- newa.append(link)
-
- if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
-
- self.a = newa
-
- if self.abbr_list and force == "end":
- for abbr, definition in self.abbr_list.items():
- self.out(" *[" + abbr + "]: " + definition + "\n")
-
- self.p_p = 0
- self.out(data)
- self.outcount += 1
-
- def handle_data(self, data):
- if r'\/script>' in data: self.quiet -= 1
-
- if self.style:
- self.style_def.update(dumb_css_parser(data))
-
- if not self.maybe_automatic_link is None:
- href = self.maybe_automatic_link
- if href == data and self.absolute_url_matcher.match(href):
- self.o("<" + data + ">")
- return
- else:
- self.o("[")
- self.maybe_automatic_link = None
-
- if not self.code and not self.pre:
- data = escape_md_section(data, snob=self.escape_snob)
- self.o(data, 1)
-
- def unknown_decl(self, data): pass
-
- def charref(self, name):
- if name[0] in ['x','X']:
- c = int(name[1:], 16)
- else:
- c = int(name)
-
- if not self.unicode_snob and c in unifiable_n.keys():
- return unifiable_n[c]
- else:
- try:
- return unichr(c)
- except NameError: #Python3
- return chr(c)
-
- def entityref(self, c):
- if not self.unicode_snob and c in unifiable.keys():
- return unifiable[c]
- else:
- try: name2cp(c)
- except KeyError: return "&" + c + ';'
- else:
- try:
- return unichr(name2cp(c))
- except NameError: #Python3
- return chr(name2cp(c))
-
- def replaceEntities(self, s):
- s = s.group(1)
- if s[0] == "#":
- return self.charref(s[1:])
- else: return self.entityref(s)
-
- r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
- def unescape(self, s):
- return self.r_unescape.sub(self.replaceEntities, s)
-
- def google_nest_count(self, style):
- """calculate the nesting count of google doc lists"""
- nest_count = 0
- if 'margin-left' in style:
- nest_count = int(style['margin-left'][:-2]) / self.google_list_indent
- return nest_count
-
-
- def optwrap(self, text):
- """Wrap all paragraphs in the provided text."""
- if not self.body_width:
- return text
-
- assert wrap, "Requires Python 2.3."
- result = ''
- newlines = 0
- for para in text.split("\n"):
- if len(para) > 0:
- if not skipwrap(para):
- result += "\n".join(wrap(para, self.body_width))
- if para.endswith(' '):
- result += " \n"
- newlines = 1
- else:
- result += "\n\n"
- newlines = 2
- else:
- if not onlywhite(para):
- result += para + "\n"
- newlines = 1
- else:
- if newlines < 2:
- result += "\n"
- newlines += 1
- return result
-
-ordered_list_matcher = re.compile(r'\d+\.\s')
-unordered_list_matcher = re.compile(r'[-\*\+]\s')
-md_chars_matcher = re.compile(r"([\\\[\]\(\)])")
-md_chars_matcher_all = re.compile(r"([`\*_{}\[\]\(\)#!])")
-md_dot_matcher = re.compile(r"""
- ^ # start of line
- (\s*\d+) # optional whitespace and a number
- (\.) # dot
- (?=\s) # lookahead assert whitespace
- """, re.MULTILINE | re.VERBOSE)
-md_plus_matcher = re.compile(r"""
- ^
- (\s*)
- (\+)
- (?=\s)
- """, flags=re.MULTILINE | re.VERBOSE)
-md_dash_matcher = re.compile(r"""
- ^
- (\s*)
- (-)
- (?=\s|\-) # followed by whitespace (bullet list, or spaced out hr)
- # or another dash (header or hr)
- """, flags=re.MULTILINE | re.VERBOSE)
-slash_chars = r'\`*_{}[]()#+-.!'
-md_backslash_matcher = re.compile(r'''
- (\\) # match one slash
- (?=[%s]) # followed by a char that requires escaping
- ''' % re.escape(slash_chars),
- flags=re.VERBOSE)
-
-def skipwrap(para):
- # If the text begins with four spaces or one tab, it's a code block; don't wrap
- if para[0:4] == ' ' or para[0] == '\t':
- return True
- # If the text begins with only two "--", possibly preceded by whitespace, that's
- # an emdash; so wrap.
- stripped = para.lstrip()
- if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
- return False
- # I'm not sure what this is for; I thought it was to detect lists, but there's
- # a
-inside- case in one of the tests that also depends upon it.
- if stripped[0:1] == '-' or stripped[0:1] == '*':
- return True
- # If the text begins with a single -, *, or +, followed by a space, or an integer,
- # followed by a ., followed by a space (in either case optionally preceeded by
- # whitespace), it's a list; don't wrap.
- if ordered_list_matcher.match(stripped) or unordered_list_matcher.match(stripped):
- return True
- return False
-
-def wrapwrite(text):
- text = text.encode('utf-8')
- try: #Python3
- sys.stdout.buffer.write(text)
- except AttributeError:
- sys.stdout.write(text)
-
-def html2text(html, baseurl=''):
- h = HTML2Text(baseurl=baseurl)
- return h.handle(html)
-
-def unescape(s, unicode_snob=False):
- h = HTML2Text()
- h.unicode_snob = unicode_snob
- return h.unescape(s)
-
-def escape_md(text):
- """Escapes markdown-sensitive characters within other markdown constructs."""
- return md_chars_matcher.sub(r"\\\1", text)
-
-def escape_md_section(text, snob=False):
- """Escapes markdown-sensitive characters across whole document sections."""
- text = md_backslash_matcher.sub(r"\\\1", text)
- if snob:
- text = md_chars_matcher_all.sub(r"\\\1", text)
- text = md_dot_matcher.sub(r"\1\\\2", text)
- text = md_plus_matcher.sub(r"\1\\\2", text)
- text = md_dash_matcher.sub(r"\1\\\2", text)
- return text
-
-
-def main():
- baseurl = ''
-
- p = optparse.OptionParser('%prog [(filename|url) [encoding]]', version='%prog ' + __version__)
- p.add_option("--ignore-emphasis", dest="ignore_emphasis", action="store_true",
- default=IGNORE_EMPHASIS, help="don't include any formatting for emphasis")
- p.add_option("--ignore-links", dest="ignore_links", action="store_true",
- default=IGNORE_ANCHORS, help="don't include any formatting for links")
- p.add_option("--ignore-images", dest="ignore_images", action="store_true",
- default=IGNORE_IMAGES, help="don't include any formatting for images")
- p.add_option("-g", "--google-doc", action="store_true", dest="google_doc",
- default=False, help="convert an html-exported Google Document")
- p.add_option("-d", "--dash-unordered-list", action="store_true", dest="ul_style_dash",
- default=False, help="use a dash rather than a star for unordered list items")
- p.add_option("-e", "--asterisk-emphasis", action="store_true", dest="em_style_asterisk",
- default=False, help="use an asterisk rather than an underscore for emphasized text")
- p.add_option("-b", "--body-width", dest="body_width", action="store", type="int",
- default=BODY_WIDTH, help="number of characters per output line, 0 for no wrap")
- p.add_option("-i", "--google-list-indent", dest="list_indent", action="store", type="int",
- default=GOOGLE_LIST_INDENT, help="number of pixels Google indents nested lists")
- p.add_option("-s", "--hide-strikethrough", action="store_true", dest="hide_strikethrough",
- default=False, help="hide strike-through text. only relevant when -g is specified as well")
- p.add_option("--escape-all", action="store_true", dest="escape_snob",
- default=False, help="Escape all special characters. Output is less readable, but avoids corner case formatting issues.")
- (options, args) = p.parse_args()
-
- # process input
- encoding = "utf-8"
- if len(args) > 0:
- file_ = args[0]
- if len(args) == 2:
- encoding = args[1]
- if len(args) > 2:
- p.error('Too many arguments')
-
- if file_.startswith('http://') or file_.startswith('https://'):
- baseurl = file_
- j = urllib.urlopen(baseurl)
- data = j.read()
- if encoding is None:
- try:
- from feedparser import _getCharacterEncoding as enc
- except ImportError:
- enc = lambda x, y: ('utf-8', 1)
- encoding = enc(j.headers, data)[0]
- if encoding == 'en-ascii':
- encoding = 'utf-8'
- else:
- data = open(file_, 'rb').read()
- if encoding is None:
- try:
- from chardet import detect
- except ImportError:
- detect = lambda x: {'encoding': 'utf-8'}
- encoding = detect(data)['encoding']
- else:
- data = sys.stdin.read()
-
- data = data.decode(encoding)
- h = HTML2Text(baseurl=baseurl)
- # handle options
- if options.ul_style_dash: h.ul_item_mark = '-'
- if options.em_style_asterisk:
- h.emphasis_mark = '*'
- h.strong_mark = '__'
-
- h.body_width = options.body_width
- h.list_indent = options.list_indent
- h.ignore_emphasis = options.ignore_emphasis
- h.ignore_links = options.ignore_links
- h.ignore_images = options.ignore_images
- h.google_doc = options.google_doc
- h.hide_strikethrough = options.hide_strikethrough
- h.escape_snob = options.escape_snob
-
- wrapwrite(h.handle(data))
-
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/migration/bson2json.py b/migration/bson2json.py
index bbe909f7..9559665c 100644
--- a/migration/bson2json.py
+++ b/migration/bson2json.py
@@ -24,5 +24,5 @@ def json_tables():
base, d = bson.decode_document(bs, base)
lc.append(d)
data[table] = lc
- open('migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
+ open('dump/discours/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
diff --git a/migration/html2md.py b/migration/html2md.py
deleted file mode 100644
index d2f23e1c..00000000
--- a/migration/html2md.py
+++ /dev/null
@@ -1,215 +0,0 @@
-from html.parser import HTMLParser
-import os
-import codecs
-from typing import Tuple
-
-
-class Converter(HTMLParser):
- md_file: str
- temp_tag: str
- code_box: bool
- div_count: int
- code_box_div_num: int
- ol_count: int
- related_data: list
- is_link: bool
- link_ref: str
- ignore_data: bool
- class_div_count: int
- ignore_div: bool
- table_start: Tuple[int, int]
-
- def __init__(self):
- super().__init__()
- self.md_file = ''
- self.code_box = False
- self.div_count = 0
- self.span_count = 0
- self.code_box_div_num = 0
- self.ol_count = 0
- self.temp_tag = ''
- self.related_data = []
- self.is_link = False
- self.link_ref = ''
- self.ignore_data = False
- self.class_div_count = 0
- self.ignore_div = False
-
- def handle_starttag(self, tag, attrs):
- if self.ignore_data:
- return None
- elif tag == 'sup':
- self.md_file += ''
- elif tag == 'p':
- self.temp_tag = 'p'
- self.md_file += '\n'
- elif tag == 'i':
- self.temp_tag = 'i'
- self.md_file += '*'
- elif tag == 'wbr':
- self.temp_tag = 'wbr'
- self.md_file += ''
- elif tag == 'span':
- self.temp_tag = 'span'
- self.span_count += 1
- self.md_file += ' '
- elif tag == 'figcaption':
- self.md_file += ''
- elif tag == 'hr':
- self.md_file += '\n*** \n'
- elif tag == 'title':
- self.md_file += '# '
- elif tag == 'h1':
- self.md_file += '# '
- elif tag == 'h2':
- self.md_file += '## '
- elif tag == 'h3':
- self.md_file += '### '
- elif tag == 'b' or tag == 'strong':
- self.md_file += '**'
- elif tag == 'ul':
- self.temp_tag = 'ul'
- self.md_file += ' \n'
- elif tag == 'ol':
- self.ol_count = 0
- self.temp_tag = 'ol'
- self.md_file += ' \n'
- elif tag == 'li':
- if self.temp_tag == 'ul':
- self.md_file += '* '
- elif self.temp_tag == 'ol':
- self.ol_count += 1
- self.md_file += f'{self.ol_count}. '
- elif tag == 'div':
- self.div_count += 1
- attrs_dict = dict(attrs)
- if 'style' in attrs_dict and 'codeblock' in attrs_dict['style']:
- self.code_box_div_num = self.div_count
- self.code_box = True
- self.md_file += '```\n'
- elif 'class' in attrs_dict:
- self.class_div_count = self.div_count
- self.ignore_div = True
- elif tag == 'pre' or tag == 'code':
- self.code_box = True
- self.md_file += '\n```\n'
- elif tag == 'a':
- self.is_link = True
- attrs_dict = dict(attrs)
- self.link_ref = attrs_dict.get('href', '#')
- if not self.link_ref.startswith('http') and not self.link_ref.endswith('html') and not '@' in self.link_ref:
- self.related_data.append(self.link_ref)
- elif tag == 'style':
- self.ignore_data = True
- elif tag == 'symbol':
- self.ignore_data = True
- elif tag == 'svg':
- self.ignore_data = True
- elif tag == 'path':
- self.ignore_data = True
- elif tag == 'img':
- attrs_dict = dict(attrs)
- img_ref = attrs_dict['src']
- alt_name = attrs_dict['alt'] if 'alt' in attrs_dict else 'x'
- if self.is_link:
- self.related_data.append(img_ref)
- self.md_file += f'[]({self.link_ref})'
- else:
- self.related_data.append(img_ref)
- self.md_file += f''
- elif tag == 'table':
- self.ignore_data = True
- self.table_start = self.getpos()
- else:
- print('<' + tag + '>')
-
- def get_rawdata(self, start, stop, offset):
- temp_rawdata = self.rawdata
- for i in range(offset-1):
- next_section = temp_rawdata.find('\n')
- temp_rawdata = temp_rawdata[next_section+1:]
- return temp_rawdata[start:stop]
-
- def handle_endtag(self, tag):
- if tag == 'b' or tag == 'strong':
- self.md_file += '** '
- elif tag == 'sup':
- self.md_file += ''
- elif tag == 'iframe':
- self.ignore_data = False
- elif tag == 'wbr':
- self.md_file += ''
- elif tag == 'title':
- self.md_file += '\n'
- elif tag == 'h1':
- self.md_file += '\n'
- elif tag == 'h2':
- self.md_file += '\n'
- elif tag == 'h3':
- self.md_file += '\n'
- elif tag == 'h4':
- self.md_file += '\n'
- elif tag == 'span':
- self.span_count -= 1
- self.md_file += ' '
- elif tag == 'figcaption':
- self.md_file += '\n'
- elif tag == 'i':
- self.md_file += '* '
- elif tag == 'p':
- self.md_file += '\n'
- elif tag == 'div':
- if self.code_box and self.code_box_div_num == self.div_count:
- self.code_box = False
- self.md_file += '```\n'
- elif self.ignore_div and self.class_div_count == self.div_count:
- self.ignore_div = False
- else:
- self.md_file += ' \n'
- self.div_count -= 1
- elif tag == 'pre' or tag == 'code':
- self.code_box = False
- self.md_file += '```\n'
- elif tag == 'a':
- self.is_link = False
- elif tag == 'style':
- self.ignore_data = False
- elif tag == 'symbol':
- self.ignore_data = False
- elif tag == 'svg':
- self.ignore_data = False
- elif tag == 'li':
- self.md_file += ' \n'
- elif tag == 'table':
- offset, lineno_stop = self.getpos()
- lineno_stop = lineno_stop + len(tag) + 3
- _, lineno_start = self.table_start
- raw_data = self.get_rawdata(lineno_start, lineno_stop, offset)
- self.md_file += '\n' + raw_data
- self.ignore_data = False
- else:
- print('' + tag + '>')
-
- def handle_startendtag(self, tag, attrs):
- if tag == 'br':
- self.md_file += ' \n'
- elif tag == 'wbr':
- self.md_file += ''
- elif tag == 'hr':
- self.md_file += '\n*** \n'
- elif tag == 'img':
- attr_dict = dict(attrs)
- name = attr_dict.get('data-filename', 'image')
- img_ref = attr_dict['src']
- self.related_data.append(img_ref)
- self.md_file += f''
- else:
- print("<" + tag + " />")
-
- def handle_data(self, data):
- if self.is_link:
- self.md_file += f'[{data}]({self.link_ref})'
- elif self.ignore_data:
- pass
- else:
- self.md_file += data
diff --git a/migration/html2text/config.py b/migration/html2text/config.py
index 9c10445a..9962b125 100644
--- a/migration/html2text/config.py
+++ b/migration/html2text/config.py
@@ -1,7 +1,7 @@
import re
# Use Unicode characters instead of their ascii pseudo-replacements
-UNICODE_SNOB = False
+UNICODE_SNOB = True
# Marker to use for marking tables for padding post processing
TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
@@ -13,7 +13,7 @@ ESCAPE_SNOB = False
LINKS_EACH_PARAGRAPH = False
# Wrap long lines at position. 0 for no wrapping.
-BODY_WIDTH = 78
+BODY_WIDTH = 0
# Don't show internal links (href="#local-anchor") -- corresponding link
# targets won't be visible in the plain text file anyway.
@@ -24,7 +24,7 @@ INLINE_LINKS = True
# Protect links from line breaks surrounding them with angle brackets (in
# addition to their square brackets)
-PROTECT_LINKS = False
+PROTECT_LINKS = True
WRAP_LINKS = True
# Wrap list items.
@@ -156,7 +156,7 @@ IGNORE_TABLES = False
# Use a single line break after a block element rather than two line breaks.
# NOTE: Requires body width setting to be 0.
-SINGLE_LINE_BREAK = False
+SINGLE_LINE_BREAK = True
# Use double quotation marks when converting the tag.
diff --git a/migration/tables/replacements.json b/migration/tables/replacements.json
index 234715d3..544ef95e 100644
--- a/migration/tables/replacements.json
+++ b/migration/tables/replacements.json
@@ -82,7 +82,7 @@
"blizhniy-vostok": "middle-east",
"blizost": "closeness",
"blokada": "blockade",
- "bob-dilan": "bob-dilan",
+ "bob-dilan": "bob-dylan",
"bog": "god",
"bol": "pain",
"bolotnoe-delo": "bolotnaya-case",
@@ -205,7 +205,7 @@
"erich-von-neff": "erich-von-neff",
"erotika": "erotics",
"essay": "essay",
- "estetika": "aestetic",
+ "estetika": "aestetics",
"etika": "ethics",
"etnos": "ethnics",
"everyday-life": "everyday-life",
@@ -219,7 +219,7 @@
"faktcheking": "fact-checking",
"falsifikatsii": "falsifications",
"family": "family",
- "fanfiki": "fanfiction",
+ "fanfiki": "fan-fiction",
"fantastika": "sci-fi",
"fatalizm": "fatalism",
"fedor-dostoevskiy": "fedor-dostoevsky",
@@ -234,7 +234,7 @@
"folklor": "folklore",
"fotoreportazh": "photoreports",
"france": "france",
- "frants-kafka": "Franz-Kafka",
+ "frants-kafka": "franz-kafka",
"frederik-begbeder": "frederick-begbeder",
"freedom": "freedom",
"friendship": "friendship",
@@ -262,7 +262,7 @@
"graffiti": "graffiti",
"graphics": "graphics",
"gravyura": "engraving",
- "grazhdanskaya-oborona": "grob",
+ "grazhdanskaya-oborona": "grazhdanskaya-oborona",
"gretsiya": "greece",
"gulag": "gulag",
"han-batyy": "khan-batyy",
@@ -332,7 +332,7 @@
"kinoklub": "cinema-club",
"kirill-serebrennikov": "kirill-serebrennikov",
"klassika": "classic",
- "kollektivnoe-bessoznatelnoe": "kollektivnoe-bessoznatelnoe",
+ "kollektivnoe-bessoznatelnoe": "Ńollective-unconscious",
"komediya": "comedy",
"kommunikatsii": "communications",
"kommunizm": "communism",
@@ -429,18 +429,18 @@
"muzey": "museum",
"muzhchiny": "man",
"myshlenie": "thinking",
- "nagornyy-karabah": "nagornyy-karabah",
+ "nagornyy-karabah": "nagorno-karabakh",
"natsionalizm": "nationalism",
"natsionalnaya-ideya": "national-idea",
"natsizm": "nazism",
- "natyurmort": "natyurmort",
+ "natyurmort": "nature-morte",
"nauchpop": "pop-science",
"nbp": "nbp",
"nenavist": "hate",
"neofitsialnaya-literatura": "unofficial-literature",
"neoklassika": "neoclassic",
"neprozrachnye-smysly": "hidden-meanings",
- "neravenstvo": "non-equality",
+ "neravenstvo": "inequality",
"new-year": "new-year",
"neyronauka": "neuro-science",
"neyroseti": "neural-networks",
@@ -458,7 +458,7 @@
"ocherk": "etudes",
"ochevidnyy-nuar": "ochevidnyy-nuar",
"odinochestvo": "loneliness",
- "odna-kniga-odna-istoriya": "odna-kniga-odna-istoriya",
+ "odna-kniga-odna-istoriya": "one-book-one-story",
"okrainy": "outskirts",
"opinions": "opinions",
"oppozitsiya": "opposition",
@@ -467,7 +467,7 @@
"osip-mandelshtam": "osip-mandelshtam",
"oskar-uayld": "oscar-wilde",
"osoznanie": "awareness",
- "otnosheniya": "relationships",
+ "otnosheniya": "relationship",
"pablo-pikasso": "pablo-picasso",
"painting": "painting",
"paintings": "painting",
@@ -613,7 +613,7 @@
"sotsializm": "socialism",
"sotsialnaya-filosofiya": "social-philosophy",
"sotsseti": "social-networks",
- "sotvorenie-tretego-rima": "sotvorenie-tretego-rima",
+ "sotvorenie-tretego-rima": "third-rome",
"sovremennost": "modernity",
"spaces": "spaces",
"spektakl": "spectacles",
@@ -638,7 +638,7 @@
"syurrealizm": "surrealism",
"tales": "tales",
"tanets": "dance",
- "tataro-mongolskoe-igo": "tataro-mongolskoe-igo",
+ "tataro-mongolskoe-igo": "mongol-tatar-yoke",
"tatuirovki": "tattoo",
"technology": "technology",
"televidenie": "tv",
@@ -663,8 +663,8 @@
"trendy": "trends",
"tretiy-reyh": "third-reich",
"triller": "thriller",
- "tsar": "tsar",
- "tsar-edip": "tsar-edip",
+ "tsar": "central-african-republic",
+ "tsar-edip": "oedipus",
"tsarevich-dmitriy": "tsarevich-dmitry",
"tsennosti": "values",
"tsenzura": "censorship",
@@ -702,11 +702,11 @@
"videopoeziya": "video-poetry",
"viktor-astafev": "viktor-astafev",
"viktor-pelevin": "viktor-pelevin",
- "vilgelm-rayh": "vilgelm-rayh",
+ "vilgelm-rayh": "wilhelm-reich",
"vinzavod": "vinzavod",
"violence": "violence",
"visual-culture": "visual-culture",
- "vizualnaya-poeziya": "vizual-poetry",
+ "vizualnaya-poeziya": "visual-poetry",
"vladimir-lenin": "vladimir-lenin",
"vladimir-nabokov": "vladimir-nabokov",
"vladimir-putin": "vladimir-putin",
@@ -716,10 +716,10 @@
"volontery": "volonteurs",
"vong-karvay": "wong-karwai",
"vospominaniya": "memories",
- "vostok": "vostok",
+ "vostok": "east",
"vremya": "time",
"vudi-allen": "woody-allen",
- "vynuzhdennye-otnosheniya": "forced-relationships",
+ "vynuzhdennye-otnosheniya": "forced-relationship",
"war": "war",
"war-in-ukraine-images": "war-in-ukrahine-images",
"women": "women",
diff --git a/migration/tables/users.py b/migration/tables/users.py
index f40f942e..0d51c291 100644
--- a/migration/tables/users.py
+++ b/migration/tables/users.py
@@ -88,9 +88,6 @@ def migrate(entry):
old = res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
- if res['slug'] == 'vorovich':
- print(entry)
- print(res)
return res
def migrate_email_subscription(entry):