diff --git a/.gitignore b/.gitignore index 5ef42bcf..41b0b7ec 100644 --- a/.gitignore +++ b/.gitignore @@ -145,4 +145,5 @@ migration/content/**/*.md .DS_Store dump .vscode -*.sql \ No newline at end of file +*.sql +*.csv \ No newline at end of file diff --git a/migration/__init__.py b/migration/__init__.py index edc2f6b4..74f26c8e 100644 --- a/migration/__init__.py +++ b/migration/__init__.py @@ -1,10 +1,12 @@ ''' cmd managed migration ''' +import csv from datetime import datetime import json import subprocess import sys import os - +import bs4 +import numpy as np # from export import export_email_subscriptions from .export import export_mdx, export_slug from orm.reaction import Reaction @@ -77,6 +79,8 @@ def shouts_handle(storage, args): counter = 0 discours_author = 0 pub_counter = 0 + topics_dataset_bodies = [] + topics_dataset_tlist = [] for entry in storage['shouts']['data']: # slug slug = get_shout_slug(entry) @@ -104,6 +108,14 @@ def shouts_handle(storage, args): counter += 1 line = str(counter+1) + ': ' + shout['slug'] + " @" + author print(line) + b = bs4.BeautifulSoup(shout['body'], 'html.parser') + texts = [] + texts.append(shout['title'].lower().replace(r'[^а-яА-Яa-zA-Z]', '')) + texts = b.findAll(text=True) + topics_dataset_bodies.append(u" ".join([x.strip().lower() for x in texts])) + topics_dataset_tlist.append(shout['topics']) + + np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',', fmt='%s') print('[migration] ' + str(counter) + ' content items were migrated') print('[migration] ' + str(pub_counter) + ' have been published') diff --git a/migration/tables/comments.py b/migration/tables/comments.py index 0f426476..72dca84f 100644 --- a/migration/tables/comments.py +++ b/migration/tables/comments.py @@ -91,7 +91,7 @@ def migrate(entry, storage): # creating reaction from old rating rr = Reaction.create(**re_reaction_dict) day = (re_reaction_dict.get('createdAt') or ts).replace(hour=0, minute=0, second=0, microsecond=0) - ReactedByDay.create(shout=rr.shout, reaction=rr.id, kind=rr.kind, day=day) + ReactedByDay.create(shout=rr.shout, reaction=rr.id, kind=rr.kind, day=day, replyTo=reaction.id) except Exception as e: print('[migration] comment rating error: %r' % re_reaction_dict) diff --git a/requirements.txt b/requirements.txt index 1ce909d6..db3565b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ frontmatter +numpy aioredis ariadne pyjwt>=2.0.0 diff --git a/services/stat/reacted.py b/services/stat/reacted.py index 40a71cca..21b2dbcc 100644 --- a/services/stat/reacted.py +++ b/services/stat/reacted.py @@ -13,7 +13,7 @@ class ReactedByDay(Base): id = None reaction = Column(ForeignKey("reaction.id"), primary_key = True) shout = Column(ForeignKey('shout.slug'), primary_key=True) - reply = Column(ForeignKey('reaction.id'), nullable=True) + replyTo = Column(ForeignKey('reaction.id'), nullable=True) kind: int = Column(Enum(ReactionKind), nullable=False, comment="Reaction kind") day = Column(DateTime, primary_key=True, default=datetime.now)