format and lint orm
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
''' cmd managed migration '''
|
||||
""" cmd managed migration """
|
||||
import csv
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
@@ -8,6 +8,7 @@ import sys
|
||||
import os
|
||||
import bs4
|
||||
import numpy as np
|
||||
|
||||
# from export import export_email_subscriptions
|
||||
from .export import export_mdx, export_slug
|
||||
from orm.reaction import Reaction
|
||||
@@ -21,293 +22,308 @@ from .tables.comments import migrate_2stage as migrateComment_2stage
|
||||
from settings import DB_URL
|
||||
|
||||
|
||||
TODAY = datetime.strftime(datetime.now(), '%Y%m%d')
|
||||
TODAY = datetime.strftime(datetime.now(), "%Y%m%d")
|
||||
|
||||
OLD_DATE = '2016-03-05 22:22:00.350000'
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
|
||||
|
||||
def users_handle(storage):
|
||||
''' migrating users first '''
|
||||
counter = 0
|
||||
id_map = {}
|
||||
print('[migration] migrating %d users' % (len(storage['users']['data'])))
|
||||
for entry in storage['users']['data']:
|
||||
oid = entry['_id']
|
||||
user = migrateUser(entry)
|
||||
storage['users']['by_oid'][oid] = user # full
|
||||
del user['password']
|
||||
del user['notifications']
|
||||
del user['emailConfirmed']
|
||||
del user['username']
|
||||
del user['email']
|
||||
storage['users']['by_slug'][user['slug']] = user # public
|
||||
id_map[user['oid']] = user['slug']
|
||||
counter += 1
|
||||
ce = 0
|
||||
for entry in storage['users']['data']:
|
||||
ce += migrateUser_2stage(entry, id_map)
|
||||
return storage
|
||||
"""migrating users first"""
|
||||
counter = 0
|
||||
id_map = {}
|
||||
print("[migration] migrating %d users" % (len(storage["users"]["data"])))
|
||||
for entry in storage["users"]["data"]:
|
||||
oid = entry["_id"]
|
||||
user = migrateUser(entry)
|
||||
storage["users"]["by_oid"][oid] = user # full
|
||||
del user["password"]
|
||||
del user["notifications"]
|
||||
del user["emailConfirmed"]
|
||||
del user["username"]
|
||||
del user["email"]
|
||||
storage["users"]["by_slug"][user["slug"]] = user # public
|
||||
id_map[user["oid"]] = user["slug"]
|
||||
counter += 1
|
||||
ce = 0
|
||||
for entry in storage["users"]["data"]:
|
||||
ce += migrateUser_2stage(entry, id_map)
|
||||
return storage
|
||||
|
||||
|
||||
def topics_handle(storage):
|
||||
''' topics from categories and tags '''
|
||||
counter = 0
|
||||
for t in (storage['topics']['tags'] + storage['topics']['cats']):
|
||||
if t['slug'] in storage['replacements']:
|
||||
t['slug'] = storage['replacements'][t['slug']]
|
||||
topic = migrateTopic(t)
|
||||
storage['topics']['by_oid'][t['_id']] = topic
|
||||
storage['topics']['by_slug'][t['slug']] = topic
|
||||
counter += 1
|
||||
else:
|
||||
print('[migration] topic ' + t['slug'] + ' ignored')
|
||||
for oldslug, newslug in storage['replacements'].items():
|
||||
if oldslug != newslug and oldslug in storage['topics']['by_slug']:
|
||||
oid = storage['topics']['by_slug'][oldslug]['_id']
|
||||
del storage['topics']['by_slug'][oldslug]
|
||||
storage['topics']['by_oid'][oid] = storage['topics']['by_slug'][newslug]
|
||||
print('[migration] ' + str(counter) + ' topics migrated')
|
||||
print('[migration] ' + str(len(storage['topics']
|
||||
['by_oid'].values())) + ' topics by oid')
|
||||
print('[migration] ' + str(len(storage['topics']
|
||||
['by_slug'].values())) + ' topics by slug')
|
||||
# raise Exception
|
||||
return storage
|
||||
"""topics from categories and tags"""
|
||||
counter = 0
|
||||
for t in storage["topics"]["tags"] + storage["topics"]["cats"]:
|
||||
if t["slug"] in storage["replacements"]:
|
||||
t["slug"] = storage["replacements"][t["slug"]]
|
||||
topic = migrateTopic(t)
|
||||
storage["topics"]["by_oid"][t["_id"]] = topic
|
||||
storage["topics"]["by_slug"][t["slug"]] = topic
|
||||
counter += 1
|
||||
else:
|
||||
print("[migration] topic " + t["slug"] + " ignored")
|
||||
for oldslug, newslug in storage["replacements"].items():
|
||||
if oldslug != newslug and oldslug in storage["topics"]["by_slug"]:
|
||||
oid = storage["topics"]["by_slug"][oldslug]["_id"]
|
||||
del storage["topics"]["by_slug"][oldslug]
|
||||
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
||||
print("[migration] " + str(counter) + " topics migrated")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_oid"].values()))
|
||||
+ " topics by oid"
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_slug"].values()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
# raise Exception
|
||||
return storage
|
||||
|
||||
|
||||
async def shouts_handle(storage, args):
|
||||
''' migrating content items one by one '''
|
||||
counter = 0
|
||||
discours_author = 0
|
||||
pub_counter = 0
|
||||
topics_dataset_bodies = []
|
||||
topics_dataset_tlist = []
|
||||
for entry in storage['shouts']['data']:
|
||||
# slug
|
||||
slug = get_shout_slug(entry)
|
||||
"""migrating content items one by one"""
|
||||
counter = 0
|
||||
discours_author = 0
|
||||
pub_counter = 0
|
||||
topics_dataset_bodies = []
|
||||
topics_dataset_tlist = []
|
||||
for entry in storage["shouts"]["data"]:
|
||||
# slug
|
||||
slug = get_shout_slug(entry)
|
||||
|
||||
# single slug mode
|
||||
if '-' in args and slug not in args: continue
|
||||
# single slug mode
|
||||
if "-" in args and slug not in args:
|
||||
continue
|
||||
|
||||
# migrate
|
||||
shout = await migrateShout(entry, storage)
|
||||
storage['shouts']['by_oid'][entry['_id']] = shout
|
||||
storage['shouts']['by_slug'][shout['slug']] = shout
|
||||
# shouts.topics
|
||||
if not shout['topics']: print('[migration] no topics!')
|
||||
# migrate
|
||||
shout = await migrateShout(entry, storage)
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout
|
||||
storage["shouts"]["by_slug"][shout["slug"]] = shout
|
||||
# shouts.topics
|
||||
if not shout["topics"]:
|
||||
print("[migration] no topics!")
|
||||
|
||||
# wuth author
|
||||
author = shout['authors'][0].slug
|
||||
if author == 'discours': discours_author += 1
|
||||
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
||||
# wuth author
|
||||
author = shout["authors"][0].slug
|
||||
if author == "discours":
|
||||
discours_author += 1
|
||||
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
||||
|
||||
if entry.get('published'):
|
||||
if 'mdx' in args: export_mdx(shout)
|
||||
pub_counter += 1
|
||||
if entry.get("published"):
|
||||
if "mdx" in args:
|
||||
export_mdx(shout)
|
||||
pub_counter += 1
|
||||
|
||||
# print main counter
|
||||
counter += 1
|
||||
line = str(counter+1) + ': ' + shout['slug'] + " @" + author
|
||||
print(line)
|
||||
b = bs4.BeautifulSoup(shout['body'], 'html.parser')
|
||||
texts = []
|
||||
texts.append(shout['title'].lower().replace(r'[^а-яА-Яa-zA-Z]', ''))
|
||||
texts = b.findAll(text=True)
|
||||
topics_dataset_bodies.append(u" ".join([x.strip().lower() for x in texts]))
|
||||
topics_dataset_tlist.append(shout['topics'])
|
||||
|
||||
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',', fmt='%s')
|
||||
# print main counter
|
||||
counter += 1
|
||||
line = str(counter + 1) + ": " + shout["slug"] + " @" + author
|
||||
print(line)
|
||||
b = bs4.BeautifulSoup(shout["body"], "html.parser")
|
||||
texts = []
|
||||
texts.append(shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", ""))
|
||||
texts = b.findAll(text=True)
|
||||
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
|
||||
topics_dataset_tlist.append(shout["topics"])
|
||||
|
||||
print('[migration] ' + str(counter) + ' content items were migrated')
|
||||
print('[migration] ' + str(pub_counter) + ' have been published')
|
||||
print('[migration] ' + str(discours_author) + ' authored by @discours')
|
||||
return storage
|
||||
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',', fmt='%s')
|
||||
|
||||
print("[migration] " + str(counter) + " content items were migrated")
|
||||
print("[migration] " + str(pub_counter) + " have been published")
|
||||
print("[migration] " + str(discours_author) + " authored by @discours")
|
||||
return storage
|
||||
|
||||
|
||||
async def comments_handle(storage):
|
||||
id_map = {}
|
||||
ignored_counter = 0
|
||||
missed_shouts = {}
|
||||
for oldcomment in storage['reactions']['data']:
|
||||
if not oldcomment.get('deleted'):
|
||||
reaction = await migrateComment(oldcomment, storage)
|
||||
if type(reaction) == str:
|
||||
missed_shouts[reaction] = oldcomment
|
||||
elif type(reaction) == Reaction:
|
||||
reaction = reaction.dict()
|
||||
id = reaction['id']
|
||||
oid = reaction['oid']
|
||||
id_map[oid] = id
|
||||
else:
|
||||
ignored_counter += 1
|
||||
id_map = {}
|
||||
ignored_counter = 0
|
||||
missed_shouts = {}
|
||||
for oldcomment in storage["reactions"]["data"]:
|
||||
if not oldcomment.get("deleted"):
|
||||
reaction = await migrateComment(oldcomment, storage)
|
||||
if type(reaction) == str:
|
||||
missed_shouts[reaction] = oldcomment
|
||||
elif type(reaction) == Reaction:
|
||||
reaction = reaction.dict()
|
||||
id = reaction["id"]
|
||||
oid = reaction["oid"]
|
||||
id_map[oid] = id
|
||||
else:
|
||||
ignored_counter += 1
|
||||
|
||||
for reaction in storage['reactions']['data']: migrateComment_2stage(
|
||||
reaction, id_map)
|
||||
print('[migration] ' + str(len(id_map)) + ' comments migrated')
|
||||
print('[migration] ' + str(ignored_counter) + ' comments ignored')
|
||||
print('[migration] ' + str(len(missed_shouts.keys())) +
|
||||
' commented shouts missed')
|
||||
missed_counter = 0
|
||||
for missed in missed_shouts.values():
|
||||
missed_counter += len(missed)
|
||||
print('[migration] ' + str(missed_counter) + ' comments dropped')
|
||||
return storage
|
||||
for reaction in storage["reactions"]["data"]:
|
||||
migrateComment_2stage(reaction, id_map)
|
||||
print("[migration] " + str(len(id_map)) + " comments migrated")
|
||||
print("[migration] " + str(ignored_counter) + " comments ignored")
|
||||
print("[migration] " + str(len(missed_shouts.keys())) + " commented shouts missed")
|
||||
missed_counter = 0
|
||||
for missed in missed_shouts.values():
|
||||
missed_counter += len(missed)
|
||||
print("[migration] " + str(missed_counter) + " comments dropped")
|
||||
return storage
|
||||
|
||||
|
||||
def bson_handle():
|
||||
# decode bson # preparing data
|
||||
from migration import bson2json
|
||||
bson2json.json_tables()
|
||||
# decode bson # preparing data
|
||||
from migration import bson2json
|
||||
|
||||
bson2json.json_tables()
|
||||
|
||||
|
||||
def export_one(slug, storage, args = None):
|
||||
topics_handle(storage)
|
||||
users_handle(storage)
|
||||
shouts_handle(storage, args)
|
||||
export_slug(slug, storage)
|
||||
def export_one(slug, storage, args=None):
|
||||
topics_handle(storage)
|
||||
users_handle(storage)
|
||||
shouts_handle(storage, args)
|
||||
export_slug(slug, storage)
|
||||
|
||||
|
||||
async def all_handle(storage, args):
|
||||
print('[migration] handle everything')
|
||||
users_handle(storage)
|
||||
topics_handle(storage)
|
||||
await shouts_handle(storage, args)
|
||||
await comments_handle(storage)
|
||||
# export_email_subscriptions()
|
||||
print('[migration] done!')
|
||||
print("[migration] handle everything")
|
||||
users_handle(storage)
|
||||
topics_handle(storage)
|
||||
await shouts_handle(storage, args)
|
||||
await comments_handle(storage)
|
||||
# export_email_subscriptions()
|
||||
print("[migration] done!")
|
||||
|
||||
|
||||
def data_load():
|
||||
storage = {
|
||||
'content_items': {
|
||||
'by_oid': {},
|
||||
'by_slug': {},
|
||||
},
|
||||
'shouts': {
|
||||
'by_oid': {},
|
||||
'by_slug': {},
|
||||
'data': []
|
||||
},
|
||||
'reactions': {
|
||||
'by_oid': {},
|
||||
'by_slug': {},
|
||||
'by_content': {},
|
||||
'data': []
|
||||
},
|
||||
'topics': {
|
||||
'by_oid': {},
|
||||
'by_slug': {},
|
||||
'cats': [],
|
||||
'tags': [],
|
||||
},
|
||||
'users': {
|
||||
'by_oid': {},
|
||||
'by_slug': {},
|
||||
'data': []
|
||||
},
|
||||
'replacements': json.loads(open('migration/tables/replacements.json').read())
|
||||
}
|
||||
users_data = []
|
||||
tags_data = []
|
||||
cats_data = []
|
||||
comments_data = []
|
||||
content_data = []
|
||||
try:
|
||||
users_data = json.loads(open('migration/data/users.json').read())
|
||||
print('[migration.load] ' + str(len(users_data)) + ' users ')
|
||||
tags_data = json.loads(open('migration/data/tags.json').read())
|
||||
storage['topics']['tags'] = tags_data
|
||||
print('[migration.load] ' + str(len(tags_data)) + ' tags ')
|
||||
cats_data = json.loads(
|
||||
open('migration/data/content_item_categories.json').read())
|
||||
storage['topics']['cats'] = cats_data
|
||||
print('[migration.load] ' + str(len(cats_data)) + ' cats ')
|
||||
comments_data = json.loads(open('migration/data/comments.json').read())
|
||||
storage['reactions']['data'] = comments_data
|
||||
print('[migration.load] ' + str(len(comments_data)) + ' comments ')
|
||||
content_data = json.loads(open('migration/data/content_items.json').read())
|
||||
storage['shouts']['data'] = content_data
|
||||
print('[migration.load] ' + str(len(content_data)) + ' content items ')
|
||||
# fill out storage
|
||||
for x in users_data:
|
||||
storage['users']['by_oid'][x['_id']] = x
|
||||
# storage['users']['by_slug'][x['slug']] = x
|
||||
# no user.slug yet
|
||||
print('[migration.load] ' + str(len(storage['users']
|
||||
['by_oid'].keys())) + ' users by oid')
|
||||
for x in tags_data:
|
||||
storage['topics']['by_oid'][x['_id']] = x
|
||||
storage['topics']['by_slug'][x['slug']] = x
|
||||
for x in cats_data:
|
||||
storage['topics']['by_oid'][x['_id']] = x
|
||||
storage['topics']['by_slug'][x['slug']] = x
|
||||
print('[migration.load] ' + str(len(storage['topics']
|
||||
['by_slug'].keys())) + ' topics by slug')
|
||||
for item in content_data:
|
||||
slug = get_shout_slug(item)
|
||||
storage['content_items']['by_slug'][slug] = item
|
||||
storage['content_items']['by_oid'][item['_id']] = item
|
||||
print('[migration.load] ' + str(len(content_data)) + ' content items')
|
||||
for x in comments_data:
|
||||
storage['reactions']['by_oid'][x['_id']] = x
|
||||
cid = x['contentItem']
|
||||
storage['reactions']['by_content'][cid] = x
|
||||
ci = storage['content_items']['by_oid'].get(cid, {})
|
||||
if 'slug' in ci: storage['reactions']['by_slug'][ci['slug']] = x
|
||||
print('[migration.load] ' + str(len(storage['reactions']
|
||||
['by_content'].keys())) + ' with comments')
|
||||
except Exception as e: raise e
|
||||
storage['users']['data'] = users_data
|
||||
storage['topics']['tags'] = tags_data
|
||||
storage['topics']['cats'] = cats_data
|
||||
storage['shouts']['data'] = content_data
|
||||
storage['reactions']['data'] = comments_data
|
||||
return storage
|
||||
storage = {
|
||||
"content_items": {
|
||||
"by_oid": {},
|
||||
"by_slug": {},
|
||||
},
|
||||
"shouts": {"by_oid": {}, "by_slug": {}, "data": []},
|
||||
"reactions": {"by_oid": {}, "by_slug": {}, "by_content": {}, "data": []},
|
||||
"topics": {
|
||||
"by_oid": {},
|
||||
"by_slug": {},
|
||||
"cats": [],
|
||||
"tags": [],
|
||||
},
|
||||
"users": {"by_oid": {}, "by_slug": {}, "data": []},
|
||||
"replacements": json.loads(open("migration/tables/replacements.json").read()),
|
||||
}
|
||||
users_data = []
|
||||
tags_data = []
|
||||
cats_data = []
|
||||
comments_data = []
|
||||
content_data = []
|
||||
try:
|
||||
users_data = json.loads(open("migration/data/users.json").read())
|
||||
print("[migration.load] " + str(len(users_data)) + " users ")
|
||||
tags_data = json.loads(open("migration/data/tags.json").read())
|
||||
storage["topics"]["tags"] = tags_data
|
||||
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
||||
cats_data = json.loads(
|
||||
open("migration/data/content_item_categories.json").read()
|
||||
)
|
||||
storage["topics"]["cats"] = cats_data
|
||||
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
||||
comments_data = json.loads(open("migration/data/comments.json").read())
|
||||
storage["reactions"]["data"] = comments_data
|
||||
print("[migration.load] " + str(len(comments_data)) + " comments ")
|
||||
content_data = json.loads(open("migration/data/content_items.json").read())
|
||||
storage["shouts"]["data"] = content_data
|
||||
print("[migration.load] " + str(len(content_data)) + " content items ")
|
||||
# fill out storage
|
||||
for x in users_data:
|
||||
storage["users"]["by_oid"][x["_id"]] = x
|
||||
# storage['users']['by_slug'][x['slug']] = x
|
||||
# no user.slug yet
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["users"]["by_oid"].keys()))
|
||||
+ " users by oid"
|
||||
)
|
||||
for x in tags_data:
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
for x in cats_data:
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["topics"]["by_slug"].keys()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
for item in content_data:
|
||||
slug = get_shout_slug(item)
|
||||
storage["content_items"]["by_slug"][slug] = item
|
||||
storage["content_items"]["by_oid"][item["_id"]] = item
|
||||
print("[migration.load] " + str(len(content_data)) + " content items")
|
||||
for x in comments_data:
|
||||
storage["reactions"]["by_oid"][x["_id"]] = x
|
||||
cid = x["contentItem"]
|
||||
storage["reactions"]["by_content"][cid] = x
|
||||
ci = storage["content_items"]["by_oid"].get(cid, {})
|
||||
if "slug" in ci:
|
||||
storage["reactions"]["by_slug"][ci["slug"]] = x
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["reactions"]["by_content"].keys()))
|
||||
+ " with comments"
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
storage["users"]["data"] = users_data
|
||||
storage["topics"]["tags"] = tags_data
|
||||
storage["topics"]["cats"] = cats_data
|
||||
storage["shouts"]["data"] = content_data
|
||||
storage["reactions"]["data"] = comments_data
|
||||
return storage
|
||||
|
||||
|
||||
def mongo_download(url):
|
||||
if not url: raise Exception('\n\nYou should set MONGODB_URL enviroment variable\n')
|
||||
print('[migration] mongodump ' + url)
|
||||
subprocess.call([
|
||||
'mongodump',
|
||||
'--uri', url + '/?authSource=admin',
|
||||
'--forceTableScan',
|
||||
], stderr = subprocess.STDOUT)
|
||||
if not url:
|
||||
raise Exception("\n\nYou should set MONGODB_URL enviroment variable\n")
|
||||
print("[migration] mongodump " + url)
|
||||
subprocess.call(
|
||||
[
|
||||
"mongodump",
|
||||
"--uri",
|
||||
url + "/?authSource=admin",
|
||||
"--forceTableScan",
|
||||
],
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
|
||||
|
||||
def create_pgdump():
|
||||
pgurl = DB_URL
|
||||
if not pgurl: raise Exception('\n\nYou should set DATABASE_URL enviroment variable\n')
|
||||
subprocess.call(
|
||||
[ 'pg_dump', pgurl, '-f', TODAY + '-pgdump.sql'],
|
||||
stderr = subprocess.STDOUT
|
||||
)
|
||||
subprocess.call([
|
||||
'scp',
|
||||
TODAY + '-pgdump.sql',
|
||||
'root@build.discours.io:/root/.'
|
||||
])
|
||||
pgurl = DB_URL
|
||||
if not pgurl:
|
||||
raise Exception("\n\nYou should set DATABASE_URL enviroment variable\n")
|
||||
subprocess.call(
|
||||
["pg_dump", pgurl, "-f", TODAY + "-pgdump.sql"], stderr=subprocess.STDOUT
|
||||
)
|
||||
subprocess.call(["scp", TODAY + "-pgdump.sql", "root@build.discours.io:/root/."])
|
||||
|
||||
|
||||
async def handle_auto():
|
||||
print('[migration] no command given, auto mode')
|
||||
url = os.getenv('MONGODB_URL')
|
||||
if url: mongo_download(url)
|
||||
bson_handle()
|
||||
await all_handle(data_load(), sys.argv)
|
||||
create_pgdump()
|
||||
print("[migration] no command given, auto mode")
|
||||
url = os.getenv("MONGODB_URL")
|
||||
if url:
|
||||
mongo_download(url)
|
||||
bson_handle()
|
||||
await all_handle(data_load(), sys.argv)
|
||||
create_pgdump()
|
||||
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) > 1:
|
||||
cmd=sys.argv[1]
|
||||
if type(cmd) == str: print('[migration] command: ' + cmd)
|
||||
await handle_auto()
|
||||
else:
|
||||
print('[migration] usage: python server.py migrate')
|
||||
if len(sys.argv) > 1:
|
||||
cmd = sys.argv[1]
|
||||
if type(cmd) == str:
|
||||
print("[migration] command: " + cmd)
|
||||
await handle_auto()
|
||||
else:
|
||||
print("[migration] usage: python server.py migrate")
|
||||
|
||||
|
||||
def migrate():
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
|
||||
if __name__ == '__main__':
|
||||
migrate()
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate()
|
||||
|
@@ -4,25 +4,27 @@ import json
|
||||
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
def json_tables():
|
||||
print('[migration] unpack dump/discours/*.bson to migration/data/*.json')
|
||||
data = {
|
||||
"content_items": [],
|
||||
"content_item_categories": [],
|
||||
"tags": [],
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": []
|
||||
}
|
||||
for table in data.keys():
|
||||
lc = []
|
||||
with open('dump/discours/'+table+'.bson', 'rb') as f:
|
||||
bs = f.read()
|
||||
f.close()
|
||||
base = 0
|
||||
while base < len(bs):
|
||||
base, d = bson.decode_document(bs, base)
|
||||
lc.append(d)
|
||||
data[table] = lc
|
||||
open(os.getcwd() + '/migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
|
||||
|
||||
def json_tables():
|
||||
print("[migration] unpack dump/discours/*.bson to migration/data/*.json")
|
||||
data = {
|
||||
"content_items": [],
|
||||
"content_item_categories": [],
|
||||
"tags": [],
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": [],
|
||||
}
|
||||
for table in data.keys():
|
||||
lc = []
|
||||
with open("dump/discours/" + table + ".bson", "rb") as f:
|
||||
bs = f.read()
|
||||
f.close()
|
||||
base = 0
|
||||
while base < len(bs):
|
||||
base, d = bson.decode_document(bs, base)
|
||||
lc.append(d)
|
||||
data[table] = lc
|
||||
open(os.getcwd() + "/migration/data/" + table + ".json", "w").write(
|
||||
json.dumps(lc, cls=DateTimeEncoder)
|
||||
)
|
||||
|
@@ -1,4 +1,3 @@
|
||||
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
@@ -6,100 +5,150 @@ import frontmatter
|
||||
from .extract import extract_html, prepare_html_body
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
OLD_DATE = '2016-03-05 22:22:00.350000'
|
||||
EXPORT_DEST = '../discoursio-web/data/'
|
||||
parentDir = '/'.join(os.getcwd().split('/')[:-1])
|
||||
contentDir = parentDir + '/discoursio-web/content/'
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
EXPORT_DEST = "../discoursio-web/data/"
|
||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
||||
contentDir = parentDir + "/discoursio-web/content/"
|
||||
ts = datetime.now()
|
||||
|
||||
|
||||
def get_metadata(r):
|
||||
authors = []
|
||||
for a in r['authors']:
|
||||
authors.append({ # a short version for public listings
|
||||
'slug': a.slug or 'discours',
|
||||
'name': a.name or 'Дискурс',
|
||||
'userpic': a.userpic or 'https://discours.io/static/img/discours.png'
|
||||
})
|
||||
metadata = {}
|
||||
metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
|
||||
metadata['authors'] = authors
|
||||
metadata['createdAt'] = r.get('createdAt', ts)
|
||||
metadata['layout'] = r['layout']
|
||||
metadata['topics'] = [topic for topic in r['topics']]
|
||||
metadata['topics'].sort()
|
||||
if r.get('cover', False): metadata['cover'] = r.get('cover')
|
||||
return metadata
|
||||
|
||||
authors = []
|
||||
for a in r["authors"]:
|
||||
authors.append(
|
||||
{ # a short version for public listings
|
||||
"slug": a.slug or "discours",
|
||||
"name": a.name or "Дискурс",
|
||||
"userpic": a.userpic or "https://discours.io/static/img/discours.png",
|
||||
}
|
||||
)
|
||||
metadata = {}
|
||||
metadata["title"] = r.get("title", "").replace("{", "(").replace("}", ")")
|
||||
metadata["authors"] = authors
|
||||
metadata["createdAt"] = r.get("createdAt", ts)
|
||||
metadata["layout"] = r["layout"]
|
||||
metadata["topics"] = [topic for topic in r["topics"]]
|
||||
metadata["topics"].sort()
|
||||
if r.get("cover", False):
|
||||
metadata["cover"] = r.get("cover")
|
||||
return metadata
|
||||
|
||||
|
||||
def export_mdx(r):
|
||||
# print('[export] mdx %s' % r['slug'])
|
||||
content = ''
|
||||
metadata = get_metadata(r)
|
||||
content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
|
||||
ext = 'mdx'
|
||||
filepath = contentDir + r['slug']
|
||||
bc = bytes(content,'utf-8').decode('utf-8','ignore')
|
||||
open(filepath + '.' + ext, 'w').write(bc)
|
||||
# print('[export] mdx %s' % r['slug'])
|
||||
content = ""
|
||||
metadata = get_metadata(r)
|
||||
content = frontmatter.dumps(frontmatter.Post(r["body"], **metadata))
|
||||
ext = "mdx"
|
||||
filepath = contentDir + r["slug"]
|
||||
bc = bytes(content, "utf-8").decode("utf-8", "ignore")
|
||||
open(filepath + "." + ext, "w").write(bc)
|
||||
|
||||
|
||||
def export_body(shout, storage):
|
||||
entry = storage['content_items']['by_oid'][shout['oid']]
|
||||
if entry:
|
||||
shout['body'] = prepare_html_body(entry) # prepare_md_body(entry)
|
||||
export_mdx(shout)
|
||||
print('[export] html for %s' % shout['slug'])
|
||||
body = extract_html(entry)
|
||||
open(contentDir + shout['slug'] + '.html', 'w').write(body)
|
||||
else:
|
||||
raise Exception('no content_items entry found')
|
||||
entry = storage["content_items"]["by_oid"][shout["oid"]]
|
||||
if entry:
|
||||
shout["body"] = prepare_html_body(entry) # prepare_md_body(entry)
|
||||
export_mdx(shout)
|
||||
print("[export] html for %s" % shout["slug"])
|
||||
body = extract_html(entry)
|
||||
open(contentDir + shout["slug"] + ".html", "w").write(body)
|
||||
else:
|
||||
raise Exception("no content_items entry found")
|
||||
|
||||
|
||||
def export_slug(slug, storage):
|
||||
shout = storage['shouts']['by_slug'][slug]
|
||||
shout = storage['shouts']['by_slug'].get(slug)
|
||||
assert shout, '[export] no shout found by slug: %s ' % slug
|
||||
author = shout['authors'][0]
|
||||
assert author, '[export] no author error'
|
||||
export_body(shout, storage)
|
||||
shout = storage["shouts"]["by_slug"][slug]
|
||||
shout = storage["shouts"]["by_slug"].get(slug)
|
||||
assert shout, "[export] no shout found by slug: %s " % slug
|
||||
author = shout["authors"][0]
|
||||
assert author, "[export] no author error"
|
||||
export_body(shout, storage)
|
||||
|
||||
|
||||
def export_email_subscriptions():
|
||||
email_subscriptions_data = json.loads(open('migration/data/email_subscriptions.json').read())
|
||||
for data in email_subscriptions_data:
|
||||
# migrate_email_subscription(data)
|
||||
pass
|
||||
print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions exported')
|
||||
email_subscriptions_data = json.loads(
|
||||
open("migration/data/email_subscriptions.json").read()
|
||||
)
|
||||
for data in email_subscriptions_data:
|
||||
# migrate_email_subscription(data)
|
||||
pass
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(email_subscriptions_data))
|
||||
+ " email subscriptions exported"
|
||||
)
|
||||
|
||||
|
||||
def export_shouts(storage):
|
||||
# update what was just migrated or load json again
|
||||
if len(storage['users']['by_slugs'].keys()) == 0:
|
||||
storage['users']['by_slugs'] = json.loads(open(EXPORT_DEST + 'authors.json').read())
|
||||
print('[migration] ' + str(len(storage['users']['by_slugs'].keys())) + ' exported authors ')
|
||||
if len(storage['shouts']['by_slugs'].keys()) == 0:
|
||||
storage['shouts']['by_slugs'] = json.loads(open(EXPORT_DEST + 'articles.json').read())
|
||||
print('[migration] ' + str(len(storage['shouts']['by_slugs'].keys())) + ' exported articles ')
|
||||
for slug in storage['shouts']['by_slugs'].keys(): export_slug(slug, storage)
|
||||
# update what was just migrated or load json again
|
||||
if len(storage["users"]["by_slugs"].keys()) == 0:
|
||||
storage["users"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "authors.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["users"]["by_slugs"].keys()))
|
||||
+ " exported authors "
|
||||
)
|
||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
||||
storage["shouts"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "articles.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["shouts"]["by_slugs"].keys()))
|
||||
+ " exported articles "
|
||||
)
|
||||
for slug in storage["shouts"]["by_slugs"].keys():
|
||||
export_slug(slug, storage)
|
||||
|
||||
def export_json(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
|
||||
open(EXPORT_DEST + 'authors.json', 'w').write(json.dumps(export_authors,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False))
|
||||
print('[migration] ' + str(len(export_authors.items())) + ' authors exported')
|
||||
open(EXPORT_DEST + 'topics.json', 'w').write(json.dumps(export_topics,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False))
|
||||
print('[migration] ' + str(len(export_topics.keys())) + ' topics exported')
|
||||
|
||||
open(EXPORT_DEST + 'articles.json', 'w').write(json.dumps(export_articles,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False))
|
||||
print('[migration] ' + str(len(export_articles.items())) + ' articles exported')
|
||||
open(EXPORT_DEST + 'comments.json', 'w').write(json.dumps(export_comments,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False))
|
||||
print('[migration] ' + str(len(export_comments.items())) + ' exported articles with comments')
|
||||
|
||||
def export_json(
|
||||
export_articles={}, export_authors={}, export_topics={}, export_comments={}
|
||||
):
|
||||
open(EXPORT_DEST + "authors.json", "w").write(
|
||||
json.dumps(
|
||||
export_authors,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print("[migration] " + str(len(export_authors.items())) + " authors exported")
|
||||
open(EXPORT_DEST + "topics.json", "w").write(
|
||||
json.dumps(
|
||||
export_topics,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print("[migration] " + str(len(export_topics.keys())) + " topics exported")
|
||||
|
||||
open(EXPORT_DEST + "articles.json", "w").write(
|
||||
json.dumps(
|
||||
export_articles,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print("[migration] " + str(len(export_articles.items())) + " articles exported")
|
||||
open(EXPORT_DEST + "comments.json", "w").write(
|
||||
json.dumps(
|
||||
export_comments,
|
||||
cls=DateTimeEncoder,
|
||||
indent=4,
|
||||
sort_keys=True,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(export_comments.items()))
|
||||
+ " exported articles with comments"
|
||||
)
|
||||
|
@@ -3,322 +3,397 @@ import re
|
||||
import base64
|
||||
from .html2text import html2text
|
||||
|
||||
TOOLTIP_REGEX = r'(\/\/\/(.+)\/\/\/)'
|
||||
contentDir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'discoursio-web', 'content')
|
||||
s3 = 'https://discours-io.s3.amazonaws.com/'
|
||||
cdn = 'https://assets.discours.io'
|
||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
||||
contentDir = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
|
||||
)
|
||||
s3 = "https://discours-io.s3.amazonaws.com/"
|
||||
cdn = "https://assets.discours.io"
|
||||
|
||||
def replace_tooltips(body):
|
||||
# change if you prefer regexp
|
||||
newbody = body
|
||||
matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
for match in matches:
|
||||
newbody = body.replace(match.group(1), '<Tooltip text="' + match.group(2) + '" />') # NOTE: doesn't work
|
||||
if len(matches) > 0:
|
||||
print('[extract] found %d tooltips' % len(matches))
|
||||
return newbody
|
||||
|
||||
def replace_tooltips(body):
|
||||
# change if you prefer regexp
|
||||
newbody = body
|
||||
matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
for match in matches:
|
||||
newbody = body.replace(
|
||||
match.group(1), '<Tooltip text="' + match.group(2) + '" />'
|
||||
) # NOTE: doesn't work
|
||||
if len(matches) > 0:
|
||||
print("[extract] found %d tooltips" % len(matches))
|
||||
return newbody
|
||||
|
||||
|
||||
def place_tooltips(body):
|
||||
parts = body.split('&&&')
|
||||
l = len(parts)
|
||||
newparts = list(parts)
|
||||
placed = False
|
||||
if l & 1:
|
||||
if l > 1:
|
||||
i = 1
|
||||
print('[extract] found %d tooltips' % (l-1))
|
||||
for part in parts[1:]:
|
||||
if i & 1:
|
||||
placed = True
|
||||
if 'a class="footnote-url" href=' in part:
|
||||
print('[extract] footnote: ' + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
link = part.split(fn,1)[1].split('"', 1)[0]
|
||||
extracted_part = part.split(fn,1)[0] + ' ' + part.split('/', 1)[-1]
|
||||
newparts[i] = '<Tooltip' + (' link="' + link + '" ' if link else '') + '>' + extracted_part + '</Tooltip>'
|
||||
else:
|
||||
newparts[i] = '<Tooltip>%s</Tooltip>' % part
|
||||
# print('[extract] ' + newparts[i])
|
||||
else:
|
||||
# print('[extract] ' + part[:10] + '..')
|
||||
newparts[i] = part
|
||||
i += 1
|
||||
return (''.join(newparts), placed)
|
||||
parts = body.split("&&&")
|
||||
l = len(parts)
|
||||
newparts = list(parts)
|
||||
placed = False
|
||||
if l & 1:
|
||||
if l > 1:
|
||||
i = 1
|
||||
print("[extract] found %d tooltips" % (l - 1))
|
||||
for part in parts[1:]:
|
||||
if i & 1:
|
||||
placed = True
|
||||
if 'a class="footnote-url" href=' in part:
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_part = (
|
||||
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
)
|
||||
newparts[i] = (
|
||||
"<Tooltip"
|
||||
+ (' link="' + link + '" ' if link else "")
|
||||
+ ">"
|
||||
+ extracted_part
|
||||
+ "</Tooltip>"
|
||||
)
|
||||
else:
|
||||
newparts[i] = "<Tooltip>%s</Tooltip>" % part
|
||||
# print('[extract] ' + newparts[i])
|
||||
else:
|
||||
# print('[extract] ' + part[:10] + '..')
|
||||
newparts[i] = part
|
||||
i += 1
|
||||
return ("".join(newparts), placed)
|
||||
|
||||
|
||||
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}=|[A-Za-z\d+\/]{2}==)))\)"
|
||||
|
||||
parentDir = '/'.join(os.getcwd().split('/')[:-1])
|
||||
public = parentDir + '/discoursio-web/public'
|
||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
||||
public = parentDir + "/discoursio-web/public"
|
||||
cache = {}
|
||||
|
||||
|
||||
def reextract_images(body, oid):
|
||||
# change if you prefer regexp
|
||||
matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
i = 0
|
||||
for match in matches:
|
||||
print('[extract] image ' + match.group(1))
|
||||
ext = match.group(3)
|
||||
name = oid + str(i)
|
||||
link = public + '/upload/image-' + name + '.' + ext
|
||||
img = match.group(4)
|
||||
title = match.group(1) # NOTE: this is not the title
|
||||
if img not in cache:
|
||||
content = base64.b64decode(img + '==')
|
||||
print(str(len(img)) + ' image bytes been written')
|
||||
open('../' + link, 'wb').write(content)
|
||||
cache[img] = name
|
||||
i += 1
|
||||
else:
|
||||
print('[extract] image cached ' + cache[img])
|
||||
body.replace(str(match), '') # WARNING: this does not work
|
||||
return body
|
||||
def reextract_images(body, oid):
|
||||
# change if you prefer regexp
|
||||
matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
|
||||
i = 0
|
||||
for match in matches:
|
||||
print("[extract] image " + match.group(1))
|
||||
ext = match.group(3)
|
||||
name = oid + str(i)
|
||||
link = public + "/upload/image-" + name + "." + ext
|
||||
img = match.group(4)
|
||||
title = match.group(1) # NOTE: this is not the title
|
||||
if img not in cache:
|
||||
content = base64.b64decode(img + "==")
|
||||
print(str(len(img)) + " image bytes been written")
|
||||
open("../" + link, "wb").write(content)
|
||||
cache[img] = name
|
||||
i += 1
|
||||
else:
|
||||
print("[extract] image cached " + cache[img])
|
||||
body.replace(
|
||||
str(match), ""
|
||||
) # WARNING: this does not work
|
||||
return body
|
||||
|
||||
|
||||
IMAGES = {
|
||||
'data:image/png': 'png',
|
||||
'data:image/jpg': 'jpg',
|
||||
'data:image/jpeg': 'jpg',
|
||||
"data:image/png": "png",
|
||||
"data:image/jpg": "jpg",
|
||||
"data:image/jpeg": "jpg",
|
||||
}
|
||||
|
||||
b64 = ';base64,'
|
||||
b64 = ";base64,"
|
||||
|
||||
|
||||
def extract_imageparts(bodyparts, prefix):
|
||||
# recursive loop
|
||||
newparts = list(bodyparts)
|
||||
for current in bodyparts:
|
||||
i = bodyparts.index(current)
|
||||
for mime in IMAGES.keys():
|
||||
if mime == current[-len(mime):] and (i + 1 < len(bodyparts)):
|
||||
print('[extract] ' + mime)
|
||||
next = bodyparts[i+1]
|
||||
ext = IMAGES[mime]
|
||||
b64end = next.index(')')
|
||||
b64encoded = next[:b64end]
|
||||
name = prefix + '-' + str(len(cache))
|
||||
link = '/upload/image-' + name + '.' + ext
|
||||
print('[extract] name: ' + name)
|
||||
print('[extract] link: ' + link)
|
||||
print('[extract] %d bytes' % len(b64encoded))
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + '==')
|
||||
open(public + link, 'wb').write(content)
|
||||
print('[extract] ' +str(len(content)) + ' image bytes been written')
|
||||
cache[b64encoded] = name
|
||||
except:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print('[extract] cached link ' + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
link = cdn + '/upload/image-' + name + '.' + ext
|
||||
newparts[i] = current[:-len(mime)] + current[-len(mime):] + link + next[-b64end:]
|
||||
newparts[i+1] = next[:-b64end]
|
||||
break
|
||||
return extract_imageparts(newparts[i] + newparts[i+1] + b64.join(bodyparts[i+2:]), prefix) \
|
||||
if len(bodyparts) > (i + 1) else ''.join(newparts)
|
||||
# recursive loop
|
||||
newparts = list(bodyparts)
|
||||
for current in bodyparts:
|
||||
i = bodyparts.index(current)
|
||||
for mime in IMAGES.keys():
|
||||
if mime == current[-len(mime) :] and (i + 1 < len(bodyparts)):
|
||||
print("[extract] " + mime)
|
||||
next = bodyparts[i + 1]
|
||||
ext = IMAGES[mime]
|
||||
b64end = next.index(")")
|
||||
b64encoded = next[:b64end]
|
||||
name = prefix + "-" + str(len(cache))
|
||||
link = "/upload/image-" + name + "." + ext
|
||||
print("[extract] name: " + name)
|
||||
print("[extract] link: " + link)
|
||||
print("[extract] %d bytes" % len(b64encoded))
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print(
|
||||
"[extract] "
|
||||
+ str(len(content))
|
||||
+ " image bytes been written"
|
||||
)
|
||||
cache[b64encoded] = name
|
||||
except:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print("[extract] cached link " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
newparts[i] = (
|
||||
current[: -len(mime)]
|
||||
+ current[-len(mime) :]
|
||||
+ link
|
||||
+ next[-b64end:]
|
||||
)
|
||||
newparts[i + 1] = next[:-b64end]
|
||||
break
|
||||
return (
|
||||
extract_imageparts(
|
||||
newparts[i] + newparts[i + 1] + b64.join(bodyparts[i + 2 :]), prefix
|
||||
)
|
||||
if len(bodyparts) > (i + 1)
|
||||
else "".join(newparts)
|
||||
)
|
||||
|
||||
|
||||
def extract_dataimages(parts, prefix):
|
||||
newparts = list(parts)
|
||||
for part in parts:
|
||||
i = parts.index(part)
|
||||
if part.endswith(']('):
|
||||
[ext, rest] = parts[i+1].split(b64)
|
||||
name = prefix + '-' + str(len(cache))
|
||||
if ext == '/jpeg': ext = 'jpg'
|
||||
else: ext = ext.replace('/', '')
|
||||
link = '/upload/image-' + name + '.' + ext
|
||||
print('[extract] filename: ' + link)
|
||||
b64end = rest.find(')')
|
||||
if b64end !=-1:
|
||||
b64encoded = rest[:b64end]
|
||||
print('[extract] %d text bytes' % len(b64encoded))
|
||||
# write if not cached
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + '==')
|
||||
open(public + link, 'wb').write(content)
|
||||
print('[extract] ' +str(len(content)) + ' image bytes')
|
||||
cache[b64encoded] = name
|
||||
except:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print('[extract] 0 image bytes, cached for ' + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
newparts = list(parts)
|
||||
for part in parts:
|
||||
i = parts.index(part)
|
||||
if part.endswith("]("):
|
||||
[ext, rest] = parts[i + 1].split(b64)
|
||||
name = prefix + "-" + str(len(cache))
|
||||
if ext == "/jpeg":
|
||||
ext = "jpg"
|
||||
else:
|
||||
ext = ext.replace("/", "")
|
||||
link = "/upload/image-" + name + "." + ext
|
||||
print("[extract] filename: " + link)
|
||||
b64end = rest.find(")")
|
||||
if b64end != -1:
|
||||
b64encoded = rest[:b64end]
|
||||
print("[extract] %d text bytes" % len(b64encoded))
|
||||
# write if not cached
|
||||
if b64encoded not in cache:
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print("[extract] " + str(len(content)) + " image bytes")
|
||||
cache[b64encoded] = name
|
||||
except:
|
||||
raise Exception
|
||||
# raise Exception('[extract] error decoding image %r' %b64encoded)
|
||||
else:
|
||||
print("[extract] 0 image bytes, cached for " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
|
||||
# update link with CDN
|
||||
link = cdn + '/upload/image-' + name + '.' + ext
|
||||
|
||||
# patch newparts
|
||||
newparts[i+1] = link + rest[b64end:]
|
||||
else:
|
||||
raise Exception('cannot find the end of base64 encoded string')
|
||||
else:
|
||||
print('[extract] dataimage skipping part ' + str(i))
|
||||
continue
|
||||
return ''.join(newparts)
|
||||
# update link with CDN
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
|
||||
# patch newparts
|
||||
newparts[i + 1] = link + rest[b64end:]
|
||||
else:
|
||||
raise Exception("cannot find the end of base64 encoded string")
|
||||
else:
|
||||
print("[extract] dataimage skipping part " + str(i))
|
||||
continue
|
||||
return "".join(newparts)
|
||||
|
||||
|
||||
di = "data:image"
|
||||
|
||||
di = 'data:image'
|
||||
|
||||
def extract_md_images(body, oid):
|
||||
newbody = ''
|
||||
body = body\
|
||||
.replace('\n! []('+di, '\n \
|
||||
.replace('\n[]('+di, '\n\
|
||||
.replace(' []('+di, ' 
|
||||
parts = body.split(di)
|
||||
i = 0
|
||||
if len(parts) > 1: newbody = extract_dataimages(parts, oid)
|
||||
else: newbody = body
|
||||
return newbody
|
||||
newbody = ""
|
||||
body = (
|
||||
body.replace("\n! [](" + di, "\n 
|
||||
.replace("\n[](" + di, "\n
|
||||
.replace(" [](" + di, " 
|
||||
)
|
||||
parts = body.split(di)
|
||||
i = 0
|
||||
if len(parts) > 1:
|
||||
newbody = extract_dataimages(parts, oid)
|
||||
else:
|
||||
newbody = body
|
||||
return newbody
|
||||
|
||||
|
||||
def cleanup(body):
|
||||
newbody = body\
|
||||
.replace('<', '').replace('>', '')\
|
||||
.replace('{', '(').replace('}', ')')\
|
||||
.replace('…', '...')\
|
||||
.replace(' __ ', ' ')\
|
||||
.replace('_ _', ' ')\
|
||||
.replace('****', '')\
|
||||
.replace('\u00a0', ' ')\
|
||||
.replace('\u02c6', '^')\
|
||||
.replace('\u00a0',' ')\
|
||||
.replace('\ufeff', '')\
|
||||
.replace('\u200b', '')\
|
||||
.replace('\u200c', '')\
|
||||
# .replace('\u2212', '-')
|
||||
return newbody
|
||||
newbody = (
|
||||
body.replace("<", "")
|
||||
.replace(">", "")
|
||||
.replace("{", "(")
|
||||
.replace("}", ")")
|
||||
.replace("…", "...")
|
||||
.replace(" __ ", " ")
|
||||
.replace("_ _", " ")
|
||||
.replace("****", "")
|
||||
.replace("\u00a0", " ")
|
||||
.replace("\u02c6", "^")
|
||||
.replace("\u00a0", " ")
|
||||
.replace("\ufeff", "")
|
||||
.replace("\u200b", "")
|
||||
.replace("\u200c", "")
|
||||
) # .replace('\u2212', '-')
|
||||
return newbody
|
||||
|
||||
|
||||
def extract_md(body, oid):
|
||||
newbody = body
|
||||
if newbody:
|
||||
newbody = extract_md_images(newbody, oid)
|
||||
if not newbody: raise Exception('extract_images error')
|
||||
newbody = cleanup(newbody)
|
||||
if not newbody: raise Exception('cleanup error')
|
||||
newbody, placed = place_tooltips(newbody)
|
||||
if not newbody: raise Exception('place_tooltips error')
|
||||
if placed:
|
||||
newbody = 'import Tooltip from \'$/components/Article/Tooltip\'\n\n' + newbody
|
||||
return newbody
|
||||
newbody = body
|
||||
if newbody:
|
||||
newbody = extract_md_images(newbody, oid)
|
||||
if not newbody:
|
||||
raise Exception("extract_images error")
|
||||
newbody = cleanup(newbody)
|
||||
if not newbody:
|
||||
raise Exception("cleanup error")
|
||||
newbody, placed = place_tooltips(newbody)
|
||||
if not newbody:
|
||||
raise Exception("place_tooltips error")
|
||||
if placed:
|
||||
newbody = "import Tooltip from '$/components/Article/Tooltip'\n\n" + newbody
|
||||
return newbody
|
||||
|
||||
|
||||
def prepare_md_body(entry):
|
||||
# body modifications
|
||||
body = ''
|
||||
kind = entry.get('type')
|
||||
addon = ''
|
||||
if kind == 'Video':
|
||||
addon = ''
|
||||
for m in entry.get('media', []):
|
||||
if 'youtubeId' in m: addon += '<VideoPlayer youtubeId=\'' + m['youtubeId'] + '\' />\n'
|
||||
elif 'vimeoId' in m: addon += '<VideoPlayer vimeoId=\'' + m['vimeoId'] + '\' />\n'
|
||||
else:
|
||||
print('[extract] media is not supported')
|
||||
print(m)
|
||||
body = 'import VideoPlayer from \'$/components/Article/VideoPlayer\'\n\n' + addon
|
||||
|
||||
elif kind == 'Music':
|
||||
addon = ''
|
||||
for m in entry.get('media', []):
|
||||
artist = m.get('performer')
|
||||
trackname = ''
|
||||
if artist: trackname += artist + ' - '
|
||||
if 'title' in m: trackname += m.get('title','')
|
||||
addon += '<MusicPlayer src=\"' + m.get('fileUrl','') + '\" title=\"' + trackname + '\" />\n'
|
||||
body = 'import MusicPlayer from \'$/components/Article/MusicPlayer\'\n\n' + addon
|
||||
# body modifications
|
||||
body = ""
|
||||
kind = entry.get("type")
|
||||
addon = ""
|
||||
if kind == "Video":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
if "youtubeId" in m:
|
||||
addon += "<VideoPlayer youtubeId='" + m["youtubeId"] + "' />\n"
|
||||
elif "vimeoId" in m:
|
||||
addon += "<VideoPlayer vimeoId='" + m["vimeoId"] + "' />\n"
|
||||
else:
|
||||
print("[extract] media is not supported")
|
||||
print(m)
|
||||
body = "import VideoPlayer from '$/components/Article/VideoPlayer'\n\n" + addon
|
||||
|
||||
elif kind == "Music":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
artist = m.get("performer")
|
||||
trackname = ""
|
||||
if artist:
|
||||
trackname += artist + " - "
|
||||
if "title" in m:
|
||||
trackname += m.get("title", "")
|
||||
addon += (
|
||||
'<MusicPlayer src="'
|
||||
+ m.get("fileUrl", "")
|
||||
+ '" title="'
|
||||
+ trackname
|
||||
+ '" />\n'
|
||||
)
|
||||
body = "import MusicPlayer from '$/components/Article/MusicPlayer'\n\n" + addon
|
||||
|
||||
body_orig = extract_html(entry)
|
||||
if body_orig:
|
||||
body += extract_md(html2text(body_orig), entry["_id"])
|
||||
if not body:
|
||||
print("[extract] empty MDX body")
|
||||
return body
|
||||
|
||||
body_orig = extract_html(entry)
|
||||
if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
|
||||
if not body: print('[extract] empty MDX body')
|
||||
return body
|
||||
|
||||
def prepare_html_body(entry):
|
||||
# body modifications
|
||||
body = ''
|
||||
kind = entry.get('type')
|
||||
addon = ''
|
||||
if kind == 'Video':
|
||||
addon = ''
|
||||
for m in entry.get('media', []):
|
||||
if 'youtubeId' in m:
|
||||
addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
|
||||
addon += m['youtubeId']
|
||||
addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
|
||||
elif 'vimeoId' in m:
|
||||
addon += '<iframe src="https://player.vimeo.com/video/'
|
||||
addon += m['vimeoId']
|
||||
addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen" allowfullscreen></iframe>'
|
||||
else:
|
||||
print('[extract] media is not supported')
|
||||
print(m)
|
||||
body += addon
|
||||
|
||||
elif kind == 'Music':
|
||||
addon = ''
|
||||
for m in entry.get('media', []):
|
||||
artist = m.get('performer')
|
||||
trackname = ''
|
||||
if artist: trackname += artist + ' - '
|
||||
if 'title' in m: trackname += m.get('title','')
|
||||
addon += '<figure><figcaption>'
|
||||
addon += trackname
|
||||
addon += '</figcaption><audio controls src="'
|
||||
addon += m.get('fileUrl','')
|
||||
addon += '"></audio></figure>'
|
||||
body += addon
|
||||
# body modifications
|
||||
body = ""
|
||||
kind = entry.get("type")
|
||||
addon = ""
|
||||
if kind == "Video":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
if "youtubeId" in m:
|
||||
addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
|
||||
addon += m["youtubeId"]
|
||||
addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
|
||||
elif "vimeoId" in m:
|
||||
addon += '<iframe src="https://player.vimeo.com/video/'
|
||||
addon += m["vimeoId"]
|
||||
addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen" allowfullscreen></iframe>'
|
||||
else:
|
||||
print("[extract] media is not supported")
|
||||
print(m)
|
||||
body += addon
|
||||
|
||||
elif kind == "Music":
|
||||
addon = ""
|
||||
for m in entry.get("media", []):
|
||||
artist = m.get("performer")
|
||||
trackname = ""
|
||||
if artist:
|
||||
trackname += artist + " - "
|
||||
if "title" in m:
|
||||
trackname += m.get("title", "")
|
||||
addon += "<figure><figcaption>"
|
||||
addon += trackname
|
||||
addon += '</figcaption><audio controls src="'
|
||||
addon += m.get("fileUrl", "")
|
||||
addon += '"></audio></figure>'
|
||||
body += addon
|
||||
|
||||
body = extract_html(entry)
|
||||
# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
|
||||
if not body:
|
||||
print("[extract] empty HTML body")
|
||||
return body
|
||||
|
||||
body = extract_html(entry)
|
||||
# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
|
||||
if not body: print('[extract] empty HTML body')
|
||||
return body
|
||||
|
||||
def extract_html(entry):
|
||||
body_orig = entry.get('body') or ''
|
||||
media = entry.get('media', [])
|
||||
kind = entry.get('type') or ''
|
||||
print('[extract] kind: ' + kind)
|
||||
mbodies = set([])
|
||||
if media:
|
||||
# print('[extract] media is found')
|
||||
for m in media:
|
||||
mbody = m.get('body', '')
|
||||
addon = ''
|
||||
if kind == 'Literature':
|
||||
mbody = m.get('literatureBody') or m.get('body', '')
|
||||
elif kind == 'Image':
|
||||
cover = ''
|
||||
if 'thumborId' in entry: cover = cdn + '/unsafe/1600x/' + entry['thumborId']
|
||||
if not cover:
|
||||
if 'image' in entry: cover = entry['image'].get('url', '')
|
||||
if 'cloudinary' in cover: cover = ''
|
||||
# else: print('[extract] cover: ' + cover)
|
||||
title = m.get('title','').replace('\n', ' ').replace(' ', ' ')
|
||||
u = m.get('thumborId') or cover or ''
|
||||
if title: addon += '<h4>' + title + '</h4>\n'
|
||||
if not u.startswith('http'): u = s3 + u
|
||||
if not u: print('[extract] no image url for ' + str(m))
|
||||
if 'cloudinary' in u: u = 'img/lost.svg'
|
||||
if u != cover or (u == cover and media.index(m) == 0):
|
||||
addon += '<img src=\"' + u + '\" alt=\"'+ title +'\" />\n'
|
||||
if addon:
|
||||
body_orig += addon
|
||||
# print('[extract] item addon: ' + addon)
|
||||
# if addon: print('[extract] addon: %s' % addon)
|
||||
if mbody and mbody not in mbodies:
|
||||
mbodies.add(mbody)
|
||||
body_orig += mbody
|
||||
if len(list(mbodies)) != len(media):
|
||||
print('[extract] %d/%d media item bodies appended' % (len(list(mbodies)),len(media)))
|
||||
# print('[extract] media items body: \n' + body_orig)
|
||||
if not body_orig:
|
||||
for up in entry.get('bodyHistory', []) or []:
|
||||
body_orig = up.get('text', '') or ''
|
||||
if body_orig:
|
||||
print('[extract] got html body from history')
|
||||
break
|
||||
if not body_orig: print('[extract] empty HTML body')
|
||||
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
return body_orig
|
||||
body_orig = entry.get("body") or ""
|
||||
media = entry.get("media", [])
|
||||
kind = entry.get("type") or ""
|
||||
print("[extract] kind: " + kind)
|
||||
mbodies = set([])
|
||||
if media:
|
||||
# print('[extract] media is found')
|
||||
for m in media:
|
||||
mbody = m.get("body", "")
|
||||
addon = ""
|
||||
if kind == "Literature":
|
||||
mbody = m.get("literatureBody") or m.get("body", "")
|
||||
elif kind == "Image":
|
||||
cover = ""
|
||||
if "thumborId" in entry:
|
||||
cover = cdn + "/unsafe/1600x/" + entry["thumborId"]
|
||||
if not cover:
|
||||
if "image" in entry:
|
||||
cover = entry["image"].get("url", "")
|
||||
if "cloudinary" in cover:
|
||||
cover = ""
|
||||
# else: print('[extract] cover: ' + cover)
|
||||
title = m.get("title", "").replace("\n", " ").replace(" ", " ")
|
||||
u = m.get("thumborId") or cover or ""
|
||||
if title:
|
||||
addon += "<h4>" + title + "</h4>\n"
|
||||
if not u.startswith("http"):
|
||||
u = s3 + u
|
||||
if not u:
|
||||
print("[extract] no image url for " + str(m))
|
||||
if "cloudinary" in u:
|
||||
u = "img/lost.svg"
|
||||
if u != cover or (u == cover and media.index(m) == 0):
|
||||
addon += '<img src="' + u + '" alt="' + title + '" />\n'
|
||||
if addon:
|
||||
body_orig += addon
|
||||
# print('[extract] item addon: ' + addon)
|
||||
# if addon: print('[extract] addon: %s' % addon)
|
||||
if mbody and mbody not in mbodies:
|
||||
mbodies.add(mbody)
|
||||
body_orig += mbody
|
||||
if len(list(mbodies)) != len(media):
|
||||
print(
|
||||
"[extract] %d/%d media item bodies appended"
|
||||
% (len(list(mbodies)), len(media))
|
||||
)
|
||||
# print('[extract] media items body: \n' + body_orig)
|
||||
if not body_orig:
|
||||
for up in entry.get("bodyHistory", []) or []:
|
||||
body_orig = up.get("text", "") or ""
|
||||
if body_orig:
|
||||
print("[extract] got html body from history")
|
||||
break
|
||||
if not body_orig:
|
||||
print("[extract] empty HTML body")
|
||||
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||
return body_orig
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1 +1 @@
|
||||
__all__ = ["users", "tags", "content_items", "comments"],
|
||||
__all__ = (["users", "tags", "content_items", "comments"],)
|
||||
|
@@ -8,104 +8,128 @@ from services.stat.reacted import ReactedStorage
|
||||
|
||||
ts = datetime.now()
|
||||
|
||||
|
||||
async def migrate(entry, storage):
|
||||
'''
|
||||
{
|
||||
"_id": "hdtwS8fSyFLxXCgSC",
|
||||
"body": "<p>",
|
||||
"contentItem": "mnK8KsJHPRi8DrybQ",
|
||||
"createdBy": "bMFPuyNg6qAD2mhXe",
|
||||
"thread": "01/",
|
||||
"createdAt": "2016-04-19 04:33:53+00:00",
|
||||
"ratings": [
|
||||
{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
|
||||
{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
|
||||
],
|
||||
"rating": 2,
|
||||
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
|
||||
"updatedBy": "0"
|
||||
}
|
||||
"""
|
||||
{
|
||||
"_id": "hdtwS8fSyFLxXCgSC",
|
||||
"body": "<p>",
|
||||
"contentItem": "mnK8KsJHPRi8DrybQ",
|
||||
"createdBy": "bMFPuyNg6qAD2mhXe",
|
||||
"thread": "01/",
|
||||
"createdAt": "2016-04-19 04:33:53+00:00",
|
||||
"ratings": [
|
||||
{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
|
||||
{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
|
||||
],
|
||||
"rating": 2,
|
||||
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
|
||||
"updatedBy": "0"
|
||||
}
|
||||
|
||||
->
|
||||
->
|
||||
|
||||
type Reaction {
|
||||
id: Int!
|
||||
shout: Shout!
|
||||
createdAt: DateTime!
|
||||
createdBy: User!
|
||||
updatedAt: DateTime
|
||||
deletedAt: DateTime
|
||||
deletedBy: User
|
||||
range: String # full / 0:2340
|
||||
kind: ReactionKind!
|
||||
body: String
|
||||
replyTo: Reaction
|
||||
stat: Stat
|
||||
old_id: String
|
||||
old_thread: String
|
||||
}
|
||||
'''
|
||||
reaction_dict = {}
|
||||
reaction_dict['createdAt'] = ts if not entry.get('createdAt') else date_parse(entry.get('createdAt'))
|
||||
print('[migration] reaction original date %r' % entry.get('createdAt'))
|
||||
# print('[migration] comment date %r ' % comment_dict['createdAt'])
|
||||
reaction_dict['body'] = html2text(entry.get('body', ''))
|
||||
reaction_dict['oid'] = entry['_id']
|
||||
if entry.get('createdAt'): reaction_dict['createdAt'] = date_parse(entry.get('createdAt'))
|
||||
shout_oid = entry.get('contentItem')
|
||||
if not shout_oid in storage['shouts']['by_oid']:
|
||||
if len(storage['shouts']['by_oid']) > 0:
|
||||
return shout_oid
|
||||
else:
|
||||
print('[migration] no shouts migrated yet')
|
||||
raise Exception
|
||||
return
|
||||
else:
|
||||
with local_session() as session:
|
||||
author = session.query(User).filter(User.oid == entry['createdBy']).first()
|
||||
shout_dict = storage['shouts']['by_oid'][shout_oid]
|
||||
if shout_dict:
|
||||
reaction_dict['shout'] = shout_dict['slug']
|
||||
reaction_dict['createdBy'] = author.slug if author else 'discours'
|
||||
reaction_dict['kind'] = ReactionKind.COMMENT
|
||||
type Reaction {
|
||||
id: Int!
|
||||
shout: Shout!
|
||||
createdAt: DateTime!
|
||||
createdBy: User!
|
||||
updatedAt: DateTime
|
||||
deletedAt: DateTime
|
||||
deletedBy: User
|
||||
range: String # full / 0:2340
|
||||
kind: ReactionKind!
|
||||
body: String
|
||||
replyTo: Reaction
|
||||
stat: Stat
|
||||
old_id: String
|
||||
old_thread: String
|
||||
}
|
||||
"""
|
||||
reaction_dict = {}
|
||||
reaction_dict["createdAt"] = (
|
||||
ts if not entry.get("createdAt") else date_parse(entry.get("createdAt"))
|
||||
)
|
||||
print("[migration] reaction original date %r" % entry.get("createdAt"))
|
||||
# print('[migration] comment date %r ' % comment_dict['createdAt'])
|
||||
reaction_dict["body"] = html2text(entry.get("body", ""))
|
||||
reaction_dict["oid"] = entry["_id"]
|
||||
if entry.get("createdAt"):
|
||||
reaction_dict["createdAt"] = date_parse(entry.get("createdAt"))
|
||||
shout_oid = entry.get("contentItem")
|
||||
if not shout_oid in storage["shouts"]["by_oid"]:
|
||||
if len(storage["shouts"]["by_oid"]) > 0:
|
||||
return shout_oid
|
||||
else:
|
||||
print("[migration] no shouts migrated yet")
|
||||
raise Exception
|
||||
return
|
||||
else:
|
||||
with local_session() as session:
|
||||
author = session.query(User).filter(User.oid == entry["createdBy"]).first()
|
||||
shout_dict = storage["shouts"]["by_oid"][shout_oid]
|
||||
if shout_dict:
|
||||
reaction_dict["shout"] = shout_dict["slug"]
|
||||
reaction_dict["createdBy"] = author.slug if author else "discours"
|
||||
reaction_dict["kind"] = ReactionKind.COMMENT
|
||||
|
||||
# creating reaction from old comment
|
||||
day = (reaction_dict.get("createdAt") or ts).replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
reaction = Reaction.create(**reaction_dict)
|
||||
await ReactedStorage.increment(reaction)
|
||||
|
||||
reaction_dict["id"] = reaction.id
|
||||
for comment_rating_old in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == comment_rating_old["createdBy"])
|
||||
.first()
|
||||
)
|
||||
reactedBy = (
|
||||
rater
|
||||
if rater
|
||||
else session.query(User).filter(User.slug == "noname").first()
|
||||
)
|
||||
re_reaction_dict = {
|
||||
"shout": reaction_dict["shout"],
|
||||
"replyTo": reaction.id,
|
||||
"kind": ReactionKind.LIKE
|
||||
if comment_rating_old["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": reactedBy.slug if reactedBy else "discours",
|
||||
}
|
||||
cts = comment_rating_old.get("createdAt")
|
||||
if cts:
|
||||
re_reaction_dict["createdAt"] = date_parse(cts)
|
||||
try:
|
||||
# creating reaction from old rating
|
||||
rr = Reaction.create(**re_reaction_dict)
|
||||
await ReactedStorage.increment(rr)
|
||||
|
||||
except Exception as e:
|
||||
print("[migration] comment rating error: %r" % re_reaction_dict)
|
||||
raise e
|
||||
else:
|
||||
print(
|
||||
"[migration] error: cannot find shout for comment %r"
|
||||
% reaction_dict
|
||||
)
|
||||
return reaction
|
||||
|
||||
# creating reaction from old comment
|
||||
day = (reaction_dict.get('createdAt') or ts).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
reaction = Reaction.create(**reaction_dict)
|
||||
await ReactedStorage.increment(reaction)
|
||||
|
||||
reaction_dict['id'] = reaction.id
|
||||
for comment_rating_old in entry.get('ratings',[]):
|
||||
rater = session.query(User).filter(User.oid == comment_rating_old['createdBy']).first()
|
||||
reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
|
||||
re_reaction_dict = {
|
||||
'shout': reaction_dict['shout'],
|
||||
'replyTo': reaction.id,
|
||||
'kind': ReactionKind.LIKE if comment_rating_old['value'] > 0 else ReactionKind.DISLIKE,
|
||||
'createdBy': reactedBy.slug if reactedBy else 'discours'
|
||||
}
|
||||
cts = comment_rating_old.get('createdAt')
|
||||
if cts: re_reaction_dict['createdAt'] = date_parse(cts)
|
||||
try:
|
||||
# creating reaction from old rating
|
||||
rr = Reaction.create(**re_reaction_dict)
|
||||
await ReactedStorage.increment(rr)
|
||||
|
||||
except Exception as e:
|
||||
print('[migration] comment rating error: %r' % re_reaction_dict)
|
||||
raise e
|
||||
else:
|
||||
print('[migration] error: cannot find shout for comment %r' % reaction_dict)
|
||||
return reaction
|
||||
|
||||
def migrate_2stage(rr, old_new_id):
|
||||
reply_oid = rr.get('replyTo')
|
||||
if not reply_oid: return
|
||||
new_id = old_new_id.get(rr.get('oid'))
|
||||
if not new_id: return
|
||||
with local_session() as session:
|
||||
comment = session.query(Reaction).filter(Reaction.id == new_id).first()
|
||||
comment.replyTo = old_new_id.get(reply_oid)
|
||||
comment.save()
|
||||
session.commit()
|
||||
if not rr['body']: raise Exception(rr)
|
||||
reply_oid = rr.get("replyTo")
|
||||
if not reply_oid:
|
||||
return
|
||||
new_id = old_new_id.get(rr.get("oid"))
|
||||
if not new_id:
|
||||
return
|
||||
with local_session() as session:
|
||||
comment = session.query(Reaction).filter(Reaction.id == new_id).first()
|
||||
comment.replyTo = old_new_id.get(reply_oid)
|
||||
comment.save()
|
||||
session.commit()
|
||||
if not rr["body"]:
|
||||
raise Exception(rr)
|
||||
|
@@ -10,224 +10,279 @@ from migration.extract import prepare_html_body
|
||||
from orm.community import Community
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
|
||||
OLD_DATE = '2016-03-05 22:22:00.350000'
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
ts = datetime.now()
|
||||
type2layout = {
|
||||
'Article': 'article',
|
||||
'Literature': 'prose',
|
||||
'Music': 'music',
|
||||
'Video': 'video',
|
||||
'Image': 'image'
|
||||
"Article": "article",
|
||||
"Literature": "prose",
|
||||
"Music": "music",
|
||||
"Video": "video",
|
||||
"Image": "image",
|
||||
}
|
||||
|
||||
|
||||
def get_shout_slug(entry):
|
||||
slug = entry.get('slug', '')
|
||||
if not slug:
|
||||
for friend in entry.get('friendlySlugs', []):
|
||||
slug = friend.get('slug', '')
|
||||
if slug: break
|
||||
return slug
|
||||
slug = entry.get("slug", "")
|
||||
if not slug:
|
||||
for friend in entry.get("friendlySlugs", []):
|
||||
slug = friend.get("slug", "")
|
||||
if slug:
|
||||
break
|
||||
return slug
|
||||
|
||||
|
||||
async def migrate(entry, storage):
|
||||
# init, set title and layout
|
||||
r = {
|
||||
'layout': type2layout[entry['type']],
|
||||
'title': entry['title'],
|
||||
'community': Community.default_community.id,
|
||||
'authors': [],
|
||||
'topics': set([]),
|
||||
# 'rating': 0,
|
||||
# 'ratings': [],
|
||||
'createdAt': []
|
||||
}
|
||||
topics_by_oid = storage['topics']['by_oid']
|
||||
users_by_oid = storage['users']['by_oid']
|
||||
# init, set title and layout
|
||||
r = {
|
||||
"layout": type2layout[entry["type"]],
|
||||
"title": entry["title"],
|
||||
"community": Community.default_community.id,
|
||||
"authors": [],
|
||||
"topics": set([]),
|
||||
# 'rating': 0,
|
||||
# 'ratings': [],
|
||||
"createdAt": [],
|
||||
}
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
users_by_oid = storage["users"]["by_oid"]
|
||||
|
||||
# author
|
||||
# author
|
||||
|
||||
oid = entry.get('createdBy', entry.get('_id', entry.get('oid')))
|
||||
userdata = users_by_oid.get(oid)
|
||||
if not userdata:
|
||||
app = entry.get('application')
|
||||
if app:
|
||||
userslug = translit(app['name'], 'ru', reversed=True)\
|
||||
.replace(' ', '-')\
|
||||
.replace('\'', '')\
|
||||
.replace('.', '-').lower()
|
||||
userdata = {
|
||||
'username': app['email'],
|
||||
'email': app['email'],
|
||||
'name': app['name'],
|
||||
'bio': app.get('bio', ''),
|
||||
'emailConfirmed': False,
|
||||
'slug': userslug,
|
||||
'createdAt': ts,
|
||||
'wasOnlineAt': ts
|
||||
}
|
||||
else:
|
||||
userdata = User.default_user.dict()
|
||||
assert userdata, 'no user found for %s from ' % [oid, len(users_by_oid.keys())]
|
||||
r['authors'] = [userdata, ]
|
||||
oid = entry.get("createdBy", entry.get("_id", entry.get("oid")))
|
||||
userdata = users_by_oid.get(oid)
|
||||
if not userdata:
|
||||
app = entry.get("application")
|
||||
if app:
|
||||
userslug = (
|
||||
translit(app["name"], "ru", reversed=True)
|
||||
.replace(" ", "-")
|
||||
.replace("'", "")
|
||||
.replace(".", "-")
|
||||
.lower()
|
||||
)
|
||||
userdata = {
|
||||
"username": app["email"],
|
||||
"email": app["email"],
|
||||
"name": app["name"],
|
||||
"bio": app.get("bio", ""),
|
||||
"emailConfirmed": False,
|
||||
"slug": userslug,
|
||||
"createdAt": ts,
|
||||
"wasOnlineAt": ts,
|
||||
}
|
||||
else:
|
||||
userdata = User.default_user.dict()
|
||||
assert userdata, "no user found for %s from " % [oid, len(users_by_oid.keys())]
|
||||
r["authors"] = [
|
||||
userdata,
|
||||
]
|
||||
|
||||
# slug
|
||||
# slug
|
||||
|
||||
slug = get_shout_slug(entry)
|
||||
if slug: r['slug'] = slug
|
||||
else: raise Exception
|
||||
|
||||
# cover
|
||||
c = ''
|
||||
if entry.get('thumborId'):
|
||||
c = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
|
||||
else:
|
||||
c = entry.get('image', {}).get('url')
|
||||
if not c or 'cloudinary' in c: c = ''
|
||||
r['cover'] = c
|
||||
slug = get_shout_slug(entry)
|
||||
if slug:
|
||||
r["slug"] = slug
|
||||
else:
|
||||
raise Exception
|
||||
|
||||
# timestamps
|
||||
# cover
|
||||
c = ""
|
||||
if entry.get("thumborId"):
|
||||
c = "https://assets.discours.io/unsafe/1600x/" + entry["thumborId"]
|
||||
else:
|
||||
c = entry.get("image", {}).get("url")
|
||||
if not c or "cloudinary" in c:
|
||||
c = ""
|
||||
r["cover"] = c
|
||||
|
||||
r['createdAt'] = date_parse(entry.get('createdAt', OLD_DATE))
|
||||
r['updatedAt'] = date_parse(entry['updatedAt']) if 'updatedAt' in entry else ts
|
||||
if entry.get('published'):
|
||||
r['publishedAt'] = date_parse(entry.get('publishedAt', OLD_DATE))
|
||||
if 'deletedAt' in entry: r['deletedAt'] = date_parse(entry['deletedAt'])
|
||||
# timestamps
|
||||
|
||||
# topics
|
||||
category = entry['category']
|
||||
mainTopic = topics_by_oid.get(category)
|
||||
if mainTopic:
|
||||
r['mainTopic'] = storage['replacements'].get(mainTopic["slug"], mainTopic["slug"])
|
||||
topic_oids = [category, ]
|
||||
topic_oids.extend(entry.get('tags', []))
|
||||
for oid in topic_oids:
|
||||
if oid in storage['topics']['by_oid']:
|
||||
r['topics'].add(storage['topics']['by_oid'][oid]['slug'])
|
||||
else:
|
||||
print('[migration] unknown old topic id: ' + oid)
|
||||
r['topics'] = list(r['topics'])
|
||||
|
||||
entry['topics'] = r['topics']
|
||||
entry['cover'] = r['cover']
|
||||
entry['authors'] = r['authors']
|
||||
r["createdAt"] = date_parse(entry.get("createdAt", OLD_DATE))
|
||||
r["updatedAt"] = date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts
|
||||
if entry.get("published"):
|
||||
r["publishedAt"] = date_parse(entry.get("publishedAt", OLD_DATE))
|
||||
if "deletedAt" in entry:
|
||||
r["deletedAt"] = date_parse(entry["deletedAt"])
|
||||
|
||||
# body
|
||||
r['body'] = prepare_html_body(entry)
|
||||
# topics
|
||||
category = entry["category"]
|
||||
mainTopic = topics_by_oid.get(category)
|
||||
if mainTopic:
|
||||
r["mainTopic"] = storage["replacements"].get(
|
||||
mainTopic["slug"], mainTopic["slug"]
|
||||
)
|
||||
topic_oids = [
|
||||
category,
|
||||
]
|
||||
topic_oids.extend(entry.get("tags", []))
|
||||
for oid in topic_oids:
|
||||
if oid in storage["topics"]["by_oid"]:
|
||||
r["topics"].add(storage["topics"]["by_oid"][oid]["slug"])
|
||||
else:
|
||||
print("[migration] unknown old topic id: " + oid)
|
||||
r["topics"] = list(r["topics"])
|
||||
|
||||
# save shout to db
|
||||
entry["topics"] = r["topics"]
|
||||
entry["cover"] = r["cover"]
|
||||
entry["authors"] = r["authors"]
|
||||
|
||||
s = object()
|
||||
shout_dict = r.copy()
|
||||
user = None
|
||||
del shout_dict['topics'] # NOTE: AttributeError: 'str' object has no attribute '_sa_instance_state'
|
||||
#del shout_dict['rating'] # NOTE: TypeError: 'rating' is an invalid keyword argument for Shout
|
||||
#del shout_dict['ratings']
|
||||
email = userdata.get('email')
|
||||
slug = userdata.get('slug')
|
||||
if not slug: raise Exception
|
||||
with local_session() as session:
|
||||
# c = session.query(Community).all().pop()
|
||||
if email: user = session.query(User).filter(User.email == email).first()
|
||||
if not user and slug: user = session.query(User).filter(User.slug == slug).first()
|
||||
if not user and userdata:
|
||||
try:
|
||||
userdata['slug'] = userdata['slug'].lower().strip().replace(' ', '-')
|
||||
user = User.create(**userdata)
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
print('[migration] user error: ' + userdata)
|
||||
userdata['id'] = user.id
|
||||
userdata['createdAt'] = user.createdAt
|
||||
storage['users']['by_slug'][userdata['slug']] = userdata
|
||||
storage['users']['by_oid'][entry['_id']] = userdata
|
||||
assert user, 'could not get a user'
|
||||
shout_dict['authors'] = [ user, ]
|
||||
# body
|
||||
r["body"] = prepare_html_body(entry)
|
||||
|
||||
try:
|
||||
s = Shout.create(**shout_dict)
|
||||
except sqlalchemy.exc.IntegrityError as e:
|
||||
with local_session() as session:
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict['slug']).first()
|
||||
bump = False
|
||||
if s:
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print('[migration] shout already exists, but differs in %s' % key)
|
||||
bump = True
|
||||
else:
|
||||
print('[migration] shout already exists, but lacks %s' % key)
|
||||
bump = True
|
||||
if bump:
|
||||
s.update(shout_dict)
|
||||
else:
|
||||
print('[migration] something went wrong with shout: \n%r' % shout_dict)
|
||||
raise e
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(s)
|
||||
raise Exception
|
||||
|
||||
# save shout to db
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict['topics'] = []
|
||||
for tpc in r['topics']:
|
||||
oldslug = tpc
|
||||
newslug = storage['replacements'].get(oldslug, oldslug)
|
||||
if newslug:
|
||||
with local_session() as session:
|
||||
shout_topic_old = session.query(ShoutTopic)\
|
||||
.filter(ShoutTopic.shout == shout_dict['slug'])\
|
||||
.filter(ShoutTopic.topic == oldslug).first()
|
||||
if shout_topic_old:
|
||||
shout_topic_old.update({ 'slug': newslug })
|
||||
else:
|
||||
shout_topic_new = session.query(ShoutTopic)\
|
||||
.filter(ShoutTopic.shout == shout_dict['slug'])\
|
||||
.filter(ShoutTopic.topic == newslug).first()
|
||||
if not shout_topic_new:
|
||||
try: ShoutTopic.create(**{ 'shout': shout_dict['slug'], 'topic': newslug })
|
||||
except: print('[migration] shout topic error: ' + newslug)
|
||||
session.commit()
|
||||
if newslug not in shout_dict['topics']:
|
||||
shout_dict['topics'].append(newslug)
|
||||
else:
|
||||
print('[migration] ignored topic slug: \n%r' % tpc['slug'])
|
||||
# raise Exception
|
||||
s = object()
|
||||
shout_dict = r.copy()
|
||||
user = None
|
||||
del shout_dict[
|
||||
"topics"
|
||||
] # NOTE: AttributeError: 'str' object has no attribute '_sa_instance_state'
|
||||
# del shout_dict['rating'] # NOTE: TypeError: 'rating' is an invalid keyword argument for Shout
|
||||
# del shout_dict['ratings']
|
||||
email = userdata.get("email")
|
||||
slug = userdata.get("slug")
|
||||
if not slug:
|
||||
raise Exception
|
||||
with local_session() as session:
|
||||
# c = session.query(Community).all().pop()
|
||||
if email:
|
||||
user = session.query(User).filter(User.email == email).first()
|
||||
if not user and slug:
|
||||
user = session.query(User).filter(User.slug == slug).first()
|
||||
if not user and userdata:
|
||||
try:
|
||||
userdata["slug"] = userdata["slug"].lower().strip().replace(" ", "-")
|
||||
user = User.create(**userdata)
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
print("[migration] user error: " + userdata)
|
||||
userdata["id"] = user.id
|
||||
userdata["createdAt"] = user.createdAt
|
||||
storage["users"]["by_slug"][userdata["slug"]] = userdata
|
||||
storage["users"]["by_oid"][entry["_id"]] = userdata
|
||||
assert user, "could not get a user"
|
||||
shout_dict["authors"] = [
|
||||
user,
|
||||
]
|
||||
|
||||
# content_item ratings to reactions
|
||||
try:
|
||||
for content_rating in entry.get('ratings',[]):
|
||||
with local_session() as session:
|
||||
rater = session.query(User).filter(User.oid == content_rating['createdBy']).first()
|
||||
reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
|
||||
if rater:
|
||||
reaction_dict = {
|
||||
'kind': ReactionKind.LIKE if content_rating['value'] > 0 else ReactionKind.DISLIKE,
|
||||
'createdBy': reactedBy.slug,
|
||||
'shout': shout_dict['slug']
|
||||
}
|
||||
cts = content_rating.get('createdAt')
|
||||
if cts: reaction_dict['createdAt'] = date_parse(cts)
|
||||
reaction = session.query(Reaction).\
|
||||
filter(Reaction.shout == reaction_dict['shout']).\
|
||||
filter(Reaction.createdBy == reaction_dict['createdBy']).\
|
||||
filter(Reaction.kind == reaction_dict['kind']).first()
|
||||
if reaction:
|
||||
reaction_dict['kind'] = ReactionKind.AGREE if content_rating['value'] > 0 else ReactionKind.DISAGREE,
|
||||
reaction.update(reaction_dict)
|
||||
else:
|
||||
day = (reaction_dict.get('createdAt') or ts).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
rea = Reaction.create(**reaction_dict)
|
||||
await ReactedStorage.increment(rea)
|
||||
# shout_dict['ratings'].append(reaction_dict)
|
||||
except:
|
||||
print('[migration] content_item.ratings error: \n%r' % content_rating)
|
||||
raise Exception
|
||||
try:
|
||||
s = Shout.create(**shout_dict)
|
||||
except sqlalchemy.exc.IntegrityError as e:
|
||||
with local_session() as session:
|
||||
s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first()
|
||||
bump = False
|
||||
if s:
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
bump = True
|
||||
if bump:
|
||||
s.update(shout_dict)
|
||||
else:
|
||||
print("[migration] something went wrong with shout: \n%r" % shout_dict)
|
||||
raise e
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(s)
|
||||
raise Exception
|
||||
|
||||
# shout views
|
||||
ViewedByDay.create( shout = shout_dict['slug'], value = entry.get('views', 1) )
|
||||
# del shout_dict['ratings']
|
||||
shout_dict['oid'] = entry.get('_id')
|
||||
storage['shouts']['by_oid'][entry['_id']] = shout_dict
|
||||
storage['shouts']['by_slug'][slug] = shout_dict
|
||||
return shout_dict
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = []
|
||||
for tpc in r["topics"]:
|
||||
oldslug = tpc
|
||||
newslug = storage["replacements"].get(oldslug, oldslug)
|
||||
if newslug:
|
||||
with local_session() as session:
|
||||
shout_topic_old = (
|
||||
session.query(ShoutTopic)
|
||||
.filter(ShoutTopic.shout == shout_dict["slug"])
|
||||
.filter(ShoutTopic.topic == oldslug)
|
||||
.first()
|
||||
)
|
||||
if shout_topic_old:
|
||||
shout_topic_old.update({"slug": newslug})
|
||||
else:
|
||||
shout_topic_new = (
|
||||
session.query(ShoutTopic)
|
||||
.filter(ShoutTopic.shout == shout_dict["slug"])
|
||||
.filter(ShoutTopic.topic == newslug)
|
||||
.first()
|
||||
)
|
||||
if not shout_topic_new:
|
||||
try:
|
||||
ShoutTopic.create(
|
||||
**{"shout": shout_dict["slug"], "topic": newslug}
|
||||
)
|
||||
except:
|
||||
print("[migration] shout topic error: " + newslug)
|
||||
session.commit()
|
||||
if newslug not in shout_dict["topics"]:
|
||||
shout_dict["topics"].append(newslug)
|
||||
else:
|
||||
print("[migration] ignored topic slug: \n%r" % tpc["slug"])
|
||||
# raise Exception
|
||||
|
||||
# content_item ratings to reactions
|
||||
try:
|
||||
for content_rating in entry.get("ratings", []):
|
||||
with local_session() as session:
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == content_rating["createdBy"])
|
||||
.first()
|
||||
)
|
||||
reactedBy = (
|
||||
rater
|
||||
if rater
|
||||
else session.query(User).filter(User.slug == "noname").first()
|
||||
)
|
||||
if rater:
|
||||
reaction_dict = {
|
||||
"kind": ReactionKind.LIKE
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": reactedBy.slug,
|
||||
"shout": shout_dict["slug"],
|
||||
}
|
||||
cts = content_rating.get("createdAt")
|
||||
if cts:
|
||||
reaction_dict["createdAt"] = date_parse(cts)
|
||||
reaction = (
|
||||
session.query(Reaction)
|
||||
.filter(Reaction.shout == reaction_dict["shout"])
|
||||
.filter(Reaction.createdBy == reaction_dict["createdBy"])
|
||||
.filter(Reaction.kind == reaction_dict["kind"])
|
||||
.first()
|
||||
)
|
||||
if reaction:
|
||||
reaction_dict["kind"] = (
|
||||
ReactionKind.AGREE
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISAGREE,
|
||||
)
|
||||
reaction.update(reaction_dict)
|
||||
else:
|
||||
day = (reaction_dict.get("createdAt") or ts).replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
rea = Reaction.create(**reaction_dict)
|
||||
await ReactedStorage.increment(rea)
|
||||
# shout_dict['ratings'].append(reaction_dict)
|
||||
except:
|
||||
print("[migration] content_item.ratings error: \n%r" % content_rating)
|
||||
raise Exception
|
||||
|
||||
# shout views
|
||||
ViewedByDay.create(shout=shout_dict["slug"], value=entry.get("views", 1))
|
||||
# del shout_dict['ratings']
|
||||
shout_dict["oid"] = entry.get("_id")
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
storage["shouts"]["by_slug"][slug] = shout_dict
|
||||
return shout_dict
|
||||
|
@@ -4,104 +4,144 @@ from orm import User, UserRating
|
||||
from dateutil.parser import parse
|
||||
from base.orm import local_session
|
||||
|
||||
|
||||
def migrate(entry):
|
||||
if 'subscribedTo' in entry: del entry['subscribedTo']
|
||||
email = entry['emails'][0]['address']
|
||||
user_dict = {
|
||||
'oid': entry['_id'],
|
||||
'roles': [],
|
||||
'ratings': [],
|
||||
'username': email,
|
||||
'email': email,
|
||||
'password': entry['services']['password'].get('bcrypt', ''),
|
||||
'createdAt': parse(entry['createdAt']),
|
||||
'emailConfirmed': bool(entry['emails'][0]['verified']),
|
||||
'muted': False, # amnesty
|
||||
'bio': entry['profile'].get('bio', ''),
|
||||
'notifications': [],
|
||||
'createdAt': parse(entry['createdAt']),
|
||||
'roles': [], # entry['roles'] # roles by community
|
||||
'ratings': [], # entry['ratings']
|
||||
'links': [],
|
||||
'name': 'anonymous'
|
||||
}
|
||||
if 'updatedAt' in entry: user_dict['updatedAt'] = parse(entry['updatedAt'])
|
||||
if 'wasOnineAt' in entry: user_dict['wasOnlineAt'] = parse(entry['wasOnlineAt'])
|
||||
if entry.get('profile'):
|
||||
# slug
|
||||
user_dict['slug'] = entry['profile'].get('path').lower().replace(' ', '-').strip()
|
||||
user_dict['bio'] = html2text(entry.get('profile').get('bio') or '')
|
||||
if "subscribedTo" in entry:
|
||||
del entry["subscribedTo"]
|
||||
email = entry["emails"][0]["address"]
|
||||
user_dict = {
|
||||
"oid": entry["_id"],
|
||||
"roles": [],
|
||||
"ratings": [],
|
||||
"username": email,
|
||||
"email": email,
|
||||
"password": entry["services"]["password"].get("bcrypt", ""),
|
||||
"createdAt": parse(entry["createdAt"]),
|
||||
"emailConfirmed": bool(entry["emails"][0]["verified"]),
|
||||
"muted": False, # amnesty
|
||||
"bio": entry["profile"].get("bio", ""),
|
||||
"notifications": [],
|
||||
"createdAt": parse(entry["createdAt"]),
|
||||
"roles": [], # entry['roles'] # roles by community
|
||||
"ratings": [], # entry['ratings']
|
||||
"links": [],
|
||||
"name": "anonymous",
|
||||
}
|
||||
if "updatedAt" in entry:
|
||||
user_dict["updatedAt"] = parse(entry["updatedAt"])
|
||||
if "wasOnineAt" in entry:
|
||||
user_dict["wasOnlineAt"] = parse(entry["wasOnlineAt"])
|
||||
if entry.get("profile"):
|
||||
# slug
|
||||
user_dict["slug"] = (
|
||||
entry["profile"].get("path").lower().replace(" ", "-").strip()
|
||||
)
|
||||
user_dict["bio"] = html2text(entry.get("profile").get("bio") or "")
|
||||
|
||||
# userpic
|
||||
try: user_dict['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
|
||||
except KeyError:
|
||||
try: user_dict['userpic'] = entry['profile']['image']['url']
|
||||
except KeyError: user_dict['userpic'] = ''
|
||||
# userpic
|
||||
try:
|
||||
user_dict["userpic"] = (
|
||||
"https://assets.discours.io/unsafe/100x/"
|
||||
+ entry["profile"]["thumborId"]
|
||||
)
|
||||
except KeyError:
|
||||
try:
|
||||
user_dict["userpic"] = entry["profile"]["image"]["url"]
|
||||
except KeyError:
|
||||
user_dict["userpic"] = ""
|
||||
|
||||
# name
|
||||
fn = entry['profile'].get('firstName', '')
|
||||
ln = entry['profile'].get('lastName', '')
|
||||
name = user_dict['slug'] if user_dict['slug'] else 'anonymous'
|
||||
name = fn if fn else name
|
||||
name = (name + ' ' + ln) if ln else name
|
||||
name = entry['profile']['path'].lower().strip().replace(' ', '-') if len(name) < 2 else name
|
||||
user_dict['name'] = name
|
||||
# name
|
||||
fn = entry["profile"].get("firstName", "")
|
||||
ln = entry["profile"].get("lastName", "")
|
||||
name = user_dict["slug"] if user_dict["slug"] else "anonymous"
|
||||
name = fn if fn else name
|
||||
name = (name + " " + ln) if ln else name
|
||||
name = (
|
||||
entry["profile"]["path"].lower().strip().replace(" ", "-")
|
||||
if len(name) < 2
|
||||
else name
|
||||
)
|
||||
user_dict["name"] = name
|
||||
|
||||
# links
|
||||
fb = entry['profile'].get('facebook', False)
|
||||
if fb: user_dict['links'].append(fb)
|
||||
vk = entry['profile'].get('vkontakte', False)
|
||||
if vk: user_dict['links'].append(vk)
|
||||
tr = entry['profile'].get('twitter', False)
|
||||
if tr: user_dict['links'].append(tr)
|
||||
ws = entry['profile'].get('website', False)
|
||||
if ws: user_dict['links'].append(ws)
|
||||
# links
|
||||
fb = entry["profile"].get("facebook", False)
|
||||
if fb:
|
||||
user_dict["links"].append(fb)
|
||||
vk = entry["profile"].get("vkontakte", False)
|
||||
if vk:
|
||||
user_dict["links"].append(vk)
|
||||
tr = entry["profile"].get("twitter", False)
|
||||
if tr:
|
||||
user_dict["links"].append(tr)
|
||||
ws = entry["profile"].get("website", False)
|
||||
if ws:
|
||||
user_dict["links"].append(ws)
|
||||
|
||||
# some checks
|
||||
if not user_dict['slug'] and len(user_dict['links']) > 0:
|
||||
user_dict['slug'] = user_dict['links'][0].split('/')[-1]
|
||||
# some checks
|
||||
if not user_dict["slug"] and len(user_dict["links"]) > 0:
|
||||
user_dict["slug"] = user_dict["links"][0].split("/")[-1]
|
||||
|
||||
user_dict["slug"] = user_dict.get("slug", user_dict["email"].split("@")[0])
|
||||
oid = user_dict["oid"]
|
||||
user_dict["slug"] = user_dict["slug"].lower().strip().replace(" ", "-")
|
||||
try:
|
||||
user = User.create(**user_dict.copy())
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
print("[migration] cannot create user " + user_dict["slug"])
|
||||
with local_session() as session:
|
||||
old_user = (
|
||||
session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
)
|
||||
old_user.oid = oid
|
||||
user = old_user
|
||||
if not user:
|
||||
print("[migration] ERROR: cannot find user " + user_dict["slug"])
|
||||
raise Exception
|
||||
user_dict["id"] = user.id
|
||||
return user_dict
|
||||
|
||||
user_dict['slug'] = user_dict.get('slug', user_dict['email'].split('@')[0])
|
||||
oid = user_dict['oid']
|
||||
user_dict['slug'] = user_dict['slug'].lower().strip().replace(' ', '-')
|
||||
try: user = User.create(**user_dict.copy())
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
print('[migration] cannot create user ' + user_dict['slug'])
|
||||
with local_session() as session:
|
||||
old_user = session.query(User).filter(User.slug == user_dict['slug']).first()
|
||||
old_user.oid = oid
|
||||
user = old_user
|
||||
if not user:
|
||||
print('[migration] ERROR: cannot find user ' + user_dict['slug'])
|
||||
raise Exception
|
||||
user_dict['id'] = user.id
|
||||
return user_dict
|
||||
|
||||
def migrate_2stage(entry, id_map):
|
||||
ce = 0
|
||||
for rating_entry in entry.get('ratings',[]):
|
||||
rater_oid = rating_entry['createdBy']
|
||||
rater_slug = id_map.get(rater_oid)
|
||||
if not rater_slug:
|
||||
ce +=1
|
||||
# print(rating_entry)
|
||||
continue
|
||||
oid = entry['_id']
|
||||
author_slug = id_map.get(oid)
|
||||
user_rating_dict = {
|
||||
'value': rating_entry['value'],
|
||||
'rater': rater_slug,
|
||||
'user': author_slug
|
||||
}
|
||||
with local_session() as session:
|
||||
try:
|
||||
user_rating = UserRating.create(**user_rating_dict)
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
old_rating = session.query(UserRating).filter(UserRating.rater == rater_slug).first()
|
||||
print('[migration] cannot create ' + author_slug + '`s rate from ' + rater_slug)
|
||||
print('[migration] concat rating value %d+%d=%d' % (old_rating.value, rating_entry['value'], old_rating.value + rating_entry['value']))
|
||||
old_rating.update({ 'value': old_rating.value + rating_entry['value'] })
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return ce
|
||||
ce = 0
|
||||
for rating_entry in entry.get("ratings", []):
|
||||
rater_oid = rating_entry["createdBy"]
|
||||
rater_slug = id_map.get(rater_oid)
|
||||
if not rater_slug:
|
||||
ce += 1
|
||||
# print(rating_entry)
|
||||
continue
|
||||
oid = entry["_id"]
|
||||
author_slug = id_map.get(oid)
|
||||
user_rating_dict = {
|
||||
"value": rating_entry["value"],
|
||||
"rater": rater_slug,
|
||||
"user": author_slug,
|
||||
}
|
||||
with local_session() as session:
|
||||
try:
|
||||
user_rating = UserRating.create(**user_rating_dict)
|
||||
except sqlalchemy.exc.IntegrityError:
|
||||
old_rating = (
|
||||
session.query(UserRating)
|
||||
.filter(UserRating.rater == rater_slug)
|
||||
.first()
|
||||
)
|
||||
print(
|
||||
"[migration] cannot create "
|
||||
+ author_slug
|
||||
+ "`s rate from "
|
||||
+ rater_slug
|
||||
)
|
||||
print(
|
||||
"[migration] concat rating value %d+%d=%d"
|
||||
% (
|
||||
old_rating.value,
|
||||
rating_entry["value"],
|
||||
old_rating.value + rating_entry["value"],
|
||||
)
|
||||
)
|
||||
old_rating.update({"value": old_rating.value + rating_entry["value"]})
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return ce
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from datetime import datetime
|
||||
from json import JSONEncoder
|
||||
|
||||
|
||||
class DateTimeEncoder(JSONEncoder):
|
||||
def default(self, z):
|
||||
if isinstance(z, datetime):
|
||||
return (str(z))
|
||||
return str(z)
|
||||
else:
|
||||
return super().default(z)
|
||||
return super().default(z)
|
||||
|
Reference in New Issue
Block a user