31 lines
832 B
Python
31 lines
832 B
Python
import sys
|
|
from collections import defaultdict
|
|
import json
|
|
|
|
with open(sys.argv[1], 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# new data with stripped information
|
|
new_data = list()
|
|
|
|
# generate stat files for several keys
|
|
stats_for = ['language', 'rights', 'subject']
|
|
stats = {key: defaultdict(int) for key in stats_for}
|
|
|
|
for book_id, entry in data.items():
|
|
# strip formaturi from entry
|
|
new_entry = {key: entry[key] for key in entry if key != 'formaturi'}
|
|
new_entry['id'] = book_id
|
|
new_data.append(new_entry)
|
|
# add stats
|
|
for stat_key in stats_for:
|
|
for value in entry[stat_key]:
|
|
stats[stat_key][value] += 1
|
|
|
|
for stat in stats.keys():
|
|
with open('data/stats_' + stat + '.json', 'w') as f:
|
|
json.dump(stats[stat], f)
|
|
|
|
with open('data/gutenberg.json', 'w') as f:
|
|
json.dump(new_data, f)
|