rsvp-reader/scripts/process-metadata.py
2019-12-20 13:27:14 +01:00

27 lines
719 B
Python

import sys
from collections import defaultdict
import json
with open(sys.argv[1], 'r') as f:
data = json.load(f)
# new data with stripped information
new_data = list()
# generate stat files for several keys
stats_for = ['language', 'rights', 'subject']
stats = {key: defaultdict(int) for key in stats_for}
for entry in data.values():
for stat_key in stats_for:
for value in entry[stat_key]:
stats[stat_key][value] += 1
new_data.append({key: entry[key] for key in entry if key != 'formaturi'})
for stat in stats.keys():
with open('data/stats_' + stat + '.json', 'w') as f:
json.dump(stats[stat], f)
with open('data/gutenberg.json', 'w') as f:
json.dump(new_data, f)