27 lines
719 B
Python
27 lines
719 B
Python
|
import sys
|
||
|
from collections import defaultdict
|
||
|
import json
|
||
|
|
||
|
with open(sys.argv[1], 'r') as f:
|
||
|
data = json.load(f)
|
||
|
|
||
|
# new data with stripped information
|
||
|
new_data = list()
|
||
|
|
||
|
# generate stat files for several keys
|
||
|
stats_for = ['language', 'rights', 'subject']
|
||
|
stats = {key: defaultdict(int) for key in stats_for}
|
||
|
|
||
|
for entry in data.values():
|
||
|
for stat_key in stats_for:
|
||
|
for value in entry[stat_key]:
|
||
|
stats[stat_key][value] += 1
|
||
|
new_data.append({key: entry[key] for key in entry if key != 'formaturi'})
|
||
|
|
||
|
for stat in stats.keys():
|
||
|
with open('data/stats_' + stat + '.json', 'w') as f:
|
||
|
json.dump(stats[stat], f)
|
||
|
|
||
|
with open('data/gutenberg.json', 'w') as f:
|
||
|
json.dump(new_data, f)
|