import sys from collections import defaultdict import json with open(sys.argv[1], 'r') as f: data = json.load(f) # new data with stripped information new_data = list() # generate stat files for several keys stats_for = ['language', 'rights', 'subject'] stats = {key: defaultdict(int) for key in stats_for} for entry in data.values(): for stat_key in stats_for: for value in entry[stat_key]: stats[stat_key][value] += 1 new_data.append({key: entry[key] for key in entry if key != 'formaturi'}) for stat in stats.keys(): with open('data/stats_' + stat + '.json', 'w') as f: json.dump(stats[stat], f) with open('data/gutenberg.json', 'w') as f: json.dump(new_data, f)