''' normalize_cities.py A throwaway script for turning the populations table (city, year, population) into two tables: pops: (city_id, year, population) cities: (id, name) ''' import csv cities = {} populations = {} # Collect the data reader = csv.reader(open('poprows.csv')) for row in reader: (city, year, population) = row if city not in cities: cities[city] = len(cities) assert (city, year) not in populations populations[(city, year)] = population # Print it out, using city i cityRows = [(cities[city] + 1, city) for city in cities] cityRows.sort() writer = csv.writer(open('cities.csv', 'w')) for row in cityRows: writer.writerow(row) populationRows = [(cities[city], year, populations[(city, year)]) for (city, year) in populations] populationRows.sort() writer = csv.writer(open('pops.csv', 'w')) for row in populationRows: writer.writerow(row)