# coding: utf-8 from csv import reader, writer import numpy as np fname = "../src/assets/data/all_data.csv" outname = "../src/assets/data/normalized_data.csv" vars_by_year = {} updated_by_year = {} headers = [] regions = [] domains = [] scenarios = [] with open(fname, encoding="utf-8") as fh: for row in reader(fh, delimiter=','): if len(headers) == 0: # Save the headers for later use headers = row # and also skip the headers line from processing continue year = row[0] #print(year) if year not in vars_by_year: vars_by_year[year] = [] vars_by_year[year].append(row[1:]) #print(', '.join(row)) #print(vars_by_year) # This step shall not be necessary as Runar is already eliminating years which are not fully covered by all pilots for (year, data) in vars_by_year.copy().items(): y_mat = np.matrix(data) # Remove years for which we do not have data for each pilot region if (y_mat.shape[0] < 9): del vars_by_year[year] continue #print(year) #print(y_mat.shape) for (year, data) in vars_by_year.items(): y_mat = np.matrix(data) # Save all pilot region names (can't be read elsewhere), as well as domain and scenario names if len(regions) == 0: # Region name is stored in the "MODEL" column which is 3rd from the end regions = np.squeeze(y_mat[:, -3]).tolist()[0] print(regions) # Domain name is stored in the "DOMAIN" column which is 2nd from the end domains = np.squeeze(y_mat[:, -2]).tolist()[0] # Scenario name is stored in the "SCENARIO" column which is 1st from the end scenarios = np.squeeze(y_mat[:, -1]).tolist()[0] y_mat = y_mat[:, :-3].astype(float) #print(y_mat.dtype) maxs = y_mat.max(axis=0) #print(maxs) mins = y_mat.min(axis=0) #print(mins) spans = maxs-mins #print(spans) y_mat = y_mat - mins y_mat = y_mat / spans #print(y_mat) updated_by_year[year] = y_mat out = open(outname, 'w', encoding="utf-8", newline="") cw = writer(out, delimiter=',', quotechar='"') cw.writerow(headers) for i in range(0, 9*10): #FIXME: [Apulia, Apulia, Apulia, ...] for (year, data) in updated_by_year.items(): row = data[i, :].astype(str).tolist()[0] row.insert(0, year) row.append(regions[i]) row.append(domains[i]) row.append(scenarios[i]) #print(row) cw.writerow(row) out.close()