| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- # coding: utf-8
- from csv import reader, writer
- import numpy as np
- fname = "../src/assets/data/all_data.csv"
- outname = "../src/assets/data/normalized_data.csv"
- vars_by_year = {}
- updated_by_year = {}
- headers = []
- regions = []
- domains = []
- scenarios = []
- with open(fname, encoding="utf-8") as fh:
- for row in reader(fh, delimiter=','):
- if len(headers) == 0:
- # Save the headers for later use
- headers = row
- # and also skip the headers line from processing
- continue
- year = row[0]
- #print(year)
- if year not in vars_by_year:
- vars_by_year[year] = []
- vars_by_year[year].append(row[1:])
- #print(', '.join(row))
- #print(vars_by_year)
- # This step shall not be necessary as Runar is already eliminating years which are not fully covered by all pilots
- for (year, data) in vars_by_year.copy().items():
- y_mat = np.matrix(data)
- # Remove years for which we do not have data for each pilot region
- if (y_mat.shape[0] < 9):
- del vars_by_year[year]
- continue
- #print(year)
- #print(y_mat.shape)
- for (year, data) in vars_by_year.items():
- y_mat = np.matrix(data)
- # Save all pilot region names (can't be read elsewhere), as well as domain and scenario names
- if len(regions) == 0:
- # Region name is stored in the "MODEL" column which is 3rd from the end
- regions = np.squeeze(y_mat[:, -3]).tolist()[0]
- print(regions)
- # Domain name is stored in the "DOMAIN" column which is 2nd from the end
- domains = np.squeeze(y_mat[:, -2]).tolist()[0]
- # Scenario name is stored in the "SCENARIO" column which is 1st from the end
- scenarios = np.squeeze(y_mat[:, -1]).tolist()[0]
- y_mat = y_mat[:, :-3].astype(float)
- #print(y_mat.dtype)
- maxs = y_mat.max(axis=0)
- #print(maxs)
- mins = y_mat.min(axis=0)
- #print(mins)
- spans = maxs-mins
- #print(spans)
- y_mat = y_mat - mins
- y_mat = y_mat / spans
- #print(y_mat)
- updated_by_year[year] = y_mat
- out = open(outname, 'w', encoding="utf-8", newline="")
- cw = writer(out, delimiter=',', quotechar='"')
- cw.writerow(headers)
- for i in range(0, 9*10): #FIXME: [Apulia, Apulia, Apulia, ...]
- for (year, data) in updated_by_year.items():
- row = data[i, :].astype(str).tolist()[0]
- row.insert(0, year)
- row.append(regions[i])
- row.append(domains[i])
- row.append(scenarios[i])
- #print(row)
- cw.writerow(row)
- out.close()
|