vor 3 Jahren · 95ecadfcab
--- a/scripts/normalize_input.py
+++ b/scripts/normalize_input.py
@@ -38,6 +38,8 @@ for (year, data) in vars_by_year.copy().items():
 
				 	#print(year)
			
 
				 	#print(y_mat.shape)
			
 
				 
			
 
				+# For all the datasets we assume that the higher the value the higher its region's attractiveness
			
 
				+# TODO: the above statement should be made optional via some (positive/negative) effect switch
			
 
				 for (year, data) in vars_by_year.items():
			
 
				 	y_mat = np.matrix(data)
			
 
				 	# Save all pilot region names (can't be read elsewhere), as well as domain and scenario names
			
@@ -49,17 +51,19 @@ for (year, data) in vars_by_year.items():
 
				 		domains = np.squeeze(y_mat[:, -2]).tolist()[0]
			
 
				 		# Scenario name is stored in the "SCENARIO" column which is 1st from the end
			
 
				 		scenarios = np.squeeze(y_mat[:, -1]).tolist()[0]
			
 
				+	# Drop last three columns (MODEL, DOMAIN, SCENARIO) and convert the value's datatype
			
 
				 	y_mat = y_mat[:, :-3].astype(float)
			
 
				 	#print(y_mat.dtype)
			
 
				+	# Maximum of each column (each dataset)
			
 
				 	maxs = y_mat.max(axis=0)
			
 
				-	#print(maxs)
			
 
				+	# Minimum of each column (each dataset)
			
 
				 	mins = y_mat.min(axis=0)
			
 
				-	#print(mins)
			
 
				+	# Value range of each column (each dataset)
			
 
				 	spans = maxs-mins
			
 
				-	#print(spans)
			
 
				+	# Align the range of each column (dataset) so it starts at 0
			
 
				 	y_mat = y_mat - mins
			
 
				+	# Align the range of each column (dataset) into range 0 to 1
			
 
				 	y_mat = y_mat / spans
			
 
				-	#print(y_mat)
			
 
				 	updated_by_year[year] = y_mat
			
 
				 
			
 
				 out = open(outname, 'w', encoding="utf-8", newline="")