Quellcode durchsuchen

📝 improve normalization script docs

jmacura vor 3 Jahren
Ursprung
Commit
95ecadfcab
1 geänderte Dateien mit 8 neuen und 4 gelöschten Zeilen
  1. 8 4
      scripts/normalize_input.py

+ 8 - 4
scripts/normalize_input.py

@@ -38,6 +38,8 @@ for (year, data) in vars_by_year.copy().items():
 	#print(year)
 	#print(y_mat.shape)
 
+# For all the datasets we assume that the higher the value the higher its region's attractiveness
+# TODO: the above statement should be made optional via some (positive/negative) effect switch
 for (year, data) in vars_by_year.items():
 	y_mat = np.matrix(data)
 	# Save all pilot region names (can't be read elsewhere), as well as domain and scenario names
@@ -49,17 +51,19 @@ for (year, data) in vars_by_year.items():
 		domains = np.squeeze(y_mat[:, -2]).tolist()[0]
 		# Scenario name is stored in the "SCENARIO" column which is 1st from the end
 		scenarios = np.squeeze(y_mat[:, -1]).tolist()[0]
+	# Drop last three columns (MODEL, DOMAIN, SCENARIO) and convert the value's datatype
 	y_mat = y_mat[:, :-3].astype(float)
 	#print(y_mat.dtype)
+	# Maximum of each column (each dataset)
 	maxs = y_mat.max(axis=0)
-	#print(maxs)
+	# Minimum of each column (each dataset)
 	mins = y_mat.min(axis=0)
-	#print(mins)
+	# Value range of each column (each dataset)
 	spans = maxs-mins
-	#print(spans)
+	# Align the range of each column (dataset) so it starts at 0
 	y_mat = y_mat - mins
+	# Align the range of each column (dataset) into range 0 to 1
 	y_mat = y_mat / spans
-	#print(y_mat)
 	updated_by_year[year] = y_mat
 
 out = open(outname, 'w', encoding="utf-8", newline="")