nuts-data.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. const fs = require('fs');
  2. const csv = require('csv-parse');
  3. const stringify = require('csv-stringify');
  4. /* Helper method to load the datasets from CSV and store it in server object */
  5. module.exports.loadDatasets = async function(filePath) {
  6. //console.log('Datasets structure loading.');
  7. let datasets = [];
  8. let columns = undefined;
  9. return new Promise((resolve, reject) => {
  10. fs.createReadStream(filePath)
  11. .pipe(csv({ separator: ';' }))
  12. .on('data', (row) => {
  13. if (!columns) {
  14. columns = row;
  15. }
  16. else {
  17. let ds = {};
  18. for (let i = 0; i < columns.length; i++) {
  19. ds[columns[i]] = row[i];
  20. }
  21. datasets.push(ds);
  22. }
  23. })
  24. .on('end', () => {
  25. //console.log('Datasets structure loaded.');
  26. resolve(datasets);
  27. })
  28. .on('error', reject);
  29. })
  30. }
  31. /* Load all the attractivness data from CSV into a server object.
  32. The data don't need to be loaded for each request then. */
  33. module.exports.loadRuralData = async function (filePath, datasets) {
  34. console.log('Reading rural data file processing started.');
  35. let ruralData = [];
  36. let columns = undefined;
  37. return new Promise((resolve, reject) => {
  38. fs.createReadStream(filePath)
  39. .pipe(csv())
  40. .on('data', (row) => {
  41. if (!columns) {
  42. columns = row;
  43. }
  44. else {
  45. let item = {};
  46. for (let i = 0; i < columns.length; i++) {
  47. let colName = columns[i].toLowerCase();
  48. if (colName == "nuts_id") // ID of the NUTS region
  49. item.nuts = row[i];
  50. else if (colName == "datasets") // empty datasets count
  51. item.availableDS = datasets.length - row[i];
  52. else if (colName == "quality")
  53. item.quality = row[i];
  54. else if (colName == "lau2")
  55. item.lau2 = row[i];
  56. else {
  57. let factor = getDataSetFactor(datasets, colName);
  58. if (factor) {
  59. if (!item[factor])
  60. item[factor] = {};
  61. //item[factor].push({ dataset: columns[i], value: row[i] });
  62. item[factor][columns[i]] = Number(row[i]);
  63. }
  64. }
  65. }
  66. ruralData.push(item);
  67. }
  68. })
  69. .on('end', () => {
  70. //console.log('Rural data file processing finished.');
  71. resolve(ruralData);
  72. })
  73. .on('error', reject);
  74. })
  75. }
  76. module.exports.loadOntology = async function(filePath) {
  77. return new Promise((resolve, reject) => {
  78. fs.readFile(filePath, (err, data) => {
  79. if (err) reject(err)
  80. const ontology = JSON.parse(data)
  81. resolve(ontology)
  82. })
  83. })
  84. }
  85. /**
  86. * Resolves with an array representing rows of CSV file
  87. * @param {string} inputFileName path to the CSV file with input data for clustering calculation
  88. */
  89. module.exports.loadClusteringInput = async function (inputFileName) {
  90. const clusteringData = [];
  91. /*
  92. * The parsed CSV array keeps the native csv-parser structure
  93. * for future easier serialization back to CSV file
  94. */
  95. return new Promise((resolve, reject) => {
  96. fs.createReadStream(inputFileName)
  97. .pipe(csv())
  98. .on('data', (row) => {
  99. clusteringData.push(row);
  100. })
  101. .on('end', () => {
  102. resolve(clusteringData);
  103. })
  104. .on('error', reject);
  105. });
  106. }
  107. /**
  108. * Resolves once the modified CSV file is written to fs
  109. */
  110. module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
  111. // regional ID must be copied to the output as well
  112. let allowedDatasets = [idString];
  113. for (const factor of params.factors) {
  114. allowedDatasets = [...allowedDatasets, ...factor.datasets];
  115. }
  116. const factorMultipliers = data[0].map((dataset) => {
  117. if (dataset === idString) return 1;
  118. const factor = datasets.find(ds => ds.Name === dataset);
  119. if (!factor) {
  120. /* If the factor is unknown for this dataset, it will effectivelly turn it off */
  121. console.log(`Undefined factor for dataset ${dataset}`);
  122. return 0;
  123. } else if (!allowedDatasets.includes(dataset)) {
  124. return 0;
  125. } else {
  126. return params.factors.find(f => f.factor === factor.Factor).weight;
  127. }
  128. })
  129. //console.log(factorMultipliers);
  130. /* The actual modification logic resides here */
  131. const modifiedData = data.map((row, idx) => {
  132. return row.map((value, i) => {
  133. if (idx == 0) {
  134. /* These are the headers */
  135. /* Have to check for both allowed datasets and zero multiplications */
  136. return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
  137. } else if (isNaN(value)) {
  138. /* This is the NUTS ID record at the beginning of each line */
  139. return value;
  140. }
  141. return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
  142. }).filter(val => val !== null);
  143. });
  144. //console.log(modifiedData);
  145. if (modifiedData[0].length <= 1) {
  146. throw new Error('All datasets turned off. No data to create clusters.');
  147. }
  148. return new Promise((resolve, reject) => {
  149. stringify(modifiedData, (err, output) => {
  150. if (err) return reject(err);
  151. fs.writeFile(outputFileName, output, (err) => {
  152. if (err) reject(err);
  153. else resolve();
  154. console.log('Data modification finished.');
  155. })
  156. })
  157. });
  158. }
  159. /**
  160. * Reads the out_file.csv created by R script and saves it into an object
  161. */
  162. module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
  163. //console.log('Reading clustering data file processing started.');
  164. let clusters = [];
  165. let columns = undefined;
  166. fs.createReadStream(filePath)
  167. .pipe(csv())
  168. .on('data', (row) => {
  169. if (!columns) {
  170. columns = row;
  171. }
  172. else {
  173. let item = {};
  174. for (let i = 0; i < columns.length; i++) {
  175. const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
  176. item[colName] = row[i];
  177. }
  178. clusters.push(item);
  179. }
  180. })
  181. .on('end', () => {
  182. console.log('Cluster data file processing finished.');
  183. dataLoadedCallback(clusters);
  184. });
  185. }
  186. module.exports.getFactorIndex = function (region, factor) {
  187. //console.log('getFactorIndex');
  188. //console.log('region: ' + region.nuts);
  189. //console.log('factor: ' + JSON.stringify(factor, null, 4));
  190. let sumValue = 0;
  191. let count = 0;
  192. factor.datasets.forEach(ds => {
  193. //console.log('factor: ' + factor.factor);
  194. let value = region[factor.factor][ds];
  195. if (value) {
  196. sumValue += value;
  197. count++;
  198. }
  199. });
  200. return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
  201. }
  202. function getDataSetFactor(datasets, colName) {
  203. for (let i = 0; i < datasets.length; i++) {
  204. if (datasets[i].Name.toLowerCase() == colName)
  205. return datasets[i].Factor;
  206. }
  207. return undefined;
  208. }