nuts-data.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. const fs = require('fs');
  2. const csv = require('csv-parse');
  3. const stringify = require('csv-stringify');
  4. /* Helper method to load the datasets from CSV and store it in server object */
  5. module.exports.loadDatasets = function(filePath, dataLoadedCallback) {
  6. //console.log('Datasets structure loading.');
  7. var datasets = [];
  8. let columns = undefined;
  9. fs.createReadStream(filePath)
  10. .pipe(csv({ separator: ';' }))
  11. .on('data', (row) => {
  12. if (!columns) {
  13. columns = row;
  14. }
  15. else {
  16. let ds = {};
  17. for (let i = 0; i < columns.length; i++) {
  18. ds[columns[i]] = row[i];
  19. }
  20. datasets.push(ds);
  21. }
  22. })
  23. .on('end', () => {
  24. //console.log('Datasets structure loaded.');
  25. dataLoadedCallback(datasets);
  26. });
  27. }
  28. /* Load all the attractivness data from CSV into a server object.
  29. The data don't need to be loaded for each request then. */
  30. module.exports.loadRuralData = function (filePath, datasets, dataLoadedCallback) {
  31. console.log('Reading rural data file processing started.');
  32. var ruralData = [];
  33. let columns = undefined;
  34. fs.createReadStream(filePath)
  35. .pipe(csv())
  36. .on('data', (row) => {
  37. if (!columns) {
  38. columns = row;
  39. }
  40. else {
  41. let item = {};
  42. for (let i = 0; i < columns.length; i++) {
  43. let colName = columns[i].toLowerCase();
  44. if (colName == "nuts_id") // ID of the NUTS region
  45. item.nuts = row[i];
  46. else if (colName == "datasets") // empty datasets count
  47. item.availableDS = datasets.length - row[i];
  48. else if (colName == "quality")
  49. item.quality = row[i];
  50. else if (colName == "lau2")
  51. item.lau2 = row[i];
  52. else {
  53. let factor = getDataSetFactor(datasets, colName);
  54. if (factor) {
  55. if (!item[factor])
  56. item[factor] = {};
  57. //item[factor].push({ dataset: columns[i], value: row[i] });
  58. item[factor][columns[i]] = Number(row[i]);
  59. }
  60. }
  61. }
  62. ruralData.push(item);
  63. }
  64. })
  65. .on('end', () => {
  66. //console.log('Rural data file processing finished.');
  67. dataLoadedCallback(ruralData);
  68. });
  69. }
  70. /**
  71. * Resolves with an array representing rows of CSV file
  72. * @param {string} inputFileName path to the CSV file with input data for clustering calculation
  73. */
  74. module.exports.loadClusteringInput = async function (inputFileName) {
  75. const clusteringData = [];
  76. /*
  77. * The parsed CSV array keeps the native csv-parser structure
  78. * for future easier serialization back to CSV file
  79. */
  80. return new Promise((resolve, reject) => {
  81. fs.createReadStream(inputFileName)
  82. .pipe(csv())
  83. .on('data', (row) => {
  84. clusteringData.push(row);
  85. })
  86. .on('end', () => {
  87. resolve(clusteringData);
  88. })
  89. .on('error', reject);
  90. });
  91. }
  92. /**
  93. * Resolves once the modified CSV file is written to fs
  94. */
  95. module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
  96. // regional ID must be copied to the output as well
  97. let allowedDatasets = [idString];
  98. for (const factor of params.factors) {
  99. allowedDatasets = [...allowedDatasets, ...factor.datasets];
  100. }
  101. const factorMultipliers = data[0].map((dataset) => {
  102. if (dataset === idString) return 1;
  103. const factor = datasets.find(ds => ds.Name === dataset);
  104. if (!factor) {
  105. /* If the factor is unknown for this dataset, it will effectivelly turn it off */
  106. console.log(`Undefined factor for dataset ${dataset}`);
  107. return 0;
  108. } else if (!allowedDatasets.includes(dataset)) {
  109. return 0;
  110. } else {
  111. return params.factors.find(f => f.factor === factor.Factor).weight;
  112. }
  113. })
  114. //console.log(factorMultipliers);
  115. /* The actual modification logic resides here */
  116. const modifiedData = data.map((row, idx) => {
  117. return row.map((value, i) => {
  118. if (idx == 0) {
  119. /* These are the headers */
  120. /* Have to check for both allowed datasets and zero multiplications */
  121. return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
  122. } else if (isNaN(value)) {
  123. /* This is the NUTS ID record at the beginning of each line */
  124. return value;
  125. }
  126. return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
  127. }).filter(val => val !== null);
  128. });
  129. //console.log(modifiedData);
  130. if (modifiedData[0].length <= 1) {
  131. throw new Error('All datasets turned off. No data to create clusters.');
  132. }
  133. return new Promise((resolve, reject) => {
  134. stringify(modifiedData, (err, output) => {
  135. if (err) return reject(err);
  136. fs.writeFile(outputFileName, output, (err) => {
  137. if (err) reject(err);
  138. else resolve();
  139. console.log('Data modification finished.');
  140. })
  141. })
  142. });
  143. }
  144. /**
  145. * Reads the out_file.csv created by R script and saves it into an object
  146. */
  147. module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
  148. //console.log('Reading clustering data file processing started.');
  149. let clusters = [];
  150. let columns = undefined;
  151. fs.createReadStream(filePath)
  152. .pipe(csv())
  153. .on('data', (row) => {
  154. if (!columns) {
  155. columns = row;
  156. }
  157. else {
  158. let item = {};
  159. for (let i = 0; i < columns.length; i++) {
  160. const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
  161. item[colName] = row[i];
  162. }
  163. clusters.push(item);
  164. }
  165. })
  166. .on('end', () => {
  167. console.log('Cluster data file processing finished.');
  168. dataLoadedCallback(clusters);
  169. });
  170. }
  171. module.exports.getFactorIndex = function (region, factor) {
  172. //console.log('getFactorIndex');
  173. //console.log('region: ' + region.nuts);
  174. //console.log('factor: ' + JSON.stringify(factor, null, 4));
  175. let sumValue = 0;
  176. let count = 0;
  177. factor.datasets.forEach(ds => {
  178. //console.log('factor: ' + factor.factor);
  179. let value = region[factor.factor][ds];
  180. if (value) {
  181. sumValue += value;
  182. count++;
  183. }
  184. });
  185. return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
  186. }
  187. function getDataSetFactor(datasets, colName) {
  188. for (let i = 0; i < datasets.length; i++) {
  189. if (datasets[i].Name.toLowerCase() == colName)
  190. return datasets[i].Factor;
  191. }
  192. return undefined;
  193. }