nuts-data.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. const fs = require('fs');
  2. const csv = require('csv-parse');
  3. const stringify = require('csv-stringify');
  4. /* Helper method to load the datasets from CSV and store it in server object */
  5. module.exports.loadDatasets = async function(filePath) {
  6. //console.log('Datasets structure loading.')
  7. let datasets = undefined
  8. return new Promise((resolve, reject) => {
  9. const stream = fs.createReadStream(filePath).pipe(csv())
  10. stream
  11. .on('data', (row) => {
  12. if (!datasets) {
  13. datasets = row
  14. }
  15. stream.end()
  16. })
  17. .on('end', () => {
  18. datasets = datasets.slice(3)
  19. console.log('Datasets structure loaded.')
  20. resolve(datasets)
  21. })
  22. .on('error', reject)
  23. })
  24. }
  25. /* Load all the attractivness data from CSV into a server object.
  26. The data don't need to be loaded for each request then. */
  27. module.exports.loadRuralData = async function (filePath) {
  28. //console.log('Reading rural data file processing started.')
  29. let ruralData = []
  30. let columns
  31. return new Promise((resolve, reject) => {
  32. fs.createReadStream(filePath)
  33. .pipe(csv())
  34. .on('data', (row) => {
  35. if (!columns) {
  36. columns = row
  37. return
  38. }
  39. let item = {
  40. values: {}
  41. }
  42. for (let i = 0; i < columns.length; i++) {
  43. let colName = columns[i].toLowerCase()
  44. if (colName == "nuts_id") // ID of the NUTS region
  45. item.nuts = row[i]
  46. // else if (colName == "datasets") // empty datasets count
  47. // item.availableDS = datasets.length - row[i];
  48. // else if (colName == "quality")
  49. // item.quality = row[i];
  50. else if (colName == "lau2")
  51. item.lau2 = row[i]
  52. else if (colName == "eurostat_code" || colName == "name")
  53. continue
  54. else {
  55. item.values[colName] = Number(row[i])
  56. }
  57. }
  58. ruralData.push(item)
  59. })
  60. .on('end', () => {
  61. console.log('Rural data file processing finished.');
  62. resolve(ruralData);
  63. })
  64. .on('error', reject);
  65. })
  66. }
  67. module.exports.loadOntology = async function(filePath) {
  68. return new Promise((resolve, reject) => {
  69. fs.readFile(filePath, (err, data) => {
  70. if (err) reject(err)
  71. const ontology = JSON.parse(data)
  72. resolve(ontology)
  73. })
  74. })
  75. }
  76. /**
  77. * Resolves with an array representing rows of CSV file
  78. * @param {string} inputFileName path to the CSV file with input data for clustering calculation
  79. */
  80. module.exports.loadClusteringInput = async function (inputFileName) {
  81. const clusteringData = [];
  82. /*
  83. * The parsed CSV array keeps the native csv-parser structure
  84. * for future easier serialization back to CSV file
  85. */
  86. return new Promise((resolve, reject) => {
  87. fs.createReadStream(inputFileName)
  88. .pipe(csv())
  89. .on('data', (row) => {
  90. clusteringData.push(row);
  91. })
  92. .on('end', () => {
  93. resolve(clusteringData);
  94. })
  95. .on('error', reject);
  96. });
  97. }
  98. /**
  99. * Resolves once the modified CSV file is written to fs
  100. */
  101. module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
  102. // regional ID must be copied to the output as well
  103. const allowedDatasets = [idString, ...params.datasets.map(ds => ds.id)]
  104. const factorMultipliers = data[0].map((dataset) => {
  105. if (dataset === idString) return 1
  106. if (!allowedDatasets.includes(dataset)) {
  107. return 0
  108. } else {
  109. return params.datasets.find(ds => ds.id === dataset).weight
  110. }
  111. })
  112. /* The actual modification logic resides here */
  113. const modifiedData = data.map((row, idx) => {
  114. return row.map((value, i) => {
  115. if (idx == 0) {
  116. /* These are the headers */
  117. /* Have to check for both allowed datasets and zero multiplications */
  118. return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
  119. } else if (isNaN(value)) {
  120. /* This is the NUTS ID record at the beginning of each line */
  121. return value;
  122. }
  123. return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
  124. }).filter(val => val !== null);
  125. });
  126. //console.log(modifiedData);
  127. if (modifiedData[0].length <= 1) {
  128. throw new Error('All datasets turned off. No data to create clusters.');
  129. }
  130. return new Promise((resolve, reject) => {
  131. stringify(modifiedData, (err, output) => {
  132. if (err) return reject(err);
  133. fs.writeFile(outputFileName, output, (err) => {
  134. if (err) reject(err);
  135. else resolve();
  136. console.log('Data modification finished.');
  137. })
  138. })
  139. });
  140. }
  141. /**
  142. * Reads the out_file.csv created by R script and saves it into an object
  143. */
  144. module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
  145. //console.log('Reading clustering data file processing started.');
  146. let clusters = [];
  147. let columns = undefined;
  148. fs.createReadStream(filePath)
  149. .pipe(csv())
  150. .on('data', (row) => {
  151. if (!columns) {
  152. columns = row;
  153. }
  154. else {
  155. let item = {};
  156. for (let i = 0; i < columns.length; i++) {
  157. const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
  158. item[colName] = row[i];
  159. }
  160. clusters.push(item);
  161. }
  162. })
  163. .on('end', () => {
  164. console.log('Cluster data file processing finished.');
  165. dataLoadedCallback(clusters);
  166. });
  167. }
  168. module.exports.getFactorIndex = function (region, factor) {
  169. //console.log('getFactorIndex');
  170. //console.log('region: ' + JSON.stringify(region, null, 4));
  171. //console.log('factor: ' + JSON.stringify(factor, null, 4));
  172. let sumValue = 0;
  173. let count = 0;
  174. factor.datasets.forEach(ds => {
  175. const dataset = ds.split('/').slice(-1).pop()
  176. //console.log('factor: ' + factor.factor);
  177. const value = region.values[dataset];
  178. if (value) {
  179. sumValue += value;
  180. count++;
  181. }
  182. });
  183. return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
  184. }
  185. /* Unused */
  186. function getDatasetFactor(datasets, colName) {
  187. for (let i = 0; i < datasets.length; i++) {
  188. if (datasets[i].Name.toLowerCase() == colName)
  189. return datasets[i].Factor;
  190. }
  191. return undefined;
  192. }