nuts-data.js 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. const fs = require('fs');
  2. const csv = require('csv-parse');
  3. const stringify = require('csv-stringify');
  4. /* Helper method to load the datasets from CSV and store it in server object */
  5. module.exports.loadDatasets = async function(filePath) {
  6. //console.log('Datasets structure loading.')
  7. let datasets = undefined
  8. return new Promise((resolve, reject) => {
  9. const stream = fs.createReadStream(filePath).pipe(csv({to_line: 1}))
  10. stream
  11. .on('data', (row) => {
  12. if (!datasets) {
  13. datasets = row
  14. }
  15. })
  16. .on('end', () => {
  17. datasets = datasets.slice(2) //TODO: FIXME: unify this number (only one ID column in the beginning)
  18. console.log('Datasets structure loaded.')
  19. resolve(datasets)
  20. })
  21. .on('error', reject)
  22. })
  23. }
  24. /* Load all the attractiveness data from CSV into a server object.
  25. The data don't need to be loaded for each request then. */
  26. module.exports.loadRuralData = async function (filePath) {
  27. //console.log('Reading rural data file processing started.')
  28. let ruralData = []
  29. let columns
  30. return new Promise((resolve, reject) => {
  31. fs.createReadStream(filePath)
  32. .pipe(csv())
  33. .on('data', (row) => {
  34. if (!columns) {
  35. columns = row
  36. return
  37. }
  38. let item = {
  39. values: {}
  40. }
  41. for (let i = 0; i < columns.length; i++) {
  42. let colName = columns[i].toLowerCase()
  43. if (colName == "nuts_id") // ID of the NUTS region (EU)
  44. item.nuts = row[i]
  45. // else if (colName == "datasets") // empty datasets count
  46. // item.availableDS = datasets.length - row[i];
  47. // else if (colName == "quality")
  48. // item.quality = row[i];
  49. else if (colName == "lau2") // ID of the municipality (CZ)
  50. item.lau2 = row[i]
  51. else if (colName == "district_code") // ID of the district (Kenya+Uganda)
  52. item.district = row[i]
  53. else if (colName == "eurostat_code" || colName == "name")
  54. continue
  55. else {
  56. item.values[colName] = Number(row[i])
  57. }
  58. }
  59. ruralData.push(item)
  60. })
  61. .on('end', () => {
  62. console.log('Rural data file processing finished.');
  63. resolve(ruralData);
  64. })
  65. .on('error', reject);
  66. })
  67. }
  68. module.exports.loadOntology = async function(filePath) {
  69. return new Promise((resolve, reject) => {
  70. fs.readFile(filePath, (err, data) => {
  71. if (err) reject(err)
  72. const ontology = JSON.parse(data)
  73. resolve(ontology)
  74. })
  75. })
  76. }
  77. module.exports.parseDatasetsMetadata = function(ontology) {
  78. let factorId; //used twice so store it in a variable
  79. return ontology
  80. //Only datasets ...
  81. .filter((entity) => entity['@type'] ? entity['@type'].includes('http://www.semanticweb.org/attractiveness/Dataset') : null)
  82. //only with coverage of Europe ...
  83. .filter(
  84. (entity) => entity['http://www.semanticweb.org/attractiveness/hasCoverage']
  85. .some((coverage) => coverage['@id'] == 'http://www.semanticweb.org/attractiveness/Europe')
  86. )
  87. //only with a level of detail equal to NUTS3 regions ...
  88. .filter(
  89. (entity) => entity['http://www.semanticweb.org/attractiveness/hasLoD']
  90. .some((lod) => lod['@id'] == 'http://www.semanticweb.org/attractiveness/NUTS3')
  91. )
  92. //and only part of a factor, which belongs to the Polirural schema
  93. .filter(
  94. (entity) => {
  95. const entityDatasetGroups = entity['http://www.semanticweb.org/attractiveness/isDatasetOf'] || []
  96. for (const group of entityDatasetGroups) {
  97. const groupEntity = ontology.find((entity) => entity['@id'] == group['@id'])
  98. if (groupEntity['http://www.semanticweb.org/attractiveness/isPartOf'].some((schema) => schema['@id'] == 'http://www.semanticweb.org/attractiveness/Polirural')) {
  99. factorId = groupEntity['@id']
  100. return true
  101. }
  102. }
  103. return false
  104. }
  105. )
  106. .map((entity) => {
  107. return {
  108. name: entity['@id'].split('/').slice(-1).pop(),
  109. //FIXME: instead of [0] search for "@language"="en"
  110. description: entity['http://www.semanticweb.org/attractiveness#description'] ? entity['http://www.semanticweb.org/attractiveness#description'][0]['@value'] : '',
  111. factor: factorId ? factorId.split('/').slice(-1).pop() : ''
  112. }
  113. })
  114. }
  115. /**
  116. * Resolves with an array representing rows of CSV file
  117. * @param {string} inputFileName path to the CSV file with input data for clustering calculation
  118. */
  119. module.exports.loadClusteringInput = async function (inputFileName) {
  120. const clusteringData = [];
  121. /*
  122. * The parsed CSV array keeps the native csv-parser structure
  123. * for future easier serialization back to CSV file
  124. */
  125. return new Promise((resolve, reject) => {
  126. fs.createReadStream(inputFileName)
  127. .pipe(csv())
  128. .on('data', (row) => {
  129. row = [row[0], ...row.slice(2)]
  130. clusteringData.push(row);
  131. })
  132. .on('end', () => {
  133. resolve(clusteringData);
  134. })
  135. .on('error', reject);
  136. });
  137. }
  138. /**
  139. * Resolves once the modified CSV file is written to fs
  140. */
  141. module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
  142. // regional ID must be copied to the output as well
  143. const allowedDatasets = [idString, ...params.datasets.map(ds => ds.id)]
  144. const factorMultipliers = data[0].map((dataset) => {
  145. if (dataset === idString) return 1
  146. if (!allowedDatasets.includes(dataset)) {
  147. return 0
  148. } else {
  149. return params.datasets.find(ds => ds.id === dataset).weight
  150. }
  151. })
  152. /* The actual modification logic resides here */
  153. const modifiedData = data.map((row, idx) => {
  154. return row.map((value, i) => {
  155. if (idx == 0) {
  156. /* These are the headers */
  157. /* Have to check for both allowed datasets and zero multiplications */
  158. return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
  159. } else if (isNaN(value)) {
  160. /* This is the NUTS ID record at the beginning of each line */
  161. return value;
  162. }
  163. return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
  164. }).filter(val => val !== null);
  165. });
  166. //console.log(modifiedData);
  167. if (modifiedData[0].length <= 1) {
  168. throw new Error('All datasets turned off. No data to create clusters.');
  169. }
  170. return new Promise((resolve, reject) => {
  171. stringify(modifiedData, (err, output) => {
  172. if (err) return reject(err);
  173. fs.writeFile(outputFileName, output, (err) => {
  174. if (err) reject(err);
  175. else resolve();
  176. console.log('Data modification finished.');
  177. })
  178. })
  179. });
  180. }
  181. /**
  182. * Reads the out_file.csv created by R script and saves it into an object
  183. */
  184. module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
  185. //console.log('Reading clustering data file processing started.');
  186. let clusters = [];
  187. let columns = undefined;
  188. fs.createReadStream(filePath)
  189. .pipe(csv())
  190. .on('data', (row) => {
  191. if (!columns) {
  192. columns = row;
  193. }
  194. else {
  195. let item = {};
  196. for (let i = 0; i < columns.length; i++) {
  197. const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
  198. item[colName] = row[i];
  199. }
  200. clusters.push(item);
  201. }
  202. })
  203. .on('end', () => {
  204. console.log('Cluster data file processing finished.');
  205. dataLoadedCallback(clusters);
  206. });
  207. }
  208. module.exports.getFactorIndex = function (region, factor) {
  209. //console.log('getFactorIndex');
  210. //console.log('region: ' + JSON.stringify(region, null, 4));
  211. //console.log('factor: ' + JSON.stringify(factor, null, 4));
  212. let sumValue = 0;
  213. let count = 0;
  214. factor.datasets.forEach(ds => {
  215. const dataset = ds.split('/').slice(-1).pop()
  216. //console.log('factor: ' + factor.factor);
  217. const value = region.values[dataset];
  218. if (value) {
  219. sumValue += value;
  220. count++;
  221. }
  222. });
  223. return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
  224. }
  225. /* Unused */
  226. function getDatasetFactor(datasets, colName) {
  227. for (let i = 0; i < datasets.length; i++) {
  228. if (datasets[i].Name.toLowerCase() == colName)
  229. return datasets[i].Factor;
  230. }
  231. return undefined;
  232. }