const fs = require('fs'); const csv = require('csv-parse'); const stringify = require('csv-stringify'); /* Helper method to load the datasets from CSV and store it in server object */ module.exports.loadDatasets = async function(filePath) { //console.log('Datasets structure loading.') let datasets = undefined return new Promise((resolve, reject) => { const stream = fs.createReadStream(filePath).pipe(csv()) stream .on('data', (row) => { if (!datasets) { datasets = row } stream.end() }) .on('end', () => { datasets = datasets.slice(3) console.log('Datasets structure loaded.') resolve(datasets) }) .on('error', reject) }) } /* Load all the attractivness data from CSV into a server object. The data don't need to be loaded for each request then. */ module.exports.loadRuralData = async function (filePath) { //console.log('Reading rural data file processing started.') let ruralData = [] let columns return new Promise((resolve, reject) => { fs.createReadStream(filePath) .pipe(csv()) .on('data', (row) => { if (!columns) { columns = row return } let item = { values: {} } for (let i = 0; i < columns.length; i++) { let colName = columns[i].toLowerCase() if (colName == "nuts_id") // ID of the NUTS region item.nuts = row[i] // else if (colName == "datasets") // empty datasets count // item.availableDS = datasets.length - row[i]; // else if (colName == "quality") // item.quality = row[i]; else if (colName == "lau2") item.lau2 = row[i] else if (colName == "eurostat_code" || colName == "name") continue else { item.values[colName] = Number(row[i]) } } ruralData.push(item) }) .on('end', () => { console.log('Rural data file processing finished.'); resolve(ruralData); }) .on('error', reject); }) } module.exports.loadOntology = async function(filePath) { return new Promise((resolve, reject) => { fs.readFile(filePath, (err, data) => { if (err) reject(err) const ontology = JSON.parse(data) resolve(ontology) }) }) } /** * Resolves with an array representing rows of CSV file * @param {string} inputFileName path to the CSV file with input data for clustering calculation */ module.exports.loadClusteringInput = async function (inputFileName) { const clusteringData = []; /* * The parsed CSV array keeps the native csv-parser structure * for future easier serialization back to CSV file */ return new Promise((resolve, reject) => { fs.createReadStream(inputFileName) .pipe(csv()) .on('data', (row) => { clusteringData.push(row); }) .on('end', () => { resolve(clusteringData); }) .on('error', reject); }); } /** * Resolves once the modified CSV file is written to fs */ module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) { // regional ID must be copied to the output as well let allowedDatasets = [idString]; for (const factor of params.factors) { allowedDatasets = [...allowedDatasets, ...factor.datasets]; } const factorMultipliers = data[0].map((dataset) => { if (dataset === idString) return 1; const factor = datasets.find(ds => ds.Name === dataset); if (!factor) { /* If the factor is unknown for this dataset, it will effectivelly turn it off */ console.log(`Undefined factor for dataset ${dataset}`); return 0; } else if (!allowedDatasets.includes(dataset)) { return 0; } else { return params.factors.find(f => f.factor === factor.Factor).weight; } }) //console.log(factorMultipliers); /* The actual modification logic resides here */ const modifiedData = data.map((row, idx) => { return row.map((value, i) => { if (idx == 0) { /* These are the headers */ /* Have to check for both allowed datasets and zero multiplications */ return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null; } else if (isNaN(value)) { /* This is the NUTS ID record at the beginning of each line */ return value; } return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i]; }).filter(val => val !== null); }); //console.log(modifiedData); if (modifiedData[0].length <= 1) { throw new Error('All datasets turned off. No data to create clusters.'); } return new Promise((resolve, reject) => { stringify(modifiedData, (err, output) => { if (err) return reject(err); fs.writeFile(outputFileName, output, (err) => { if (err) reject(err); else resolve(); console.log('Data modification finished.'); }) }) }); } /** * Reads the out_file.csv created by R script and saves it into an object */ module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) { //console.log('Reading clustering data file processing started.'); let clusters = []; let columns = undefined; fs.createReadStream(filePath) .pipe(csv()) .on('data', (row) => { if (!columns) { columns = row; } else { let item = {}; for (let i = 0; i < columns.length; i++) { const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString; item[colName] = row[i]; } clusters.push(item); } }) .on('end', () => { console.log('Cluster data file processing finished.'); dataLoadedCallback(clusters); }); } module.exports.getFactorIndex = function (region, factor) { //console.log('getFactorIndex'); //console.log('region: ' + JSON.stringify(region, null, 4)); //console.log('factor: ' + JSON.stringify(factor, null, 4)); let sumValue = 0; let count = 0; factor.datasets.forEach(ds => { const dataset = ds.split('/').slice(-1).pop() //console.log('factor: ' + factor.factor); const value = region.values[dataset]; if (value) { sumValue += value; count++; } }); return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight }; } /* Unused */ function getDatasetFactor(datasets, colName) { for (let i = 0; i < datasets.length; i++) { if (datasets[i].Name.toLowerCase() == colName) return datasets[i].Factor; } return undefined; }