| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212 |
- const fs = require('fs');
- const csv = require('csv-parse');
- const stringify = require('csv-stringify');
- /* Helper method to load the datasets from CSV and store it in server object */
- module.exports.loadDatasets = async function(filePath) {
- //console.log('Datasets structure loading.')
- let datasets = undefined
- return new Promise((resolve, reject) => {
- const stream = fs.createReadStream(filePath).pipe(csv())
- stream
- .on('data', (row) => {
- if (!datasets) {
- datasets = row
- }
- stream.end()
- })
- .on('end', () => {
- datasets = datasets.slice(1) //unify this number (only one ID column in the beginning)
- console.log('Datasets structure loaded.')
- resolve(datasets)
- })
- .on('error', reject)
- })
- }
- /* Load all the attractivness data from CSV into a server object.
- The data don't need to be loaded for each request then. */
- module.exports.loadRuralData = async function (filePath) {
- //console.log('Reading rural data file processing started.')
- let ruralData = []
- let columns
- return new Promise((resolve, reject) => {
- fs.createReadStream(filePath)
- .pipe(csv())
- .on('data', (row) => {
- if (!columns) {
- columns = row
- return
- }
- let item = {
- values: {}
- }
- for (let i = 0; i < columns.length; i++) {
- let colName = columns[i].toLowerCase()
- if (colName == "nuts_id") // ID of the NUTS region (EU)
- item.nuts = row[i]
- // else if (colName == "datasets") // empty datasets count
- // item.availableDS = datasets.length - row[i];
- // else if (colName == "quality")
- // item.quality = row[i];
- else if (colName == "lau2") // ID of the municipality (CZ)
- item.lau2 = row[i]
- else if (colName == "district_code") // ID of the district (Kenya+Uganda)
- item.district = row[i]
- else if (colName == "eurostat_code" || colName == "name")
- continue
- else {
- item.values[colName] = Number(row[i])
- }
- }
- ruralData.push(item)
- })
- .on('end', () => {
- console.log('Rural data file processing finished.');
- resolve(ruralData);
- })
- .on('error', reject);
- })
- }
- module.exports.loadOntology = async function(filePath) {
- return new Promise((resolve, reject) => {
- fs.readFile(filePath, (err, data) => {
- if (err) reject(err)
- const ontology = JSON.parse(data)
- resolve(ontology)
- })
- })
- }
- /**
- * Resolves with an array representing rows of CSV file
- * @param {string} inputFileName path to the CSV file with input data for clustering calculation
- */
- module.exports.loadClusteringInput = async function (inputFileName) {
- const clusteringData = [];
- /*
- * The parsed CSV array keeps the native csv-parser structure
- * for future easier serialization back to CSV file
- */
- return new Promise((resolve, reject) => {
- fs.createReadStream(inputFileName)
- .pipe(csv())
- .on('data', (row) => {
- clusteringData.push(row);
- })
- .on('end', () => {
- resolve(clusteringData);
- })
- .on('error', reject);
- });
- }
- /**
- * Resolves once the modified CSV file is written to fs
- */
- module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
- // regional ID must be copied to the output as well
- const allowedDatasets = [idString, ...params.datasets.map(ds => ds.id)]
- const factorMultipliers = data[0].map((dataset) => {
- if (dataset === idString) return 1
- if (!allowedDatasets.includes(dataset)) {
- return 0
- } else {
- return params.datasets.find(ds => ds.id === dataset).weight
- }
- })
- /* The actual modification logic resides here */
- const modifiedData = data.map((row, idx) => {
- return row.map((value, i) => {
- if (idx == 0) {
- /* These are the headers */
- /* Have to check for both allowed datasets and zero multiplications */
- return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
- } else if (isNaN(value)) {
- /* This is the NUTS ID record at the beginning of each line */
- return value;
- }
- return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
- }).filter(val => val !== null);
- });
- //console.log(modifiedData);
- if (modifiedData[0].length <= 1) {
- throw new Error('All datasets turned off. No data to create clusters.');
- }
- return new Promise((resolve, reject) => {
- stringify(modifiedData, (err, output) => {
- if (err) return reject(err);
- fs.writeFile(outputFileName, output, (err) => {
- if (err) reject(err);
- else resolve();
- console.log('Data modification finished.');
- })
- })
- });
- }
- /**
- * Reads the out_file.csv created by R script and saves it into an object
- */
- module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
- //console.log('Reading clustering data file processing started.');
- let clusters = [];
- let columns = undefined;
- fs.createReadStream(filePath)
- .pipe(csv())
- .on('data', (row) => {
- if (!columns) {
- columns = row;
- }
- else {
- let item = {};
- for (let i = 0; i < columns.length; i++) {
- const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
- item[colName] = row[i];
- }
- clusters.push(item);
- }
- })
- .on('end', () => {
- console.log('Cluster data file processing finished.');
- dataLoadedCallback(clusters);
- });
- }
- module.exports.getFactorIndex = function (region, factor) {
- //console.log('getFactorIndex');
- //console.log('region: ' + JSON.stringify(region, null, 4));
- //console.log('factor: ' + JSON.stringify(factor, null, 4));
- let sumValue = 0;
- let count = 0;
- factor.datasets.forEach(ds => {
- const dataset = ds.split('/').slice(-1).pop()
- //console.log('factor: ' + factor.factor);
- const value = region.values[dataset];
- if (value) {
- sumValue += value;
- count++;
- }
- });
- return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
- }
- /* Unused */
- function getDatasetFactor(datasets, colName) {
- for (let i = 0; i < datasets.length; i++) {
- if (datasets[i].Name.toLowerCase() == colName)
- return datasets[i].Factor;
- }
- return undefined;
- }
|