fzadrazil
/
rural-attractivness-service


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
							const fs = require('fs');
const csv = require('csv-parse');
const stringify = require('csv-stringify');

/* Helper method to load the datasets from CSV and store it in server object */
module.exports.loadDatasets = async function(filePath) {
    //console.log('Datasets structure loading.')
    let datasets = undefined

    return new Promise((resolve, reject) => {
    const stream = fs.createReadStream(filePath).pipe(csv({to_line: 1}))
    stream
        .on('data', (row) => {
            if (!datasets) {
                datasets = row
            }
        })
        .on('end', () => {
            datasets = datasets.slice(2) //TODO: FIXME: unify this number (only one ID column in the beginning)
            console.log('Datasets structure loaded.')
            resolve(datasets)
        })
        .on('error', reject)
    })
}

/* Load all the attractiveness data from CSV into a server object.
   The data don't need to be loaded for each request then. */
module.exports.loadRuralData = async function (filePath) {
    //console.log('Reading rural data file processing started.')
    let ruralData = []
    let columns

    return new Promise((resolve, reject) => {
    fs.createReadStream(filePath)
        .pipe(csv())
        .on('data', (row) => {
            if (!columns) {
                columns = row
                return
            }
            let item = {
                values: {}
            }
            for (let i = 0; i < columns.length; i++) {
                let colName = columns[i].toLowerCase()
                if (colName == "nuts_id")       // ID of the NUTS region (EU)
                    item.nuts = row[i]
                // else if (colName == "datasets") // empty datasets count
                //     item.availableDS = datasets.length - row[i];
                // else if (colName == "quality")
                //     item.quality = row[i];
                else if (colName == "lau2") // ID of the municipality (CZ)
                    item.lau2 = row[i]
                else if (colName == "district_code") // ID of the district (Kenya+Uganda)
                    item.district = row[i]
                else if (colName == "eurostat_code" || colName == "name")
                    continue
                else {
                    item.values[colName] = Number(row[i])
                }
            }
            ruralData.push(item)
        })
        .on('end', () => {
            console.log('Rural data file processing finished.');
            resolve(ruralData);
        })
        .on('error', reject);
    })
}

module.exports.loadOntology = async function(filePath) {
    return new Promise((resolve, reject) => {
        fs.readFile(filePath, (err, data) => {
            if (err) reject(err)
            const ontology = JSON.parse(data)
            resolve(ontology)
        })
    })
}

module.exports.parseDatasetsMetadata = function(ontology) {
    let factorId; //used twice so store it in a variable
    return ontology
        //Only datasets ...
        .filter((entity) => entity['@type'] ? entity['@type'].includes('http://www.semanticweb.org/attractiveness/Dataset') : null)
        //only with coverage of Europe ...
        .filter(
            (entity) => entity['http://www.semanticweb.org/attractiveness/hasCoverage']
                .some((coverage) => coverage['@id'] == 'http://www.semanticweb.org/attractiveness/Europe')
        )
        //only with a level of detail equal to NUTS3 regions ...
        .filter(
            (entity) => entity['http://www.semanticweb.org/attractiveness/hasLoD']
                .some((lod) => lod['@id'] == 'http://www.semanticweb.org/attractiveness/NUTS3')
        )
        //and only part of a factor, which belongs to the Polirural schema
        .filter(
            (entity) => {
                const entityDatasetGroups = entity['http://www.semanticweb.org/attractiveness/isDatasetOf'] || []
                for (const group of entityDatasetGroups) {
                    const groupEntity = ontology.find((entity) => entity['@id'] == group['@id'])
                    if (groupEntity['http://www.semanticweb.org/attractiveness/isPartOf'].some((schema) => schema['@id'] == 'http://www.semanticweb.org/attractiveness/Polirural')) {
                        factorId = groupEntity['@id']
                        return true
                    }
                }
                return false
            }
        )
        .map((entity) => {
            return {
                name: entity['@id'].split('/').slice(-1).pop(),
                //FIXME: instead of [0] search for "@language"="en"
                description: entity['http://www.semanticweb.org/attractiveness#description'] ? entity['http://www.semanticweb.org/attractiveness#description'][0]['@value'] : '',
                factor: factorId ? factorId.split('/').slice(-1).pop() : ''
            }
        })
}

/**
 * Resolves with an array representing rows of CSV file
 * @param {string} inputFileName path to the CSV file with input data for clustering calculation
 */
module.exports.loadClusteringInput = async function (inputFileName) {
    const clusteringData = [];

    /*
     * The parsed CSV array keeps the native csv-parser structure
     * for future easier serialization back to CSV file
     */
    return new Promise((resolve, reject) => {
        fs.createReadStream(inputFileName)
            .pipe(csv())
            .on('data', (row) => {
                row = [row[0], ...row.slice(2)]
                clusteringData.push(row);
            })
            .on('end', () => {
                resolve(clusteringData);
            })
            .on('error', reject);
    });
}

/**
 * Resolves once the modified CSV file is written to fs
 */
module.exports.modifyClusteringData = async function ({datasets, data, params, idString, outputFileName}) {
    // regional ID must be copied to the output as well
    const allowedDatasets = [idString, ...params.datasets.map(ds => ds.id)]
    const factorMultipliers = data[0].map((dataset) => {
        if (dataset === idString) return 1
        if (!allowedDatasets.includes(dataset)) {
            return 0
        } else {
            return params.datasets.find(ds => ds.id === dataset).weight
        }
    })
    /* The actual modification logic resides here */
    const modifiedData = data.map((row, idx) => {
        return row.map((value, i) => {
            if (idx == 0) {
                /* These are the headers */
                /* Have to check for both allowed datasets and zero multiplications */
                return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
            } else if (isNaN(value)) {
                /* This is the NUTS ID record at the beginning of each line */
                return value;
            }
            return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
        }).filter(val => val !== null);
    });
    //console.log(modifiedData);
    if (modifiedData[0].length <= 1) {
        throw new Error('All datasets turned off. No data to create clusters.');
    }
    return new Promise((resolve, reject) => {
        stringify(modifiedData, (err, output) => {
            if (err) return reject(err);
            fs.writeFile(outputFileName, output, (err) => {
                if (err) reject(err);
                else resolve();
                console.log('Data modification finished.');
            })
        })
    });
}

/** 
 * Reads the out_file.csv created by R script and saves it into an object
*/
module.exports.loadClusters = function (filePath, idString, dataLoadedCallback) {
    //console.log('Reading clustering data file processing started.');
    let clusters = [];

    let columns = undefined;

    fs.createReadStream(filePath)
        .pipe(csv())
        .on('data', (row) => {
            if (!columns) {
                columns = row;
            }
            else {
                let item = {};
                for (let i = 0; i < columns.length; i++) {
                    const colName = columns[i].length > 0 ? columns[i].toLowerCase() : idString;
                    item[colName] = row[i];
                }
                clusters.push(item);
            }
        })
        .on('end', () => {
            console.log('Cluster data file processing finished.');
            dataLoadedCallback(clusters);
        });
}

module.exports.getFactorIndex = function (region, factor) {
    //console.log('getFactorIndex');
    //console.log('region: ' + JSON.stringify(region, null, 4));
    //console.log('factor: ' + JSON.stringify(factor, null, 4));

    let sumValue = 0;
    let count = 0;

    factor.datasets.forEach(ds => {
        const dataset = ds.split('/').slice(-1).pop()
        //console.log('factor: ' + factor.factor);

        const value = region.values[dataset];
        if (value) {
            sumValue += value;
            count++;
        }
    });

    return { index: sumValue / count, sumValue: sumValue, sumWeight: count * factor.weight };
}

/* Unused */
function getDatasetFactor(datasets, colName) {
    for (let i = 0; i < datasets.length; i++) {
        if (datasets[i].Name.toLowerCase() == colName)
            return datasets[i].Factor;
    }

    return undefined;
}