5 gadi atpakaļ · 2e87c6b0ea
--- a/README.md
+++ b/README.md
@@ -25,6 +25,10 @@ Computes and returns attractivity data for all the NUTS regions based on the inc
 
				 POST https://publish.lesprojekt.cz/nodejs/scores
			
 
				 
			
 
				 
			
 
				+Computes and returns clusters based on attractivity data for all the NUTS regions and based on the incoming datasets and factor weights
			
 
				+
			
 
				+POST https://publish.lesprojekt.cz/nodejs/clusters
			
 
				+
			
 
				 
			
 
				 ## Start the system service
			
 
				 sudo systemctl start fz-node-rural_attractiveness
			
--- a/index.js
+++ b/index.js
@@ -8,6 +8,8 @@ const app = express();
 
				 
			
 
				 const _datasetsFilePath = 'data/datasets.csv';
			
 
				 const _dataFilePath = 'data/data.csv';
			
 
				+const _clusteringInputFilePath = 'data/clustering/input_all.csv';
			
 
				+const _clusteringModifiedFilePath = 'data/clustering/input_modified.csv';
			
 
				 const _clustersFilePath = 'data/clustering/out_file.csv';
			
 
				 var _datasets = undefined;
			
 
				 var _ruralData = undefined;
			
@@ -31,6 +33,7 @@ app.get('/refresh', (req, res, next) => {
 
				         nutsData.loadRuralData(_dataFilePath, _datasets, function (rd) {
			
 
				             //console.log('Rural data loaded succesfully');
			
 
				             _ruralData = rd;
			
 
				+            res.send('Data refreshed');
			
 
				         });
			
 
				     });
			
 
				 });
			
@@ -132,6 +135,7 @@ app.post('/scores', (req, res, next) => {
 
				 app.get('/runR', (req, res, next) => {
			
 
				     //console.log(console);
			
 
				     console.log('calling R...')
			
 
				+    console.log(req)
			
 
				     R('./r/selected_data.r').call(
			
 
				         function(err, data) {
			
 
				             console.log('R done');
			
@@ -149,27 +153,39 @@ app.get('/runR', (req, res, next) => {
 
				 });
			
 
				 
			
 
				 /*
			
 
				-    Calls R script, loads the resulting CSV file and returns it
			
 
				+    Just informative response. POST with JSON data is required.
			
 
				 */
			
 
				 app.get('/clusters', (req, res, next) => {
			
 
				-    //console.log(console);
			
 
				-    console.log('calling R...')
			
 
				-    R('./r/selected_data.r').call(
			
 
				-        function(err, data) {
			
 
				-            console.log('R done');
			
 
				-            if (err) {
			
 
				-                console.log(err.toString('utf8'));
			
 
				-                data = { result: err.toString('utf8') };
			
 
				-            }
			
 
				-            else {
			
 
				-                console.log(data);
			
 
				-                nutsData.loadClusters(_clustersFilePath, function(clusterData) {
			
 
				-                    data = clusterData;
			
 
				-                    helpers.formatResponse({ response: data }, req, res);
			
 
				-                });
			
 
				-            }
			
 
				+    const data = { response: '/clusters method is only available under POST' }
			
 
				+    helpers.formatResponse(data, req, res);
			
 
				+});
			
 
				+
			
 
				+/*
			
 
				+    Modifies input CSV file, calls R script, loads the resulting CSV file and returns it
			
 
				+*/
			
 
				+app.post('/clusters', async (req, res, next) => {
			
 
				+    try {
			
 
				+        if (!_datasets) {
			
 
				+            //TODO: promisify all functions to avoid callback hell and make this work properly
			
 
				+            await nutsData.loadDatasets(_datasetsFilePath, function (ds) {
			
 
				+                //console.log('Datasets loaded succesfully');
			
 
				+                _datasets = ds;
			
 
				+            })
			
 
				         }
			
 
				-    );
			
 
				+        //console.log(req.body);
			
 
				+        const clusteringData = await nutsData.loadClusteringInput(
			
 
				+            _clusteringInputFilePath
			
 
				+        );
			
 
				+        await nutsData.modifyClusteringData({
			
 
				+            datasets: _datasets,
			
 
				+            data: clusteringData,
			
 
				+            params: req.body,
			
 
				+            outputFileName: _clusteringModifiedFilePath
			
 
				+        });
			
 
				+        handleRCall(req, res);
			
 
				+    } catch (error) { // Catch errors in async functions
			
 
				+        next(error.toString());
			
 
				+    }
			
 
				 });
			
 
				 
			
 
				 // start the service on the port xxxx
			
@@ -216,3 +232,23 @@ function returnRegionScores(nuts, req, res) {
 
				         // NUTS region not found
			
 
				         res.status(404).send('NUTS region not found.');
			
 
				 }
			
 
				+
			
 
				+function handleRCall(req, res) {
			
 
				+    //console.log('calling R...')
			
 
				+    R('./r/selected_data.r').call(
			
 
				+        function(err, data) {
			
 
				+            //console.log('R done');
			
 
				+            if (err) {
			
 
				+                console.log(err.toString('utf8'));
			
 
				+                data = { result: err.toString('utf8') };
			
 
				+            }
			
 
				+            else {
			
 
				+                //console.log(data);
			
 
				+                nutsData.loadClusters(_clustersFilePath, function(clusterData) {
			
 
				+                    data = clusterData;
			
 
				+                    helpers.formatResponse({ response: data }, req, res);
			
 
				+                });
			
 
				+            }
			
 
				+        }
			
 
				+    );
			
 
				+}
			
--- a/nuts-data.js
+++ b/nuts-data.js
@@ -1,5 +1,6 @@
 
				 const fs = require('fs');
			
 
				 const csv = require('csv-parse');
			
 
				+const stringify = require('csv-stringify');
			
 
				 
			
 
				 /* Helper method to load the datasets from CSV and store it in server object */
			
 
				 module.exports.loadDatasets = function(filePath, dataLoadedCallback) {
			
@@ -75,10 +76,87 @@ module.exports.loadRuralData = function (filePath, datasets, dataLoadedCallback)
 
				         });
			
 
				 }
			
 
				 
			
 
				-/* Reads the out_file.csv created by R script and saves it into an object
			
 
				+/**
			
 
				+ * Resolves with an array representing rows of CSV file
			
 
				+ * @param {string} inputFileName path to the CSV file with input data for clustering calculation
			
 
				+ */
			
 
				+module.exports.loadClusteringInput = async function (inputFileName) {
			
 
				+    const clusteringData = [];
			
 
				+
			
 
				+    /*
			
 
				+     * The parsed CSV array keeps the native csv-parser structure
			
 
				+     * for future easier serialization back to CSV file
			
 
				+     */
			
 
				+    return new Promise((resolve, reject) => {
			
 
				+        fs.createReadStream(inputFileName)
			
 
				+            .pipe(csv())
			
 
				+            .on('data', (row) => {
			
 
				+                clusteringData.push(row);
			
 
				+            })
			
 
				+            .on('end', () => {
			
 
				+                resolve(clusteringData);
			
 
				+            })
			
 
				+            .on('error', reject);
			
 
				+    });
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Resolves once the modified CSV file is written to fs
			
 
				+ */
			
 
				+module.exports.modifyClusteringData = async function ({datasets, data, params, outputFileName}) {
			
 
				+    let allowedDatasets = ['NUTS_ID']; // NUTS_ID must be copied to the output as well
			
 
				+    for (const factor of params.factors) {
			
 
				+        allowedDatasets = [...allowedDatasets, ...factor.datasets];
			
 
				+    }
			
 
				+    const factorMultipliers = data[0].map((dataset) => {
			
 
				+        if (dataset === 'NUTS_ID') return 1;
			
 
				+        const factor = datasets.find(ds => ds.Name === dataset);
			
 
				+        if (!factor) {
			
 
				+            /* If the factor is unknown for this dataset, it will effectivelly turn it off */
			
 
				+            console.log(`Undefined factor for dataset ${dataset}`);
			
 
				+            return 0;
			
 
				+        } else if (!allowedDatasets.includes(dataset)) {
			
 
				+            return 0;
			
 
				+        } else {
			
 
				+            return params.factors.find(f => f.factor === factor.Factor).weight;
			
 
				+        }
			
 
				+    })
			
 
				+    //console.log(factorMultipliers);
			
 
				+    /* The actual modification logic resides here */
			
 
				+    const modifiedData = data.map((row, idx) => {
			
 
				+        return row.map((value, i) => {
			
 
				+            if (idx == 0) {
			
 
				+                /* These are the headers */
			
 
				+                /* Have to check for both allowed datasets and zero multiplications */
			
 
				+                return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
			
 
				+            } else if (isNaN(value)) {
			
 
				+                /* This is the NUTS ID record at the beginning of each line */
			
 
				+                return value;
			
 
				+            }
			
 
				+            return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
			
 
				+        }).filter(val => val !== null);
			
 
				+    });
			
 
				+    //console.log(modifiedData);
			
 
				+    if (modifiedData[0].length <= 1) {
			
 
				+        throw new Error('All datasets turned off. No data to create clusters.');
			
 
				+    }
			
 
				+    return new Promise((resolve, reject) => {
			
 
				+        stringify(modifiedData, (err, output) => {
			
 
				+            if (err) return reject(err);
			
 
				+            fs.writeFile(outputFileName, output, (err) => {
			
 
				+                if (err) reject(err);
			
 
				+                else resolve();
			
 
				+                //console.log('Data modification finished.');
			
 
				+            })
			
 
				+        })
			
 
				+    });
			
 
				+}
			
 
				+
			
 
				+/** 
			
 
				+ * Reads the out_file.csv created by R script and saves it into an object
			
 
				 */
			
 
				 module.exports.loadClusters = function (filePath, dataLoadedCallback) {
			
 
				-    console.log('Reading clustering data file processing started.');
			
 
				+    //console.log('Reading clustering data file processing started.');
			
 
				     let clusters = [];
			
 
				 
			
 
				     let columns = undefined;
			
@@ -126,7 +204,6 @@ module.exports.getFactorIndex = function (region, factor) {
 
				 }
			
 
				 
			
 
				 function getDataSetFactor(datasets, colName) {
			
 
				-
			
 
				     for (let i = 0; i < datasets.length; i++) {
			
 
				         if (datasets[i].Name.toLowerCase() == colName)
			
 
				             return datasets[i].Factor;
			
--- a/package-lock.json
+++ b/package-lock.json
@@ -1231,6 +1231,11 @@
 
				       "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-4.8.5.tgz",
			
 
				       "integrity": "sha512-rpsLmlLWJZifmLzZEVGbZ9phWnJyi+cCbCGYr4vX2NaHFtgbmQPFk+WmMkmMkQXgsIUn6CgnK9cTuUAfFjoXbA=="
			
 
				     },
			
 
				+    "csv-stringify": {
			
 
				+      "version": "5.5.1",
			
 
				+      "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-5.5.1.tgz",
			
 
				+      "integrity": "sha512-HM0/86Ks8OwFbaYLd495tqTs1NhscZL52dC4ieKYumy8+nawQYC0xZ63w1NqLf0M148T2YLYqowoImc1giPn0g=="
			
 
				+    },
			
 
				     "debug": {
			
 
				       "version": "2.6.9",
			
 
				       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
			
--- a/package.json
+++ b/package.json
@@ -11,6 +11,7 @@
 
				   "dependencies": {
			
 
				     "cors": "^2.8.5",
			
 
				     "csv-parse": "^4.8.5",
			
 
				+    "csv-stringify": "^5.5.1",
			
 
				     "express": "^4.17.1",
			
 
				     "r-script": "0.0.4"
			
 
				   },
			
--- a/r/selected_data.r
+++ b/r/selected_data.r
@@ -2,10 +2,10 @@ setwd("./data/clustering/") # Nastavení pracovního adresáře (relativní k ro
 
				 
			
 
				 library(cluster)
			
 
				 
			
 
				-input <- read.csv(file = 'input_all.csv',header=TRUE,sep=",") # Načtení CSV souboru
			
 
				-head(input) # Výpis prvních šesti řádek CSV souboru
			
 
				-mydata <- input[, -1] # Úprava dat, která funguje, ale nevím proč... (ale jen pro data s více než jedním číselným sloupcem)
			
 
				-rownames(mydata) <- input[, 1]
			
 
				+input <- read.csv(file = 'input_modified.csv') # Načtení CSV souboru
			
 
				+#head(input) # Výpis prvních šesti řádek CSV souboru
			
 
				+mydata <- input[, -1, drop=F] # Zahození prvního sloupce dat (NUTS_ID). Při 2-sloupcové tabulce nesmí redukovat dimenzi na vektor, proto drop=F
			
 
				+rownames(mydata) <- input[, 1] # První sloupec dat jako název řádek
			
 
				 mydata <- scale(mydata) # Standardizace dat
			
 
				 km25 <- kmeans(mydata, 12, nstart=25) # 12 cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50
			
 
				 km50hw <- kmeans(mydata, 12, nstart=50) # 12 cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50
			
--- a/test.html
+++ b/test.html
@@ -6,7 +6,7 @@ function loadDoc() {
 
				   var xhttp = new XMLHttpRequest();
			
 
				   xhttp.onreadystatechange = function() {
			
 
				     if (this.readyState == 4 && this.status == 200) {
			
 
				-     document.getElementById("demo").innerHTML = this.responseText;
			
 
				+      document.getElementById("demo").innerHTML = this.responseText;
			
 
				     }
			
 
				   };
			
 
				   xhttp.open("GET", "https://publish.lesprojekt.cz/nodejs/datasets", true);