Pārlūkot izejas kodu

Merge branch 'r' of fzadrazil/rural-attractivness-service into master

František Zadražil 5 gadi atpakaļ
vecāks
revīzija
2e87c6b0ea
7 mainītis faili ar 149 papildinājumiem un 26 dzēšanām
  1. 4 0
      README.md
  2. 54 18
      index.js
  3. 80 3
      nuts-data.js
  4. 5 0
      package-lock.json
  5. 1 0
      package.json
  6. 4 4
      r/selected_data.r
  7. 1 1
      test.html

+ 4 - 0
README.md

@@ -25,6 +25,10 @@ Computes and returns attractivity data for all the NUTS regions based on the inc
 POST https://publish.lesprojekt.cz/nodejs/scores
 
 
+Computes and returns clusters based on attractivity data for all the NUTS regions and based on the incoming datasets and factor weights
+
+POST https://publish.lesprojekt.cz/nodejs/clusters
+
 
 ## Start the system service
 sudo systemctl start fz-node-rural_attractiveness

+ 54 - 18
index.js

@@ -8,6 +8,8 @@ const app = express();
 
 const _datasetsFilePath = 'data/datasets.csv';
 const _dataFilePath = 'data/data.csv';
+const _clusteringInputFilePath = 'data/clustering/input_all.csv';
+const _clusteringModifiedFilePath = 'data/clustering/input_modified.csv';
 const _clustersFilePath = 'data/clustering/out_file.csv';
 var _datasets = undefined;
 var _ruralData = undefined;
@@ -31,6 +33,7 @@ app.get('/refresh', (req, res, next) => {
         nutsData.loadRuralData(_dataFilePath, _datasets, function (rd) {
             //console.log('Rural data loaded succesfully');
             _ruralData = rd;
+            res.send('Data refreshed');
         });
     });
 });
@@ -132,6 +135,7 @@ app.post('/scores', (req, res, next) => {
 app.get('/runR', (req, res, next) => {
     //console.log(console);
     console.log('calling R...')
+    console.log(req)
     R('./r/selected_data.r').call(
         function(err, data) {
             console.log('R done');
@@ -149,27 +153,39 @@ app.get('/runR', (req, res, next) => {
 });
 
 /*
-    Calls R script, loads the resulting CSV file and returns it
+    Just informative response. POST with JSON data is required.
 */
 app.get('/clusters', (req, res, next) => {
-    //console.log(console);
-    console.log('calling R...')
-    R('./r/selected_data.r').call(
-        function(err, data) {
-            console.log('R done');
-            if (err) {
-                console.log(err.toString('utf8'));
-                data = { result: err.toString('utf8') };
-            }
-            else {
-                console.log(data);
-                nutsData.loadClusters(_clustersFilePath, function(clusterData) {
-                    data = clusterData;
-                    helpers.formatResponse({ response: data }, req, res);
-                });
-            }
+    const data = { response: '/clusters method is only available under POST' }
+    helpers.formatResponse(data, req, res);
+});
+
+/*
+    Modifies input CSV file, calls R script, loads the resulting CSV file and returns it
+*/
+app.post('/clusters', async (req, res, next) => {
+    try {
+        if (!_datasets) {
+            //TODO: promisify all functions to avoid callback hell and make this work properly
+            await nutsData.loadDatasets(_datasetsFilePath, function (ds) {
+                //console.log('Datasets loaded succesfully');
+                _datasets = ds;
+            })
         }
-    );
+        //console.log(req.body);
+        const clusteringData = await nutsData.loadClusteringInput(
+            _clusteringInputFilePath
+        );
+        await nutsData.modifyClusteringData({
+            datasets: _datasets,
+            data: clusteringData,
+            params: req.body,
+            outputFileName: _clusteringModifiedFilePath
+        });
+        handleRCall(req, res);
+    } catch (error) { // Catch errors in async functions
+        next(error.toString());
+    }
 });
 
 // start the service on the port xxxx
@@ -216,3 +232,23 @@ function returnRegionScores(nuts, req, res) {
         // NUTS region not found
         res.status(404).send('NUTS region not found.');
 }
+
+function handleRCall(req, res) {
+    //console.log('calling R...')
+    R('./r/selected_data.r').call(
+        function(err, data) {
+            //console.log('R done');
+            if (err) {
+                console.log(err.toString('utf8'));
+                data = { result: err.toString('utf8') };
+            }
+            else {
+                //console.log(data);
+                nutsData.loadClusters(_clustersFilePath, function(clusterData) {
+                    data = clusterData;
+                    helpers.formatResponse({ response: data }, req, res);
+                });
+            }
+        }
+    );
+}

+ 80 - 3
nuts-data.js

@@ -1,5 +1,6 @@
 const fs = require('fs');
 const csv = require('csv-parse');
+const stringify = require('csv-stringify');
 
 /* Helper method to load the datasets from CSV and store it in server object */
 module.exports.loadDatasets = function(filePath, dataLoadedCallback) {
@@ -75,10 +76,87 @@ module.exports.loadRuralData = function (filePath, datasets, dataLoadedCallback)
         });
 }
 
-/* Reads the out_file.csv created by R script and saves it into an object
+/**
+ * Resolves with an array representing rows of CSV file
+ * @param {string} inputFileName path to the CSV file with input data for clustering calculation
+ */
+module.exports.loadClusteringInput = async function (inputFileName) {
+    const clusteringData = [];
+
+    /*
+     * The parsed CSV array keeps the native csv-parser structure
+     * for future easier serialization back to CSV file
+     */
+    return new Promise((resolve, reject) => {
+        fs.createReadStream(inputFileName)
+            .pipe(csv())
+            .on('data', (row) => {
+                clusteringData.push(row);
+            })
+            .on('end', () => {
+                resolve(clusteringData);
+            })
+            .on('error', reject);
+    });
+}
+
+/**
+ * Resolves once the modified CSV file is written to fs
+ */
+module.exports.modifyClusteringData = async function ({datasets, data, params, outputFileName}) {
+    let allowedDatasets = ['NUTS_ID']; // NUTS_ID must be copied to the output as well
+    for (const factor of params.factors) {
+        allowedDatasets = [...allowedDatasets, ...factor.datasets];
+    }
+    const factorMultipliers = data[0].map((dataset) => {
+        if (dataset === 'NUTS_ID') return 1;
+        const factor = datasets.find(ds => ds.Name === dataset);
+        if (!factor) {
+            /* If the factor is unknown for this dataset, it will effectivelly turn it off */
+            console.log(`Undefined factor for dataset ${dataset}`);
+            return 0;
+        } else if (!allowedDatasets.includes(dataset)) {
+            return 0;
+        } else {
+            return params.factors.find(f => f.factor === factor.Factor).weight;
+        }
+    })
+    //console.log(factorMultipliers);
+    /* The actual modification logic resides here */
+    const modifiedData = data.map((row, idx) => {
+        return row.map((value, i) => {
+            if (idx == 0) {
+                /* These are the headers */
+                /* Have to check for both allowed datasets and zero multiplications */
+                return allowedDatasets.includes(value) && factorMultipliers[i] !== 0 ? value : null;
+            } else if (isNaN(value)) {
+                /* This is the NUTS ID record at the beginning of each line */
+                return value;
+            }
+            return factorMultipliers[i] === 0 ? null : value*factorMultipliers[i];
+        }).filter(val => val !== null);
+    });
+    //console.log(modifiedData);
+    if (modifiedData[0].length <= 1) {
+        throw new Error('All datasets turned off. No data to create clusters.');
+    }
+    return new Promise((resolve, reject) => {
+        stringify(modifiedData, (err, output) => {
+            if (err) return reject(err);
+            fs.writeFile(outputFileName, output, (err) => {
+                if (err) reject(err);
+                else resolve();
+                //console.log('Data modification finished.');
+            })
+        })
+    });
+}
+
+/** 
+ * Reads the out_file.csv created by R script and saves it into an object
 */
 module.exports.loadClusters = function (filePath, dataLoadedCallback) {
-    console.log('Reading clustering data file processing started.');
+    //console.log('Reading clustering data file processing started.');
     let clusters = [];
 
     let columns = undefined;
@@ -126,7 +204,6 @@ module.exports.getFactorIndex = function (region, factor) {
 }
 
 function getDataSetFactor(datasets, colName) {
-
     for (let i = 0; i < datasets.length; i++) {
         if (datasets[i].Name.toLowerCase() == colName)
             return datasets[i].Factor;

+ 5 - 0
package-lock.json

@@ -1231,6 +1231,11 @@
       "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-4.8.5.tgz",
       "integrity": "sha512-rpsLmlLWJZifmLzZEVGbZ9phWnJyi+cCbCGYr4vX2NaHFtgbmQPFk+WmMkmMkQXgsIUn6CgnK9cTuUAfFjoXbA=="
     },
+    "csv-stringify": {
+      "version": "5.5.1",
+      "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-5.5.1.tgz",
+      "integrity": "sha512-HM0/86Ks8OwFbaYLd495tqTs1NhscZL52dC4ieKYumy8+nawQYC0xZ63w1NqLf0M148T2YLYqowoImc1giPn0g=="
+    },
     "debug": {
       "version": "2.6.9",
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",

+ 1 - 0
package.json

@@ -11,6 +11,7 @@
   "dependencies": {
     "cors": "^2.8.5",
     "csv-parse": "^4.8.5",
+    "csv-stringify": "^5.5.1",
     "express": "^4.17.1",
     "r-script": "0.0.4"
   },

+ 4 - 4
r/selected_data.r

@@ -2,10 +2,10 @@ setwd("./data/clustering/") # Nastavení pracovního adresáře (relativní k ro
 
 library(cluster)
 
-input <- read.csv(file = 'input_all.csv',header=TRUE,sep=",") # Načtení CSV souboru
-head(input) # Výpis prvních šesti řádek CSV souboru
-mydata <- input[, -1] # Úprava dat, která funguje, ale nevím proč... (ale jen pro data s více než jedním číselným sloupcem)
-rownames(mydata) <- input[, 1]
+input <- read.csv(file = 'input_modified.csv') # Načtení CSV souboru
+#head(input) # Výpis prvních šesti řádek CSV souboru
+mydata <- input[, -1, drop=F] # Zahození prvního sloupce dat (NUTS_ID). Při 2-sloupcové tabulce nesmí redukovat dimenzi na vektor, proto drop=F
+rownames(mydata) <- input[, 1] # První sloupec dat jako název řádek
 mydata <- scale(mydata) # Standardizace dat
 km25 <- kmeans(mydata, 12, nstart=25) # 12 cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50
 km50hw <- kmeans(mydata, 12, nstart=50) # 12 cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50

+ 1 - 1
test.html

@@ -6,7 +6,7 @@ function loadDoc() {
   var xhttp = new XMLHttpRequest();
   xhttp.onreadystatechange = function() {
     if (this.readyState == 4 && this.status == 200) {
-     document.getElementById("demo").innerHTML = this.responseText;
+      document.getElementById("demo").innerHTML = this.responseText;
     }
   };
   xhttp.open("GET", "https://publish.lesprojekt.cz/nodejs/datasets", true);