fzadrazil
/
rural-attractivness-service


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
							setwd("./data/clustering/") # Nastavení pracovního adresáře (relativní k rootu)

library(cluster)

N <- input[[1]] # 'input' is a list of parametres, which comes from the NodeJS service

input <- read.csv(file = 'input_modified.csv') # Načtení CSV souboru
#head(input) # Výpis prvních šesti řádek CSV souboru
mydata <- input[, -1, drop=F] # Zahození prvního sloupce dat (NUTS_ID). Při 2-sloupcové tabulce nesmí redukovat dimenzi na vektor, proto drop=F
rownames(mydata) <- input[, 1] # První sloupec dat jako název řádek
mydata <- scale(mydata) # Standardizace dat
km25 <- kmeans(mydata, N, nstart=25) # N cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50
km50hw <- kmeans(mydata, N, nstart=50) # N cluster solution, nstart = počet náhodných počátečních přiřazení, optimální je hodnota 25-50
km50l <- kmeans(mydata, N, nstart=50, algorithm=c("Lloyd"))
km50m <- kmeans(mydata, N, nstart=50, algorithm=c("MacQueen"))
kme_eu <- pam(mydata, N, diss=FALSE, metric=c("euclidean"))
kme_mn <- pam(mydata, N, diss=FALSE, metric=c("manhattan"))

d <- dist(mydata, method = "euclidean") # distance matrix
dmin <- dist(mydata, method = "minkowski")
dbin <- dist(mydata, method = "binary")
dmax <- dist(mydata, method = "maximum")
dcan <- dist(mydata, method = "canberra")
dman <- dist(mydata, method = "manhattan")
fit <- hclust(d)
fitmin <- hclust(dmin)
fitbin <- hclust(dbin)
fitmax <- hclust(dmax)
fitcan <- hclust(dcan)
fitman <- hclust(dman)

fitwd2 <- hclust(d,method="ward.D2")
fitcom <- hclust(d,method="complete")
fitsin <- hclust(d,method="single")
fitcen <- hclust(d,method="centroid")
fitmed <- hclust(d,method="median")
fitmcq <- hclust(d,method="mcquitty")

haclust <- cutree(fit, k=N) #default fit method=complete, so this is equal to haclustcom
haclustmin <- cutree(fitmin, k=N)
haclustbin <- cutree(fitbin, k=N)
haclustmax <- cutree(fitmax, k=N)
haclustcan <- cutree(fitcan, k=N)
haclustman <- cutree(fitman, k=N)

haclustwd2 <- cutree(fitwd2, k=N)
haclustcom <- cutree(fitcom, k=N)
haclustsin <- cutree(fitsin, k=N)
haclustcen <- cutree(fitcen, k=N)
haclustmed <- cutree(fitmed, k=N)
haclustmcq <- cutree(fitmcq, k=N)

fit <- diana(d)
hdclust <- cutree(fit, k=N)

#aggregate(mydata,by=list(km25$cluster),FUN=mean) # get cluster means
outdata <- data.frame(km25$cluster, km50hw$cluster, km50l$cluster, km50m$cluster,kme_eu$cluster,kme_mn$cluster, haclust, haclustmin, haclustbin, haclustman, haclustmax, haclustcan, haclustwd2, haclustcom, haclustsin, haclustcen, haclustmed, haclustmcq, hdclust) # append cluster assignment
#library(useful)
#plot(fit, data=mydata) # vykreslení grafu
write.csv(outdata,'out_file.csv') # zápis do souboru