summaryrefslogtreecommitdiff
path: root/analysis/kmeans.R
blob: 320ba8d066266e433bba0049ed5b6841a0396139 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
########## INFORMATIONS ##########
# This file is made to study online classification
# So, each pair (wireless,wakeupfor) has its classification models (knn and decision tree)
##################################

library("tidyverse")
options(dplyr.summarise.inform = FALSE)
library("class")
library("rpart")
library("rpart.plot")
library("viridis")
library("MLmetrics")

## Simulation Parameters:
## simkey         {baseline,extended,hint,hintandextended}
## wireless       {lora,nbiot}
## wakeupfor      {60s,180s}
## seed           [1,200]
## node           on[0,12]
## isSender       {0,1}
## dataSize       {1MB}

## Metrics:
## energy         [0,+inf)
## nDataRcv       [0,+inf)

nseed=200
nwakeupfor=2
nwireless=2
nsimkey=4
nsimulations=nseed*nwakeupfor*nwireless*nsimkey # Must be 3200

## Load data
data=suppressMessages(read_csv("../CCGRID2022.csv"))%>%distinct() # Note that in the data experiment wireless=="lora",seed==1,wakeupfor==60,simkey=="baseline" is present 2 times in the CSV file
data_seed=data%>%group_by(simkey,wireless,wakeupfor,seed)%>%summarize(energy=sum(energy),coverage=sum(nDataRcv))%>% ungroup()

## Format data
data_seed=data_seed%>%filter(simkey!="hint")
wireless_map=c("lora"=1,"nbiot"=2)
simkey_map=c("baseline"=1,"hint"=2,"extended"=3,"hintandextended"=4)
data_seed$wireless=wireless_map[data_seed$wireless]
data_seed$simkey=simkey_map[data_seed$simkey]


## Lora 180s
data_kmean=data_seed%>%filter(wireless==1,wakeupfor==180)%>%select(-wireless,-wakeupfor,-seed)%>%relocate(simkey)
m=kmeans(data_kmean,3,nstart=50,iter.max=80)
data_kmean=data_kmean%>%mutate(cluster=as.factor(m$cluster))

## plots
ggplot(data=data_kmean,aes(coverage,energy,color=as.factor(cluster)))+geom_point()