diff options
Diffstat (limited to 'analysis/kmeans.R')
| -rw-r--r-- | analysis/kmeans.R | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/analysis/kmeans.R b/analysis/kmeans.R new file mode 100644 index 0000000..320ba8d --- /dev/null +++ b/analysis/kmeans.R @@ -0,0 +1,51 @@ +########## INFORMATIONS ########## +# This file is made to study online classification +# So, each pair (wireless,wakeupfor) has its classification models (knn and decision tree) +################################## + +library("tidyverse") +options(dplyr.summarise.inform = FALSE) +library("class") +library("rpart") +library("rpart.plot") +library("viridis") +library("MLmetrics") + +## Simulation Parameters: +## simkey {baseline,extended,hint,hintandextended} +## wireless {lora,nbiot} +## wakeupfor {60s,180s} +## seed [1,200] +## node on[0,12] +## isSender {0,1} +## dataSize {1MB} + +## Metrics: +## energy [0,+inf) +## nDataRcv [0,+inf) + +nseed=200 +nwakeupfor=2 +nwireless=2 +nsimkey=4 +nsimulations=nseed*nwakeupfor*nwireless*nsimkey # Must be 3200 + +## Load data +data=suppressMessages(read_csv("../CCGRID2022.csv"))%>%distinct() # Note that in the data experiment wireless=="lora",seed==1,wakeupfor==60,simkey=="baseline" is present 2 times in the CSV file +data_seed=data%>%group_by(simkey,wireless,wakeupfor,seed)%>%summarize(energy=sum(energy),coverage=sum(nDataRcv))%>% ungroup() + +## Format data +data_seed=data_seed%>%filter(simkey!="hint") +wireless_map=c("lora"=1,"nbiot"=2) +simkey_map=c("baseline"=1,"hint"=2,"extended"=3,"hintandextended"=4) +data_seed$wireless=wireless_map[data_seed$wireless] +data_seed$simkey=simkey_map[data_seed$simkey] + + +## Lora 180s +data_kmean=data_seed%>%filter(wireless==1,wakeupfor==180)%>%select(-wireless,-wakeupfor,-seed)%>%relocate(simkey) +m=kmeans(data_kmean,3,nstart=50,iter.max=80) +data_kmean=data_kmean%>%mutate(cluster=as.factor(m$cluster)) + +## plots +ggplot(data=data_kmean,aes(coverage,energy,color=as.factor(cluster)))+geom_point() |
