1 files changed, 51 insertions, 0 deletions
diff --git a/analysis/kmeans.R b/analysis/kmeans.R
new file mode 100644
index 0000000..320ba8d
--- /dev/null
+++ b/analysis/kmeans.R
@@ -0,0 +1,51 @@
+########## INFORMATIONS ##########
+# This file is made to study online classification
+# So, each pair (wireless,wakeupfor) has its classification models (knn and decision tree)
+##################################
+
+library("tidyverse")
+options(dplyr.summarise.inform = FALSE)
+library("class")
+library("rpart")
+library("rpart.plot")
+library("viridis")
+library("MLmetrics")
+
+## Simulation Parameters:
+## simkey         {baseline,extended,hint,hintandextended}
+## wireless       {lora,nbiot}
+## wakeupfor      {60s,180s}
+## seed           [1,200]
+## node           on[0,12]
+## isSender       {0,1}
+## dataSize       {1MB}
+
+## Metrics:
+## energy         [0,+inf)
+## nDataRcv       [0,+inf)
+
+nseed=200
+nwakeupfor=2
+nwireless=2
+nsimkey=4
+nsimulations=nseed*nwakeupfor*nwireless*nsimkey # Must be 3200
+
+## Load data
+data=suppressMessages(read_csv("../CCGRID2022.csv"))%>%distinct() # Note that in the data experiment wireless=="lora",seed==1,wakeupfor==60,simkey=="baseline" is present 2 times in the CSV file
+data_seed=data%>%group_by(simkey,wireless,wakeupfor,seed)%>%summarize(energy=sum(energy),coverage=sum(nDataRcv))%>% ungroup()
+
+## Format data
+data_seed=data_seed%>%filter(simkey!="hint")
+wireless_map=c("lora"=1,"nbiot"=2)
+simkey_map=c("baseline"=1,"hint"=2,"extended"=3,"hintandextended"=4)
+data_seed$wireless=wireless_map[data_seed$wireless]
+data_seed$simkey=simkey_map[data_seed$simkey]
+
+
+## Lora 180s
+data_kmean=data_seed%>%filter(wireless==1,wakeupfor==180)%>%select(-wireless,-wakeupfor,-seed)%>%relocate(simkey)
+m=kmeans(data_kmean,3,nstart=50,iter.max=80)
+data_kmean=data_kmean%>%mutate(cluster=as.factor(m$cluster))
+
+## plots
+ggplot(data=data_kmean,aes(coverage,energy,color=as.factor(cluster)))+geom_point()