summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoic Guegan <manzerbredes@mailbox.org>2022-11-08 12:09:09 +0100
committerLoic Guegan <manzerbredes@mailbox.org>2022-11-08 12:09:09 +0100
commitb5a575c126e583feb973eff22c70e8a52a71b7f6 (patch)
treeddda4b76e351c43ae869c9451523575d100fcfa2
parent2f3c41d6a3ff84c430c78a15b5f96085362ad7a5 (diff)
Minor changes
-rw-r--r--analysis/figures/tree.pdfbin0 -> 9593 bytes
-rw-r--r--analysis/knn.R43
-rw-r--r--analysis/renv.lock18
3 files changed, 49 insertions, 12 deletions
diff --git a/analysis/figures/tree.pdf b/analysis/figures/tree.pdf
new file mode 100644
index 0000000..59b305a
--- /dev/null
+++ b/analysis/figures/tree.pdf
Binary files differ
diff --git a/analysis/knn.R b/analysis/knn.R
index 71771f1..3cfa2a9 100644
--- a/analysis/knn.R
+++ b/analysis/knn.R
@@ -1,5 +1,8 @@
library("tidyverse")
library("class")
+library("rpart")
+library("rpart.plot")
+library("viridis")
## Simulation Parameters:
## simkey {baseline,extended,hint,hintandextended}
@@ -32,18 +35,34 @@ data_seed=data%>%group_by(simkey,wireless,wakeupfor,seed)%>%summarize(energy=sum
ungroup()
-## Prepare data for knn
+## Prepare data for traning
set.seed(1) # Reproducibility
-data_seed=data_seed%>%select(-efficiency,-seed)%>%mutate(wireless=as.numeric(as.factor(data_seed$wireless)))
-
-## Train
+data_seed=data_seed%>%select(-efficiency,-seed)%>%mutate(wireless=as.numeric(as.factor(data_seed$wireless)))#%>%filter(simkey!="hint")
train_set=data_seed%>%sample_frac(0.8) # 80% of the data
test_set=data_seed%>%anti_join(train_set) # 20% of the data
-classifier=knn(train=train_set%>%select(-simkey),test=test_set%>%select(-simkey),cl=train_set$simkey,k=10)
-
-## Analysis
-cont_table=table(classifier,test_set$simkey)
-accuracy=round((sum(diag(cont_table)/sum(rowSums(cont_table))))*100)
-prop_table=round(prop.table(cont_table),digits=2)
-print(prop_table)
-print(paste0("Overall KNN accuracy ",accuracy,"%"))
+
+## KNN training
+knn_predictions=knn(train=train_set%>%select(-simkey),test=test_set%>%select(-simkey),cl=train_set$simkey,k=10)
+## KNN analysis
+knn_cont_table=table(knn_predictions,test_set$simkey)
+knn_accuracy=round((sum(diag(knn_cont_table)/sum(rowSums(knn_cont_table))))*100)
+knn_prop_table=round(prop.table(knn_cont_table),digits=2)
+
+## Decision tree
+tree=rpart(
+ simkey ~ wireless + wakeupfor + energy + coverage,
+ data=train_set,
+ method="class",
+ minsplit=60,
+ minbucket=1)
+tree_predictions=predict(tree,newdata=test_set%>%select(-simkey),type="class")
+tree_cont_table=table(tree_predictions,test_set$simkey)
+tree_accuracy=round((sum(diag(tree_cont_table)/sum(rowSums(tree_cont_table))))*100)
+tree_prop_table=round(prop.table(tree_cont_table),digits=2)
+
+## Prints
+print(paste0("Accuracy: KNN=",knn_accuracy,"% CART=",tree_accuracy,"%"))
+pdf("figures/tree.pdf")
+tree_plot=rpart.plot(tree,box.palette=as.list(viridis::viridis(4,begin=0.48)))
+dev.off()
+## Notes: KNN accuracy jump to 76% and CART to 80% accuracy without the hint policy
diff --git a/analysis/renv.lock b/analysis/renv.lock
index 698d4ef..c51eb94 100644
--- a/analysis/renv.lock
+++ b/analysis/renv.lock
@@ -928,6 +928,24 @@
"yaml"
]
},
+ "rpart": {
+ "Package": "rpart",
+ "Version": "4.1.19",
+ "Source": "Repository",
+ "Repository": "CRAN",
+ "Hash": "b3c892a81783376cc2204af0f5805a80",
+ "Requirements": []
+ },
+ "rpart.plot": {
+ "Package": "rpart.plot",
+ "Version": "3.1.1",
+ "Source": "Repository",
+ "Repository": "CRAN",
+ "Hash": "3281d3200bbcff53781d7d40d78f3590",
+ "Requirements": [
+ "rpart"
+ ]
+ },
"rstudioapi": {
"Package": "rstudioapi",
"Version": "0.14",