diff options
Diffstat (limited to 'analysis/knn.R')
| -rw-r--r-- | analysis/knn.R | 43 |
1 files changed, 31 insertions, 12 deletions
diff --git a/analysis/knn.R b/analysis/knn.R index 71771f1..3cfa2a9 100644 --- a/analysis/knn.R +++ b/analysis/knn.R @@ -1,5 +1,8 @@ library("tidyverse") library("class") +library("rpart") +library("rpart.plot") +library("viridis") ## Simulation Parameters: ## simkey {baseline,extended,hint,hintandextended} @@ -32,18 +35,34 @@ data_seed=data%>%group_by(simkey,wireless,wakeupfor,seed)%>%summarize(energy=sum ungroup() -## Prepare data for knn +## Prepare data for traning set.seed(1) # Reproducibility -data_seed=data_seed%>%select(-efficiency,-seed)%>%mutate(wireless=as.numeric(as.factor(data_seed$wireless))) - -## Train +data_seed=data_seed%>%select(-efficiency,-seed)%>%mutate(wireless=as.numeric(as.factor(data_seed$wireless)))#%>%filter(simkey!="hint") train_set=data_seed%>%sample_frac(0.8) # 80% of the data test_set=data_seed%>%anti_join(train_set) # 20% of the data -classifier=knn(train=train_set%>%select(-simkey),test=test_set%>%select(-simkey),cl=train_set$simkey,k=10) - -## Analysis -cont_table=table(classifier,test_set$simkey) -accuracy=round((sum(diag(cont_table)/sum(rowSums(cont_table))))*100) -prop_table=round(prop.table(cont_table),digits=2) -print(prop_table) -print(paste0("Overall KNN accuracy ",accuracy,"%")) + +## KNN training +knn_predictions=knn(train=train_set%>%select(-simkey),test=test_set%>%select(-simkey),cl=train_set$simkey,k=10) +## KNN analysis +knn_cont_table=table(knn_predictions,test_set$simkey) +knn_accuracy=round((sum(diag(knn_cont_table)/sum(rowSums(knn_cont_table))))*100) +knn_prop_table=round(prop.table(knn_cont_table),digits=2) + +## Decision tree +tree=rpart( + simkey ~ wireless + wakeupfor + energy + coverage, + data=train_set, + method="class", + minsplit=60, + minbucket=1) +tree_predictions=predict(tree,newdata=test_set%>%select(-simkey),type="class") +tree_cont_table=table(tree_predictions,test_set$simkey) +tree_accuracy=round((sum(diag(tree_cont_table)/sum(rowSums(tree_cont_table))))*100) +tree_prop_table=round(prop.table(tree_cont_table),digits=2) + +## Prints +print(paste0("Accuracy: KNN=",knn_accuracy,"% CART=",tree_accuracy,"%")) +pdf("figures/tree.pdf") +tree_plot=rpart.plot(tree,box.palette=as.list(viridis::viridis(4,begin=0.48))) +dev.off() +## Notes: KNN accuracy jump to 76% and CART to 80% accuracy without the hint policy |
