相関係数の高いカラムのみを抜き出して相関行列を作成する
rm(list = ls())
library(tidyverse)
library(data.table)
library(scales)
library(VIM)
library(corrr)
library(corrplot)
library(scales)
path <- "C:/Users/************/R/house price/all/"
filename <- "train.csv"
filename2 <- "test.csv"
df_train <- fread(str_c(path,filename), header=T)
df_test <- fread(str_c(path,filename2), header=T)
test_label <- df_test$Id
df_train$Id <- NULL
df_test$Id <- NULL
all <- dplyr::bind_rows(df_train,df_test)
VIM::aggr(all,labels = names(all),cex.axis=1,prop=TRUE)
all %>%
dplyr::filter( !is.na(SalePrice) ) %>%
mutate(logPrice = log10(SalePrice)) %>%
ggplot(aes(x=logPrice))+
geom_histogram(bins=30,fill="#009900",colour="black")+
scale_x_continuous(labels = dollar)
numericVars <- all %>%
select_if(.,is.numeric)
numericVarsName <- names(numericVars)
cor_all <- cor(numericVars,use="pairwise.complete.obs")
as.data.frame(cor_all) -> cor_all2
rownames_to_column(cor_all2) -> cor_all2
cor_all2 %>%
select(rowname,SalePrice) %>%
mutate(corabs = abs(SalePrice)) %>%
arrange(desc(corabs)) %>%
dplyr::slice(.,1:15)->cor_sorted
cor_sorted$rowname -> CorHigh
corrplot.mixed(cor_all[CorHigh,CorHigh], tl.col="black", tl.pos = "lt")