とある技術者の徒然草

生産技術者の適当な日記(統計言語Rに関するメモがメイン)

【R言語】重回帰分析で交互作用を検討

重回帰分析で交互作用を検討

重回帰分析で交互作用を検討する場合は、
中心化が必要。その時はscale関数でscale引数をFALSEにすると楽。

http://cogpsy.educ.kyoto-u.ac.jp/personal/Kusumi/datasem13/shinya2.pdf

#重回帰分析
library(tidyverse)
library(car)

housing <- read.table("http://www.jaredlander.com/data/housing.csv",sep=",",header=TRUE)

name <- c("Neighborhood","Class","Units","YearBuilt","SqFt","Income","IncomeperSqFt","Expence","ExpencePerSqFt",
          "NetIncome","Value","ValueperSqFt","Boro")

names(housing) <-name

ggplot(housing,aes(x=ValueperSqFt))+geom_histogram(colour="black")+labs(x="Value per Square Foot")

#units Sqftを除く

housing2 <- select(housing,ValueperSqFt,Class,YearBuilt,Units,Expence,Income)

#中心化しないで交互作用項は含まない
housemodel2 <- lm(ValueperSqFt ~(.),data=housing2)

summary(housemodel2)


#scale関数でscale因数をFLASEにしてセンタリング 
housing3 <- scale(select(housing2, Units,Expence,Income),center = TRUE, scale = FALSE)
housing4 <- select_if(housing2, ~!is.numeric(.))

#or
#housing4 <- select(housing2 ,-Units,-Expence,-Income)
#housing3 <- scale(select_if(housing2, is.numeric),center = TRUE, scale = FALSE)

housing5 <- cbind(housing3,housing4)

#中心化して交互作用項を入れる
housemodel3 <- lm(ValueperSqFt ~(.)^2,data=housing5)
summary(housemodel3)

#交互作用項の有効性を確認• 決定係数(平方和)の変化量の検定  
anova(housemodel2,housemodel3)
#交互作用項を入れたモデルのほうが決定係数が有意に増加