数据挖掘:关联规则挖掘实操
课本习题:探究学生成绩和学生特征的关联规则。
·
课本习题:探究学生成绩和学生特征的关联规则。
一.加载程序包
#0.加载程序包
library(arules)
library(arulesViz)
library(dplyr)
二.读入数据 处理数据
#1.读入数据生成R数据框 声明变量类型 关于学生类型的各变量转换为因子型变量
#as.factor()函数将其转换为因子类型
StudentsPerformance <- read.csv("machine experiment/data/StudentsPerformance.csv",
colClasses = c(rep("character",5),
rep("numeric",3)))
StudentsPerformance <- StudentsPerformance %>%
mutate(gender = as.factor(gender)) %>%
mutate(race.ethnicity = as.factor(race.ethnicity)) %>%
mutate(parental.level.of.education =
as.factor(parental.level.of.education)) %>%
mutate(lunch = as.factor(lunch)) %>%
mutate(test.preparation.course =
as.factor(test.preparation.course))
#2.将数学 阅读 写作每项成绩按照小于60 大于等于60且小于85 大于或等于85份划分为3组 转换为因子变量
#使用cut函数将成绩分为三个区间 cut(0,59,84,100)分为(0,60] (60,85](85,100]
#as.factor()转换为因子类型
StudentsPerformance <- StudentsPerformance %>%
mutate(math.score = as.factor(cut(math.score,breaks=c(0,59,84,100)))) %>%
mutate(reading.score = as.factor(cut(reading.score,breaks=c(0,59,84,100)))) %>%
mutate(writing.score = as.factor(cut(writing.score,breaks=c(0,59,84,100))))
三.关联分析 查看结果
数学成绩(0,59]
#3.设最小支持度阈值min_sup=0.1 最小置信度阈值min_conf=0.5 学生特征与数学 阅读 写作每项成绩的关联规则
#挖掘每一项成绩与学生特征的关联规则的时候不需要另外两项成绩 先处理数据得到学生特征和单项成绩的数据框
StudentsPerformance_math <- StudentsPerformance %>%
select(-c(reading.score,writing.score))
StudentsPerformance_reading <- StudentsPerformance %>%
select(-c(math.score,writing.score))
StudentsPerformance_writing <- StudentsPerformance %>%
select(-c(reading.score,math.score))
#4.挖掘数学成绩小于60分跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
rules_math_below60 <- apriori(StudentsPerformance_math,
parameter = list(supp=0.1,conf=0.5),
appearance = list(rhs=c('math.score=(0,59]')),
control =list(verbose=F))
inspect(head(rules_math_below60,by='lift'))
lhs rhs support confidence coverage lift count
[1] {gender=female,
lunch=free/reduced} => {math.score=(0,59]} 0.106 0.5608 0.189 1.742 106
[2] {lunch=free/reduced,
test.preparation.course=none} => {math.score=(0,59]} 0.120 0.5357 0.224 1.664 120
inspect(head(rules_math_below60[!is.redundant(rules_math_below60)],by="lift"))
lhs rhs support confidence coverage lift count
[1] {gender=female,
lunch=free/reduced} => {math.score=(0,59]} 0.106 0.5608 0.189 1.742 106
[2] {lunch=free/reduced,
test.preparation.course=none} => {math.score=(0,59]} 0.120 0.5357 0.224 1.664 120
rules_math_below60_pruned <- rules_math_below60[!is.redundant(rules_math_below60)]
plot(head(rules_math_below60_pruned,by="lift"),method = "graph")
数学成绩(59,84]
#5.挖掘数学成绩大于等于60分小于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
> rules_math_60to85 <- apriori(StudentsPerformance_math,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('math.score=(59,84]')),
+ control =list(verbose=F))
+
> inspect(head(rules_math_60to85,by='lift'))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {math.score=(59,84]} 0.110 0.6587 0.167 1.176 110
[2] {gender=male,
lunch=standard} => {math.score=(59,84]} 0.196 0.6203 0.316 1.108 196
[3] {race.ethnicity=group D} => {math.score=(59,84]} 0.162 0.6183 0.262 1.104 162
[4] {lunch=standard,
test.preparation.course=completed} => {math.score=(59,84]} 0.140 0.6167 0.227 1.101 140
[5] {gender=male,
lunch=standard,
test.preparation.course=none} => {math.score=(59,84]} 0.125 0.6158 0.203 1.100 125
[6] {lunch=standard} => {math.score=(59,84]} 0.393 0.6093 0.645 1.088 393
> inspect(head(rules_math_60to85[!is.redundant(rules_math_60to85)],by="lift"))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {math.score=(59,84]} 0.110 0.6587 0.167 1.176 110
[2] {gender=male,
lunch=standard} => {math.score=(59,84]} 0.196 0.6203 0.316 1.108 196
[3] {race.ethnicity=group D} => {math.score=(59,84]} 0.162 0.6183 0.262 1.104 162
[4] {lunch=standard,
test.preparation.course=completed} => {math.score=(59,84]} 0.140 0.6167 0.227 1.101 140
[5] {lunch=standard} => {math.score=(59,84]} 0.393 0.6093 0.645 1.088 393
[6] {gender=male,
test.preparation.course=completed} => {math.score=(59,84]} 0.105 0.6034 0.174 1.078 105
> rules_math_60to85_pruned <- rules_math_60to85[!is.redundant(rules_math_60to85)]
> plot(head(rules_math_60to85_pruned,by="lift"),method = "graph")
数学成绩(85,100]
#6.挖掘数学成绩大于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
> rules_math_above85 <- apriori(StudentsPerformance_math,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('math.score=(84,100]')),
+ control =list(verbose=F))
> inspect(head(rules_math_above85,by='lift'))
> inspect(head(rules_math_above85[!is.redundant(rules_math_above85)],by="lift"))
> rules_math_above85_pruned <- rules_math_above85[!is.redundant(rules_math_above85)]
> plot(head(rules_math_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_math_above85_pruned, by = "lift"), method = "graph") :
x contains 0 rules!
阅读成绩(0,59]
> rules_reading_below60 <- apriori(StudentsPerformance_reading,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('reading.score=(0,59]')),
+ control =list(verbose=F))
> inspect(head(rules_reading_below60,by='lift'))
> inspect(head(rules_reading_below60[!is.redundant(rules_reading_below60)],by="lift"))
> rules_reading_below60_pruned <- rules_reading_below60[!is.redundant(rules_reading_below60)]
> plot(head(rules_reading_below60_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_below60_pruned, by = "lift"), method = "graph") :
x contains 0 rules!
阅读成绩(59,84]
> rules_reading_60to85 <- apriori(StudentsPerformance_reading,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('reading.score=(59,84]')),
+ control =list(verbose=F))
> inspect(head(rules_reading_60to85,by='lift'))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {reading.score=(59,84]} 0.113 0.6766 0.167 1.135 113
[2] {gender=female,
race.ethnicity=group C} => {reading.score=(59,84]} 0.118 0.6556 0.180 1.100 118
[3] {gender=male,
test.preparation.course=completed} => {reading.score=(59,84]} 0.113 0.6494 0.174 1.090 113
[4] {race.ethnicity=group C,
lunch=standard} => {reading.score=(59,84]} 0.133 0.6488 0.205 1.089 133
[5] {gender=female,
lunch=standard,
test.preparation.course=none} => {reading.score=(59,84]} 0.139 0.6465 0.215 1.085 139
[6] {race.ethnicity=group C,
test.preparation.course=none} => {reading.score=(59,84]} 0.130 0.6436 0.202 1.080 130
> inspect(head(rules_reading_60to85[!is.redundant(rules_reading_60to85)],by="lift"))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {reading.score=(59,84]} 0.113 0.6766 0.167 1.135 113
[2] {gender=female,
race.ethnicity=group C} => {reading.score=(59,84]} 0.118 0.6556 0.180 1.100 118
[3] {gender=male,
test.preparation.course=completed} => {reading.score=(59,84]} 0.113 0.6494 0.174 1.090 113
[4] {race.ethnicity=group C,
lunch=standard} => {reading.score=(59,84]} 0.133 0.6488 0.205 1.089 133
[5] {gender=female,
lunch=standard,
test.preparation.course=none} => {reading.score=(59,84]} 0.139 0.6465 0.215 1.085 139
[6] {race.ethnicity=group C,
test.preparation.course=none} => {reading.score=(59,84]} 0.130 0.6436 0.202 1.080 130
> rules_reading_60to85_pruned <- rules_reading_60to85[!is.redundant(rules_reading_60to85)]
> plot(head(rules_reading_60to85_pruned,by="lift"),method = "graph")
阅读成绩(84,100]
> rules_reading_above85 <- apriori(StudentsPerformance_reading,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('reading.score=(84,100]')),
+ control =list(verbose=F))
> inspect(head(rules_reading_above85,by='lift'))
> inspect(head(rules_reading_above85[!is.redundant(rules_reading_above85)],by="lift"))
> rules_reading_above85_pruned <- rules_reading_above85[!is.redundant(rules_reading_above85)]
> plot(head(rules_reading_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_above85_pruned, by = "lift"), method = "graph") :
x contains 0 rules!
写作成绩(0,59]
> rules_writing_below60 <- apriori(StudentsPerformance_writing,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('writing.score=(0,59]')),
+ control =list(verbose=F))
+
> inspect(head(rules_writing_below60,by='lift'))
lhs rhs support confidence coverage lift count
[1] {lunch=free/reduced,
test.preparation.course=none} => {writing.score=(0,59]} 0.115 0.5134 0.224 1.827 115
> inspect(head(rules_writing_below60[!is.redundant(rules_writing_below60)],by="lift"))
lhs rhs support confidence coverage lift count
[1] {lunch=free/reduced,
test.preparation.course=none} => {writing.score=(0,59]} 0.115 0.5134 0.224 1.827 115
> rules_writing_below60_pruned <- rules_writing_below60[!is.redundant(rules_writing_below60)]
> plot(head(rules_writing_below60_pruned,by="lift"),method = "graph")
写作成绩(59,84]
> rules_writing_60to85 <- apriori(StudentsPerformance_writing,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('writing.score=(59,84]')),
+ control =list(verbose=F))
> inspect(head(rules_writing_60to85,by='lift'))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {writing.score=(59,84]} 0.113 0.6766 0.167 1.159 113
[2] {gender=female,
lunch=standard,
test.preparation.course=none} => {writing.score=(59,84]} 0.144 0.6698 0.215 1.147 144
[3] {gender=female,
lunch=standard} => {writing.score=(59,84]} 0.213 0.6474 0.329 1.109 213
[4] {race.ethnicity=group D} => {writing.score=(59,84]} 0.168 0.6412 0.262 1.098 168
[5] {gender=male,
test.preparation.course=completed} => {writing.score=(59,84]} 0.110 0.6322 0.174 1.083 110
[6] {test.preparation.course=completed} => {writing.score=(59,84]} 0.224 0.6257 0.358 1.071 224
> inspect(head(rules_writing_60to85[!is.redundant(rules_writing_60to85)],by="lift"))
lhs rhs support confidence coverage lift count
[1] {race.ethnicity=group D,
lunch=standard} => {writing.score=(59,84]} 0.113 0.6766 0.167 1.159 113
[2] {gender=female,
lunch=standard,
test.preparation.course=none} => {writing.score=(59,84]} 0.144 0.6698 0.215 1.147 144
[3] {gender=female,
lunch=standard} => {writing.score=(59,84]} 0.213 0.6474 0.329 1.109 213
[4] {race.ethnicity=group D} => {writing.score=(59,84]} 0.168 0.6412 0.262 1.098 168
[5] {gender=male,
test.preparation.course=completed} => {writing.score=(59,84]} 0.110 0.6322 0.174 1.083 110
[6] {test.preparation.course=completed} => {writing.score=(59,84]} 0.224 0.6257 0.358 1.071 224
> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")
写作成绩(84,100]
> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")
> rules_writing_above85 <- apriori(StudentsPerformance_writing,
+ parameter = list(supp=0.1,conf=0.5),
+ appearance = list(rhs=c('writing.score=(84,100]')),
+ control =list(verbose=F))
> inspect(head(rules_writing_above85,by='lift'))
> inspect(head(rules_writing_above85[!is.redundant(rules_writing_above85)],by="lift"))
> rules_writing_above85_pruned <- rules_writing_above85[!is.redundant(rules_writing_above85)]
> plot(head(rules_writing_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_writing_above85_pruned, by = "lift"), method = "graph") :
x contains 0 rules!
更多推荐
所有评论(0)