数据挖掘：关联规则挖掘实操

课本习题：探究学生成绩和学生特征的关联规则。

clingboo

498人浏览 · 2023-03-04 15:22:46

clingboo · 2023-03-04 15:22:46 发布

课本习题：探究学生成绩和学生特征的关联规则。

一.加载程序包

#0.加载程序包

library(arules)
library(arulesViz)
library(dplyr)

二.读入数据处理数据

#1.读入数据生成R数据框 声明变量类型 关于学生类型的各变量转换为因子型变量
#as.factor()函数将其转换为因子类型

StudentsPerformance <- read.csv("machine experiment/data/StudentsPerformance.csv",
                                colClasses = c(rep("character",5),
                                               rep("numeric",3)))
StudentsPerformance <- StudentsPerformance %>%
  mutate(gender = as.factor(gender)) %>%
  mutate(race.ethnicity = as.factor(race.ethnicity)) %>%
  mutate(parental.level.of.education = 
           as.factor(parental.level.of.education)) %>%
  mutate(lunch = as.factor(lunch)) %>%
  mutate(test.preparation.course = 
           as.factor(test.preparation.course))

#2.将数学 阅读 写作每项成绩按照小于60 大于等于60且小于85 大于或等于85份划分为3组 转换为因子变量
#使用cut函数将成绩分为三个区间 cut(0,59,84,100)分为(0,60] (60,85](85,100]
#as.factor()转换为因子类型

StudentsPerformance <- StudentsPerformance %>%
  mutate(math.score = as.factor(cut(math.score,breaks=c(0,59,84,100)))) %>%
  mutate(reading.score = as.factor(cut(reading.score,breaks=c(0,59,84,100)))) %>%
  mutate(writing.score = as.factor(cut(writing.score,breaks=c(0,59,84,100))))

三.关联分析查看结果

数学成绩(0,59]

#3.设最小支持度阈值min_sup=0.1 最小置信度阈值min_conf=0.5 学生特征与数学 阅读 写作每项成绩的关联规则
#挖掘每一项成绩与学生特征的关联规则的时候不需要另外两项成绩 先处理数据得到学生特征和单项成绩的数据框

StudentsPerformance_math <- StudentsPerformance %>%
  select(-c(reading.score,writing.score))
StudentsPerformance_reading <- StudentsPerformance %>%
  select(-c(math.score,writing.score))
StudentsPerformance_writing <- StudentsPerformance %>%
  select(-c(reading.score,math.score))

#4.挖掘数学成绩小于60分跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

rules_math_below60 <- apriori(StudentsPerformance_math,
                              parameter = list(supp=0.1,conf=0.5),
                              appearance = list(rhs=c('math.score=(0,59]')),
                              control =list(verbose=F))
                              
inspect(head(rules_math_below60,by='lift'))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120
     
inspect(head(rules_math_below60[!is.redundant(rules_math_below60)],by="lift"))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120

rules_math_below60_pruned <- rules_math_below60[!is.redundant(rules_math_below60)]
plot(head(rules_math_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

数学成绩(59,84]

#5.挖掘数学成绩大于等于60分小于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

> rules_math_60to85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(59,84]')),
+                               control =list(verbose=F))
+ 
> inspect(head(rules_math_60to85,by='lift'))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {gender=male,                                                                                      
     lunch=standard,                                                                                   
     test.preparation.course=none}      => {math.score=(59,84]}   0.125     0.6158    0.203 1.100   125
[6] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393

> inspect(head(rules_math_60to85[!is.redundant(rules_math_60to85)],by="lift"))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393
[6] {gender=male,                                                                                      
     test.preparation.course=completed} => {math.score=(59,84]}   0.105     0.6034    0.174 1.078   105
     
> rules_math_60to85_pruned <- rules_math_60to85[!is.redundant(rules_math_60to85)]
> plot(head(rules_math_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述
数学成绩(85,100]

#6.挖掘数学成绩大于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
> rules_math_above85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_math_above85,by='lift'))
> inspect(head(rules_math_above85[!is.redundant(rules_math_above85)],by="lift"))
> rules_math_above85_pruned <- rules_math_above85[!is.redundant(rules_math_above85)]
> plot(head(rules_math_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_math_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(0,59]

> rules_reading_below60 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(0,59]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_below60,by='lift'))
> inspect(head(rules_reading_below60[!is.redundant(rules_reading_below60)],by="lift"))
> rules_reading_below60_pruned <- rules_reading_below60[!is.redundant(rules_reading_below60)]
> plot(head(rules_reading_below60_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_below60_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(59,84]

> rules_reading_60to85 <- apriori(StudentsPerformance_reading,
+                              parameter = list(supp=0.1,conf=0.5),
+                              appearance = list(rhs=c('reading.score=(59,84]')),
+                              control =list(verbose=F))
> inspect(head(rules_reading_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> inspect(head(rules_reading_60to85[!is.redundant(rules_reading_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> rules_reading_60to85_pruned <- rules_reading_60to85[!is.redundant(rules_reading_60to85)]
> plot(head(rules_reading_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述 阅读成绩(84,100]

> rules_reading_above85 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_above85,by='lift'))
> inspect(head(rules_reading_above85[!is.redundant(rules_reading_above85)],by="lift"))
> rules_reading_above85_pruned <- rules_reading_above85[!is.redundant(rules_reading_above85)]
> plot(head(rules_reading_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

写作成绩(0,59]

> rules_writing_below60 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(0,59]')),
+                                  control =list(verbose=F))
+ 
> inspect(head(rules_writing_below60,by='lift'))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> inspect(head(rules_writing_below60[!is.redundant(rules_writing_below60)],by="lift"))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> rules_writing_below60_pruned <- rules_writing_below60[!is.redundant(rules_writing_below60)]
> plot(head(rules_writing_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(59,84]

> rules_writing_60to85 <- apriori(StudentsPerformance_writing,
+                                 parameter = list(supp=0.1,conf=0.5),
+                                 appearance = list(rhs=c('writing.score=(59,84]')),
+                                 control =list(verbose=F))

> inspect(head(rules_writing_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> inspect(head(rules_writing_60to85[!is.redundant(rules_writing_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(84,100]

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")
> rules_writing_above85 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(84,100]')),
+                                  control =list(verbose=F))
> inspect(head(rules_writing_above85,by='lift'))
> inspect(head(rules_writing_above85[!is.redundant(rules_writing_above85)],by="lift"))
> rules_writing_above85_pruned <- rules_writing_above85[!is.redundant(rules_writing_above85)]
> plot(head(rules_writing_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_writing_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

永洪数据分析社区

永洪科技，致力于打造全球领先的数据技术厂商，具备从数据应用方案咨询、BI、AIGC智能分析、数字孪生、数据资产、数据治理、数据实施的端到端大数据价值服务能力。

更多推荐

java计算机毕业设计教师工作量统计系统基于SpringBoot的高校教师绩效测算与可视化平台教师教学任务与工作量智能汇总系统

永洪数据分析社区

BI是报表？BI是可视化？BI到底是什么？

永洪数据分析社区

AI智能体+BI可视化：1小时极速入门，市场部独力完成分析报告

商务蓝：适合正式报告活力橙：适合创意活动环保绿：适合可持续发展主题自主分析能力：不再依赖IT部门，活动当天即可产出分析结论智能数据处理：AI智能体自动完成80%的数据清洗和分析工作专业可视化：通过PowerBI制作媲美专业数据分析师的报告持续优化：建立可复用的分析模板，后续活动效率更高决策支持：基于数据快速调整营销策略，提升活动ROI现在就可以尝试部署你的第一个智能分析环境，体验从数据到决策的高速