# 필요한 패키지 설치 및 로드
if (!require("arules")) {
install.packages("arules")
library(arules)
}
# Groceries 데이터 로드
data("Groceries")
# 데이터 요약 확인
> summary(Groceries)
transactions as itemMatrix in sparse format with
9835 rows (elements/itemsets/transactions) and
169 columns (items) and a density of 0.02609146
most frequent items:
whole milk other vegetables rolls/buns soda yogurt (Other)
2513 1903 1809 1715 1372 34055
element (itemset/transaction) length distribution:
sizes
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46 29 14 14 9 11 4 6
24 26 27 28 29 32
1 1 1 1 3 1
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 2.000 3.000 4.409 6.000 32.000
includes extended item information - examples:
labels level2 level1
1 frankfurter sausage meat and sausage
2 sausage sausage meat and sausage
3 liver loaf sausage meat and sausage
# 처음 5개의 트랜잭션 확인
> inspect(Groceries[1:5])
items
[1] {citrus fruit, semi-finished bread, margarine, ready soups}
[2] {tropical fruit, yogurt, coffee}
[3] {whole milk}
[4] {pip fruit, yogurt, cream cheese , meat spreads}
[5] {other vegetables, whole milk, condensed milk, long life bakery product}
# 데이터의 전체 구조 확인
str(Groceries)
rules <- apriori ( Groceries, parameter = list( supp=0.01, conf=0.5) )
>rules
set of 15 rules
inspect(sort(rules, by = "lift")[1:10])