python之关联规则

关联规则
以下计算了不同商品的关联强度

import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
retail_shopping_basket = {'ID':[1,2,3,4,5,6],
                         'Basket':[['Beer', 'Diaper', 'Pretzels', 'Chips', 'Aspirin'],
                                   ['Diaper', 'Beer', 'Chips', 'Lotion', 'Juice', 'BabyFood', 'Milk'],
                                   ['Soda', 'Chips', 'Milk'],
                                   ['Soup', 'Beer', 'Diaper', 'Milk', 'IceCream'],
                                   ['Soda', 'Coffee', 'Milk', 'Bread'],
                                   ['Beer', 'Chips']]}
retail = pd.DataFrame(retail_shopping_basket)  #pandas, DataFrame
retail = retail[['ID', 'Basket']]             ##############????????????
#pd.options.display.max_colwidth=100           #显示设置
retail_Basket = retail.Basket.str.join(',')
retail_Basket = retail_Basket.str.get_dummies(',')          ##标准化格式get_dummies
#retail_id = retail.drop('Basket' ,1)              #drop
#retail = retail_id.join(retail_Basket)             #join
#frequent_itemsets_2 = apriori(retail.drop('ID',1), min_support=0.50, use_colnames=True)#计算支持度
frequent_itemsets = apriori(retail_Basket, min_support=0.50, use_colnames=True)
association_rules(frequent_itemsets, metric='lift', min_threshold=1)#计算提升值
rules=rules [ (rules['lift'] >1.1)  & (rules['confidence']> 0.8)  ]
rules
Tags: