The Algorithms logoThe Algorithms
About

Association

H
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

df = pd.read_csv("grocery_data.csv")
data = list(df["products"].apply(lambda x:x.split(',')))
data
[['MILK', 'BREAD', 'BISCUIT'],
 ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['JAM', 'MAGGI', 'BREAD', 'MILK'],
 ['MAGGI', 'TEA', 'BISCUIT'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['MAGGI', 'TEA', 'CORNFLAKES'],
 ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],
 ['JAM', 'MAGGI', 'BREAD', 'TEA'],
 ['BREAD', 'MILK'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'COCK'],
 ['BREAD', 'SUGER', 'BISCUIT'],
 ['COFFEE', 'SUGER', 'CORNFLAKES'],
 ['BREAD', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]
te = TransactionEncoder()

te_data = te.fit(data).transform(data).astype("int")
df = pd.DataFrame(te_data,columns=te.columns_)
df
# df.to_csv("transformed_data.csv", encoding='utf-8', index=False)
BISCUIT BOURNVITA BREAD COCK COFFEE CORNFLAKES JAM MAGGI MILK SUGER TEA
0 1 0 1 0 0 0 0 0 1 0 0
1 1 0 1 0 0 1 0 0 1 0 0
2 0 1 1 0 0 0 0 0 0 0 1
3 0 0 1 0 0 0 1 1 1 0 0
4 1 0 0 0 0 0 0 1 0 0 1
5 0 1 1 0 0 0 0 0 0 0 1
6 0 0 0 0 0 1 0 1 0 0 1
7 1 0 1 0 0 0 0 1 0 0 1
8 0 0 1 0 0 0 1 1 0 0 1
9 0 0 1 0 0 0 0 0 1 0 0
10 1 0 0 1 1 1 0 0 0 0 0
11 1 0 0 1 1 1 0 0 0 0 0
12 0 1 0 0 1 0 0 0 0 1 0
13 0 0 1 1 1 0 0 0 0 0 0
14 1 0 1 0 0 0 0 0 0 1 0
15 0 0 0 0 1 1 0 0 0 1 0
16 0 1 1 0 0 0 0 0 0 1 0
17 0 0 1 0 1 0 0 0 0 1 0
18 0 0 1 0 1 0 0 0 0 1 0
19 0 0 0 0 1 1 0 0 1 0 1
apriori_data = apriori(df,min_support=0.01,use_colnames=True)
apriori_data.sort_values(by="support",ascending=False)
apriori_data['length'] = apriori_data['itemsets'].apply(lambda x:len(x))

apriori_data
support itemsets length
0 0.35 (BISCUIT) 1
1 0.20 (BOURNVITA) 1
2 0.65 (BREAD) 1
3 0.15 (COCK) 1
4 0.40 (COFFEE) 1
5 0.30 (CORNFLAKES) 1
6 0.10 (JAM) 1
7 0.25 (MAGGI) 1
8 0.25 (MILK) 1
9 0.30 (SUGER) 1
10 0.35 (TEA) 1
11 0.20 (BISCUIT, BREAD) 2
12 0.10 (BISCUIT, COCK) 2
13 0.10 (BISCUIT, COFFEE) 2
14 0.15 (BISCUIT, CORNFLAKES) 2
15 0.10 (BISCUIT, MAGGI) 2
16 0.10 (MILK, BISCUIT) 2
17 0.05 (BISCUIT, SUGER) 2
18 0.10 (BISCUIT, TEA) 2
19 0.15 (BREAD, BOURNVITA) 2
20 0.05 (COFFEE, BOURNVITA) 2
21 0.10 (SUGER, BOURNVITA) 2
22 0.10 (TEA, BOURNVITA) 2
23 0.05 (COCK, BREAD) 2
24 0.15 (BREAD, COFFEE) 2
25 0.05 (CORNFLAKES, BREAD) 2
26 0.10 (JAM, BREAD) 2
27 0.15 (BREAD, MAGGI) 2
28 0.20 (MILK, BREAD) 2
29 0.20 (BREAD, SUGER) 2
... ... ... ...
53 0.10 (CORNFLAKES, BISCUIT, COCK) 3
54 0.10 (CORNFLAKES, BISCUIT, COFFEE) 3
55 0.05 (MILK, BISCUIT, CORNFLAKES) 3
56 0.10 (BISCUIT, TEA, MAGGI) 3
57 0.05 (SUGER, BREAD, BOURNVITA) 3
58 0.10 (TEA, BREAD, BOURNVITA) 3
59 0.05 (SUGER, COFFEE, BOURNVITA) 3
60 0.05 (COCK, BREAD, COFFEE) 3
61 0.10 (BREAD, COFFEE, SUGER) 3
62 0.05 (CORNFLAKES, MILK, BREAD) 3
63 0.10 (JAM, BREAD, MAGGI) 3
64 0.05 (MILK, BREAD, JAM) 3
65 0.05 (JAM, TEA, BREAD) 3
66 0.05 (MILK, BREAD, MAGGI) 3
67 0.10 (TEA, BREAD, MAGGI) 3
68 0.10 (CORNFLAKES, COCK, COFFEE) 3
69 0.05 (CORNFLAKES, MILK, COFFEE) 3
70 0.05 (CORNFLAKES, COFFEE, SUGER) 3
71 0.05 (CORNFLAKES, TEA, COFFEE) 3
72 0.05 (MILK, TEA, COFFEE) 3
73 0.05 (TEA, CORNFLAKES, MAGGI) 3
74 0.05 (MILK, TEA, CORNFLAKES) 3
75 0.05 (MAGGI, MILK, JAM) 3
76 0.05 (JAM, TEA, MAGGI) 3
77 0.05 (CORNFLAKES, MILK, BISCUIT, BREAD) 4
78 0.05 (TEA, BISCUIT, BREAD, MAGGI) 4
79 0.10 (CORNFLAKES, BISCUIT, COCK, COFFEE) 4
80 0.05 (MAGGI, MILK, BREAD, JAM) 4
81 0.05 (JAM, TEA, BREAD, MAGGI) 4
82 0.05 (CORNFLAKES, MILK, TEA, COFFEE) 4

83 rows × 3 columns

apriori_data[(apriori_data['length']==2) & (apriori_data['support']>=0.05)]
support itemsets length
11 0.20 (BISCUIT, BREAD) 2
12 0.10 (BISCUIT, COCK) 2
13 0.10 (BISCUIT, COFFEE) 2
14 0.15 (BISCUIT, CORNFLAKES) 2
15 0.10 (BISCUIT, MAGGI) 2
16 0.10 (MILK, BISCUIT) 2
17 0.05 (BISCUIT, SUGER) 2
18 0.10 (BISCUIT, TEA) 2
19 0.15 (BREAD, BOURNVITA) 2
20 0.05 (COFFEE, BOURNVITA) 2
21 0.10 (SUGER, BOURNVITA) 2
22 0.10 (TEA, BOURNVITA) 2
23 0.05 (COCK, BREAD) 2
24 0.15 (BREAD, COFFEE) 2
25 0.05 (CORNFLAKES, BREAD) 2
26 0.10 (JAM, BREAD) 2
27 0.15 (BREAD, MAGGI) 2
28 0.20 (MILK, BREAD) 2
29 0.20 (BREAD, SUGER) 2
30 0.20 (TEA, BREAD) 2
31 0.15 (COCK, COFFEE) 2
32 0.10 (CORNFLAKES, COCK) 2
33 0.20 (CORNFLAKES, COFFEE) 2
34 0.05 (MILK, COFFEE) 2
35 0.20 (COFFEE, SUGER) 2
36 0.05 (TEA, COFFEE) 2
37 0.05 (CORNFLAKES, MAGGI) 2
38 0.10 (MILK, CORNFLAKES) 2
39 0.05 (CORNFLAKES, SUGER) 2
40 0.10 (TEA, CORNFLAKES) 2
41 0.10 (JAM, MAGGI) 2
42 0.05 (MILK, JAM) 2
43 0.05 (JAM, TEA) 2
44 0.05 (MILK, MAGGI) 2
45 0.20 (TEA, MAGGI) 2
46 0.05 (MILK, TEA) 2