import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
df = pd.read_csv("grocery_data.csv")
data = list(df["products"].apply(lambda x:x.split(',')))
data
[['MILK', 'BREAD', 'BISCUIT'], ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'], ['BREAD', 'TEA', 'BOURNVITA'], ['JAM', 'MAGGI', 'BREAD', 'MILK'], ['MAGGI', 'TEA', 'BISCUIT'], ['BREAD', 'TEA', 'BOURNVITA'], ['MAGGI', 'TEA', 'CORNFLAKES'], ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'], ['JAM', 'MAGGI', 'BREAD', 'TEA'], ['BREAD', 'MILK'], ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'], ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'], ['COFFEE', 'SUGER', 'BOURNVITA'], ['BREAD', 'COFFEE', 'COCK'], ['BREAD', 'SUGER', 'BISCUIT'], ['COFFEE', 'SUGER', 'CORNFLAKES'], ['BREAD', 'SUGER', 'BOURNVITA'], ['BREAD', 'COFFEE', 'SUGER'], ['BREAD', 'COFFEE', 'SUGER'], ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]
te = TransactionEncoder()
te_data = te.fit(data).transform(data).astype("int")
df = pd.DataFrame(te_data,columns=te.columns_)
df
# df.to_csv("transformed_data.csv", encoding='utf-8', index=False)
BISCUIT | BOURNVITA | BREAD | COCK | COFFEE | CORNFLAKES | JAM | MAGGI | MILK | SUGER | TEA | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
3 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 |
4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
5 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
6 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 |
7 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
8 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 |
9 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
10 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
11 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
12 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
13 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
15 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
16 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
17 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
18 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
19 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 |
apriori_data = apriori(df,min_support=0.01,use_colnames=True)
apriori_data.sort_values(by="support",ascending=False)
apriori_data['length'] = apriori_data['itemsets'].apply(lambda x:len(x))
apriori_data
support | itemsets | length | |
---|---|---|---|
0 | 0.35 | (BISCUIT) | 1 |
1 | 0.20 | (BOURNVITA) | 1 |
2 | 0.65 | (BREAD) | 1 |
3 | 0.15 | (COCK) | 1 |
4 | 0.40 | (COFFEE) | 1 |
5 | 0.30 | (CORNFLAKES) | 1 |
6 | 0.10 | (JAM) | 1 |
7 | 0.25 | (MAGGI) | 1 |
8 | 0.25 | (MILK) | 1 |
9 | 0.30 | (SUGER) | 1 |
10 | 0.35 | (TEA) | 1 |
11 | 0.20 | (BISCUIT, BREAD) | 2 |
12 | 0.10 | (BISCUIT, COCK) | 2 |
13 | 0.10 | (BISCUIT, COFFEE) | 2 |
14 | 0.15 | (BISCUIT, CORNFLAKES) | 2 |
15 | 0.10 | (BISCUIT, MAGGI) | 2 |
16 | 0.10 | (MILK, BISCUIT) | 2 |
17 | 0.05 | (BISCUIT, SUGER) | 2 |
18 | 0.10 | (BISCUIT, TEA) | 2 |
19 | 0.15 | (BREAD, BOURNVITA) | 2 |
20 | 0.05 | (COFFEE, BOURNVITA) | 2 |
21 | 0.10 | (SUGER, BOURNVITA) | 2 |
22 | 0.10 | (TEA, BOURNVITA) | 2 |
23 | 0.05 | (COCK, BREAD) | 2 |
24 | 0.15 | (BREAD, COFFEE) | 2 |
25 | 0.05 | (CORNFLAKES, BREAD) | 2 |
26 | 0.10 | (JAM, BREAD) | 2 |
27 | 0.15 | (BREAD, MAGGI) | 2 |
28 | 0.20 | (MILK, BREAD) | 2 |
29 | 0.20 | (BREAD, SUGER) | 2 |
... | ... | ... | ... |
53 | 0.10 | (CORNFLAKES, BISCUIT, COCK) | 3 |
54 | 0.10 | (CORNFLAKES, BISCUIT, COFFEE) | 3 |
55 | 0.05 | (MILK, BISCUIT, CORNFLAKES) | 3 |
56 | 0.10 | (BISCUIT, TEA, MAGGI) | 3 |
57 | 0.05 | (SUGER, BREAD, BOURNVITA) | 3 |
58 | 0.10 | (TEA, BREAD, BOURNVITA) | 3 |
59 | 0.05 | (SUGER, COFFEE, BOURNVITA) | 3 |
60 | 0.05 | (COCK, BREAD, COFFEE) | 3 |
61 | 0.10 | (BREAD, COFFEE, SUGER) | 3 |
62 | 0.05 | (CORNFLAKES, MILK, BREAD) | 3 |
63 | 0.10 | (JAM, BREAD, MAGGI) | 3 |
64 | 0.05 | (MILK, BREAD, JAM) | 3 |
65 | 0.05 | (JAM, TEA, BREAD) | 3 |
66 | 0.05 | (MILK, BREAD, MAGGI) | 3 |
67 | 0.10 | (TEA, BREAD, MAGGI) | 3 |
68 | 0.10 | (CORNFLAKES, COCK, COFFEE) | 3 |
69 | 0.05 | (CORNFLAKES, MILK, COFFEE) | 3 |
70 | 0.05 | (CORNFLAKES, COFFEE, SUGER) | 3 |
71 | 0.05 | (CORNFLAKES, TEA, COFFEE) | 3 |
72 | 0.05 | (MILK, TEA, COFFEE) | 3 |
73 | 0.05 | (TEA, CORNFLAKES, MAGGI) | 3 |
74 | 0.05 | (MILK, TEA, CORNFLAKES) | 3 |
75 | 0.05 | (MAGGI, MILK, JAM) | 3 |
76 | 0.05 | (JAM, TEA, MAGGI) | 3 |
77 | 0.05 | (CORNFLAKES, MILK, BISCUIT, BREAD) | 4 |
78 | 0.05 | (TEA, BISCUIT, BREAD, MAGGI) | 4 |
79 | 0.10 | (CORNFLAKES, BISCUIT, COCK, COFFEE) | 4 |
80 | 0.05 | (MAGGI, MILK, BREAD, JAM) | 4 |
81 | 0.05 | (JAM, TEA, BREAD, MAGGI) | 4 |
82 | 0.05 | (CORNFLAKES, MILK, TEA, COFFEE) | 4 |
83 rows × 3 columns
apriori_data[(apriori_data['length']==2) & (apriori_data['support']>=0.05)]
support | itemsets | length | |
---|---|---|---|
11 | 0.20 | (BISCUIT, BREAD) | 2 |
12 | 0.10 | (BISCUIT, COCK) | 2 |
13 | 0.10 | (BISCUIT, COFFEE) | 2 |
14 | 0.15 | (BISCUIT, CORNFLAKES) | 2 |
15 | 0.10 | (BISCUIT, MAGGI) | 2 |
16 | 0.10 | (MILK, BISCUIT) | 2 |
17 | 0.05 | (BISCUIT, SUGER) | 2 |
18 | 0.10 | (BISCUIT, TEA) | 2 |
19 | 0.15 | (BREAD, BOURNVITA) | 2 |
20 | 0.05 | (COFFEE, BOURNVITA) | 2 |
21 | 0.10 | (SUGER, BOURNVITA) | 2 |
22 | 0.10 | (TEA, BOURNVITA) | 2 |
23 | 0.05 | (COCK, BREAD) | 2 |
24 | 0.15 | (BREAD, COFFEE) | 2 |
25 | 0.05 | (CORNFLAKES, BREAD) | 2 |
26 | 0.10 | (JAM, BREAD) | 2 |
27 | 0.15 | (BREAD, MAGGI) | 2 |
28 | 0.20 | (MILK, BREAD) | 2 |
29 | 0.20 | (BREAD, SUGER) | 2 |
30 | 0.20 | (TEA, BREAD) | 2 |
31 | 0.15 | (COCK, COFFEE) | 2 |
32 | 0.10 | (CORNFLAKES, COCK) | 2 |
33 | 0.20 | (CORNFLAKES, COFFEE) | 2 |
34 | 0.05 | (MILK, COFFEE) | 2 |
35 | 0.20 | (COFFEE, SUGER) | 2 |
36 | 0.05 | (TEA, COFFEE) | 2 |
37 | 0.05 | (CORNFLAKES, MAGGI) | 2 |
38 | 0.10 | (MILK, CORNFLAKES) | 2 |
39 | 0.05 | (CORNFLAKES, SUGER) | 2 |
40 | 0.10 | (TEA, CORNFLAKES) | 2 |
41 | 0.10 | (JAM, MAGGI) | 2 |
42 | 0.05 | (MILK, JAM) | 2 |
43 | 0.05 | (JAM, TEA) | 2 |
44 | 0.05 | (MILK, MAGGI) | 2 |
45 | 0.20 | (TEA, MAGGI) | 2 |
46 | 0.05 | (MILK, TEA) | 2 |