# Python中用K-均值聚类来探索顾客细分

Python

1234

import pandas as pddf_offers = pd.read_excel("./WineKMC.xlsx", sheetname=0)df_offers.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]df_offers.head()

Python

1234

K-均值快速入门

Python

12345678

# join the offers and transactions tabledf = pd.merge(df_offers, df_transactions)# create a "pivot table" which will give us the number of times each customer responded to a given offermatrix = df.pivot_table(index=['customer_name'], columns=['offer_id'], values='n')# a little tidying up. fill NA values with 0 and make the index into a columnmatrix = matrix.fillna(0).reset_index()# save a list of the 0/1 columns. we'll use these a bit laterx_cols = matrix.columns[1:]

Python

1234567891011

from sklearn.cluster import KMeanscluster = KMeans(n_clusters=5)# slice matrix so we only include the 0/1 indicator columns in the clusteringmatrix['cluster'] = cluster.fit_predict(matrix[matrix.columns[2:]])matrix.cluster.value_counts()2 321 224 200 153 11dtype: int64

Python

123456789

from sklearn.decomposition import PCA pca = PCA(n_components=2)matrix['x'] = pca.fit_transform(matrix[x_cols])[:,0]matrix['y'] = pca.fit_transform(matrix[x_cols])[:,1]matrix = matrix.reset_index() customer_clusters = matrix[['customer_name', 'cluster', 'x', 'y']]customer_clusters.head()

Python

12345678

df = pd.merge(df_transactions, customer_clusters)df = pd.merge(df_offers, df) from ggplot import * ggplot(df, aes(x='x', y='y', color='cluster')) + geom_point(size=75) + ggtitle("Customers Grouped by Cluster")

Python

12345678

cluster_centers = pca.transform(cluster.cluster_centers_)cluster_centers = pd.DataFrame(cluster_centers, columns=['x', 'y'])cluster_centers['cluster'] = range(0, len(cluster_centers)) ggplot(df, aes(x='x', y='y', color='cluster')) + geom_point(size=75) + geom_point(cluster_centers, size=500) + ggtitle("Customers Grouped by Cluster")

Python

12

df['is_4'] = df.cluster==4df.groupby("is_4").varietal.value_counts()

Python

1

df.groupby("is_4")[['min_qty', 'discount']].mean()

INSEAD Analytics Cluster Analysis and Segmentation Post Customer Segmentation at Bain & Company Customer Segmentation Wikipedia

27 篇文章37 人订阅

0 条评论

## 相关文章

3985

1750

### 【资源】机器学习和神经网络实践：书籍及博客推介

【新智元导读】前几天我们向大家推荐了自学成为 Data Scientist 在线课程系列，很多人纷纷收藏和分享。今天新智元再针对数学，为大家介绍几本书和相关资料...

3808

2167

### 周志华Deep Forrest论文参与者答网友问，或6月1日开源

【新智元导读】新智元之前发布周志华老师的Deep Forrest论文引起了广泛关注和讨论。本文作者Ji Feng正是该论文的参与者，他在知乎上对这篇论文的评论做...

35910

4204

2195

### 视频编码优化之道

5月20号，在LiveVideoStack音视频技术社区举办的WebRTCon 2018大会上，上海交通大学图像所宋利教授在WebRTC与Codec专题作为出品...

4912

3376

1122