The ever-changing mobile landscape is a challenging space to navigate. . The percentage of mobile over desktop is only increasing. Android holds about 53.2% of the smartphone market, while iOS is 43%. To get more people to download your app, you need to make sure they can easily find your app. Mobile app analytics is a great way to understand the existing strategy to drive growth and retention of future user.
With million of apps around nowadays, the following data set has become very key to getting top trending apps in iOS app store. This data set contains more than 7000 Apple iOS mobile application details. The data was extracted from the iTunes Search API at the Apple Inc website. R and linux web scraping tools were used for this study.
appleStore.csv
"id" : App ID
"track_name": App Name
"size_bytes": Size (in Bytes)
"currency": Currency Type
"price": Price amount
"ratingcounttot": User Rating counts (for all version)
"ratingcountver": User Rating counts (for current version)
"user_rating" : Average User Rating value (for all version)
"userratingver": Average User Rating value (for current version)
"ver" : Latest version code
"cont_rating": Content Rating
"prime_genre": Primary Genre
"sup_devices.num": Number of supporting devices
"ipadSc_urls.num": Number of screenshots showed for display
"lang.num": Number of supported languages
"vpp_lic": Vpp Device Based Licensing Enabled
appleStore_description.csv
id : App ID
track_name: Application name
size_bytes: Memory size (in Bytes)
app_desc: Application description
import pandas as pd
import numpy as np
import plotly_express as px
import plotly as py
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import random
import dash
import dash_html_components as html
import dash_core_components as dcc
Merge two Dataframe mentioned above.
主要分析的是APP类别
# 颜色的随机生成:#123456 # 加上6位数字构成
def random_color_generator(number_of_colors):
color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(number_of_colors)]
return color
trace = go.Bar(
x = cnt_srs.index,
y = cnt_srs.values,
text = text,
marker = dict(
color = random_color_generator(100),
line = dict(color='rgb(8, 48, 107)', # 柱子的外围线条颜色和宽度
width = 1.5)
),
opacity = 0.7 # 透明度设置
)
# 数据部分:一定是列表的形式
data = [trace]
# 布局设置
layout = go.Layout(
title = 'Prime genre', # 整个图的标题
margin = dict(
l = 100 # 左边距离
),
xaxis = dict(
title = 'Type of app' # 2个轴的标题
),
yaxis = dict(
title = 'Count of app'
),
width = 900, # figure的宽高
height = 500
)
fig = go.Figure(data=data, layout=layout)
fig.update_traces(textposition="outside") # 将每个占比显示出来,也就是y轴的值
fig.show()
df_free = df[df["isNotFree"] == 0] # free
df_NotFree = df[df["isNotFree"] == 1] # notfree
# 颜色的随机生成:#123456 # 加上6位数字构成
def random_color_generator(number_of_colors):
color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(number_of_colors)]
return color
# make subplots
fig = make_subplots(rows=2, cols=2, # 2*2的子图
vertical_spacing=0.25, # 上下子图之间的间隔
subplot_titles=("(1)Free","(2)NotFree","(3)Group Bar of Free & notFree"), # 每个子图的标题
specs=[[{"type": "xy"}, {"type": "xy"}], # 每个子图的类型
[{"rowspan": 1, "colspan": 2}, None]] # 第2行中,只有第一个位置上有图,(2,2)是空的
)
# single bar
# 1-free
cnt_srs1 = df_free['prime_genre'].value_counts()
text1 = ['{:.2f}%'.format(100 * (value / cnt_srs1.sum())) for value in cnt_srs1.values]
trace1 = go.Bar(
x = cnt_srs1.index,
y = cnt_srs1.values,
text = text1,
marker = dict(
color = random_color_generator(100),
line = dict(color='rgb(8, 48, 107)',
width = 1.5)
),
opacity = 0.7
)
# 2-notfree
cnt_srs2 = df_NotFree['prime_genre'].value_counts()
text2 = ['{:.2f}%'.format(100 * (value / cnt_srs2.sum())) for value in cnt_srs2.values]
trace2 = go.Bar(
x = cnt_srs2.index,
y = cnt_srs2.values,
text = text2,
marker = dict(
color = random_color_generator(100),
line = dict(color='rgb(8, 48, 107)',
width = 1.5)
),
opacity = 0.7
)
# group bar chart
trace3 = go.Bar(
x = cnt_srs1.index,
y = cnt_srs1.values,
text = text1,
opacity = 0.7,
# name='Free'
)
trace4 = go.Bar(
x = cnt_srs2.index,
y = cnt_srs2.values,
text = text2,
opacity = 0.7,
# name='Not-Free'
)
fig.add_trace(trace1,row=1,col=1)
fig.add_trace(trace2,row=1,col=2)
fig.add_trace(trace3,row=2,col=1)
fig.add_trace(trace4,row=2,col=1)
fig.update_layout(height=800,width=900,
title_text="Free vs NotFree", # 整个图的标题
showlegend=False) # 将右上角的图例隐藏
fig.show()
cnt_srs = df[['prime_genre', 'user_rating']].groupby('prime_genre').mean()['user_rating'].sort_values(ascending=False) # descending
trace = go.Bar(
x = cnt_srs.index, # the value of x
y = cnt_srs.values, # the value of y
marker = dict(
color = random_color_generator(100), # the style of bar
line = dict(color='rgb(8, 48, 107)',
width = 1.5)
),
opacity = 0.7
)
data = [trace]
layout = go.Layout(
title = 'User rating depending on Prime genre',
margin = dict(
l = 100
),
xaxis = dict(
title = 'Genre'
),
yaxis = dict(
title = 'Mean User Rating'
),
width = 800,
height = 500
)
fig = go.Figure(data=data, layout=layout)
fig.show()
# how to make heatmap
df_temp = df.drop("id",axis=1)
fig = go.Figure(data=go.Heatmap(
z=[[1, None, 30, 50, 1], [20, 1, 60, 80, 30], [30, 60, 1, -10, 20]],
x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
y=['Morning', 'Afternoon', 'Evening'],
hoverongaps = False))
fig.show()
fig = go.Figure(data=go.Heatmap(
z=df_corr.values,
x=df_corr.columns.values,
y=df_corr.columns.values,
colorscale='gnbu',
))
fig.update_layout(width = 800, height = 700,
title='Pearson Correlation of float-type features',
xaxis = dict(ticks=''),
yaxis = dict(ticks='' ),
margin = dict(l = 100)
)
fig.show()
data = [
go.Heatmap(
z = df_temp.corr(),
x = df_temp.corr().columns.values,
y = df_temp.corr().columns.values,
colorscale='magma', # 'deep', 'delta', 'dense', 'earth', 'edge', 'electric','emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens'
# reversescale=False,
)
]
layout = go.Layout(
title='Pearson Correlation of float-type features',
xaxis = dict(ticks=''),
yaxis = dict(ticks='' ),
width = 800, height = 700,
margin = dict(
l = 100
)
)
fig = go.Figure(data=data, layout=layout)
fig.show()