【Python 数据科学】数据可视化seaborn

一、分布import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as sns%matplotlib inline#先给txt文件添加字段名再导入文件以空格为分隔符sep='\s+'colums=['user_id','order_dt','order_products...

Mercy92

1101人浏览 · 2019-07-30 11:16:12

Mercy92 · 2019-07-30 11:16:12 发布

文章目录

参考： http://seaborn.pydata.org/index.html

一、分布

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#先给txt文件添加字段名  再导入文件 以空格为分隔符sep='\s+'
colums=['user_id','order_dt','order_products','order_amount']
df=pd.read_table('CDNOW_master.txt',names=colums,sep='\s+')
df.head()

	user_id	order_dt	order_products	order_amount
0	1	19970101	1	11.77
1	2	19970112	1	12.00
2	2	19970112	5	77.00
3	3	19970102	2	20.76
4	3	19970330	2	20.76

1 distplot 概率分布图

#分布---1 distplot 概率分布图 kde=False纵坐标直接是数量  反之是一个比例
sns.distplot(df.order_amount,kde=False,color='m')

<matplotlib.axes._subplots.AxesSubplot at 0x20dd1a714a8>

在这里插入图片描述

2 kdeplot 概率密度图

#分布----2 kdeplot 概率密度图
sns.kdeplot(df.order_amount)

<matplotlib.axes._subplots.AxesSubplot at 0x20dd1f952e8>

在这里插入图片描述

#将df表按照user_id分组后，合计
grouped_user=df.groupby('user_id').sum()
grouped_user.head()

	order_dt	order_products	order_amount
user_id
1	19970101	1	11.77
2	39940224	6	89.00
3	119833602	16	156.46
4	79882233	7	100.50
5	219686137	29	385.61

3、jointplot联合密度图

#分布----3、jointplot联合密度图
#kind='reg'拟合散点回归
sns.jointplot(grouped_user.order_products,grouped_user.order_amount,kind='reg')

<seaborn.axisgrid.JointGrid at 0x20dce090cf8>

在这里插入图片描述

#解析日期，将其转换成固定格式的日期型数据,或者在导入时用parse_dates,date_parser解析
df['order_dt']=pd.to_datetime(df['order_dt'],format='%Y%m%d')
#数据透视表
rfm=df.pivot_table(index='user_id',values=['order_amount','order_dt','order_products'],
                  aggfunc={'order_amount':'sum','order_dt':'max','order_products':'sum'})
rfm['R']=(rfm.order_dt.max()-rfm.order_dt)/np.timedelta64(1,'D')
rfm.rename(columns={'order_products':'F','order_amount':'M'},inplace=True)
rfm

	M	order_dt	F	R
user_id
1	11.77	1997-01-01	1	545.0
2	89.00	1997-01-12	6	534.0
3	156.46	1998-05-28	16	33.0
4	100.50	1997-12-12	7	200.0
...	...	...	...	...
23566	36.00	1997-03-25	2	462.0
23567	20.97	1997-03-25	1	462.0
23568	121.70	1997-04-22	6	434.0
23569	25.74	1997-03-25	2	462.0
23570	94.08	1997-03-26	5	461.0

23570 rows × 4 columns

#jointplot联合密度图
#kind='reg'拟合散点回归
sns.jointplot(rfm.R,rfm.F,kind='reg')

<seaborn.axisgrid.JointGrid at 0x20dd2afe1d0>

在这里插入图片描述

4、pairplot多变量图

#4、pairplot多变量图
#hue多种类
sns.pairplot(rfm[['R','F','M']])

<seaborn.axisgrid.PairGrid at 0x20dd2d287f0>

在这里插入图片描述

二、分类

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


#防止文字不显示 
plt.rcParams['font.sans-serif']=['SimHei']
df=pd.read_csv('cy.csv',encoding='gbk')

df.head()

	ID	城市	类型	店名	点评	人均	口味	环境	服务
0	1	北京	私房菜	梧桐宇私房菜	45.0	80.0	7.3	7.3	7.1
1	2	北京	私房菜	小东北私房菜	1.0	35.0	6.9	6.9	6.9
2	3	北京	私房菜	辣家私房菜	1.0	NaN	6.9	6.9	6.9
3	4	北京	私房菜	鸿福天原	NaN	NaN	NaN	NaN	NaN
4	5	北京	私房菜	虾而美(北京)餐饮管理有限公司	NaN	NaN	NaN	NaN	NaN

1、boxplots箱线图

#过滤出两个城市的数据
df2=df.query("(城市=='上海')|(城市=='北京')")

#分类-----1、boxplots箱线图
plt.figure(figsize=(20,5))
#data=df 引用的数据来源  hue对比
sns.boxplot(x='类型',y='口味',data=df2,hue='城市')

<matplotlib.axes._subplots.AxesSubplot at 0x1bef114d898>

在这里插入图片描述

2、violinplot提琴图

#分类----2、violinplot提琴图  日本菜评价比较集中  palette调色盘  split分裂将两种数据拼接在一起，对比更直观
plt.figure(figsize=(20,5))
sns.violinplot(x='类型',y='口味',data=df2,hue='城市',palette='muted',split=True)

<matplotlib.axes._subplots.AxesSubplot at 0x1beef17d470>

在这里插入图片描述

3、factorplot因子图

#分类-----4、factorplot因子图
#失效 plt.figure(figsize=(20,5)),它自带size参数
#kind='box'将因子图转化为箱线图  aspect=2长宽比
sns.factorplot(x='类型',y='口味',hue='城市',data=df2,size=8,kind='box',aspect=2 )

<seaborn.axisgrid.FacetGrid at 0x1bef1e2e6d8>

在这里插入图片描述

#kind='violin' 转化为提琴图  col='城市'按城市分子图 col_wrap=4每列显示4个
sns.factorplot(x='类型',y='口味',data=df,size=8,kind='violin',aspect=2,col='城市',col_wrap=4 )

<seaborn.axisgrid.FacetGrid at 0x1bef1ed66a0>

在这里插入图片描述

4、barplot柱形图

##分类----3、barplot柱形图
plt.figure(figsize=(20,5))
sns.barplot(x='类型',y='口味',hue='城市',data=df2)

<matplotlib.axes._subplots.AxesSubplot at 0x1befcbd6198>

在这里插入图片描述

三、线性

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#防止文字不显示 
plt.rcParams['font.sans-serif']=['SimHei']
df=pd.read_csv('cy.csv',encoding='gbk')

df2=df.query("(城市=='上海')|(城市=='北京')")

1.回归图

#回归图 order=2二次函数（曲线）  df2.query()筛选后的数据 row以行的方式按照城市排列 col以列的方式按照城市排列
sns.lmplot(x='环境',y='口味',data=df2.query('点评<2000'),row='城市',order=2)

<seaborn.axisgrid.FacetGrid at 0x19b89bf17b8>

在这里插入图片描述

2.热力图

pt=df.pivot_table(index='城市',columns='类型',values='口味',aggfunc='mean')
#heatmap热力图  annot=True显示数值
plt.figure(figsize=(20,10))
sns.heatmap(pt,annot=True)

<matplotlib.axes._subplots.AxesSubplot at 0x19b8a8a1278>

在这里插入图片描述

永洪数据分析社区

永洪科技，致力于打造全球领先的数据技术厂商，具备从数据应用方案咨询、BI、AIGC智能分析、数字孪生、数据资产、数据治理、数据实施的端到端大数据价值服务能力。

更多推荐

java计算机毕业设计教师工作量统计系统基于SpringBoot的高校教师绩效测算与可视化平台教师教学任务与工作量智能汇总系统

永洪数据分析社区

BI是报表？BI是可视化？BI到底是什么？

永洪数据分析社区

BackstopJS 与 Prettier 集成：配置文件格式化与团队规范统一

在当今的前端开发环境中，**BackstopJS** 作为一款强大的视觉回归测试工具，已经成为保障UI一致性的重要武器。然而，随着团队规模的扩大和项目的复杂度提升，如何确保所有开发人员遵循统一的配置规范成为了一个挑战。本文将为您详细介绍如何通过 Prettier 工具实现 BackstopJS 配置文件的自动格式化，从而提升团队协作效率和代码质量。💪## 为什么需要 BackstopJS 配