一、matplotlib基础

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline 
In [2]:
import pandas as pd
import numpy as np

常用的plt对象

image.png

In [3]:
# Python 中万物皆对象,Matplotlib 里这些元素也都是对象。下面代码打印出坐标系、坐标轴和刻度。
fig = plt.figure()
ax = fig.add_subplot(1,1,1) # 增加一个子图
plt.show()

# 绘制x轴、y轴对象
xax = ax.xaxis
yax = ax.yaxis

# xax和yax指向的都是figure对象
print(xax.figure)
print(yax.figure)
Figure(432x288)
Figure(432x288)
In [5]:
r_hex = '#dc2624'     # red,       RGB = 220,38,36
dt_hex = '#2b4750'    # dark teal, RGB = 43,71,80
tl_hex = '#45a0a2'    # teal,      RGB = 69,160,162
r1_hex = '#e87a59'    # red,       RGB = 232,122,89
tl1_hex = '#7dcaa9'   # teal,      RGB = 125,202,169
g_hex = '#649E7D'     # green,     RGB = 100,158,125
o_hex = '#dc8018'     # orange,    RGB = 220,128,24
tn_hex = '#C89F91'    # tan,       RGB = 200,159,145
g50_hex = '#6c6d6c'   # grey-50,   RGB = 108,109,108
bg_hex = '#4f6268'    # blue grey, RGB = 79,98,104
g25_hex = '#c7cccf'   # grey-25,   RGB = 199,204,207

在figure中显示文本并自定义位置

In [13]:
plt.figure()
plt.text(x=0.5,y=0.5,s='Figure',ha='center',va='center',size=20,alpha=0.5)
# 第一、二个参数是指横轴和纵轴坐标
# 第三个参数字符是指要显示的内容
# ha, va 是横向和纵向位置
# size 设置字体大小
# alpha 设置字体透明度 (0.5 是半透明)
Out[13]:
Text(0.5, 0.5, 'Figure')

用 Image.open() 将图片转成像素存在 ndarray 中,再用 plt.imshow() 展示。

In [21]:
from PIL import Image #导入图片处理模块
plt.figure()
plt.xticks([]) #将x、y的刻度置为null
plt.yticks([])
im = np.array(Image.open('111.png'))
plt.imshow(im)
plt.show()

添加基本折线图

In [24]:
plt.figure()
plt.plot([0,1],[0,1])
plt.show()

绘制子图

In [29]:
# subplot(rows, columns, i-th plots) 
plt.figure()
plt.subplot(2,1,1)
plt.xticks([])
plt.yticks([])
plt.text(x=0.5,y=0.5,s='subplot(2,1,1)',ha='center',va='center',size=20,alpha=0.5)
plt.subplot(2,1,2)
plt.xticks([])
plt.yticks([])
plt.text(x=0.5,y=0.5,s='subplot(2,1,2)',ha='center',va='center',size=20,alpha=0.5)
plt.show()
# 声明完子图后,下面所有代码就只在这幅子图上生效,直到声明下一幅子图
In [39]:
# 循环绘制子图
fig,axes = plt.subplots(nrows=2,ncols=2)
# enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)
# 组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
for i,ax in enumerate(axes.flat): 
    ax.set(xticks=[],yticks=[])
    s = 'subplot(2,2' + str(i) + ')'
    ax.text(0.5,0.5,s,size=20,alpha=0.5,ha='center',va='center')
In [45]:
# 循环绘制子图
fig,axes = plt.subplots(nrows=2,ncols=2)
# enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)
# 组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
for i,ax in enumerate(axes.flat): 
    print(i)
    print(ax)
0
AxesSubplot(0.125,0.536818;0.352273x0.343182)
1
AxesSubplot(0.547727,0.536818;0.352273x0.343182)
2
AxesSubplot(0.125,0.125;0.352273x0.343182)
3
AxesSubplot(0.547727,0.125;0.352273x0.343182)

坐标系

坐标系比子图更通用,有两种生成方式

  • 用 gridspec 包加上 subplot()
  • 用 plt.axes()
In [72]:
## 不规则网格
import matplotlib.gridspec as gridspec
G = gridspec.GridSpec(3,3)

ax1 = plt.subplot(G[0:1,:3])  #类似于numpy的选取方式,选取矩阵数据
plt.xticks([]),plt.yticks([])
plt.text(x=0.5,y=0.5,s='Axes 1',ha='center',va='center',size=20,alpha=0.5)

ax2 = plt.subplot(G[1:2,0:2])  #类似于numpy的选取方式,选取矩阵数据
plt.xticks([]),plt.yticks([])
plt.text(x=0.5,y=0.5,s='Axes 2',ha='center',va='center',size=20,alpha=0.5)

ax3 = plt.subplot(G[1:3,2:3])  #类似于numpy的选取方式,选取矩阵数据
plt.xticks([]),plt.yticks([])
plt.text(x=0.5,y=0.5,s='Axes 3',ha='center',va='center',size=20,alpha=0.5)

ax4 = plt.subplot(G[2:3,0:1])  #类似于numpy的选取方式,选取矩阵数据
plt.xticks([]),plt.yticks([])
plt.text(x=0.5,y=0.5,s='Axes 4',ha='center',va='center',size=20,alpha=0.5)

ax5 = plt.subplot(G[2:3,1:2])  #类似于numpy的选取方式,选取矩阵数据
plt.xticks([]),plt.yticks([])
plt.text(x=0.5,y=0.5,s='Axes 5',ha='center',va='center',size=20,alpha=0.5)

plt.show()
In [71]:
# 大图套小图
plt.axes([0.1,0.1,0.8,0.8])
# plt.xticks([]),plt.yticks([])
plt.text(x=0.6,y=0.6,s='axes([0.1,0.1,0.8,0.8])',va='center',ha='center',size=20,alpha=0.5)

plt.axes([0.2,0.2,0.3,0.3])
plt.text(x=0.5,y=0.5,s='axes([0.2,0.2,0.3,0.3])',va='center',ha='center',size=10,alpha=0.5)

plt.show()
# 此时的axes对象的坐标和xticks刻度上的坐标并不一致
# plt.axes([l,b,w,h]) 
# 其中 [l, b, w, h] 可以定义坐标系

# l 代表坐标系左边到 Figure 左边的水平距离
# b 代表坐标系底边到 Figure 底边的垂直距离
# w 代表坐标系的宽度
# h 代表坐标系的高度
# 如果 l, b, w, h 都小于 1,那它们是标准化 (normalized) 后的距离。比如 Figure 底边长度为 10, 坐标系底边到它的垂直距离是 2,那么 b = 2/10 = 0.2。

两种生成图的方式

In [75]:
# 第一种,同时生成图和坐标系:
fig,ax = plt.subplots()
ax.set(xticks=[],yticks=[])
s='xxx'
ax.text(x=0.5,y=0.5,s=s,ha='center',va='center',size=20,alpha=0.5)
plt.show()
In [77]:
# 第二种,先生成图,再添加坐标系
fig=plt.figure()
ax = fig.add_subplot(1,1,1)
ax.set(xticks=[],yticks=[])
s='xxx'
ax.text(x=0.5,y=0.5,s=s,ha='center',va='center',size=20,alpha=0.5)
plt.show()

坐标轴

In [96]:
fig, ax = plt.subplots()
ax.set_xlabel('x label',size=20,color='red',rotation=30) 
#所有的文本对象都可以调整文字和颜色以及角度
ax.set_ylabel('t label')


for label in ax.xaxis.get_ticklabels():
    # x轴的label是文本对象,都可以调整相关文本的参数,y轴同样
    label.set_color('blue')
    label.set_rotation(45)
    label.set_fontsize(20)
    

for line in ax.yaxis.get_ticklines():
    # y轴的line对象进行调整,线条的对象同样有类似的属性
    line.set_color('green')
    line.set_markersize(500)
    line.set_markeredgewidth(30)

二、matplotlib实例绘制

字段含义如下:

SaleID - 销售样本ID

name - 汽车编码

regDate - 汽车注册时间

model - 车型编码

brand - 品牌

bodyType - 车身类型

fuelType - 燃油类型

gearbox - 变速箱

power - 汽车功率

kilometer - 汽车行驶公里

notRepairedDamage - 汽车有尚未修复的损坏

regionCode - 看车地区编码

seller - 销售方

offerType - 报价类型

creatDate - 广告发布时间

price - 汽车价格

v_0', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6', 'v_7', 'v_8', 'v_9', 'v_10', 'v_11', 'v_12', 'v_13','v_14' 【匿名特征,包含v0-14在内15个匿名特征】

数字全都脱敏处理,都为label encoding形式,即数字形式

In [114]:
# 读取天池二手车交易价格预测的训练数据
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
dftrain = pd.read_csv(r'.\\天池二手车交易交额预测\\used_car_train_20200313.csv',sep=' ')
dftrain.head()
Out[114]:
SaleID name regDate model brand bodyType fuelType gearbox power kilometer ... v_5 v_6 v_7 v_8 v_9 v_10 v_11 v_12 v_13 v_14
0 0 736 20040402 30.0 6 1.0 0.0 0.0 60 12.5 ... 0.235676 0.101988 0.129549 0.022816 0.097462 -2.881803 2.804097 -2.420821 0.795292 0.914762
1 1 2262 20030301 40.0 1 2.0 0.0 0.0 0 15.0 ... 0.264777 0.121004 0.135731 0.026597 0.020582 -4.900482 2.096338 -1.030483 -1.722674 0.245522
2 2 14874 20040403 115.0 15 1.0 0.0 0.0 163 12.5 ... 0.251410 0.114912 0.165147 0.062173 0.027075 -4.846749 1.803559 1.565330 -0.832687 -0.229963
3 3 71865 19960908 109.0 10 0.0 0.0 1.0 193 15.0 ... 0.274293 0.110300 0.121964 0.033395 0.000000 -4.509599 1.285940 -0.501868 -2.438353 -0.478699
4 4 111080 20120103 110.0 5 1.0 0.0 0.0 68 5.0 ... 0.228036 0.073205 0.091880 0.078819 0.121534 -1.896240 0.910783 0.931110 2.834518 1.923482

5 rows × 31 columns

In [117]:
# 对数据的基础数据了解下
dftrain.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150000 entries, 0 to 149999
Data columns (total 31 columns):
SaleID               150000 non-null int64
name                 150000 non-null int64
regDate              150000 non-null int64
model                149999 non-null float64
brand                150000 non-null int64
bodyType             145494 non-null float64
fuelType             141320 non-null float64
gearbox              144019 non-null float64
power                150000 non-null int64
kilometer            150000 non-null float64
notRepairedDamage    150000 non-null object
regionCode           150000 non-null int64
seller               150000 non-null int64
offerType            150000 non-null int64
creatDate            150000 non-null int64
price                150000 non-null int64
v_0                  150000 non-null float64
v_1                  150000 non-null float64
v_2                  150000 non-null float64
v_3                  150000 non-null float64
v_4                  150000 non-null float64
v_5                  150000 non-null float64
v_6                  150000 non-null float64
v_7                  150000 non-null float64
v_8                  150000 non-null float64
v_9                  150000 non-null float64
v_10                 150000 non-null float64
v_11                 150000 non-null float64
v_12                 150000 non-null float64
v_13                 150000 non-null float64
v_14                 150000 non-null float64
dtypes: float64(20), int64(10), object(1)
memory usage: 35.5+ MB
In [124]:
dftrain.describe().round(2)
Out[124]:
SaleID name regDate model brand bodyType fuelType gearbox power kilometer ... v_5 v_6 v_7 v_8 v_9 v_10 v_11 v_12 v_13 v_14
count 150000.00 150000.00 150000.00 149999.00 150000.00 145494.00 141320.00 144019.00 150000.00 150000.00 ... 150000.00 150000.00 150000.00 150000.00 150000.00 150000.00 150000.00 150000.00 150000.00 150000.00
mean 74999.50 68349.17 20034170.51 47.13 8.05 1.79 0.38 0.22 119.32 12.60 ... 0.25 0.04 0.12 0.06 0.06 -0.00 0.01 0.00 0.00 -0.00
std 43301.41 61103.88 53649.88 49.54 7.86 1.76 0.55 0.42 177.17 3.92 ... 0.05 0.05 0.20 0.03 0.04 3.77 3.29 2.52 1.29 1.04
min 0.00 0.00 19910001.00 0.00 0.00 0.00 0.00 0.00 0.00 0.50 ... 0.00 0.00 0.00 0.00 0.00 -9.17 -5.56 -9.64 -4.15 -6.55
25% 37499.75 11156.00 19990912.00 10.00 1.00 0.00 0.00 0.00 75.00 12.50 ... 0.24 0.00 0.06 0.04 0.03 -3.72 -1.95 -1.87 -1.06 -0.44
50% 74999.50 51638.00 20030912.00 30.00 6.00 1.00 0.00 0.00 110.00 15.00 ... 0.26 0.00 0.10 0.06 0.06 1.62 -0.36 -0.13 -0.04 0.14
75% 112499.25 118841.25 20071109.00 66.00 13.00 3.00 1.00 0.00 150.00 15.00 ... 0.27 0.10 0.13 0.08 0.09 2.84 1.26 1.78 0.94 0.68
max 149999.00 196812.00 20151212.00 247.00 39.00 7.00 6.00 1.00 19312.00 15.00 ... 0.29 0.15 1.40 0.16 0.22 12.36 18.82 13.85 11.15 8.66

8 rows × 30 columns

In [125]:
dftrain.corr()
Out[125]:
SaleID name regDate model brand bodyType fuelType gearbox power kilometer ... v_5 v_6 v_7 v_8 v_9 v_10 v_11 v_12 v_13 v_14
SaleID 1.000000 -0.002299 -0.001373 0.000659 -0.001754 -0.005163 -0.001584 0.001153 0.000873 -0.001115 ... -0.001380 -0.000397 0.001164 -0.000046 -0.000749 0.000983 0.001276 0.000249 0.000162 -0.000209
name -0.002299 1.000000 -0.037638 0.016080 0.040678 0.034532 0.014880 0.026104 0.000236 -0.007613 ... -0.057710 -0.638191 0.057199 0.235341 0.008455 0.576758 -0.258582 0.090897 0.007558 -0.011303
regDate -0.001373 -0.037638 1.000000 0.148780 0.033199 0.102864 0.276227 0.142631 0.097446 -0.492852 ... 0.020100 0.107282 -0.037048 0.761804 0.146139 -0.249871 -0.184640 0.704787 0.412246 0.177898
model 0.000659 0.016080 0.148780 1.000000 0.358765 0.207208 0.048767 0.019314 0.031245 -0.087445 ... -0.019701 0.015419 -0.100753 0.211149 0.423526 -0.058674 -0.079661 0.110157 0.400001 -0.513733
brand -0.001754 0.040678 0.033199 0.358765 1.000000 0.114116 -0.079566 0.012914 -0.022391 -0.096262 ... -0.069206 -0.031185 -0.050022 0.015200 0.352137 0.043222 0.007571 -0.070912 0.320933 -0.207713
bodyType -0.005163 0.034532 0.102864 0.207208 0.114116 1.000000 0.118717 0.099966 0.079632 -0.053212 ... 0.398335 -0.009374 -0.399415 0.220308 -0.051773 -0.082708 -0.293019 0.210039 -0.030675 -0.288897
fuelType -0.001584 0.014880 0.276227 0.048767 -0.079566 0.118717 1.000000 0.171692 0.059315 0.099446 ... 0.081457 0.001499 0.000487 0.279369 -0.128720 -0.075217 -0.137755 0.302333 -0.049669 -0.021656
gearbox 0.001153 0.026104 0.142631 0.019314 0.012914 0.099966 0.171692 1.000000 0.156706 -0.046807 ... 0.124329 0.002201 0.011876 0.238381 -0.258017 -0.080558 -0.145925 0.291374 -0.203841 -0.044204
power 0.000873 0.000236 0.097446 0.031245 -0.022391 0.079632 0.059315 0.156706 1.000000 -0.019631 ... 0.119727 0.025648 -0.060397 0.155956 -0.140203 -0.092717 -0.122107 0.161990 -0.103430 -0.023808
kilometer -0.001115 -0.007613 -0.492852 -0.087445 -0.096262 -0.053212 0.099446 -0.046807 -0.019631 1.000000 ... 0.049502 -0.024664 -0.017835 -0.407686 -0.149422 0.083358 0.066542 -0.370153 -0.285158 -0.120389
regionCode -0.001078 0.005603 -0.004783 0.010241 -0.002326 0.017380 -0.012507 -0.022640 0.004422 -0.010590 ... -0.002506 0.028838 0.000609 -0.001227 -0.016659 -0.024333 0.016807 0.002261 -0.001976 -0.009702
seller 0.000055 -0.001257 -0.001606 0.000984 0.000311 0.000309 -0.001822 -0.001420 -0.001739 0.001583 ... -0.000641 -0.002242 -0.000421 -0.005144 0.005572 0.003026 0.000182 -0.006003 0.000881 -0.003140
offerType NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
creatDate -0.001449 -0.005056 -0.001293 -0.004383 -0.001466 0.002508 -0.001081 -0.002538 0.001280 0.004718 ... 0.005612 0.010005 -0.004129 -0.005214 -0.003350 -0.010391 0.000596 -0.003077 -0.004312 0.005001
price -0.001043 0.002030 0.611959 0.136983 -0.043799 0.241303 0.200536 0.329075 0.219834 -0.440519 ... 0.164317 0.068970 -0.053024 0.685798 -0.206205 -0.246175 -0.275320 0.692823 -0.013993 0.035911
v_0 -0.001831 -0.125450 0.524815 0.091031 -0.101214 0.303539 0.279984 0.306703 0.215028 -0.225034 ... 0.726250 0.243783 -0.584363 0.514149 -0.186243 -0.582943 -0.667809 0.415711 -0.136938 -0.039809
v_1 -0.000390 -0.638309 0.103922 0.019805 -0.025579 -0.010096 -0.002663 -0.004298 0.023746 -0.022228 ... 0.109303 0.999415 -0.110806 -0.298966 -0.007698 -0.921904 0.370445 -0.087593 0.017349 0.002143
v_2 0.001237 -0.003404 0.142413 -0.037685 -0.036106 -0.172383 0.067672 0.054411 -0.031487 -0.110375 ... -0.921857 0.023877 0.973689 0.180285 -0.236164 0.274341 0.800915 0.535270 -0.055376 -0.013785
v_3 -0.000034 -0.047349 -0.796583 -0.215659 -0.000582 -0.250790 -0.293114 -0.254030 -0.185342 0.402502 ... -0.233412 -0.000747 0.191278 -0.933161 0.079292 0.247385 0.429777 -0.811301 -0.246052 -0.058561
v_4 -0.000494 0.006417 0.260285 0.426664 0.343755 -0.052972 -0.095064 -0.238739 -0.141013 -0.214861 ... -0.259739 -0.011275 -0.054241 0.051741 0.962928 0.071116 0.110660 -0.134611 0.934580 -0.178518
v_5 -0.001380 -0.057710 0.020100 -0.019701 -0.069206 0.398335 0.081457 0.124329 0.119727 0.049502 ... 1.000000 0.091229 -0.939385 0.010686 -0.050343 -0.440588 -0.845954 -0.258521 -0.162689 0.037804
v_6 -0.000397 -0.638191 0.107282 0.015419 -0.031185 -0.009374 0.001499 0.002201 0.025648 -0.024664 ... 0.091229 1.000000 -0.085410 -0.294956 -0.023057 -0.917056 0.386446 -0.070238 0.000758 -0.003322
v_7 0.001164 0.057199 -0.037048 -0.100753 -0.050022 -0.399415 0.000487 0.011876 -0.060397 -0.017835 ... -0.939385 -0.085410 1.000000 0.028695 -0.264091 0.410014 0.813175 0.385378 -0.154535 -0.020218
v_8 -0.000046 0.235341 0.761804 0.211149 0.015200 0.220308 0.279369 0.238381 0.155956 -0.407686 ... 0.010686 -0.294956 0.028695 1.000000 -0.063577 0.094497 -0.369353 0.882121 0.250423 0.030416
v_9 -0.000749 0.008455 0.146139 0.423526 0.352137 -0.051773 -0.128720 -0.258017 -0.140203 -0.149422 ... -0.050343 -0.023057 -0.264091 -0.063577 1.000000 0.026562 -0.056200 -0.313634 0.880545 -0.214151
v_10 0.000983 0.576758 -0.249871 -0.058674 0.043222 -0.082708 -0.075217 -0.080558 -0.092717 0.083358 ... -0.440588 -0.917056 0.410014 0.094497 0.026562 1.000000 0.006306 0.001289 -0.000580 0.002244
v_11 0.001276 -0.258582 -0.184640 -0.079661 0.007571 -0.293019 -0.137755 -0.145925 -0.122107 0.066542 ... -0.845954 0.386446 0.813175 -0.369353 -0.056200 0.006306 1.000000 0.006695 -0.001671 -0.001156
v_12 0.000249 0.090897 0.704787 0.110157 -0.070912 0.210039 0.302333 0.291374 0.161990 -0.370153 ... -0.258521 -0.070238 0.385378 0.882121 -0.313634 0.001289 0.006695 1.000000 0.001512 0.002045
v_13 0.000162 0.007558 0.412246 0.400001 0.320933 -0.030675 -0.049669 -0.203841 -0.103430 -0.285158 ... -0.162689 0.000758 -0.154535 0.250423 0.880545 -0.000580 -0.001671 0.001512 1.000000 0.001419
v_14 -0.000209 -0.011303 0.177898 -0.513733 -0.207713 -0.288897 -0.021656 -0.044204 -0.023808 -0.120389 ... 0.037804 -0.003322 -0.020218 0.030416 -0.214151 0.002244 -0.001156 0.002045 0.001419 1.000000

30 rows × 30 columns