Visualization


Matplotlib


In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
import numpy as np

x = np.linspace(0,5,11)
y = x ** 2

functional method plot

In [3]:
plt.plot(x, y)
plt.xlabel('X label')
plt.ylabel('Y label')
plt.title('Title')
plt.show()

Multi plot

In [4]:
plt.subplot(1, 2, 1) # numOfRows, numOfCols, #cases
plt.plot(x, y, 'r')

plt.subplot(1, 2, 2)
plt.plot(y, x, 'b')
Out[4]:
[<matplotlib.lines.Line2D at 0x12062b810>]

OOP plot

In [5]:
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.plot(x, y)
axes.set_xlabel('X label')
axes.set_ylabel('Y label')
axes.set_title('Title')
Out[5]:
Text(0.5, 1.0, 'Title')
In [6]:
fig = plt.figure()
axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3])

axes1.plot(x, y, 'r')
axes2.plot(y, x, 'b+')

axes2.set_title('Smaller')
axes1.set_title('Larger')
Out[6]:
Text(0.5, 1.0, 'Larger')
In [7]:
fig, axes = plt.subplots(1, 2)
plt.tight_layout() # correct the alignment

axes[0].plot(x, y)
axes[0].set_title('First')
axes[1].plot(y, x)
axes[1].set_title('Second')
Out[7]:
Text(0.5, 1.0, 'Second')

Figure size and DPI

In [8]:
fig = plt.figure(figsize=(6,2))
ax = fig.add_axes([0,0,1,1])
ax.plot(x, x**2, label='X squared')
ax.plot(x, x**3, label='X cubed')

ax.legend()
Out[8]:
<matplotlib.legend.Legend at 0x11d81e310>
In [9]:
fig.savefig('myfig.png', dpi=200)

Plot appearance

In [10]:
fig = plt.figure(figsize=(10, 4))
ax = fig.add_axes([0,0,1,1])
# color, linewidth, alpha channel, 
ax.plot(x, y, 'red', linewidth=3, alpha=0.5, linestyle='-.') #RGB hex code can be used (FF8C00)

#marker
ax.plot(y, x, 'b', linewidth=1, marker='o', markersize=15, markerfacecolor='yellow', 
        markeredgewidth=3, markeredgecolor='black')
Out[10]:
[<matplotlib.lines.Line2D at 0x124c2dc90>]

Plot range

In [11]:
fig, axes = plt.subplots(1, 2)
plt.tight_layout()

axes[0].plot(x, y)
axes[0].set_title('First')
axes[1].plot(x, y)
axes[1].set_title('Second')

# zoom in
axes[0].set_xlim([0, 3])
axes[0].set_ylim([0, 2])
Out[11]:
(0.0, 2.0)

Different plots

In [12]:
plt.scatter(x, y)
Out[12]:
<matplotlib.collections.PathCollection at 0x124dd40d0>
In [13]:
from random import sample
data = sample(range(1, 1000), 100)

plt.hist(data)
plt.show()
In [14]:
data = [np.random.normal(0, std, 100) for std in range(1, 4)]

plt.boxplot(data, vert=True, patch_artist=True)
plt.show()

Seaborn


In [35]:
import seaborn as sns

%matplotlib inline

tips = sns.load_dataset('tips') # built in example dataset

tips.head()
Out[35]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4

Distribution plot

In [55]:
sns.set_style('darkgrid')
sns.distplot(tips['total_bill'])
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x136574190>
In [56]:
sns.set_style('darkgrid')
sns.distplot(tips['total_bill'], kde=False, bins=30)
Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x136fcba10>

Joint plot

In [57]:
sns.set_style('darkgrid')
sns.jointplot(x='total_bill', y='tip', data=tips)
Out[57]:
<seaborn.axisgrid.JointGrid at 0x137092d90>
In [58]:
sns.set_style('darkgrid')
sns.jointplot(x='total_bill', y='tip', data=tips, kind='hex')
Out[58]:
<seaborn.axisgrid.JointGrid at 0x1370928d0>
In [59]:
sns.set_style('darkgrid')
sns.jointplot(x='total_bill', y='tip', data=tips, kind='reg')
Out[59]:
<seaborn.axisgrid.JointGrid at 0x135d99710>
In [60]:
sns.set_style('darkgrid')
sns.jointplot(x='total_bill', y='tip', data=tips, kind='kde')
Out[60]:
<seaborn.axisgrid.JointGrid at 0x13779f190>

Pair plot

In [61]:
sns.pairplot(tips) # plot all pairs in the dataset
Out[61]:
<seaborn.axisgrid.PairGrid at 0x124ecbdd0>
In [64]:
sns.pairplot(tips, hue='sex', palette='coolwarm') # hue attrib to categorichal data
Out[64]:
<seaborn.axisgrid.PairGrid at 0x138a64e50>

Categorical plot

In [65]:
# bar plot
sns.barplot(x='sex', y='total_bill', data=tips)
Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x13907f050>
In [67]:
sns.barplot(x='sex', y='total_bill', data=tips, estimator=np.std)
Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x13978c450>

Count plot

In [68]:
sns.countplot(x='sex', data=tips)
Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x139e2c6d0>

Box plot

In [69]:
sns.boxplot(x='day', y='total_bill', data=tips)
Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x1385e57d0>
In [71]:
sns.boxplot(x='day', y='total_bill', data=tips, hue='smoker')
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x13a9f0450>

Violin plot

In [76]:
sns.violinplot(x='day', y='total_bill', data=tips, hue='sex', split=True)
Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x13a6bca90>

Strip plot

In [78]:
sns.stripplot(x='day', y='total_bill', data=tips, hue='sex')
Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x13a66d9d0>

Swarm plot

In [79]:
sns.swarmplot(x='day', y='total_bill', data=tips, hue='sex')
Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x13a8683d0>
In [81]:
sns.violinplot(x='day', y='total_bill', data=tips)
sns.swarmplot(x='day', y='total_bill', data=tips, hue='sex')
Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x13b07c7d0>

Matrix plot

In [83]:
flights = sns.load_dataset('flights')
flights.head()
Out[83]:
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
3 1949 April 129
4 1949 May 121
In [85]:
# correlation data
tc = tips.corr()
tc
Out[85]:
total_bill tip size
total_bill 1.000000 0.675734 0.598315
tip 0.675734 1.000000 0.489299
size 0.598315 0.489299 1.000000

Heat map

In [88]:
sns.heatmap(tc, annot=True, cmap='coolwarm')
Out[88]:
<matplotlib.axes._subplots.AxesSubplot at 0x13a2afa50>
In [90]:
f = flights.pivot_table(index='month', columns='year', values='passengers')
f
Out[90]:
year 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960
month
January 112 115 145 171 196 204 242 284 315 340 360 417
February 118 126 150 180 196 188 233 277 301 318 342 391
March 132 141 178 193 236 235 267 317 356 362 406 419
April 129 135 163 181 235 227 269 313 348 348 396 461
May 121 125 172 183 229 234 270 318 355 363 420 472
June 135 149 178 218 243 264 315 374 422 435 472 535
July 148 170 199 230 264 302 364 413 465 491 548 622
August 148 170 199 242 272 293 347 405 467 505 559 606
September 136 158 184 209 237 259 312 355 404 404 463 508
October 119 133 162 191 211 229 274 306 347 359 407 461
November 104 114 146 172 180 203 237 271 305 310 362 390
December 118 140 166 194 201 229 278 306 336 337 405 432
In [93]:
sns.heatmap(f, cmap='magma', linecolor='white', linewidths=1)
Out[93]:
<matplotlib.axes._subplots.AxesSubplot at 0x138a2b190>

Cluster map

In [99]:
# cluster similar month and year
sns.clustermap(f, cmap='coolwarm', linewidths=1, linecolor='black', standard_scale=1)
Out[99]:
<seaborn.matrix.ClusterGrid at 0x13d88f290>

Grids

In [100]:
iris = sns.load_dataset('iris')
iris.head()
Out[100]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [103]:
g = sns.PairGrid(iris)
g.map_diag(sns.distplot)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)
Out[103]:
<seaborn.axisgrid.PairGrid at 0x13f93a250>