Azad Rasul
SmartRS

SmartRS

14- Plotting in Python

Azad Rasul's photo
Azad Rasul

Published on Jun 28, 2021

7 min read

Subscribe to my newsletter and never miss my upcoming articles

In this tutorial, you learn to plot lineplot, barplot, pairplot, scatterplot, jointplot, piechart, boxplot, histogram, animated plot, different types of catplot (categorical plot). We use matplotlib, seaborn, and other libraries.

Firstly, download used data in this tutorial: london_borough_profiles1.csv , myPub.csv , and sar_data.csv

Timeplot (time series)

Plotting one variable time variation:

# import libraries:
import pandas as pd
import matplotlib.pyplot as plt

# Load time series data at Github.
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv', parse_dates=['date'], index_col='date')
# define 'tplot' function
def tplot(df, x, y, title="", xlabel='Date', ylabel='Value', dpi=300):
    plt.figure(figsize=(16,5), dpi=dpi)
    plt.plot(x, y, marker='o', markerfacecolor='blue')
    plt.gca().set(title=title, xlabel=xlabel, ylabel=ylabel)
    plt.show()

tplot(df, x=df.index, y=df.value, title='Anti-diabetic sales in Australia from 1992 to 2008.')

image.png

Plotting two variables time series:

# import libraries
import numpy as np
import pandas as pd
import seaborn as sns

rs = np.random.RandomState(365) # create data
values = rs.randn(365, 2).cumsum(axis=0)
dates = pd.date_range("1 1 2021", periods=365, freq="D")
data = pd.DataFrame(values, dates, columns=["A", "B"])
data = data.rolling(7).mean()

sns.lineplot(data=data, palette="tab10", linewidth=2.5)

image.png

Swarmplot

Swarmplot used to display distribution of attributes.

Import libraries

import pandas as pd
import seaborn as sns

Import csv file of data

df = pd.read_csv (r'D:\Python\Python_for_Researchers\london_borough_profiles1.csv', encoding='unicode_escape')
df.head()

Create dataframe from some columns

df = df[['In_Out','Inner/_Outer_London', 'Happiness_score_2011-14_(out_of_10)', 'Anxiety_score_2011-14_(out_of_10)','Employment_rate_(%)_(2015)'
,'People_aged_17+_with_diabetes_(%)']]

Cleaning data by change some names of columns

df.rename(columns={'Inner/_Outer_London': 'in_out','Happiness_score_2011-14_(out_of_10)':'happiness', 'Anxiety_score_2011-14_(out_of_10)':'anxiety', 'Employment_rate_(%)_(2015)':'employment','People_aged_17+_with_diabetes_(%)':'diabetes' }, inplace=True)

Create some different data frames

df = df.reindex(columns=['diabetes', 'In_Out','in_out', 'happiness', 'anxiety', 'employment'])
df1 = df[['diabetes', 'happiness', 'anxiety']]
df2 = df[['In_Out','employment', 'happiness', 'anxiety']]
df3 = df[['in_out', 'employment', 'happiness', 'anxiety']]

Create swarmplot using seaborn library

sns.swarmplot(data=df1)
plt.gca().set(ylabel='Value', xlabel='Indices') # set x and y labels

image.png

Barplot

Presenting categorical data by bar chart or bar graph.

sns.barplot(data=df1)
plt.gca().set(ylabel='Value', xlabel='Indices')

image.png

Stacked Barplot

import matplotlib.pyplot as plt
labels = ['A', 'B', 'C', 'D', 'E']
men_av = [23, 25, 33, 30, 18]
women_av = [15, 22, 30, 10, 15]
std_m = [1, 2.5, 3, 1, 1.5]
std_w = [2, 4, 1.5, 2, 2.5]
width = 0.5       # the width of the bars: can also be len(x) sequence

fig, ba = plt.subplots()

ba.bar(labels, men_av, width, yerr=std_m, label='Men')
ba.bar(labels, women_av, width, yerr=std_w,
       label='Women')

ba.set_ylabel('Scores')
ba.set_title('Scores by group and gender')
ba.legend()

plt.show()

image.png

Pairplot

Pairplot used to presents the distribution of variables and relationships between variables.

sns.pairplot(data=df3, hue='in_out')

image.png

Scatterplot

Scatter plot shows the relationship between two variables.

sns.scatterplot(data=df3, x = 'employment',y= 'happiness', hue='in_out')
plt.legend(title="", loc=8)

image.png

3D Scatterplot

Import libraries and functions

import re, seaborn as sns, numpy as np, pandas as pd, random
from pylab import *
from matplotlib.pyplot import plot, show, draw, figure, cm
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap

sns.set_style("whitegrid", {'axes.grid' : False})
fig = plt.figure()
ax = Axes3D(fig) # Method 1
#ax = fig.add_subplot(111, projection='3d') # Method 2

Create x, y, and z NumPy array data

X = np.array([0, 5, 10, 15, 20, 22, 26, 24, 14, 30])
Y = np.array([0, 3, 6, 9, 12, 22, 24, 26, 30, 20])
Z = np.array([3, 5, 11, 10, 12, 4, 5, 17, 10, 13])

Get colormap from seaborn

cmap = ListedColormap(sns.color_palette("husl", 256).as_hex())
g = ax.scatter(X, Y, Z, c=X, s= 50, marker='o', cmap = cmap, alpha = 1)

Set x, y and z labels

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

Add a color bar which maps values to colors.

fig.colorbar( g, shrink=0.5, aspect=5)
plt.show()

image.png

Scatter plot with varying marker colors and sizes

Import libraries and functions

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import pandas_datareader as pdr

Load data (^DJI stooq) from Pandas datareader

data = pdr.DataReader('^DJI', 'stooq')# Data of ^DJI stooq market
data
data = data[-365:] # get the most recent 365 days data
delta1 = np.diff(data.Close) / data.Close[:-1] # price of close day / price of close day before
volume = (15 * data.Volume[:-2] / data.Volume[0])**2

Set color for 363 days from seaborn (color palette) library

colors = sns.color_palette("Set3", 363)

Plotting to scatter plot:

fig, pl = plt.subplots()
pl.scatter(delta1[:-1], delta1[1:], color=colors, s=volume, alpha = 0.5)

Set x, y labels and title:

pl.set_xlabel(r'Δi', fontsize=12)
pl.set_ylabel(r'Δi+1', fontsize=12)
pl.xaxis.label.set_color('midnightblue')
pl.yaxis.label.set_color('midnightblue')
pl.set_title('Scatter plot of ^DJI stooq with volume and price change')
pl.grid(True)

Set x, y limittion

pl.axis([-0.025, 0.025, -0.025, 0.025]) # xlim , ylim
fig.tight_layout()
plt.show()

image.png

Jointplot

Besides shows the relationship between dependent variable(Y) and independent variable(X), it disples the distribution of X and Y.

import seaborn as sns
import matplotlib.pyplot as plt
# Linear regression
x = (1,3,5, 2, 9, 11)
y = (2,4,6, 3, 8, 10)
sns.jointplot (x=x, y=y, data =df , kind = "reg")
plt.gca().set(ylabel='Y', xlabel='X')

image.png

Piechart

# load library
import matplotlib.pyplot as plt

# create data
names='A', 'B', 'C', 'D',
values=[5, 15, 30, 50]

# create a pieplot
plt.pie(values, labels = names, labeldistance=1.15, shadow=True, startangle=90, autopct='%1.1f%%')# Label distance: gives the space between labels and the center of the pie
plt.show()

image.png

Boxplot

df = pd.read_csv (r'D:\Python\Python_for_Researchers\sar_data.csv', encoding='unicode_escape')
df.head()

import seaborn as sns
sns.boxplot(data=df,palette=["m", "g"])
sns.despine(offset=10, trim=True)
plt.gca().set(ylabel='Value', xlabel='Sensor')

image.png

Histogram

It represents the distribution of numerical data.

bio = [-2, 1, 2, 4, 2, 5, 5, 5,6 , 7, 9, 7, 5, 10, 15] # create data
import seaborn as sns

sns.set_style('darkgrid') # set grid style
his = sns.distplot(bio)
his.set_xlabel('Value', fontsize=12) # set x label
his.set_ylabel('Frequency', fontsize=12) # set y label

image.png

Animated plot in Python

Import ibraries

import pandas as pd
import matplotlib as mpl

Read the data

data = pd.read_csv(r'd://myPub.csv')
# Check the first 5 rows
data.head(5)

Create and organize the plot:

# image resolution
dpi=300
# For each year:
for i in data.Year.unique():

    # Turn interactive plotting off
    plt.ioff()

    # initialize a figure
    fig = plt.figure(figsize=(680/dpi, 480/dpi), dpi=dpi)

    # Find the subset of the dataset for the current year
    subsetData = data[ data.Year == i ]

    # Build the scatterplot
    plt.scatter(
        x=subsetData['Publication'], 
        y=subsetData['Citation'], 
        s=subsetData['Citation']*100, 
        edgecolors="white", linewidth=2, color = 'midnightblue')
Add titles (main and on axis)
    plt.yscale('linear')
    plt.xlabel("Publication")
    plt.ylabel("Citation"),
    plt.title("Azad Rasul's Publications and Citations during Year: "+str(i) )
    plt.ylim(-10, 150) # y limitation
    plt.xlim(0, 6) # x limitation
Save it & close the figure
    filename='/Users/Azad/Desktop/test2/myPub'+str(i)+'.png'
    plt.savefig(fname=filename, dpi=96)
    plt.gca()
    plt.close(fig)

After a list of png figures downloaded to your computer, you can convert them to gif video online, for example in this webste: gifmaker.me Animation_Plot_My_Publications.gif

Categorical data (catplot)

If the variables are “categorical” (divided into discrete groups) it may be advantageous to use catplot. We can change the plot type by change: "kind" to violin, swarm, boxen, strip, box, point, bar or count.

Violin Catplot

Load titanic.csv file from load_dataset function in Seaborn library.

titanic = sns.load_dataset("titanic") # load titanic csv file from seaborn lab

g = sns.catplot(x='pclass', y="age",
                hue="alive",  # catigorize and change the color by alive column data
                data=titanic, kind='violin', legend_out=False) # legend_out = Faluse to move legend to inside the plot
plt.legend(title="Alive", loc=1) # Location: 'upper right':1

image.png

Swarm Catplot

titanic = sns.load_dataset("titanic") # load data

g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='swarm', legend_out=False)
plt.axis([-1, 3, 0, 90]) # xlim , ylim
plt.legend(title="Alive", loc=9) # Location: 'upper center':9

image.png

Boxen Catplot

titanic = sns.load_dataset("titanic")
g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='boxen', legend_out = False)
plt.axis([-1, 3, 0, 90]) # xlim , ylim
plt.legend(title='Alive', loc = 9)

image.png

Strip Catplot

titanic = sns.load_dataset("titanic")
g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='strip', legend_out=False)

plt.axis([-1, 3, 0, 90]) # xlim , ylim
plt.legend(title='Alive', loc = 9)

image.png

Box Catplot

titanic = sns.load_dataset("titanic")
g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='box')

image.png

Point Catplot

titanic = sns.load_dataset("titanic")
g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='point', legend_out = False)

image.png

Bar Catplot

titanic = sns.load_dataset("titanic")
g = sns.catplot(x='pclass', y="age",
                hue="alive",
                data=titanic, kind='bar', legend_out = False)

image.png

Count Catplot

sns.catplot( x='embark_town',
             kind="count",
             hue= "sex",
             data=titanic,
             height=5,
             aspect=1.5, legend_out = False)
plt.xlabel("Embark town", size=14)
plt.ylabel("Count", size=14)
#plt.tight_layout()
plt.legend(title="Gender", loc=9) # Location: 'upper center':9

image.png

Did you find this article valuable?

Support Azad Rasul by becoming a sponsor. Any amount is appreciated!

Learn more about Hashnode Sponsors
 
Share this
Proudly part of