Data Visualization with Matplotlib on Python

import numpy as np
import pandas as pd

# we are using the inline backend
get_ipython().run_line_magic('matplotlib', 'inline') 
# if you want to modify the graph also after you have do it, don't use this command!

import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.style.use(['ggplot'])   # for ggplot-like style

# df is the name of my dataframe

df.index = df.index.map(int)   # maybe you have to change the index values to integer for plotting


# 0. TWO TYPES OF PLOTTING

# Type 1
df.plot([...])
plt.title([...])
plt.ylabel([...])
[...]

# Type 2: more flexibility
ax = df.plot([...])
ax.set_title([...])
ax.set_ylabel([...])
ax.[...]


# 1. LINEGRAPH

df.plot()   # basic linegraph

# let's change something
df.plot(kind='line')
plt.title('TITLE')
plt.ylabel('Y LABEL')
plt.xlabel('X LABEL')

# add an annotation - syntax: plt.text(x, y, label)
plt.text(2000, 6000, 'ANNOTATION')

plt.show()   # need this line to show the updates made to the figure


# 2. AREA PLOTS

df.plot(kind='area',
             alpha=0.25,   # transparency between 0 and 1, default value 0.5
             stacked=False,   # Area plots are stacked by default
             figsize=(20, 10)   # pass a tuple (x, y) size
             )



# 3. HISTOGRAMS

df.plot(kind='hist', figsize=(8, 5))


# help for choose bins number
bins = np.histogram(df['column'], bins=5)   # divide into 5 bins and give the count
df.plot(kind='hist', figsize=(8, 5), xticks=bins)


# 4. BAR CHARTS

# Vertical
df.plot(kind='bar', figsize=(10, 6))



# Horizontal
df.plot(kind='barh', figsize=(10, 6))



# 5. PIE CHARTS

df['col1'].plot(kind='pie',
                            figsize=(5, 6),
                            autopct='%1.1f%%',   # value in the plot in thar format
                            startangle=90,   # start angle 90°
                            shadow=True,   # add shadow     
                            labels=None,   # turn off labels
                            pctdistance=1.12,   # distance between the center of each pie slice and the text
                            )

# we can choose colors and make more importance to some slice with this:
colors_list = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue', 'lightgreen', 'pink']
explode_list = [0.1, 0, 0, 0, 0.1, 0.1]   # ratio for each slice

df['col1'].plot(kind='pie',
                            figsize=(5, 6),
                            autopct='%1.1f%%',    # value in the plot in thar format
                            startangle=90,    # start angle 90°
                            shadow=True,    # add shadow     
                            labels=None,    # turn off labels
                            pctdistance=1.12,   # distance between the center of each pie slice and the text
                            colors=colors_list,   # add custom colors
                            explode=explode_list    # 'explode' 3 items
                            )

# for adding legend
plt.legend(labels=df.index, loc='upper right') 

plt.show()



# 6. BOX PLOTS

df['col1'].plot(kind='box', figsize=(8, 6))


# horizontal box plots with some adjustments
df.plot(kind='box', figsize=(10, 7), color='blue', vert=False)



# 7. SCATTERPLOTS

df.plot(kind='scatter', x='col1', y='col2', figsize=(10, 6), color='darkblue')


# we can add a regression line to see the trend.
x = df['col1']      # item on x-axis
y = df['col2']     # item on y-axis
fit = np.polyfit(x, y, deg=1)   # Degree of fitting polynomial. 1 = linear, 2 = quadratic, ...

# plot
df.plot(kind='scatter', x='year', y='total', figsize=(10, 6), color='darkblue')

# plot line of best fit
plt.plot(x, fit[0] * x + fit[1], color='red')
plt.annotate('y={0:.0f} x + {1:.0f}'.format(fit[0], fit[1]), xy=(2000, 150000))

plt.show()



# 8. BUBBLE PLOTS

df.plot(kind='scatter',
                    x='col1',
                    y='col2',
                    figsize=(14, 8),
                    alpha=0.5,      # transparency
                    color='green',
                    s=df['col3']      # pass weights
                   )


# ANNOTATION EXAMPLE

df.plot(kind='bar', figsize=(10, 6))
plt.xlabel('X')
plt.ylabel('Y')
plt.title('TITLE')

# Annotate arrow
plt.annotate('',     # leave it blank for no text
             xy=(32, 70),     # place head of the arrow at that point
             xytext=(28, 20),     # place base of the arrow at that point
             xycoords='data',     # will use the coordinate system of the object being annotated
             arrowprops=dict(arrowstyle='->', connectionstyle='arc3', color='blue', lw=2)
            )

# Annotate Text
plt.annotate('TEXT ON THE ARROW',    # text to display
             xy=(28, 30),     # start the text at at this point
             rotation=72.5,     # based on trial and error to match the arrow
             va='bottom',     # want the text to be vertically 'bottom' aligned
             ha='left',     # want the text to be horizontally 'left' aligned
            )

plt.show()



# SUBPLOTS

# For have multiple plots in the same figure.

fig = plt.figure()   # create figure

ax0 = fig.add_subplot(1, 2, 1)   # add subplot 1 (1 row, 2 columns, first plot)
ax1 = fig.add_subplot(1, 2, 2)   # add subplot 2 (1 row, 2 columns, second plot)

# Subplot 1: Box plot
df.plot(kind='box', color='blue', vert=False, figsize=(20, 6), ax=ax0) # add to subplot 1

# Subplot 2: Line plot
df.plot(kind='line', figsize=(20, 6), ax=ax1)  # add to subplot 2

plt.show()




No comments:

Post a Comment