Master the fundamental building blocks of data visualization with Matplotlib
Imagine you're a data artist with three essential brushes: lines to show trends over time, dots to reveal relationships between variables, and bars to compare quantities. Each chart type is a different language for communicating insights!
In the real world, choosing the right visualization can mean the difference between confusion and clarity. A well-crafted chart can reveal patterns invisible in raw numbers, tell compelling stories, and drive data-informed decisions. Let's master these fundamental tools!
Explore different chart types and their parameters in real-time:
Perfect for showing trends and changes over time
# Basic line plot
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 10, 100)
y = np.sin(x)
plt.figure(figsize=(8, 6))
plt.plot(x, y)
plt.title('Simple Line Plot')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.grid(True, alpha=0.3)
plt.show()
# Multiple lines with styles
y2 = np.cos(x)
plt.plot(x, y, 'b-', label='sin(x)', linewidth=2)
plt.plot(x, y2, 'r--', label='cos(x)', linewidth=2)
plt.legend()
plt.show()
Ideal for revealing relationships and patterns between variables
# Basic scatter plot
import matplotlib.pyplot as plt
import numpy as np
x = np.random.randn(100)
y = 2 * x + np.random.randn(100) * 0.5
plt.figure(figsize=(8, 6))
plt.scatter(x, y, alpha=0.6)
plt.title('Scatter Plot')
plt.xlabel('X values')
plt.ylabel('Y values')
plt.grid(True, alpha=0.3)
plt.show()
# Advanced scatter with colors and sizes
colors = np.random.rand(100)
sizes = np.random.randint(20, 200, 100)
plt.scatter(x, y, c=colors, s=sizes,
alpha=0.6, cmap='viridis')
plt.colorbar(label='Color Scale')
plt.show()
Best for comparing quantities across categories
# Basic bar chart
import matplotlib.pyplot as plt
categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]
plt.figure(figsize=(8, 6))
plt.bar(categories, values, color='skyblue')
plt.title('Bar Chart')
plt.xlabel('Categories')
plt.ylabel('Values')
plt.show()
# Horizontal bar chart
plt.barh(categories, values, color='coral')
plt.title('Horizontal Bar Chart')
plt.show()
# Grouped bars
x = np.arange(len(categories))
width = 0.35
values2 = [30, 35, 40, 45, 50]
plt.bar(x - width/2, values, width, label='Series 1')
plt.bar(x + width/2, values2, width, label='Series 2')
plt.xticks(x, categories)
plt.legend()
plt.show()
Advanced techniques for professional line plots
# Customizing line plots
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.sin(x) * np.exp(-x/10)
fig, ax = plt.subplots(figsize=(10, 6))
# Different line styles
ax.plot(x, y1, 'b-', linewidth=2, label='Solid')
ax.plot(x, y2, 'r--', linewidth=2, label='Dashed')
ax.plot(x, y3, 'g:', linewidth=2, label='Dotted')
# Add markers
x_markers = x[::10]
y_markers = y1[::10]
ax.plot(x_markers, y_markers, 'bo', markersize=8)
# Fill area under curve
ax.fill_between(x, 0, y3, alpha=0.3, color='green')
# Annotations
max_idx = np.argmax(y3)
ax.annotate('Peak', xy=(x[max_idx], y3[max_idx]),
xytext=(x[max_idx]+1, y3[max_idx]+0.2),
arrowprops=dict(arrowstyle='->', color='black'))
# Styling
ax.set_title('Advanced Line Plot Features', fontsize=16, fontweight='bold')
ax.set_xlabel('Time (seconds)', fontsize=12)
ax.set_ylabel('Amplitude', fontsize=12)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='upper right', frameon=True, shadow=True)
# Add secondary y-axis
ax2 = ax.twinx()
ax2.plot(x, y1*10, 'orange', linewidth=1, alpha=0.5)
ax2.set_ylabel('Secondary Scale', color='orange')
ax2.tick_params(axis='y', labelcolor='orange')
plt.tight_layout()
plt.show()
Create publication-quality scatter plots with advanced features
# Advanced scatter plots
import matplotlib.pyplot as plt
import numpy as np
# Generate data with clusters
np.random.seed(42)
n_points = 150
# Three clusters
cluster1_x = np.random.normal(2, 0.5, 50)
cluster1_y = np.random.normal(2, 0.5, 50)
cluster2_x = np.random.normal(5, 0.5, 50)
cluster2_y = np.random.normal(5, 0.5, 50)
cluster3_x = np.random.normal(3.5, 0.5, 50)
cluster3_y = np.random.normal(8, 0.5, 50)
fig, ax = plt.subplots(figsize=(10, 8))
# Plot clusters with different styles
ax.scatter(cluster1_x, cluster1_y, c='blue', marker='o',
s=100, alpha=0.6, label='Cluster A', edgecolors='black')
ax.scatter(cluster2_x, cluster2_y, c='red', marker='^',
s=100, alpha=0.6, label='Cluster B', edgecolors='black')
ax.scatter(cluster3_x, cluster3_y, c='green', marker='s',
s=100, alpha=0.6, label='Cluster C', edgecolors='black')
# Add regression line for one cluster
z = np.polyfit(cluster1_x, cluster1_y, 1)
p = np.poly1d(z)
x_line = np.linspace(cluster1_x.min(), cluster1_x.max(), 100)
ax.plot(x_line, p(x_line), "b--", alpha=0.8, linewidth=2)
# Add confidence ellipse
from matplotlib.patches import Ellipse
cov = np.cov(cluster2_x, cluster2_y)
eigenvalues, eigenvectors = np.linalg.eigh(cov)
angle = np.degrees(np.arctan2(*eigenvectors[:, 0][::-1]))
width, height = 2 * np.sqrt(eigenvalues)
ellipse = Ellipse((np.mean(cluster2_x), np.mean(cluster2_y)),
width, height, angle=angle,
facecolor='none', edgecolor='red',
linewidth=2, linestyle='--', alpha=0.5)
ax.add_patch(ellipse)
# Styling
ax.set_title('Advanced Scatter Plot with Clusters', fontsize=16, fontweight='bold')
ax.set_xlabel('X Variable', fontsize=12)
ax.set_ylabel('Y Variable', fontsize=12)
ax.grid(True, alpha=0.3)
ax.legend(loc='upper left')
# Add text box with statistics
textstr = f'Total points: {n_points}\nCorrelation: 0.85'
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10,
verticalalignment='top', bbox=props)
plt.tight_layout()
plt.show()
Create stunning bar charts with advanced styling
# Advanced bar charts
import matplotlib.pyplot as plt
import numpy as np
# Data
categories = ['Product A', 'Product B', 'Product C', 'Product D', 'Product E']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
values = np.array([[20, 35, 30, 35],
[25, 32, 34, 20],
[15, 20, 35, 30],
[30, 25, 30, 35],
[10, 15, 25, 30]])
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# 1. Grouped bar chart
ax1 = axes[0, 0]
x = np.arange(len(categories))
width = 0.2
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
for i, quarter in enumerate(quarters):
ax1.bar(x + i * width, values[:, i], width,
label=quarter, color=colors[i])
ax1.set_title('Grouped Bar Chart', fontweight='bold')
ax1.set_xticks(x + width * 1.5)
ax1.set_xticklabels(categories, rotation=45, ha='right')
ax1.legend()
ax1.grid(axis='y', alpha=0.3)
# 2. Stacked bar chart
ax2 = axes[0, 1]
bottom = np.zeros(len(categories))
for i, quarter in enumerate(quarters):
ax2.bar(categories, values[:, i], bottom=bottom,
label=quarter, color=colors[i])
bottom += values[:, i]
ax2.set_title('Stacked Bar Chart', fontweight='bold')
ax2.legend()
ax2.grid(axis='y', alpha=0.3)
# 3. Horizontal bar chart with error bars
ax3 = axes[1, 0]
y_pos = np.arange(len(categories))
performance = [30, 45, 56, 78, 32]
error = [5, 3, 7, 2, 4]
bars = ax3.barh(y_pos, performance, xerr=error,
color='skyblue', ecolor='gray', capsize=5)
# Add value labels on bars
for i, (bar, val) in enumerate(zip(bars, performance)):
ax3.text(val + 2, bar.get_y() + bar.get_height()/2,
f'{val}±{error[i]}', va='center')
ax3.set_yticks(y_pos)
ax3.set_yticklabels(categories)
ax3.set_title('Horizontal Bar with Error Bars', fontweight='bold')
ax3.set_xlabel('Performance Score')
ax3.grid(axis='x', alpha=0.3)
# 4. Bar chart with patterns
ax4 = axes[1, 1]
patterns = ['/', '\\', '|', '-', '+']
bars = ax4.bar(categories, performance)
for bar, pattern in zip(bars, patterns):
bar.set_hatch(pattern)
bar.set_edgecolor('black')
bar.set_linewidth(1.5)
ax4.set_title('Bar Chart with Patterns', fontweight='bold')
ax4.set_ylabel('Values')
ax4.grid(axis='y', alpha=0.3)
# Color bars based on value
for bar, val in zip(bars, performance):
if val > 50:
bar.set_facecolor('#2ecc71')
elif val > 30:
bar.set_facecolor('#f39c12')
else:
bar.set_facecolor('#e74c3c')
plt.suptitle('Advanced Bar Chart Techniques', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
Matplotlib comes with various built-in styles to quickly change the look of your plots:
# Apply different styles
import matplotlib.pyplot as plt
# List available styles
print(plt.style.available)
# Use a style
plt.style.use('seaborn')
# or
with plt.style.context('ggplot'):
# Your plotting code here
plt.plot([1, 2, 3], [4, 5, 6])
Let's create a comprehensive sales visualization combining all three chart types:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
# Generate sample sales data
np.random.seed(42)
dates = pd.date_range(start='2024-01-01', end='2024-12-31', freq='D')
n_days = len(dates)
# Sales data with trend and seasonality
trend = np.linspace(100, 150, n_days)
seasonal = 20 * np.sin(np.arange(n_days) * 2 * np.pi / 365)
noise = np.random.normal(0, 10, n_days)
daily_sales = trend + seasonal + noise
# Product categories
products = ['Electronics', 'Clothing', 'Food', 'Books', 'Sports']
product_sales = [45000, 32000, 28000, 18000, 22000]
product_growth = [15, -5, 8, 12, 20] # YoY growth
# Regional data
regions = ['North', 'South', 'East', 'West']
regional_q1 = [25000, 22000, 28000, 20000]
regional_q2 = [27000, 24000, 30000, 23000]
regional_q3 = [30000, 26000, 32000, 25000]
regional_q4 = [32000, 28000, 35000, 27000]
# Customer satisfaction vs sales correlation
n_stores = 50
satisfaction = np.random.uniform(3, 5, n_stores)
store_sales = 20000 + 15000 * satisfaction + np.random.normal(0, 5000, n_stores)
# Create comprehensive dashboard
fig = plt.figure(figsize=(16, 10))
fig.suptitle('2024 Sales Performance Dashboard', fontsize=20, fontweight='bold')
# 1. Line plot - Daily sales trend
ax1 = plt.subplot(2, 3, 1)
ax1.plot(dates, daily_sales, color='#2E86AB', linewidth=1, alpha=0.7)
# Add 30-day moving average
window = 30
ma = pd.Series(daily_sales).rolling(window=window).mean()
ax1.plot(dates, ma, color='#A23B72', linewidth=2, label=f'{window}-day MA')
# Highlight max and min
max_idx = np.argmax(daily_sales)
min_idx = np.argmin(daily_sales)
ax1.plot(dates[max_idx], daily_sales[max_idx], 'go', markersize=10, label='Peak')
ax1.plot(dates[min_idx], daily_sales[min_idx], 'ro', markersize=10, label='Low')
ax1.set_title('Daily Sales Trend', fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Sales ($K)')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# 2. Bar chart - Product performance
ax2 = plt.subplot(2, 3, 2)
colors = ['#2ecc71' if g > 0 else '#e74c3c' for g in product_growth]
bars = ax2.bar(products, product_sales, color=colors, edgecolor='black', linewidth=1)
# Add value labels
for bar, val, growth in zip(bars, product_sales, product_growth):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'${val/1000:.0f}K\n({growth:+d}%)',
ha='center', va='bottom', fontsize=9)
ax2.set_title('Product Sales & Growth', fontweight='bold')
ax2.set_ylabel('Sales ($)')
ax2.set_ylim(0, max(product_sales) * 1.2)
ax2.grid(axis='y', alpha=0.3)
# 3. Scatter plot - Satisfaction vs Sales
ax3 = plt.subplot(2, 3, 3)
scatter = ax3.scatter(satisfaction, store_sales,
c=store_sales, s=100,
cmap='viridis', alpha=0.6,
edgecolors='black', linewidth=1)
# Add trend line
z = np.polyfit(satisfaction, store_sales, 1)
p = np.poly1d(z)
ax3.plot(np.linspace(3, 5, 100), p(np.linspace(3, 5, 100)),
'r--', linewidth=2, alpha=0.8, label=f'Trend: R²=0.72')
ax3.set_title('Customer Satisfaction vs Store Sales', fontweight='bold')
ax3.set_xlabel('Customer Satisfaction (1-5)')
ax3.set_ylabel('Store Sales ($)')
ax3.legend()
ax3.grid(True, alpha=0.3)
plt.colorbar(scatter, ax=ax3, label='Sales ($)')
# 4. Grouped bar chart - Regional quarterly performance
ax4 = plt.subplot(2, 3, 4)
x = np.arange(len(regions))
width = 0.2
ax4.bar(x - 1.5*width, regional_q1, width, label='Q1', color='#FF6B6B')
ax4.bar(x - 0.5*width, regional_q2, width, label='Q2', color='#4ECDC4')
ax4.bar(x + 0.5*width, regional_q3, width, label='Q3', color='#45B7D1')
ax4.bar(x + 1.5*width, regional_q4, width, label='Q4', color='#96CEB4')
ax4.set_title('Regional Quarterly Sales', fontweight='bold')
ax4.set_xlabel('Region')
ax4.set_ylabel('Sales ($)')
ax4.set_xticks(x)
ax4.set_xticklabels(regions)
ax4.legend()
ax4.grid(axis='y', alpha=0.3)
# 5. Combined line and bar - Monthly comparison
ax5 = plt.subplot(2, 3, 5)
monthly_dates = pd.date_range(start='2024-01-01', end='2024-12-31', freq='M')
monthly_sales = pd.Series(daily_sales).resample('M', label='right',
closed='right').sum()[1:]
monthly_target = [4200] * 12
ax5_bar = ax5.bar(range(12), monthly_sales, color='skyblue',
alpha=0.7, label='Actual')
ax5.plot(range(12), monthly_target, 'r--', linewidth=2,
marker='o', label='Target')
# Color bars based on target achievement
for i, (bar, actual, target) in enumerate(zip(ax5_bar, monthly_sales, monthly_target)):
if actual >= target:
bar.set_facecolor('#2ecc71')
else:
bar.set_facecolor('#e74c3c')
ax5.set_title('Monthly Sales vs Target', fontweight='bold')
ax5.set_xlabel('Month')
ax5.set_ylabel('Sales ($K)')
ax5.set_xticks(range(12))
ax5.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
ax5.legend()
ax5.grid(axis='y', alpha=0.3)
# 6. Pie chart bonus - Market share
ax6 = plt.subplot(2, 3, 6)
explode = [0.05 if s == max(product_sales) else 0 for s in product_sales]
colors_pie = plt.cm.Set3(np.linspace(0, 1, len(products)))
wedges, texts, autotexts = ax6.pie(product_sales, labels=products,
autopct='%1.1f%%', explode=explode,
colors=colors_pie, startangle=90)
for autotext in autotexts:
autotext.set_color('white')
autotext.set_weight('bold')
ax6.set_title('Product Market Share', fontweight='bold')
# Add summary statistics
total_sales = sum(product_sales)
avg_satisfaction = np.mean(satisfaction)
total_stores = len(satisfaction)
fig.text(0.02, 0.02,
f'Total Annual Sales: ${total_sales:,.0f} | ' +
f'Avg Customer Satisfaction: {avg_satisfaction:.2f}/5.0 | ' +
f'Active Stores: {total_stores}',
fontsize=12, ha='left',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout()
plt.show()