Dashboard Creation with Streamlit - Python Data Science Path

Build Data Apps in Pure Python! 🚀

Streamlit turns data scripts into shareable web apps in minutes. All in pure Python. No front-end experience required. It's the fastest way to build and share data applications - perfect for machine learning demos, data exploration tools, and interactive reports!

Why Streamlit?

Streamlit revolutionizes how data scientists create web applications:

🐍 Pure Python: No HTML, CSS, or JavaScript needed
⚡ Instant Updates: See changes as you save your script
🔄 Reactive: Automatic UI updates when data changes
📊 Data-First: Built specifically for data science workflows
🎨 Beautiful by Default: Professional looking apps with zero styling
☁️ Easy Deployment: Free hosting on Streamlit Cloud
🔌 Integration: Works with all major Python libraries

Installation and Setup

# Install Streamlit
# pip install streamlit

# Optional: Install additional components
# pip install streamlit-aggrid  # For advanced data tables
# pip install streamlit-folium  # For maps
# pip install streamlit-plotly-events  # For Plotly interactions

# Create your first app (app.py)
import streamlit as st

st.title("My First Streamlit App")
st.write("Hello, World! 👋")

# Run the app
# streamlit run app.py

# Check Streamlit version
import streamlit as st
print(f"Streamlit version: {st.__version__}")

Basic Components

Text and Markdown

import streamlit as st

# Title and headers
st.title("Dashboard Title")
st.header("This is a header")
st.subheader("This is a subheader")

# Text
st.text("Fixed width text")
st.write("This supports _markdown_ **formatting**")

# Markdown
st.markdown("""
### Features
- Bullet points
- **Bold text**
- *Italic text*
- [Links](https://streamlit.io)
- `code snippets`
""")

# LaTeX
st.latex(r'''
     e^{i\pi} + 1 = 0
     ''')

# Code
code = '''def hello():
    print("Hello, Streamlit!")'''
st.code(code, language='python')

# Success, info, warning, error messages
st.success("Success message")
st.info("Information message")
st.warning("Warning message")
st.error("Error message")
st.exception(Exception("This is an exception"))

Data Display

import streamlit as st
import pandas as pd
import numpy as np

# Create sample data
df = pd.DataFrame(
    np.random.randn(10, 5),
    columns=['A', 'B', 'C', 'D', 'E']
)

# Display DataFrame
st.dataframe(df)  # Interactive table
st.table(df)  # Static table

# Styled DataFrame
st.dataframe(df.style.highlight_max(axis=0))

# Metrics
col1, col2, col3 = st.columns(3)
col1.metric("Temperature", "70 °F", "1.2 °F")
col2.metric("Wind", "9 mph", "-8%")
col3.metric("Humidity", "86%", "4%")

# JSON
json_data = {"name": "John", "age": 30, "city": "New York"}
st.json(json_data)

Interactive Widgets

Input Widgets

import streamlit as st
import datetime

# Button
if st.button('Click me'):
    st.write('Button clicked!')

# Checkbox
agree = st.checkbox('I agree to the terms')
if agree:
    st.write('Thank you for agreeing!')

# Radio buttons
genre = st.radio(
    "What's your favorite movie genre?",
    ('Comedy', 'Drama', 'Documentary')
)
st.write(f'You selected: {genre}')

# Selectbox
option = st.selectbox(
    'How would you like to be contacted?',
    ('Email', 'Home phone', 'Mobile phone')
)

# Multiselect
options = st.multiselect(
    'What are your favorite colors?',
    ['Green', 'Yellow', 'Red', 'Blue'],
    ['Yellow', 'Red']
)

# Slider
age = st.slider('How old are you?', 0, 130, 25)
st.write(f"I'm {age} years old")

# Range slider
values = st.slider(
    'Select a range of values',
    0.0, 100.0, (25.0, 75.0)
)

# Text input
title = st.text_input('Movie title', 'Life of Brian')
st.write('The current movie title is', title)

# Text area
txt = st.text_area('Text to analyze', '''
    It was the best of times, it was the worst of times...
    ''')

# Number input
number = st.number_input('Insert a number', value=0, step=1)

# Date input
d = st.date_input("When's your birthday", datetime.date(2000, 1, 1))

# Time input
t = st.time_input('Set an alarm for', datetime.time(8, 45))

# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.write(data)

# Color picker
color = st.color_picker('Pick A Color', '#00f900')
st.write('The current color is', color)

Layouts and Containers

Organizing Your App

import streamlit as st

# Sidebar
with st.sidebar:
    st.title("Sidebar Title")
    option = st.selectbox("Choose option", ["A", "B", "C"])
    
# Alternative sidebar syntax
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Home", "Data", "Model"])

# Columns
col1, col2, col3 = st.columns(3)

with col1:
    st.header("Column 1")
    st.write("Content for column 1")

with col2:
    st.header("Column 2")
    st.write("Content for column 2")

with col3:
    st.header("Column 3")
    st.write("Content for column 3")

# Columns with different widths
col1, col2 = st.columns([2, 1])  # 2:1 ratio

# Tabs
tab1, tab2, tab3 = st.tabs(["Chart", "Data", "Settings"])

with tab1:
    st.header("Chart")
    st.line_chart(data)

with tab2:
    st.header("Data")
    st.dataframe(data)

with tab3:
    st.header("Settings")
    st.write("Configuration options here")

# Expander
with st.expander("See explanation"):
    st.write("""
        The chart above shows some numbers I picked for you.
        I rolled actual dice for these, so they're *guaranteed* to be random.
    """)
    st.image("https://static.streamlit.io/examples/dice.jpg")

# Container
with st.container():
    st.write("This is inside the container")
    # You can add any elements here

# Empty placeholder
placeholder = st.empty()
# Later update it
placeholder.text("Hello")
# Replace with something else
placeholder.line_chart(data)

Data Visualization

Built-in Charts

import streamlit as st
import pandas as pd
import numpy as np

# Generate sample data
chart_data = pd.DataFrame(
    np.random.randn(20, 3),
    columns=['a', 'b', 'c']
)

# Line chart
st.line_chart(chart_data)

# Area chart
st.area_chart(chart_data)

# Bar chart
st.bar_chart(chart_data)

# Scatter plot (using Altair behind the scenes)
scatter_data = pd.DataFrame({
    'x': np.random.randn(100),
    'y': np.random.randn(100),
    'c': np.random.choice(['A', 'B', 'C'], 100)
})

st.scatter_chart(
    scatter_data,
    x='x',
    y='y',
    color='c'
)

# Map
map_data = pd.DataFrame(
    np.random.randn(100, 2) / [50, 50] + [37.76, -122.4],
    columns=['lat', 'lon']
)
st.map(map_data)

Integration with Plotting Libraries

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import altair as alt

# Matplotlib
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
ax.set_title("Matplotlib Plot")
st.pyplot(fig)

# Plotly
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species")
st.plotly_chart(fig, use_container_width=True)

# Altair
chart = alt.Chart(df).mark_circle().encode(
    x='sepal_width',
    y='sepal_length',
    color='species',
    tooltip=['sepal_width', 'sepal_length', 'species']
).interactive()
st.altair_chart(chart, use_container_width=True)

# Bokeh
from bokeh.plotting import figure

p = figure(
    title='Simple line example',
    x_axis_label='x',
    y_axis_label='y'
)
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
st.bokeh_chart(p, use_container_width=True)

# Seaborn (via matplotlib)
import seaborn as sns
fig, ax = plt.subplots()
sns.heatmap(df.select_dtypes(include=np.number).corr(), ax=ax)
st.pyplot(fig)

Session State and Caching

Managing State

import streamlit as st

# Initialize session state
if 'counter' not in st.session_state:
    st.session_state.counter = 0

# Increment counter button
if st.button('Increment'):
    st.session_state.counter += 1

st.write(f'Counter: {st.session_state.counter}')

# Callback functions with session state
def increment_counter():
    st.session_state.counter += 1

st.button('Increment with callback', on_click=increment_counter)

# Form with session state
with st.form("my_form"):
    st.write("Inside the form")
    slider_val = st.slider("Form slider", 0, 10, 5)
    checkbox_val = st.checkbox("Form checkbox")
    
    # Every form must have a submit button
    submitted = st.form_submit_button("Submit")
    if submitted:
        st.session_state.form_slider = slider_val
        st.session_state.form_checkbox = checkbox_val
        st.write("Form submitted!")

# Display form values from session state
if 'form_slider' in st.session_state:
    st.write(f"Slider value: {st.session_state.form_slider}")
    st.write(f"Checkbox value: {st.session_state.form_checkbox}")

Performance Optimization with Caching

import streamlit as st
import pandas as pd
import time

# Cache data loading
@st.cache_data  # New caching decorator (replaces st.cache)
def load_data():
    # Simulate expensive data loading
    time.sleep(2)
    return pd.read_csv("large_dataset.csv")

# Cache resource initialization (models, database connections)
@st.cache_resource
def init_model():
    # Load ML model (this runs only once)
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier()
    # Load pretrained weights...
    return model

# Cache with parameters
@st.cache_data
def expensive_computation(param1, param2):
    # This will cache results for each unique combination of parameters
    result = param1 ** 2 + param2 ** 2
    time.sleep(2)  # Simulate expensive computation
    return result

# Clear cache
if st.button("Clear all cache"):
    st.cache_data.clear()
    st.success("Cache cleared!")

# Use cached functions
data = load_data()
st.write(f"Loaded {len(data)} rows")

result = expensive_computation(5, 10)
st.write(f"Result: {result}")

# TTL (Time To Live) for cache
@st.cache_data(ttl=3600)  # Cache for 1 hour
def get_live_data():
    # Fetch data from API
    return fetch_from_api()

# Show spinner while computing
with st.spinner('Loading data...'):
    data = load_data()
st.success('Data loaded!')

Building a Complete Dashboard

# app.py - Complete Sales Dashboard
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta

# Page configuration
st.set_page_config(
    page_title="Sales Dashboard",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""

""", unsafe_allow_html=True)

# Title
st.markdown('📊 Sales Analytics Dashboard', 
            unsafe_allow_html=True)

# Generate sample data
@st.cache_data
def generate_data():
    np.random.seed(42)
    dates = pd.date_range(start='2023-01-01', end='2024-12-31', freq='D')
    n = len(dates)
    
    data = pd.DataFrame({
        'date': dates,
        'sales': np.random.normal(10000, 2000, n) + np.sin(np.arange(n) * 2 * np.pi / 365) * 3000,
        'customers': np.random.poisson(100, n),
        'region': np.random.choice(['North', 'South', 'East', 'West'], n),
        'product': np.random.choice(['Product A', 'Product B', 'Product C'], n),
        'satisfaction': np.random.uniform(3, 5, n)
    })
    data['profit'] = data['sales'] * np.random.uniform(0.1, 0.3, n)
    return data

# Load data
df = generate_data()

# Sidebar filters
st.sidebar.header("Filters")

# Date range filter
date_range = st.sidebar.date_input(
    "Select Date Range",
    value=(df['date'].min(), df['date'].max()),
    min_value=df['date'].min(),
    max_value=df['date'].max()
)

# Region filter
regions = st.sidebar.multiselect(
    "Select Regions",
    options=df['region'].unique(),
    default=df['region'].unique()
)

# Product filter
products = st.sidebar.multiselect(
    "Select Products",
    options=df['product'].unique(),
    default=df['product'].unique()
)

# Filter data
filtered_df = df[
    (df['date'] >= pd.Timestamp(date_range[0])) &
    (df['date'] <= pd.Timestamp(date_range[1])) &
    (df['region'].isin(regions)) &
    (df['product'].isin(products))
]

# Key metrics
col1, col2, col3, col4 = st.columns(4)

with col1:
    total_sales = filtered_df['sales'].sum()
    st.metric(
        label="Total Sales",
        value=f"${total_sales:,.0f}",
        delta=f"{np.random.uniform(-10, 10):.1f}%"
    )

with col2:
    total_profit = filtered_df['profit'].sum()
    st.metric(
        label="Total Profit",
        value=f"${total_profit:,.0f}",
        delta=f"{np.random.uniform(-10, 10):.1f}%"
    )

with col3:
    avg_satisfaction = filtered_df['satisfaction'].mean()
    st.metric(
        label="Avg Satisfaction",
        value=f"{avg_satisfaction:.2f} ⭐",
        delta=f"{np.random.uniform(-0.5, 0.5):.2f}"
    )

with col4:
    total_customers = filtered_df['customers'].sum()
    st.metric(
        label="Total Customers",
        value=f"{total_customers:,}",
        delta=f"{np.random.uniform(-5, 5):.1f}%"
    )

# Create two columns for charts
col1, col2 = st.columns(2)

with col1:
    # Sales trend
    st.subheader("Sales Trend Over Time")
    daily_sales = filtered_df.groupby('date')['sales'].sum().reset_index()
    
    fig_trend = px.line(
        daily_sales,
        x='date',
        y='sales',
        title="Daily Sales"
    )
    fig_trend.update_layout(showlegend=False)
    st.plotly_chart(fig_trend, use_container_width=True)
    
    # Sales by region
    st.subheader("Sales by Region")
    region_sales = filtered_df.groupby('region')['sales'].sum().reset_index()
    
    fig_region = px.pie(
        region_sales,
        values='sales',
        names='region',
        hole=0.4
    )
    st.plotly_chart(fig_region, use_container_width=True)

with col2:
    # Product performance
    st.subheader("Product Performance")
    product_metrics = filtered_df.groupby('product').agg({
        'sales': 'sum',
        'profit': 'sum',
        'customers': 'sum'
    }).reset_index()
    
    fig_product = px.bar(
        product_metrics,
        x='product',
        y=['sales', 'profit'],
        title="Sales and Profit by Product",
        barmode='group'
    )
    st.plotly_chart(fig_product, use_container_width=True)
    
    # Customer satisfaction distribution
    st.subheader("Customer Satisfaction Distribution")
    fig_satisfaction = px.histogram(
        filtered_df,
        x='satisfaction',
        nbins=20,
        title="Satisfaction Score Distribution"
    )
    st.plotly_chart(fig_satisfaction, use_container_width=True)

# Detailed data table
with st.expander("View Detailed Data"):
    st.subheader("Raw Data")
    st.dataframe(
        filtered_df.style.format({
            'sales': '${:,.0f}',
            'profit': '${:,.0f}',
            'satisfaction': '{:.2f}'
        }),
        use_container_width=True
    )
    
    # Download button
    csv = filtered_df.to_csv(index=False)
    st.download_button(
        label="Download data as CSV",
        data=csv,
        file_name='sales_data.csv',
        mime='text/csv'
    )

# Insights section
st.header("📈 Key Insights")

col1, col2, col3 = st.columns(3)

with col1:
    best_region = filtered_df.groupby('region')['sales'].sum().idxmax()
    st.info(f"**Best Performing Region:** {best_region}")

with col2:
    best_product = filtered_df.groupby('product')['profit'].sum().idxmax()
    st.info(f"**Most Profitable Product:** {best_product}")

with col3:
    peak_day = filtered_df.groupby('date')['sales'].sum().idxmax()
    st.info(f"**Peak Sales Day:** {peak_day.strftime('%Y-%m-%d')}")

# Footer
st.markdown("---")
st.markdown("Dashboard created with Streamlit • Data updated in real-time")

Advanced Features

File Upload and Processing

import streamlit as st
import pandas as pd
import io

st.title("Data Upload and Processing")

# Multiple file types
uploaded_file = st.file_uploader(
    "Choose a file",
    type=['csv', 'xlsx', 'json', 'txt'],
    accept_multiple_files=False
)

if uploaded_file is not None:
    # Get file details
    file_details = {
        "filename": uploaded_file.name,
        "filetype": uploaded_file.type,
        "filesize": uploaded_file.size
    }
    st.write(file_details)
    
    # Process based on file type
    if uploaded_file.type == "text/csv":
        df = pd.read_csv(uploaded_file)
        st.dataframe(df)
        
        # Data profiling
        st.subheader("Data Profile")
        st.write(f"Shape: {df.shape}")
        st.write("Data types:")
        st.write(df.dtypes)
        st.write("Summary statistics:")
        st.write(df.describe())
        
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
        df = pd.read_excel(uploaded_file)
        st.dataframe(df)
        
    elif uploaded_file.type == "application/json":
        data = pd.read_json(uploaded_file)
        st.json(data.to_dict())
        
    else:
        # Read as text
        stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
        string_data = stringio.read()
        st.text(string_data)

# Download processed data
if st.button("Process and Download"):
    # Process data
    processed_data = df.copy()  # Your processing here
    
    # Convert to CSV
    csv = processed_data.to_csv(index=False)
    
    # Download button
    st.download_button(
        label="Download processed data",
        data=csv,
        file_name='processed_data.csv',
        mime='text/csv'
    )

Real-time Updates

import streamlit as st
import time
import random

st.title("Real-time Dashboard")

# Placeholder for dynamic content
placeholder = st.empty()

# Auto-refresh using rerun
if st.button("Start Real-time Updates"):
    for i in range(10):
        with placeholder.container():
            # Generate new data
            value = random.random()
            
            # Display metrics
            col1, col2, col3 = st.columns(3)
            col1.metric("Metric 1", f"{value:.2f}")
            col2.metric("Metric 2", f"{value*100:.0f}")
            col3.metric("Metric 3", f"{value*1000:.0f}")
            
            # Update chart
            st.line_chart([random.random() for _ in range(20)])
            
        time.sleep(1)
        
# Alternative: Using st.experimental_rerun() for auto-refresh
auto_refresh = st.checkbox("Auto-refresh every 5 seconds")
if auto_refresh:
    time.sleep(5)
    st.experimental_rerun()

Multi-page Applications

# Create a 'pages' directory with Python files:
# pages/
#   1_📊_Dashboard.py
#   2_📈_Analytics.py
#   3_⚙️_Settings.py

# Main app.py
import streamlit as st

st.set_page_config(
    page_title="Multi-Page App",
    page_icon="🎯",
)

st.title("Main Page")
st.sidebar.success("Select a page above.")

st.markdown("""
    ## Welcome to the Multi-Page App!
    
    **👈 Select a page from the sidebar**
    
    ### Pages:
    - Dashboard: View key metrics
    - Analytics: Detailed analysis
    - Settings: Configure the app
""")

# pages/1_📊_Dashboard.py
import streamlit as st

st.set_page_config(page_title="Dashboard", page_icon="📊")
st.title("Dashboard Page")
st.write("This is the dashboard page")

# Navigation between pages programmatically
if st.button("Go to Analytics"):
    st.switch_page("pages/2_📈_Analytics.py")

Deployment

Streamlit Cloud Deployment

# 1. Create requirements.txt
streamlit
pandas
numpy
plotly
scikit-learn

# 2. Create .streamlit/config.toml (optional)
[theme]
primaryColor = "#FF6B6B"
backgroundColor = "#FFFFFF"
secondaryBackgroundColor = "#F0F2F6"
textColor = "#262730"
font = "sans serif"

# 3. Push to GitHub

# 4. Deploy on Streamlit Cloud:
# - Go to share.streamlit.io
# - Connect GitHub account
# - Select repository and branch
# - Click Deploy!

# 5. Environment variables (secrets.toml)
# .streamlit/secrets.toml (local)
# Add to Streamlit Cloud dashboard for production
[database]
host = "localhost"
port = 3306
username = "user"
password = "pass"

# Access in app:
import streamlit as st
db_host = st.secrets["database"]["host"]

Docker Deployment

# Dockerfile
FROM python:3.9-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY . .

EXPOSE 8501

CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

# Build and run
# docker build -t streamlit-app .
# docker run -p 8501:8501 streamlit-app

Best Practices

graph TD A[Streamlit App Design] --> B[Performance] A --> C[User Experience] A --> D[Code Organization] B --> E[Use Caching] B --> F[Optimize Data Loading] B --> G[Minimize Reruns] C --> H[Clear Layout] C --> I[Responsive Design] C --> J[Loading States] D --> K[Modular Functions] D --> L[Separate Pages] D --> M[Configuration Files]

Common Patterns

Authentication

import streamlit as st
import hashlib

def make_hashes(password):
    return hashlib.sha256(str.encode(password)).hexdigest()

def check_hashes(password, hashed_text):
    if make_hashes(password) == hashed_text:
        return hashed_text
    return False

# User database (in production, use a real database)
users = {"admin": make_hashes("admin123"),
         "user": make_hashes("user123")}

def login():
    st.sidebar.header("Login")
    
    username = st.sidebar.text_input("Username")
    password = st.sidebar.text_input("Password", type='password')
    
    if st.sidebar.button("Login"):
        hashed_pswd = make_hashes(password)
        
        if username in users and check_hashes(password, users[username]):
            st.session_state['logged_in'] = True
            st.session_state['username'] = username
            st.success(f"Logged in as {username}")
            return True
        else:
            st.error("Incorrect username/password")
            return False

def main_app():
    st.write(f"Welcome {st.session_state['username']}!")
    # Your app content here
    
    if st.button("Logout"):
        st.session_state['logged_in'] = False
        st.experimental_rerun()

# App logic
if 'logged_in' not in st.session_state:
    st.session_state['logged_in'] = False

if not st.session_state['logged_in']:
    login()
else:
    main_app()

Database Connection

import streamlit as st
import sqlite3
import pandas as pd

# Create connection
@st.cache_resource
def init_connection():
    return sqlite3.connect('database.db', check_same_thread=False)

conn = init_connection()

# Query data
@st.cache_data(ttl=600)
def run_query(query):
    with conn:
        return pd.read_sql(query, conn)

# Display data
df = run_query("SELECT * FROM sales LIMIT 100")
st.dataframe(df)

# Insert data
if st.button("Add Record"):
    cursor = conn.cursor()
    cursor.execute("INSERT INTO sales (date, amount) VALUES (?, ?)",
                   ('2024-01-01', 1000))
    conn.commit()
    st.success("Record added!")
    st.cache_data.clear()  # Clear cache to show new data

Streamlit vs Other Frameworks

Feature	Streamlit	Dash	Panel	Gradio
Ease of Use	✅ Excellent	⚠️ Moderate	⚠️ Moderate	✅ Excellent
Pythonic	✅ Very	⚠️ Moderate	✅ Very	✅ Very
Customization	⚠️ Limited	✅ High	✅ High	⚠️ Limited
ML Focus	✅ Good	⚠️ Basic	⚠️ Basic	✅ Excellent
Deployment	✅ Easy	⚠️ Moderate	⚠️ Moderate	✅ Easy

Practice Exercises

Exercise 1: Interactive Data Explorer

Create a data exploration app with:

File upload for CSV/Excel
Automatic data profiling
Interactive filters for each column
Dynamic visualizations based on selected columns
Export filtered data

Exercise 2: ML Model Dashboard

Build a machine learning dashboard:

Train/test split slider
Model selection dropdown
Hyperparameter tuning controls
Real-time model training
Performance metrics display
Prediction interface

Exercise 3: Real-time Monitoring App

Create a monitoring dashboard with:

Live data simulation
Auto-refresh every 5 seconds
Alert thresholds configuration
Historical data view
Export reports

Key Takeaways

🐍 Streamlit enables data apps in pure Python - no web development needed
⚡ Rapid prototyping - go from script to app in minutes
🔄 Automatic reactivity - UI updates when data changes
📊 Native support for all major data science libraries
☁️ Easy deployment with free hosting on Streamlit Cloud
🎨 Professional appearance with zero CSS/HTML knowledge
🔧 Session state and caching for complex applications