Scatterplots

When to use a scatterplot

  • To show the relationship between two or more variables.

Style tips

  • When encoding more than two variables:

    • Use color to encode nominal variables. These are discrete categories.

    • Use size to encode quantitative variables. These are continuous variables like numbers.

import altair as alt
from vega_datasets import data

source = data.cars()

alt.Chart(source).mark_circle(size=60).encode(
    x=alt.X('Horsepower'),
    y=alt.Y('Miles_per_Gallon', title = "Miles per gallon"),
    color=alt.Color('Origin'),
    tooltip=['Name', 'Origin', 'Horsepower', alt.Tooltip('Miles_per_Gallon', title = "Miles per gallon")]
).properties(
    title = "When encoding categories as a third variable use colors"
)
import altair as alt
from vega_datasets import data

source = data.iris()

alt.Chart(source).mark_circle().encode(
    x = alt.X('sepalLength', title = "Sepal length (cm)", scale=alt.Scale(zero=False)),
    y = alt.Y('sepalWidth', title="Sepal width (cm)", scale=alt.Scale(zero=False, padding=1)),
    color = alt.Color('species', title = "Species"),
    size = alt.Size('petalWidth', title = "Pedal width (cm)"),
    tooltip = [
        alt.Tooltip("species", title = "Species"), 
        alt.Tooltip("sepalLength", title = "Sepal Length (cm)"), 
        alt.Tooltip("sepalWidth", title = "Sepal Width (cm)"), 
        alt.Tooltip("petalWidth", title = "Petal Width (cm)")
    ],
).properties(
    title = "When encoding quantities as a third variable use size"
)