OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

Altair chart legend for subset of data

  • Thread starter Thread starter Don
  • Start date Start date
D

Don

Guest
As an exercise for learning more advanced altair, I'm trying to generate a simplified version of this chart: https://climatereanalyzer.org/clim/t2_daily/?dm_id=world.

To simplify, I'm using gray for all years prior to 2023 and then red and black for 2023 and 2024, respectively. I'd like to have a legend that is either just for 2023 & 2024 or is "1940-2022", "2023", "2024".

Right now I'm focused on getting a compact legend that reflect either subset of years, but I'd take any advice on how to improve the code / approach.

Code:
import pandas as pd
import altair as alt

# Function to fetch and prepare the data
def fetch_and_prep_data():
    url = "https://climatereanalyzer.org/clim/t2_daily/json/era5_world_t2_day.json"
    data = requests.get(url).json()

    years = []
    all_temperatures = []

    for year_data in data:
        year = year_data['name']
        temperatures = year_data['data']
        temperatures = [temp if temp is not None else float('nan') for temp in temperatures]
        days = list(range(1, len(temperatures) + 1))

        df = pd.DataFrame({
            'Year': [year] * len(temperatures),
            'Day': days,
            'Temperature': temperatures
        })

        years.append(year)
        all_temperatures.append(df)

    df_at = pd.concat(all_temperatures)

    # Drop all rows where Year is more than 4 digits
    df_at = df_at[df_at['Year'].str.len() <= 4]

    return df_at

# Function to create the last day in month labels
def get_last_day_in_month_labels():
    dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
    last_days = dates[dates.is_month_end]
    labels = {day_of_year: month_abbr for day_of_year, month_abbr in zip(last_days.day_of_year, last_days.strftime('%b'))}
    return labels

# Functions to determine opacity, color, and stroke width
def determine_opacity(year):
    try:
        year_int = int(year)
        return 0.01 if year_int < 2023 else 1.0
    except ValueError:
        return 1.0

def determine_color(year):
    color = 'gray'
    try:
        year_int = int(year)
        if year_int < 2023:
            color = 'gray'
        elif year_int == 2023:
            color = 'red'
        elif year_int == 2024:
            color = 'black'
    except ValueError:
        color = 'black'
    return color

def determine_strokewidth(year):
    width = 1
    try:
        year_int = int(year)
        if year_int < 2023:
            width = 1
        else:
            width = 4
    except ValueError:
        width = 4
    return width

# Applying the functions to the 'Year' column
# Fetch and prepare the data
df_at = fetch_and_prep_data()
df_all = df_at.copy()
df_all['Opacity'] = df_all['Year'].apply(determine_opacity)
df_all['Color'] = df_all['Year'].apply(determine_color)
df_all['Width'] = df_all['Year'].apply(determine_strokewidth)

# Ensure 'Day' is correctly interpreted as a quantitative variable
df_all['Day'] = pd.to_numeric(df_all['Day'], errors='coerce')

# Filter the data to ensure 'Day' values are within the desired range
df_filtered = df_all[df_all['Day'] <= 365]

# Create last day in month labels
last_day_in_month_labels = get_last_day_in_month_labels()

# Extract the keys and values for tick marks and labels
tick_values = list(last_day_in_month_labels.keys())
tick_labels = list(last_day_in_month_labels.values())

# Plotting the main data using Altair with the existing Color and Opacity columns
line_chart = alt.Chart(df_filtered).mark_line().encode(
    x=alt.X(
        'Day:Q',
        title='Month',
        scale=alt.Scale(domain=(0, 365), clamp=True),
        axis=alt.Axis(
            labels=True,
            tickCount=12,
            values=tick_values,
            labelExpr=f"datum.value == {tick_values[0]} ? '{tick_labels[0]}' : " +
                      " : ".join([f"datum.value == {tick} ? '{label}'" for tick, label in zip(tick_values[1:], tick_labels[1:])]) +
                      " : ''",
            labelOffset= -30  # Shift the x-axis labels to the left by 30 units
        )
    ),
    y=alt.Y(
        'Temperature:Q',
        title='Temperature (C)',
        scale=alt.Scale(domain=(11, 18), clamp=True),
    ),
    color=alt.Color('Color:N', legend=None, scale=None),  # Use the "Color" column for line colors
    opacity=alt.Opacity('Opacity:Q', legend=None),  # Use the "Opacity" column
    detail=alt.Detail('Year:N'),  # Add detail encoding for Year, otherwise you get vertical lines
    strokeWidth=alt.StrokeWidth('Width:N'), legend=None)  # Use the "Width" column
).properties(
    width=800,
    height=600
)

line_chart```
<p>As an exercise for learning more advanced altair, I'm trying to generate a simplified version of this chart: <a href="https://climatereanalyzer.org/clim/t2_daily/?dm_id=world" rel="nofollow noreferrer">https://climatereanalyzer.org/clim/t2_daily/?dm_id=world</a>.</p>
<p>To simplify, I'm using gray for all years prior to 2023 and then red and black for 2023 and 2024, respectively. I'd like to have a legend that is either just for 2023 & 2024 or is "1940-2022", "2023", "2024".</p>
<p>Right now I'm focused on getting a compact legend that reflect either subset of years, but I'd take any advice on how to improve the code / approach.</p>
<pre><code>import pandas as pd
import altair as alt

# Function to fetch and prepare the data
def fetch_and_prep_data():
url = "https://climatereanalyzer.org/clim/t2_daily/json/era5_world_t2_day.json"
data = requests.get(url).json()

years = []
all_temperatures = []

for year_data in data:
year = year_data['name']
temperatures = year_data['data']
temperatures = [temp if temp is not None else float('nan') for temp in temperatures]
days = list(range(1, len(temperatures) + 1))

df = pd.DataFrame({
'Year': [year] * len(temperatures),
'Day': days,
'Temperature': temperatures
})

years.append(year)
all_temperatures.append(df)

df_at = pd.concat(all_temperatures)

# Drop all rows where Year is more than 4 digits
df_at = df_at[df_at['Year'].str.len() <= 4]

return df_at

# Function to create the last day in month labels
def get_last_day_in_month_labels():
dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
last_days = dates[dates.is_month_end]
labels = {day_of_year: month_abbr for day_of_year, month_abbr in zip(last_days.day_of_year, last_days.strftime('%b'))}
return labels

# Functions to determine opacity, color, and stroke width
def determine_opacity(year):
try:
year_int = int(year)
return 0.01 if year_int < 2023 else 1.0
except ValueError:
return 1.0

def determine_color(year):
color = 'gray'
try:
year_int = int(year)
if year_int < 2023:
color = 'gray'
elif year_int == 2023:
color = 'red'
elif year_int == 2024:
color = 'black'
except ValueError:
color = 'black'
return color

def determine_strokewidth(year):
width = 1
try:
year_int = int(year)
if year_int < 2023:
width = 1
else:
width = 4
except ValueError:
width = 4
return width

# Applying the functions to the 'Year' column
# Fetch and prepare the data
df_at = fetch_and_prep_data()
df_all = df_at.copy()
df_all['Opacity'] = df_all['Year'].apply(determine_opacity)
df_all['Color'] = df_all['Year'].apply(determine_color)
df_all['Width'] = df_all['Year'].apply(determine_strokewidth)

# Ensure 'Day' is correctly interpreted as a quantitative variable
df_all['Day'] = pd.to_numeric(df_all['Day'], errors='coerce')

# Filter the data to ensure 'Day' values are within the desired range
df_filtered = df_all[df_all['Day'] <= 365]

# Create last day in month labels
last_day_in_month_labels = get_last_day_in_month_labels()

# Extract the keys and values for tick marks and labels
tick_values = list(last_day_in_month_labels.keys())
tick_labels = list(last_day_in_month_labels.values())

# Plotting the main data using Altair with the existing Color and Opacity columns
line_chart = alt.Chart(df_filtered).mark_line().encode(
x=alt.X(
'Day:Q',
title='Month',
scale=alt.Scale(domain=(0, 365), clamp=True),
axis=alt.Axis(
labels=True,
tickCount=12,
values=tick_values,
labelExpr=f"datum.value == {tick_values[0]} ? '{tick_labels[0]}' : " +
" : ".join([f"datum.value == {tick} ? '{label}'" for tick, label in zip(tick_values[1:], tick_labels[1:])]) +
" : ''",
labelOffset= -30 # Shift the x-axis labels to the left by 30 units
)
),
y=alt.Y(
'Temperature:Q',
title='Temperature (C)',
scale=alt.Scale(domain=(11, 18), clamp=True),
),
color=alt.Color('Color:N', legend=None, scale=None), # Use the "Color" column for line colors
opacity=alt.Opacity('Opacity:Q', legend=None), # Use the "Opacity" column
detail=alt.Detail('Year:N'), # Add detail encoding for Year, otherwise you get vertical lines
strokeWidth=alt.StrokeWidth('Width:N'), legend=None) # Use the "Width" column
).properties(
width=800,
height=600
)

line_chart```
</code></pre>
 

Latest posts

Top