In [1]:
Copied!
# Uncomment to install climdata in Google Colab or other environments
# !pip install climdata matplotlib seaborn
# Uncomment to install climdata in Google Colab or other environments
# !pip install climdata matplotlib seaborn
Multi-Dataset Comparison: CMIP, NEX-GDDP, MSWX, and HYRAS¶
This notebook demonstrates how to extract and compare climate data from multiple sources:
- CMIP6: 3 different climate models (ACCESS-CM2, MIROC6, MPI-ESM1-2-LR)
- NEX-GDDP: NASA's downscaled CMIP6 dataset
- MSWX: Multi-Source Weather (MSWX) dataset
- HYRAS: High-resolution gridded dataset for Germany
Location: Berlin, Germany (52.5°N, 13.4°E)
Variables: Maximum temperature (tasmax) and precipitation (pr)
Time Period: Historical period for comparison
1️⃣ Imports and Setup¶
In [1]:
Copied!
from climdata import ClimData
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
# Berlin coordinates
LAT_BERLIN = 52.5
LON_BERLIN = 13.4
from climdata import ClimData
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
# Berlin coordinates
LAT_BERLIN = 52.5
LON_BERLIN = 13.4
2️⃣ Define Time Range and Variables¶
In [2]:
Copied!
# Time range for comparison (adjust as needed)
START_DATE = "2010-01-01"
END_DATE = "2014-12-31"
# Variables to compare
VARIABLES = ["tasmax", "pr"]
# CMIP6 models to compare
CMIP_MODELS = ["ACCESS-CM2", "MIROC6", "MPI-ESM1-2-LR"]
print(f"Comparing data for Berlin ({LAT_BERLIN}°N, {LON_BERLIN}°E)")
print(f"Time period: {START_DATE} to {END_DATE}")
print(f"Variables: {', '.join(VARIABLES)}")
print(f"CMIP6 Models: {', '.join(CMIP_MODELS)}")
# Time range for comparison (adjust as needed)
START_DATE = "2010-01-01"
END_DATE = "2014-12-31"
# Variables to compare
VARIABLES = ["tasmax", "pr"]
# CMIP6 models to compare
CMIP_MODELS = ["ACCESS-CM2", "MIROC6", "MPI-ESM1-2-LR"]
print(f"Comparing data for Berlin ({LAT_BERLIN}°N, {LON_BERLIN}°E)")
print(f"Time period: {START_DATE} to {END_DATE}")
print(f"Variables: {', '.join(VARIABLES)}")
print(f"CMIP6 Models: {', '.join(CMIP_MODELS)}")
Comparing data for Berlin (52.5°N, 13.4°E) Time period: 2010-01-01 to 2014-12-31 Variables: tasmax, pr CMIP6 Models: ACCESS-CM2, MIROC6, MPI-ESM1-2-LR
3️⃣ Extract Data from CMIP6 Models¶
We'll extract data from three different CMIP6 models using the historical experiment.
In [3]:
Copied!
# Dictionary to store all datasets
datasets = {}
dataframes = {}
# Extract data from each CMIP6 model
for model in CMIP_MODELS:
print(f"Extracting CMIP6 - {model}")
overrides = [
"dataset=cmip",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"experiment_id=historical",
f"source_id={model}",
]
extractor = ClimData(overrides=overrides)
ds = extractor.extract()
df = extractor.to_dataframe(ds)
# Store results
datasets[f"CMIP6_{model}"] = ds
dataframes[f"CMIP6_{model}"] = df
# Dictionary to store all datasets
datasets = {}
dataframes = {}
# Extract data from each CMIP6 model
for model in CMIP_MODELS:
print(f"Extracting CMIP6 - {model}")
overrides = [
"dataset=cmip",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"experiment_id=historical",
f"source_id={model}",
]
extractor = ClimData(overrides=overrides)
ds = extractor.extract()
df = extractor.to_dataframe(ds)
# Store results
datasets[f"CMIP6_{model}"] = ds
dataframes[f"CMIP6_{model}"] = df
Extracting CMIP6 - ACCESS-CM2
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
Extracting CMIP6 - MIROC6
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
Extracting CMIP6 - MPI-ESM1-2-LR
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
/home/muduchuru/miniforge3/envs/sdba/lib/python3.10/site-packages/intake_esm/core.py:475: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
cat = self.__class__({'esmcat': self.esmcat.dict(), 'df': esmcat_results})
<frozen importlib._bootstrap>:241: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility. Expected 16 from C header, got 96 from PyObject
4️⃣ Extract Data from NEX-GDDP¶
NEX-GDDP provides downscaled and bias-corrected CMIP6 projections.
In [6]:
Copied!
# Extract NEX-GDDP data for the same models
for model in CMIP_MODELS:
print(f"Extracting NEX-GDDP - {model}")
overrides = [
"dataset=nexgddp",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
"experiment_id=historical",
f"source_id={model}",
]
extractor = ClimData(overrides=overrides)
ds = extractor.extract()
df = extractor.to_dataframe(ds)
# Store results
datasets[f"NEXGDDP_{model}"] = ds
dataframes[f"NEXGDDP_{model}"] = df
# Extract NEX-GDDP data for the same models
for model in CMIP_MODELS:
print(f"Extracting NEX-GDDP - {model}")
overrides = [
"dataset=nexgddp",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
"experiment_id=historical",
f"source_id={model}",
]
extractor = ClimData(overrides=overrides)
ds = extractor.extract()
df = extractor.to_dataframe(ds)
# Store results
datasets[f"NEXGDDP_{model}"] = ds
dataframes[f"NEXGDDP_{model}"] = df
Extracting NEX-GDDP - ACCESS-CM2 🔍 Auto-discovering metadata for ACCESS-CM2/historical... Checking available realizations... ✓ Found: r1i1p1f1 (grid: gn) ✓ Discovered grid_label: gn 🔍 Downloading NEX-GDDP-CMIP6 data from NASA THREDDS... Model: ACCESS-CM2, Experiment: historical 📥 Fetching tasmax (Daily Maximum Near-Surface Air Temperature)...
Downloading tasmax: 100%|██████████| 5/5 [01:44<00:00, 20.83s/it]
📥 Fetching pr (Precipitation)...
Downloading pr: 100%|██████████| 5/5 [01:32<00:00, 18.50s/it]
✅ Downloaded 10 files 📂 Loading 10 NEX-GDDP files... Loading tasmax from 5 file(s)... Loading pr from 5 file(s)... ✅ Loaded dataset with 2 variables Extracting NEX-GDDP - MIROC6 🔍 Auto-discovering metadata for MIROC6/historical... Checking available realizations... ✓ Found: r1i1p1f1 (grid: gn) ✓ Discovered grid_label: gn 🔍 Downloading NEX-GDDP-CMIP6 data from NASA THREDDS... Model: MIROC6, Experiment: historical 📥 Fetching tasmax (Daily Maximum Near-Surface Air Temperature)...
Downloading tasmax: 100%|██████████| 5/5 [02:22<00:00, 28.53s/it]
📥 Fetching pr (Precipitation)...
Downloading pr: 100%|██████████| 5/5 [01:46<00:00, 21.33s/it]
✅ Downloaded 10 files 📂 Loading 10 NEX-GDDP files... Loading tasmax from 5 file(s)... Loading pr from 5 file(s)... ✅ Loaded dataset with 2 variables Extracting NEX-GDDP - MPI-ESM1-2-LR 🔍 Auto-discovering metadata for MPI-ESM1-2-LR/historical... Checking available realizations... ✓ Found: r1i1p1f1 (grid: gn) ✓ Discovered grid_label: gn 🔍 Downloading NEX-GDDP-CMIP6 data from NASA THREDDS... Model: MPI-ESM1-2-LR, Experiment: historical 📥 Fetching tasmax (Daily Maximum Near-Surface Air Temperature)...
Downloading tasmax: 100%|██████████| 5/5 [02:04<00:00, 24.88s/it]
📥 Fetching pr (Precipitation)...
Downloading pr: 100%|██████████| 5/5 [02:14<00:00, 26.96s/it]
✅ Downloaded 10 files 📂 Loading 10 NEX-GDDP files... Loading tasmax from 5 file(s)... Loading pr from 5 file(s)... ✅ Loaded dataset with 2 variables
5️⃣ Extract Data from MSWX¶
MSWX provides merged satellite and reanalysis data.
In [7]:
Copied!
print(f"Extracting MSWX")
mswx_overrides = [
"dataset=mswx",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
# Uncomment if you need Google service account for MSWX
"dsinfo.mswx.params.google_service_account=./.climdata_conf/service.json",
]
extractor_mswx = ClimData(overrides=mswx_overrides)
ds_mswx = extractor_mswx.extract()
df_mswx = extractor_mswx.to_dataframe(ds_mswx)
datasets["MSWX"] = ds_mswx
dataframes["MSWX"] = df_mswx
print(f"✅ Successfully extracted MSWX")
print(f" Shape: {ds_mswx.dims}")
print(f"Extracting MSWX")
mswx_overrides = [
"dataset=mswx",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
# Uncomment if you need Google service account for MSWX
"dsinfo.mswx.params.google_service_account=./.climdata_conf/service.json",
]
extractor_mswx = ClimData(overrides=mswx_overrides)
ds_mswx = extractor_mswx.extract()
df_mswx = extractor_mswx.to_dataframe(ds_mswx)
datasets["MSWX"] = ds_mswx
dataframes["MSWX"] = df_mswx
print(f"✅ Successfully extracted MSWX")
print(f" Shape: {ds_mswx.dims}")
Extracting MSWX
✅ All 1826 tasmax files already exist locally.
✅ All 1826 pr files already exist locally.
✅ Successfully extracted MSWX
Shape: FrozenMappingWarningOnValuesAccess({'time': 1826})
6️⃣ Extract Data from HYRAS¶
HYRAS provides high-resolution gridded data specifically for Germany.
In [8]:
Copied!
print(f"Extracting HYRAS")
hyras_overrides = [
"dataset=hyras",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
]
extractor_hyras = ClimData(overrides=hyras_overrides)
ds_hyras = extractor_hyras.extract()
df_hyras = extractor_hyras.to_dataframe(ds_hyras)
datasets["HYRAS"] = ds_hyras
dataframes["HYRAS"] = df_hyras
print(f"Extracting HYRAS")
hyras_overrides = [
"dataset=hyras",
f"lat={LAT_BERLIN}",
f"lon={LON_BERLIN}",
f"time_range.start_date={START_DATE}",
f"time_range.end_date={END_DATE}",
f"variables={VARIABLES}",
"data_dir=/beegfs/muduchuru/data",
]
extractor_hyras = ClimData(overrides=hyras_overrides)
ds_hyras = extractor_hyras.extract()
df_hyras = extractor_hyras.to_dataframe(ds_hyras)
datasets["HYRAS"] = ds_hyras
dataframes["HYRAS"] = df_hyras
Extracting HYRAS ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/air_temperature_max/tasmax_hyras_1_2010_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/TASMAX/tasmax_hyras_1_2010_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/air_temperature_max/tasmax_hyras_1_2011_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/TASMAX/tasmax_hyras_1_2011_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/air_temperature_max/tasmax_hyras_1_2012_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/TASMAX/tasmax_hyras_1_2012_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/air_temperature_max/tasmax_hyras_1_2013_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/TASMAX/tasmax_hyras_1_2013_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/air_temperature_max/tasmax_hyras_1_2014_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/TASMAX/tasmax_hyras_1_2014_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/precipitation/pr_hyras_1_2010_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/PR/pr_hyras_1_2010_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/precipitation/pr_hyras_1_2011_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/PR/pr_hyras_1_2011_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/precipitation/pr_hyras_1_2012_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/PR/pr_hyras_1_2012_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/precipitation/pr_hyras_1_2013_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/PR/pr_hyras_1_2013_v6-0_de.nc ⬇️ Checking: https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/precipitation/pr_hyras_1_2014_v6-0_de.nc ✔️ Exists locally: /beegfs/muduchuru/data/hyras/PR/pr_hyras_1_2014_v6-0_de.nc
Prepare Data for Comparison¶
Combine all dataframes and prepare for visualization.
In [9]:
Copied!
# Add source column to each dataframe and combine
combined_dfs = []
for source_name, df in dataframes.items():
df_copy = df.copy()
df_copy['source'] = source_name
combined_dfs.append(df_copy)
# Combine all dataframes
df_all = pd.concat(combined_dfs, ignore_index=True)
# Ensure time is datetime
if 'time' in df_all.columns:
df_all['time'] = pd.to_datetime(df_all['time'])
print(f"Combined dataset shape: {df_all.shape}")
print(f"\nDataset sources: {df_all['source'].unique().tolist()}")
print(f"Variables: {df_all['variable'].unique().tolist() if 'variable' in df_all.columns else 'N/A'}")
df_all.head()
# Add source column to each dataframe and combine
combined_dfs = []
for source_name, df in dataframes.items():
df_copy = df.copy()
df_copy['source'] = source_name
combined_dfs.append(df_copy)
# Combine all dataframes
df_all = pd.concat(combined_dfs, ignore_index=True)
# Ensure time is datetime
if 'time' in df_all.columns:
df_all['time'] = pd.to_datetime(df_all['time'])
print(f"Combined dataset shape: {df_all.shape}")
print(f"\nDataset sources: {df_all['source'].unique().tolist()}")
print(f"Variables: {df_all['variable'].unique().tolist() if 'variable' in df_all.columns else 'N/A'}")
df_all.head()
Combined dataset shape: (29216, 8) Dataset sources: ['CMIP6_ACCESS-CM2', 'CMIP6_MIROC6', 'CMIP6_MPI-ESM1-2-LR', 'NEXGDDP_ACCESS-CM2', 'NEXGDDP_MIROC6', 'NEXGDDP_MPI-ESM1-2-LR', 'MSWX', 'HYRAS'] Variables: ['tasmax', 'pr']
Out[9]:
| time | lat | lon | variable | value | units | source_id | source | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2010-01-01 | 53.125 | 14.0625 | tasmax | 6.929352 | °C | ACCESS-CM2 | CMIP6_ACCESS-CM2 |
| 1 | 2010-01-02 | 53.125 | 14.0625 | tasmax | 4.574371 | °C | ACCESS-CM2 | CMIP6_ACCESS-CM2 |
| 2 | 2010-01-03 | 53.125 | 14.0625 | tasmax | 11.116364 | °C | ACCESS-CM2 | CMIP6_ACCESS-CM2 |
| 3 | 2010-01-04 | 53.125 | 14.0625 | tasmax | 6.485016 | °C | ACCESS-CM2 | CMIP6_ACCESS-CM2 |
| 4 | 2010-01-05 | 53.125 | 14.0625 | tasmax | 4.042877 | °C | ACCESS-CM2 | CMIP6_ACCESS-CM2 |
Visualize Maximum Temperature (tasmax) Comparison¶
In [12]:
Copied!
# Filter for tasmax
df_tasmax = df_all[df_all['variable'] == 'tasmax'].copy()
# Define colors for datasets and markers for models
dataset_colors = {
'MSWX': 'blue',
'HYRAS': 'black',
'CMIP6': '#E74C3C', # Red-orange for CMIP6
'NEXGDDP': '#27AE60' # Green for NEX-GDDP
}
model_markers = {
'ACCESS-CM2': 'o',
'MIROC6': 's',
'MPI-ESM1-2-LR': '^'
}
# Create color and marker mapping
color_map = {}
marker_map = {}
for source in df_tasmax['source'].unique():
if source == 'MSWX':
color_map[source] = dataset_colors['MSWX']
marker_map[source] = 'o'
elif source == 'HYRAS':
color_map[source] = dataset_colors['HYRAS']
marker_map[source] = 'o'
elif source.startswith('CMIP6_'):
model = source.replace('CMIP6_', '')
color_map[source] = dataset_colors['CMIP6']
marker_map[source] = model_markers[model]
elif source.startswith('NEXGDDP_'):
model = source.replace('NEXGDDP_', '')
color_map[source] = dataset_colors['NEXGDDP']
marker_map[source] = model_markers[model]
# Create figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 10))
# Plot 1: Time series
for source in df_tasmax['source'].unique():
data = df_tasmax[df_tasmax['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2 if is_obs else 1.5
alpha = 0.9 if is_obs else 0.7
axes[0].plot(data['time'], data['value'],
color=color_map[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle)
axes[0].set_xlabel('Time', fontsize=14)
axes[0].set_ylabel('Maximum Temperature (°C)', fontsize=14)
axes[0].set_title('Maximum Temperature Comparison - Berlin', fontsize=16, fontweight='bold')
axes[0].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
from matplotlib.lines import Line2D
dataset_handles = [
Line2D([0], [0], color=dataset_colors['MSWX'], linewidth=2, linestyle='-', label='MSWX'),
Line2D([0], [0], color=dataset_colors['HYRAS'], linewidth=2, linestyle='-', label='HYRAS'),
Line2D([0], [0], color=dataset_colors['CMIP6'], linewidth=1.5, linestyle='--', label='CMIP6'),
Line2D([0], [0], color=dataset_colors['NEXGDDP'], linewidth=1.5, linestyle='--', label='NEX-GDDP')
]
legend1 = axes[0].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[0].add_artist(legend1)
# Create custom legend for models (markers)
model_handles = [
Line2D([0], [0], color='gray', marker='o', linestyle='None', markersize=6, label='ACCESS-CM2'),
Line2D([0], [0], color='gray', marker='s', linestyle='None', markersize=6, label='MIROC6'),
Line2D([0], [0], color='gray', marker='^', linestyle='None', markersize=6, label='MPI-ESM1-2-LR')
]
axes[0].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[0].grid(True, alpha=0.3)
# Plot 2: Monthly averages
df_tasmax['year_month'] = df_tasmax['time'].dt.to_period('M')
monthly_avg = df_tasmax.groupby(['source', 'year_month'])['value'].mean().reset_index()
monthly_avg['year_month'] = monthly_avg['year_month'].dt.to_timestamp()
for source in monthly_avg['source'].unique():
data = monthly_avg[monthly_avg['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2.5 if is_obs else 2
markersize = 7 if is_obs else 6
alpha = 0.9 if is_obs else 0.7
axes[1].plot(data['year_month'], data['value'],
color=color_map[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle,
marker=marker_map[source], markersize=markersize)
axes[1].set_xlabel('Month', fontsize=14)
axes[1].set_ylabel('Monthly Average Temperature (°C)', fontsize=14)
axes[1].set_title('Monthly Average Maximum Temperature', fontsize=16, fontweight='bold')
axes[1].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
legend1 = axes[1].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[1].add_artist(legend1)
# Create custom legend for models (markers)
axes[1].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Filter for tasmax
df_tasmax = df_all[df_all['variable'] == 'tasmax'].copy()
# Define colors for datasets and markers for models
dataset_colors = {
'MSWX': 'blue',
'HYRAS': 'black',
'CMIP6': '#E74C3C', # Red-orange for CMIP6
'NEXGDDP': '#27AE60' # Green for NEX-GDDP
}
model_markers = {
'ACCESS-CM2': 'o',
'MIROC6': 's',
'MPI-ESM1-2-LR': '^'
}
# Create color and marker mapping
color_map = {}
marker_map = {}
for source in df_tasmax['source'].unique():
if source == 'MSWX':
color_map[source] = dataset_colors['MSWX']
marker_map[source] = 'o'
elif source == 'HYRAS':
color_map[source] = dataset_colors['HYRAS']
marker_map[source] = 'o'
elif source.startswith('CMIP6_'):
model = source.replace('CMIP6_', '')
color_map[source] = dataset_colors['CMIP6']
marker_map[source] = model_markers[model]
elif source.startswith('NEXGDDP_'):
model = source.replace('NEXGDDP_', '')
color_map[source] = dataset_colors['NEXGDDP']
marker_map[source] = model_markers[model]
# Create figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 10))
# Plot 1: Time series
for source in df_tasmax['source'].unique():
data = df_tasmax[df_tasmax['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2 if is_obs else 1.5
alpha = 0.9 if is_obs else 0.7
axes[0].plot(data['time'], data['value'],
color=color_map[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle)
axes[0].set_xlabel('Time', fontsize=14)
axes[0].set_ylabel('Maximum Temperature (°C)', fontsize=14)
axes[0].set_title('Maximum Temperature Comparison - Berlin', fontsize=16, fontweight='bold')
axes[0].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
from matplotlib.lines import Line2D
dataset_handles = [
Line2D([0], [0], color=dataset_colors['MSWX'], linewidth=2, linestyle='-', label='MSWX'),
Line2D([0], [0], color=dataset_colors['HYRAS'], linewidth=2, linestyle='-', label='HYRAS'),
Line2D([0], [0], color=dataset_colors['CMIP6'], linewidth=1.5, linestyle='--', label='CMIP6'),
Line2D([0], [0], color=dataset_colors['NEXGDDP'], linewidth=1.5, linestyle='--', label='NEX-GDDP')
]
legend1 = axes[0].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[0].add_artist(legend1)
# Create custom legend for models (markers)
model_handles = [
Line2D([0], [0], color='gray', marker='o', linestyle='None', markersize=6, label='ACCESS-CM2'),
Line2D([0], [0], color='gray', marker='s', linestyle='None', markersize=6, label='MIROC6'),
Line2D([0], [0], color='gray', marker='^', linestyle='None', markersize=6, label='MPI-ESM1-2-LR')
]
axes[0].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[0].grid(True, alpha=0.3)
# Plot 2: Monthly averages
df_tasmax['year_month'] = df_tasmax['time'].dt.to_period('M')
monthly_avg = df_tasmax.groupby(['source', 'year_month'])['value'].mean().reset_index()
monthly_avg['year_month'] = monthly_avg['year_month'].dt.to_timestamp()
for source in monthly_avg['source'].unique():
data = monthly_avg[monthly_avg['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2.5 if is_obs else 2
markersize = 7 if is_obs else 6
alpha = 0.9 if is_obs else 0.7
axes[1].plot(data['year_month'], data['value'],
color=color_map[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle,
marker=marker_map[source], markersize=markersize)
axes[1].set_xlabel('Month', fontsize=14)
axes[1].set_ylabel('Monthly Average Temperature (°C)', fontsize=14)
axes[1].set_title('Monthly Average Maximum Temperature', fontsize=16, fontweight='bold')
axes[1].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
legend1 = axes[1].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[1].add_artist(legend1)
# Create custom legend for models (markers)
axes[1].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
🔟 Visualize Precipitation (pr) Comparison¶
In [13]:
Copied!
# Filter for precipitation
df_pr = df_all[df_all['variable'] == 'pr'].copy()
# Use same color and marker mapping
color_map_pr = {}
marker_map_pr = {}
for source in df_pr['source'].unique():
if source == 'MSWX':
color_map_pr[source] = dataset_colors['MSWX']
marker_map_pr[source] = 'o'
elif source == 'HYRAS':
color_map_pr[source] = dataset_colors['HYRAS']
marker_map_pr[source] = 'o'
elif source.startswith('CMIP6_'):
model = source.replace('CMIP6_', '')
color_map_pr[source] = dataset_colors['CMIP6']
marker_map_pr[source] = model_markers[model]
elif source.startswith('NEXGDDP_'):
model = source.replace('NEXGDDP_', '')
color_map_pr[source] = dataset_colors['NEXGDDP']
marker_map_pr[source] = model_markers[model]
# Create figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 10))
# Plot 1: Time series
for source in df_pr['source'].unique():
data = df_pr[df_pr['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2 if is_obs else 1.5
alpha = 0.9 if is_obs else 0.7
axes[0].plot(data['time'], data['value'],
color=color_map_pr[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle)
axes[0].set_xlabel('Time', fontsize=14)
axes[0].set_ylabel('Precipitation (mm/day)', fontsize=14)
axes[0].set_title('Precipitation Comparison - Berlin', fontsize=16, fontweight='bold')
axes[0].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
from matplotlib.lines import Line2D
dataset_handles = [
Line2D([0], [0], color=dataset_colors['MSWX'], linewidth=2, linestyle='-', label='MSWX'),
Line2D([0], [0], color=dataset_colors['HYRAS'], linewidth=2, linestyle='-', label='HYRAS'),
Line2D([0], [0], color=dataset_colors['CMIP6'], linewidth=1.5, linestyle='--', label='CMIP6'),
Line2D([0], [0], color=dataset_colors['NEXGDDP'], linewidth=1.5, linestyle='--', label='NEX-GDDP')
]
legend1 = axes[0].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[0].add_artist(legend1)
# Create custom legend for models (markers)
model_handles = [
Line2D([0], [0], color='gray', marker='o', linestyle='None', markersize=6, label='ACCESS-CM2'),
Line2D([0], [0], color='gray', marker='s', linestyle='None', markersize=6, label='MIROC6'),
Line2D([0], [0], color='gray', marker='^', linestyle='None', markersize=6, label='MPI-ESM1-2-LR')
]
axes[0].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[0].grid(True, alpha=0.3)
# Plot 2: Monthly totals
df_pr['year_month'] = df_pr['time'].dt.to_period('M')
monthly_total = df_pr.groupby(['source', 'year_month'])['value'].sum().reset_index()
monthly_total['year_month'] = monthly_total['year_month'].dt.to_timestamp()
for source in monthly_total['source'].unique():
data = monthly_total[monthly_total['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2.5 if is_obs else 2
markersize = 7 if is_obs else 6
alpha = 0.9 if is_obs else 0.7
axes[1].plot(data['year_month'], data['value'],
color=color_map_pr[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle,
marker=marker_map_pr[source], markersize=markersize)
axes[1].set_xlabel('Month', fontsize=14)
axes[1].set_ylabel('Monthly Total Precipitation (mm)', fontsize=14)
axes[1].set_title('Monthly Total Precipitation', fontsize=16, fontweight='bold')
axes[1].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
legend1 = axes[1].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[1].add_artist(legend1)
# Create custom legend for models (markers)
axes[1].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Filter for precipitation
df_pr = df_all[df_all['variable'] == 'pr'].copy()
# Use same color and marker mapping
color_map_pr = {}
marker_map_pr = {}
for source in df_pr['source'].unique():
if source == 'MSWX':
color_map_pr[source] = dataset_colors['MSWX']
marker_map_pr[source] = 'o'
elif source == 'HYRAS':
color_map_pr[source] = dataset_colors['HYRAS']
marker_map_pr[source] = 'o'
elif source.startswith('CMIP6_'):
model = source.replace('CMIP6_', '')
color_map_pr[source] = dataset_colors['CMIP6']
marker_map_pr[source] = model_markers[model]
elif source.startswith('NEXGDDP_'):
model = source.replace('NEXGDDP_', '')
color_map_pr[source] = dataset_colors['NEXGDDP']
marker_map_pr[source] = model_markers[model]
# Create figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 10))
# Plot 1: Time series
for source in df_pr['source'].unique():
data = df_pr[df_pr['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2 if is_obs else 1.5
alpha = 0.9 if is_obs else 0.7
axes[0].plot(data['time'], data['value'],
color=color_map_pr[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle)
axes[0].set_xlabel('Time', fontsize=14)
axes[0].set_ylabel('Precipitation (mm/day)', fontsize=14)
axes[0].set_title('Precipitation Comparison - Berlin', fontsize=16, fontweight='bold')
axes[0].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
from matplotlib.lines import Line2D
dataset_handles = [
Line2D([0], [0], color=dataset_colors['MSWX'], linewidth=2, linestyle='-', label='MSWX'),
Line2D([0], [0], color=dataset_colors['HYRAS'], linewidth=2, linestyle='-', label='HYRAS'),
Line2D([0], [0], color=dataset_colors['CMIP6'], linewidth=1.5, linestyle='--', label='CMIP6'),
Line2D([0], [0], color=dataset_colors['NEXGDDP'], linewidth=1.5, linestyle='--', label='NEX-GDDP')
]
legend1 = axes[0].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[0].add_artist(legend1)
# Create custom legend for models (markers)
model_handles = [
Line2D([0], [0], color='gray', marker='o', linestyle='None', markersize=6, label='ACCESS-CM2'),
Line2D([0], [0], color='gray', marker='s', linestyle='None', markersize=6, label='MIROC6'),
Line2D([0], [0], color='gray', marker='^', linestyle='None', markersize=6, label='MPI-ESM1-2-LR')
]
axes[0].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[0].grid(True, alpha=0.3)
# Plot 2: Monthly totals
df_pr['year_month'] = df_pr['time'].dt.to_period('M')
monthly_total = df_pr.groupby(['source', 'year_month'])['value'].sum().reset_index()
monthly_total['year_month'] = monthly_total['year_month'].dt.to_timestamp()
for source in monthly_total['source'].unique():
data = monthly_total[monthly_total['source'] == source]
is_obs = source in ['MSWX', 'HYRAS']
linestyle = '-' if is_obs else '--'
linewidth = 2.5 if is_obs else 2
markersize = 7 if is_obs else 6
alpha = 0.9 if is_obs else 0.7
axes[1].plot(data['year_month'], data['value'],
color=color_map_pr[source], alpha=alpha,
linewidth=linewidth, linestyle=linestyle,
marker=marker_map_pr[source], markersize=markersize)
axes[1].set_xlabel('Month', fontsize=14)
axes[1].set_ylabel('Monthly Total Precipitation (mm)', fontsize=14)
axes[1].set_title('Monthly Total Precipitation', fontsize=16, fontweight='bold')
axes[1].tick_params(axis='both', labelsize=12)
# Create custom legend for datasets (colors)
legend1 = axes[1].legend(handles=dataset_handles, loc='upper left', fontsize=11, title='Dataset', title_fontsize=12, framealpha=0.9)
axes[1].add_artist(legend1)
# Create custom legend for models (markers)
axes[1].legend(handles=model_handles, loc='upper right', fontsize=11, title='Model', title_fontsize=12, framealpha=0.9)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
Box Plots for Distribution Comparison¶
In [20]:
Copied!
fig, axes = plt.subplots(1, 2, figsize=(18, 7))
# Prepare data for boxplots with model tracking
def prepare_boxplot_data_with_labels(df):
"""Organize data by dataset type and model for grouped boxplot"""
data_list = []
labels_list = []
colors_list = []
positions_list = []
pos = 1
for source in sorted(df['source'].unique()):
if source == 'MSWX':
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append('MSWX')
colors_list.append(dataset_colors['MSWX'])
positions_list.append(pos)
pos += 2 # Gap after MSWX
elif source == 'HYRAS':
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append('HYRAS')
colors_list.append(dataset_colors['HYRAS'])
positions_list.append(pos)
pos += 2 # Gap after HYRAS
# CMIP6 models
for model in CMIP_MODELS:
source = f'CMIP6_{model}'
if source in df['source'].unique():
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append(model)
colors_list.append(dataset_colors['CMIP6'])
positions_list.append(pos)
pos += 0.8 # Closer spacing within group
pos += 1.2 # Gap after CMIP6 group
# NEX-GDDP models
for model in CMIP_MODELS:
source = f'NEXGDDP_{model}'
if source in df['source'].unique():
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append(model)
colors_list.append(dataset_colors['NEXGDDP'])
positions_list.append(pos)
pos += 0.8 # Closer spacing within group
return data_list, labels_list, colors_list, positions_list
# Prepare data
tasmax_data, tasmax_labels, tasmax_colors, tasmax_positions = prepare_boxplot_data_with_labels(df_tasmax)
# Filter out pr values < 0.01 mm/day
df_pr_filtered = df_pr[df_pr['value'] >= 0.01].copy()
pr_data, pr_labels, pr_colors, pr_positions = prepare_boxplot_data_with_labels(df_pr_filtered)
# Boxplot for tasmax
bp = axes[0].boxplot(tasmax_data,
positions=tasmax_positions,
widths=0.5,
showfliers=False,
patch_artist=True,
medianprops=dict(color='black', linewidth=2))
# Color the boxes and add statistics
for i, (patch, color, data, label) in enumerate(zip(bp['boxes'], tasmax_colors, tasmax_data, tasmax_labels)):
patch.set_facecolor(color)
patch.set_alpha(0.7)
# Calculate median and std dev
median = np.median(data)
std = np.std(data)
# Add model name on the box (rotated 90 degrees)
# Calculate middle of the box (between 25th and 75th percentile)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
y_mid = (q1 + q3) / 2
axes[0].text(tasmax_positions[i], y_mid, label,
ha='center', va='center', fontsize=10, fontweight='bold',
color='white', rotation=90)
# Add text annotation below the box
axes[0].text(tasmax_positions[i], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.08,
f'{median:.1f}\n(±{std:.1f})',
ha='center', va='top', fontsize=9, color='black')
axes[0].set_title('Maximum Temperature Distribution by Dataset', fontsize=16, fontweight='bold')
axes[0].set_xlabel('Dataset / Model', fontsize=14)
axes[0].set_ylabel('Temperature (°C)', fontsize=14)
axes[0].tick_params(axis='both', labelsize=11)
axes[0].set_xticks(tasmax_positions)
axes[0].set_xticklabels([''] * len(tasmax_labels)) # Hide x-axis labels since names are on boxes
axes[0].grid(True, alpha=0.3, axis='y')
# Add dataset group labels
axes[0].text(tasmax_positions[0], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'MSWX', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['MSWX'])
axes[0].text(tasmax_positions[1], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'HYRAS', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['HYRAS'])
axes[0].text(np.mean(tasmax_positions[2:5]), axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'CMIP6', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['CMIP6'])
axes[0].text(np.mean(tasmax_positions[5:8]), axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'NEX-GDDP', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['NEXGDDP'])
# Boxplot for pr
bp = axes[1].boxplot(pr_data,
positions=pr_positions,
widths=0.5,
showfliers=False,
patch_artist=True,
medianprops=dict(color='black', linewidth=2))
# Color the boxes and add statistics
for i, (patch, color, data, label) in enumerate(zip(bp['boxes'], pr_colors, pr_data, pr_labels)):
patch.set_facecolor(color)
patch.set_alpha(0.7)
# Calculate median and std dev
median = np.median(data)
# Calculate middle of the box (between 25th and 75th percentile)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
y_mid = (q1 + q3) / 2
axes[1].text(pr_positions[i], y_mid, label,
ha='center', va='center', fontsize=10, fontweight='bold',
color='white', rotation=90)
# Add text annotation below the box
axes[1].text(pr_positions[i], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.08,
f'{median:.1f}\n(±{std:.1f})',
ha='center', va='top', fontsize=9, color='black')
axes[1].set_title('Precipitation Distribution by Dataset', fontsize=16, fontweight='bold')
axes[1].set_xlabel('Dataset / Model', fontsize=14)
axes[1].set_ylabel('Precipitation (mm/day)', fontsize=14)
axes[1].tick_params(axis='both', labelsize=11)
axes[1].set_xticks(pr_positions)
axes[1].set_xticklabels([''] * len(pr_labels)) # Hide x-axis labels since names are on boxes
axes[1].grid(True, alpha=0.3, axis='y')
# Add dataset group labels
axes[1].text(pr_positions[0], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'MSWX', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['MSWX'])
axes[1].text(pr_positions[1], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'HYRAS', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['HYRAS'])
axes[1].text(np.mean(pr_positions[2:5]), axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'CMIP6', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['CMIP6'])
axes[1].text(np.mean(pr_positions[5:8]), axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'NEX-GDDP', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['NEXGDDP'])
plt.show()
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1, 2, figsize=(18, 7))
# Prepare data for boxplots with model tracking
def prepare_boxplot_data_with_labels(df):
"""Organize data by dataset type and model for grouped boxplot"""
data_list = []
labels_list = []
colors_list = []
positions_list = []
pos = 1
for source in sorted(df['source'].unique()):
if source == 'MSWX':
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append('MSWX')
colors_list.append(dataset_colors['MSWX'])
positions_list.append(pos)
pos += 2 # Gap after MSWX
elif source == 'HYRAS':
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append('HYRAS')
colors_list.append(dataset_colors['HYRAS'])
positions_list.append(pos)
pos += 2 # Gap after HYRAS
# CMIP6 models
for model in CMIP_MODELS:
source = f'CMIP6_{model}'
if source in df['source'].unique():
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append(model)
colors_list.append(dataset_colors['CMIP6'])
positions_list.append(pos)
pos += 0.8 # Closer spacing within group
pos += 1.2 # Gap after CMIP6 group
# NEX-GDDP models
for model in CMIP_MODELS:
source = f'NEXGDDP_{model}'
if source in df['source'].unique():
data_list.append(df[df['source'] == source]['value'].values)
labels_list.append(model)
colors_list.append(dataset_colors['NEXGDDP'])
positions_list.append(pos)
pos += 0.8 # Closer spacing within group
return data_list, labels_list, colors_list, positions_list
# Prepare data
tasmax_data, tasmax_labels, tasmax_colors, tasmax_positions = prepare_boxplot_data_with_labels(df_tasmax)
# Filter out pr values < 0.01 mm/day
df_pr_filtered = df_pr[df_pr['value'] >= 0.01].copy()
pr_data, pr_labels, pr_colors, pr_positions = prepare_boxplot_data_with_labels(df_pr_filtered)
# Boxplot for tasmax
bp = axes[0].boxplot(tasmax_data,
positions=tasmax_positions,
widths=0.5,
showfliers=False,
patch_artist=True,
medianprops=dict(color='black', linewidth=2))
# Color the boxes and add statistics
for i, (patch, color, data, label) in enumerate(zip(bp['boxes'], tasmax_colors, tasmax_data, tasmax_labels)):
patch.set_facecolor(color)
patch.set_alpha(0.7)
# Calculate median and std dev
median = np.median(data)
std = np.std(data)
# Add model name on the box (rotated 90 degrees)
# Calculate middle of the box (between 25th and 75th percentile)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
y_mid = (q1 + q3) / 2
axes[0].text(tasmax_positions[i], y_mid, label,
ha='center', va='center', fontsize=10, fontweight='bold',
color='white', rotation=90)
# Add text annotation below the box
axes[0].text(tasmax_positions[i], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.08,
f'{median:.1f}\n(±{std:.1f})',
ha='center', va='top', fontsize=9, color='black')
axes[0].set_title('Maximum Temperature Distribution by Dataset', fontsize=16, fontweight='bold')
axes[0].set_xlabel('Dataset / Model', fontsize=14)
axes[0].set_ylabel('Temperature (°C)', fontsize=14)
axes[0].tick_params(axis='both', labelsize=11)
axes[0].set_xticks(tasmax_positions)
axes[0].set_xticklabels([''] * len(tasmax_labels)) # Hide x-axis labels since names are on boxes
axes[0].grid(True, alpha=0.3, axis='y')
# Add dataset group labels
axes[0].text(tasmax_positions[0], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'MSWX', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['MSWX'])
axes[0].text(tasmax_positions[1], axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'HYRAS', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['HYRAS'])
axes[0].text(np.mean(tasmax_positions[2:5]), axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'CMIP6', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['CMIP6'])
axes[0].text(np.mean(tasmax_positions[5:8]), axes[0].get_ylim()[0] - (axes[0].get_ylim()[1] - axes[0].get_ylim()[0]) * 0.18,
'NEX-GDDP', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['NEXGDDP'])
# Boxplot for pr
bp = axes[1].boxplot(pr_data,
positions=pr_positions,
widths=0.5,
showfliers=False,
patch_artist=True,
medianprops=dict(color='black', linewidth=2))
# Color the boxes and add statistics
for i, (patch, color, data, label) in enumerate(zip(bp['boxes'], pr_colors, pr_data, pr_labels)):
patch.set_facecolor(color)
patch.set_alpha(0.7)
# Calculate median and std dev
median = np.median(data)
# Calculate middle of the box (between 25th and 75th percentile)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
y_mid = (q1 + q3) / 2
axes[1].text(pr_positions[i], y_mid, label,
ha='center', va='center', fontsize=10, fontweight='bold',
color='white', rotation=90)
# Add text annotation below the box
axes[1].text(pr_positions[i], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.08,
f'{median:.1f}\n(±{std:.1f})',
ha='center', va='top', fontsize=9, color='black')
axes[1].set_title('Precipitation Distribution by Dataset', fontsize=16, fontweight='bold')
axes[1].set_xlabel('Dataset / Model', fontsize=14)
axes[1].set_ylabel('Precipitation (mm/day)', fontsize=14)
axes[1].tick_params(axis='both', labelsize=11)
axes[1].set_xticks(pr_positions)
axes[1].set_xticklabels([''] * len(pr_labels)) # Hide x-axis labels since names are on boxes
axes[1].grid(True, alpha=0.3, axis='y')
# Add dataset group labels
axes[1].text(pr_positions[0], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'MSWX', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['MSWX'])
axes[1].text(pr_positions[1], axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'HYRAS', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['HYRAS'])
axes[1].text(np.mean(pr_positions[2:5]), axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'CMIP6', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['CMIP6'])
axes[1].text(np.mean(pr_positions[5:8]), axes[1].get_ylim()[0] - (axes[1].get_ylim()[1] - axes[1].get_ylim()[0]) * 0.18,
'NEX-GDDP', ha='center', fontsize=12, fontweight='bold', color=dataset_colors['NEXGDDP'])
plt.show()
plt.tight_layout()
plt.show()
<Figure size 640x480 with 0 Axes>