sprint-econtai/pipeline/generators/sequential_coherence_cdf.py
Félix Dorn 65dc648797 wip
2025-07-15 00:34:54 +02:00

54 lines
2.1 KiB
Python

from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from ..utils import LIME, style_plot
def plot_sequential_coherence_cdf(output_dir: Path, df: pd.DataFrame, **kwargs):
style_plot()
output_path = output_dir / "sequential_coherence_cdf.png"
def cdf(series):
"""Helper function to calculate CDF data."""
s = series.sort_values().reset_index(drop=True)
# Calculate cumulative percentage
return s.values, ((s.index + 1) / len(s)) * 100
# Calculate CDF for lower, upper, and midpoint estimates
x_lb, y_lb = cdf(df['lb_estimate_in_minutes'])
x_ub, y_ub = cdf(df['ub_estimate_in_minutes'])
x_mid, y_mid = cdf(df['estimate_midpoint'])
# Create the plot
fig, ax = plt.subplots(figsize=(12, 7))
# Plot the CDFs as step plots
ax.step(x_lb, y_lb, where='post', color=LIME['300'], linewidth=1.8, linestyle='--', zorder=2, label='Lower bound estimate')
ax.step(x_ub, y_ub, where='post', color=LIME['900'], linewidth=1.8, linestyle=':', zorder=3, label='Upper bound estimate')
ax.step(x_mid, y_mid, where='post', color=LIME['600'], linewidth=2.2, zorder=4, label='Mid-point')
# --- Styling and Annotations ---
ax.set_xscale('log')
ax.set_ylim(0, 100)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(decimals=0))
# Set titles and labels using the standard axes methods
ax.set_title("% of Tasks With Sequential Coherence ≤ X")
ax.set_xlabel("Sequential Coherence (X)")
ax.set_ylabel("Cumulative Percentage of Tasks")
# Define custom x-axis ticks and labels for better readability
ticks = [1, 5, 10, 30, 60, 120, 240, 480, 1440, 2880, 10080, 43200, 129600, 259200, 525600]
ticklabels = ['1 min', '5 min', '10 min', '30 min', '1 hr', '2 hr', '4 hr', '8 hr', '1 day', '2 days',
'1 wk', '30 days', '90 days', '180 days', '1 yr']
ax.set_xticks(ticks)
ax.set_xticklabels(ticklabels, rotation=45, ha='right')
ax.legend(loc='lower right')
# --- Save and close ---
plt.tight_layout()
plt.savefig(output_path, bbox_inches='tight')
plt.close(fig)
yield output_path