from pathlib import Path import pandas as pd import matplotlib.pyplot as plt import matplotlib.ticker as mtick from ..utils import LIME, style_plot def plot_sequential_coherence_cdf(output_dir: Path, df: pd.DataFrame, **kwargs): style_plot() output_path = output_dir / "sequential_coherence_cdf.png" def cdf(series): """Helper function to calculate CDF data.""" s = series.sort_values().reset_index(drop=True) # Calculate cumulative percentage return s.values, ((s.index + 1) / len(s)) * 100 # Calculate CDF for lower, upper, and midpoint estimates x_lb, y_lb = cdf(df['lb_estimate_in_minutes']) x_ub, y_ub = cdf(df['ub_estimate_in_minutes']) x_mid, y_mid = cdf(df['estimate_midpoint']) # Create the plot fig, ax = plt.subplots(figsize=(12, 7)) # Plot the CDFs as step plots ax.step(x_lb, y_lb, where='post', color=LIME['300'], linewidth=1.8, linestyle='--', zorder=2, label='Lower bound estimate') ax.step(x_ub, y_ub, where='post', color=LIME['900'], linewidth=1.8, linestyle=':', zorder=3, label='Upper bound estimate') ax.step(x_mid, y_mid, where='post', color=LIME['600'], linewidth=2.2, zorder=4, label='Mid-point') # --- Styling and Annotations --- ax.set_xscale('log') ax.set_ylim(0, 100) ax.yaxis.set_major_formatter(mtick.PercentFormatter(decimals=0)) # Set titles and labels using the standard axes methods ax.set_title("% of Tasks With Sequential Coherence ≤ X") ax.set_xlabel("Sequential Coherence (X)") ax.set_ylabel("Cumulative Percentage of Tasks") # Define custom x-axis ticks and labels for better readability ticks = [1, 5, 10, 30, 60, 120, 240, 480, 1440, 2880, 10080, 43200, 129600, 259200, 525600] ticklabels = ['1 min', '5 min', '10 min', '30 min', '1 hr', '2 hr', '4 hr', '8 hr', '1 day', '2 days', '1 wk', '30 days', '90 days', '180 days', '1 yr'] ax.set_xticks(ticks) ax.set_xticklabels(ticklabels, rotation=45, ha='right') ax.legend(loc='lower right') # --- Save and close --- plt.tight_layout() plt.savefig(output_path, bbox_inches='tight') plt.close(fig) yield output_path