{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "143b0bee",
   "metadata": {},
   "source": [
    "# 13 — Profiling and Benchmarking Baseline\n",
    "\n",
    "Phase 2 baseline measurements for the evaluation pipeline functions.\n",
    "Sweeps: N (maps), H (pixel side), T (thresholds), B (ℓ-bands).\n",
    "Pre-optimisation timings on the local machine (CPU, single core).\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92c995a7",
   "metadata": {},
   "source": [
    "## 1  Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d1cb0e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import cProfile, pstats, tracemalloc, timeit, os, sys\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.stats import linregress\n",
    "\n",
    "sys.path.insert(0, os.path.join(os.getcwd(), '..', '..'))\n",
    "\n",
    "from foregrounds_diffusion.flatmaps import (\n",
    "    get_lpf_hpf, make_gaussian_realisation, map2cl,\n",
    ")\n",
    "from foregrounds_diffusion.moments import compute_cross_moments, compute_summed_moments\n",
    "from foregrounds_diffusion.morphology import compute_minkowski_tensors\n",
    "from foregrounds_diffusion.peak_counts import compute_peak_minima_counts\n",
    "\n",
    "%matplotlib inline\n",
    "plt.rcParams.update({'figure.dpi': 100, 'font.size': 10})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d42d9098",
   "metadata": {},
   "outputs": [],
   "source": [
    "def profile_fn(fn, *args, n_repeat=3, **kwargs):\n",
    "    \"\"\"Return median wall-clock time and peak memory for one call of fn.\"\"\"\"\n",
    "    times = timeit.repeat(lambda: fn(*args, **kwargs), number=1, repeat=n_repeat)\n",
    "    tracemalloc.start()\n",
    "    fn(*args, **kwargs)\n",
    "    _, peak = tracemalloc.get_traced_memory()\n",
    "    tracemalloc.stop()\n",
    "    return {\n",
    "        'time_median_s': float(sorted(times)[n_repeat // 2]),\n",
    "        'time_min_s':    float(min(times)),\n",
    "        'peak_mem_mb':   peak / 1024 ** 2,\n",
    "    }\n",
    "\n",
    "def sweep(fn, dim_values, make_args, n_repeat=3, label='N', verbose=True):\n",
    "    \"\"\"Sweep a single input dimension; return (times, peak_mems) arrays.\"\"\"\"\n",
    "    times, mems = [], []\n",
    "    for val in dim_values:\n",
    "        args, kwargs = make_args(val)\n",
    "        r = profile_fn(fn, *args, n_repeat=n_repeat, **kwargs)\n",
    "        times.append(r['time_median_s'])\n",
    "        mems.append(r['peak_mem_mb'])\n",
    "        if verbose:\n",
    "            print(f'  {label}={val:>5}  {r[\"time_median_s\"]:.4f}s  {r[\"peak_mem_mb\"]:.1f} MB')\n",
    "    return np.array(times), np.array(mems)\n",
    "\n",
    "def fit_slope(xs, ys):\n",
    "    \"\"\"Fit log-log power-law slope.\"\"\"\"\n",
    "    xs, ys = np.array(xs, float), np.array(ys, float)\n",
    "    mask = ys > 0\n",
    "    if mask.sum() < 2:\n",
    "        return float('nan')\n",
    "    slope, *_ = linregress(np.log(xs[mask]), np.log(ys[mask]))\n",
    "    return slope\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15c9e726",
   "metadata": {},
   "outputs": [],
   "source": [
    "_EL = np.arange(1, 5000)\n",
    "_CL = 1e-5 * _EL.astype(float) ** (-2)\n",
    "\n",
    "def make_maps(N, H):\n",
    "    np.random.seed(42)\n",
    "    params = [H, H, 1.40625, 1.40625]\n",
    "    return (\n",
    "        np.array([make_gaussian_realisation(params, _EL, _CL) for _ in range(N)]),\n",
    "        params,\n",
    "    )\n",
    "\n",
    "def make_bp_filters(params, B):\n",
    "    edges = np.linspace(200, 7000, B + 1)\n",
    "    return [get_lpf_hpf(params, (edges[i], edges[i+1]), filter_type=2) for i in range(B)]\n",
    "\n",
    "N_VALUES = [1, 5, 10, 50, 100]\n",
    "H_VALUES = [32, 64, 128, 256]\n",
    "T_VALUES = [5, 10, 25, 50, 100]\n",
    "B_VALUES = [2, 4, 8, 16]\n",
    "\n",
    "H_FIXED, T_FIXED, B_FIXED, N_FIXED = 64, 25, 4, 10\n",
    "flatskymapparams = [H_FIXED, H_FIXED, 1.40625, 1.40625]\n",
    "thresholds = np.linspace(-2, 2, T_FIXED)\n",
    "bp_fixed = make_bp_filters(flatskymapparams, B_FIXED)\n",
    "thresh_peaks  = np.linspace(-1, 5, 20)\n",
    "thresh_minima = np.linspace(-5, 1, 20)\n",
    "\n",
    "maps_cache = {N: make_maps(N, H_FIXED)[0] for N in N_VALUES}\n",
    "maps10, _ = make_maps(N_FIXED, H_FIXED)\n",
    "print('Fixtures ready.')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0aefd2f0",
   "metadata": {},
   "source": [
    "## 2  Baseline measurements (pre-optimisation)\n",
    "### 2a  `compute_minkowski_tensors`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44fd16e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('N-scaling (H=64, T=25)')\n",
    "mink_N_times, mink_N_mems = sweep(\n",
    "    compute_minkowski_tensors, N_VALUES,\n",
    "    lambda N: ((maps_cache[N], lambda x: x, thresholds), {}), label='N')\n",
    "\n",
    "print()\n",
    "print('H-scaling (N=10, T=25)')\n",
    "mink_H_times, mink_H_mems = [], []\n",
    "for H in H_VALUES:\n",
    "    maps, params = make_maps(N_FIXED, H)\n",
    "    r = profile_fn(compute_minkowski_tensors, maps, lambda x: x, np.linspace(-2,2,T_FIXED))\n",
    "    mink_H_times.append(r['time_median_s']); mink_H_mems.append(r['peak_mem_mb'])\n",
    "    print(f'  H={H:>4}  {r[\"time_median_s\"]:.4f}s  {r[\"peak_mem_mb\"]:.1f} MB')\n",
    "mink_H_times, mink_H_mems = np.array(mink_H_times), np.array(mink_H_mems)\n",
    "\n",
    "print()\n",
    "print('T-scaling (N=10, H=64)')\n",
    "mink_T_times, mink_T_mems = sweep(\n",
    "    compute_minkowski_tensors, T_VALUES,\n",
    "    lambda T: ((maps10, lambda x: x, np.linspace(-2,2,T)), {}), label='T')\n",
    "\n",
    "print(f'\\nN-slope={fit_slope(N_VALUES,mink_N_times):.2f}  '\n",
    "      f'H-slope={fit_slope(H_VALUES,mink_H_times):.2f}  '\n",
    "      f'T-slope={fit_slope(T_VALUES,mink_T_times):.2f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d74ab000",
   "metadata": {},
   "source": [
    "### 2b  `compute_cross_moments`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e71e2056",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('N-scaling (H=64, B=4)')\n",
    "cross_N_times, cross_N_mems = sweep(\n",
    "    compute_cross_moments, N_VALUES,\n",
    "    lambda N: ((maps_cache[N], maps_cache[N], bp_fixed), {}), label='N')\n",
    "\n",
    "print()\n",
    "print('B-scaling (N=10, H=64)')\n",
    "cross_B_times, cross_B_mems = sweep(\n",
    "    compute_cross_moments, B_VALUES,\n",
    "    lambda B: ((maps10, maps10, make_bp_filters(flatskymapparams, B)), {}), label='B')\n",
    "\n",
    "print(f'\\nN-slope={fit_slope(N_VALUES,cross_N_times):.2f}  '\n",
    "      f'B-slope={fit_slope(B_VALUES,cross_B_times):.2f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e204aaee",
   "metadata": {},
   "source": [
    "### 2c  `map2cl` (per-map loop)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ce63e4ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('N-scaling (H=64)')\n",
    "map2cl_N_times, map2cl_N_mems = [], []\n",
    "for N in N_VALUES:\n",
    "    maps = maps_cache[N]\n",
    "    r = profile_fn(lambda maps=maps: [map2cl(flatskymapparams, m) for m in maps])\n",
    "    map2cl_N_times.append(r['time_median_s']); map2cl_N_mems.append(r['peak_mem_mb'])\n",
    "    print(f'  N={N:>5}  {r[\"time_median_s\"]:.4f}s  {r[\"peak_mem_mb\"]:.1f} MB')\n",
    "map2cl_N_times, map2cl_N_mems = np.array(map2cl_N_times), np.array(map2cl_N_mems)\n",
    "\n",
    "print()\n",
    "print('H-scaling (N=10)')\n",
    "map2cl_H_times, map2cl_H_mems = [], []\n",
    "for H in H_VALUES:\n",
    "    maps, params = make_maps(N_FIXED, H)\n",
    "    r = profile_fn(lambda maps=maps, p=params: [map2cl(p, m) for m in maps])\n",
    "    map2cl_H_times.append(r['time_median_s']); map2cl_H_mems.append(r['peak_mem_mb'])\n",
    "    print(f'  H={H:>4}  {r[\"time_median_s\"]:.4f}s  {r[\"peak_mem_mb\"]:.1f} MB')\n",
    "map2cl_H_times, map2cl_H_mems = np.array(map2cl_H_times), np.array(map2cl_H_mems)\n",
    "\n",
    "print(f'\\nN-slope={fit_slope(N_VALUES,map2cl_N_times):.2f}  '\n",
    "      f'H-slope={fit_slope(H_VALUES,map2cl_H_times):.2f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5601711b",
   "metadata": {},
   "source": [
    "### 2d  `compute_peak_minima_counts`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a8674b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('N-scaling (H=64)')\n",
    "peak_N_times, peak_N_mems = sweep(\n",
    "    compute_peak_minima_counts, N_VALUES,\n",
    "    lambda N: ((maps_cache[N], thresh_peaks, thresh_minima), {}), label='N')\n",
    "\n",
    "print(f'\\nN-slope={fit_slope(N_VALUES,peak_N_times):.2f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e98c1ad4",
   "metadata": {},
   "source": [
    "## 3  Figures (pre-optimisation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddf44de3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Figure 1 — Wall-clock time vs N (log-log), one panel per function\n",
    "fig, ax = plt.subplots(figsize=(7, 4))\n",
    "for label, times in [\n",
    "    ('compute_minkowski_tensors', mink_N_times),\n",
    "    ('compute_cross_moments',     cross_N_times),\n",
    "    ('map2cl (loop)',             map2cl_N_times),\n",
    "    ('compute_peak_minima_counts', peak_N_times),\n",
    "]:\n",
    "    ax.loglog(N_VALUES, times, marker='o', label=label)\n",
    "# Reference: ideal linear\n",
    "ax.loglog(N_VALUES, np.array(N_VALUES, float) / N_VALUES[0] * mink_N_times[0],\n",
    "          'k--', lw=0.8, label='O(N) reference')\n",
    "ax.set_xlabel('N (number of maps)')\n",
    "ax.set_ylabel('Wall-clock time (s)')\n",
    "ax.set_title('Figure 1 — Time vs N (H=64, T=25, B=4)')\n",
    "ax.legend(fontsize=8)\n",
    "fig.tight_layout()\n",
    "os.makedirs('../../plots/benchmarks', exist_ok=True)\n",
    "fig.savefig('../../plots/benchmarks/fig1_time_vs_N.png', dpi=120)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66dcbfca",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Figure 2 — Wall-clock time vs map size H×W (log-log)\n",
    "fig, ax = plt.subplots(figsize=(6, 4))\n",
    "ax.loglog(H_VALUES, mink_H_times,  marker='s', label='compute_minkowski_tensors')\n",
    "ax.loglog(H_VALUES, map2cl_H_times, marker='^', label='map2cl (N=10 loop)')\n",
    "# O(H²) and O(H² log H) references\n",
    "h = np.array(H_VALUES, float)\n",
    "ax.loglog(h, h**2 / h[0]**2 * mink_H_times[0], 'k--', lw=0.8, label='O(H²) ref')\n",
    "ax.set_xlabel('Map side length H (pixels)')\n",
    "ax.set_ylabel('Wall-clock time (s)')\n",
    "ax.set_title('Figure 2 — Time vs H (N=10)')\n",
    "ax.legend(fontsize=8)\n",
    "fig.tight_layout()\n",
    "fig.savefig('../../plots/benchmarks/fig2_time_vs_H.png', dpi=120)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c504a802",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Figure 3 — Peak memory vs N\n",
    "fig, ax = plt.subplots(figsize=(7, 4))\n",
    "for label, mems in [\n",
    "    ('compute_minkowski_tensors', mink_N_mems),\n",
    "    ('compute_cross_moments',     cross_N_mems),\n",
    "    ('map2cl',                    map2cl_N_mems),\n",
    "]:\n",
    "    ax.plot(N_VALUES, mems, marker='o', label=label)\n",
    "ax.set_xlabel('N (number of maps)')\n",
    "ax.set_ylabel('Peak memory (MB)')\n",
    "ax.set_title('Figure 3 — Peak memory vs N (H=64)')\n",
    "ax.legend(fontsize=8)\n",
    "fig.tight_layout()\n",
    "fig.savefig('../../plots/benchmarks/fig3_mem_vs_N.png', dpi=120)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f2aac32b",
   "metadata": {},
   "source": [
    "## 7  Scaling law summary table (pre-optimisation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20bca39a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "rows = [\n",
    "    ('compute_minkowski_tensors',\n",
    "     fit_slope(N_VALUES, mink_N_times), fit_slope(H_VALUES, mink_H_times),\n",
    "     fit_slope(T_VALUES, mink_T_times), '—',\n",
    "     mink_N_times[-1], '—'),\n",
    "    ('compute_cross_moments',\n",
    "     fit_slope(N_VALUES, cross_N_times), '—', '—',\n",
    "     fit_slope(B_VALUES, cross_B_times),\n",
    "     cross_N_times[-1], '—'),\n",
    "    ('map2cl (loop)',\n",
    "     fit_slope(N_VALUES, map2cl_N_times), fit_slope(H_VALUES, map2cl_H_times),\n",
    "     '—', '—',\n",
    "     map2cl_N_times[-1], '—'),\n",
    "    ('compute_peak_minima_counts',\n",
    "     fit_slope(N_VALUES, peak_N_times), '—', '—', '—',\n",
    "     peak_N_times[-1], '—'),\n",
    "]\n",
    "\n",
    "df = pd.DataFrame(rows, columns=[\n",
    "    'Function', 'N-slope', 'H-slope', 'T-slope', 'B-slope',\n",
    "    'Time @N=100,H=64 (s)', 'Post-opt time (s)'\n",
    "])\n",
    "df = df.set_index('Function')\n",
    "print(df.to_string())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89f6566f",
   "metadata": {},
   "source": [
    "## 4  Optimisations applied\n",
    "*To be filled in after §2.6 optimisations are implemented.*"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a8d3c223",
   "metadata": {},
   "source": [
    "## 5  Post-optimisation measurements\n",
    "*To be filled in after §2.6 optimisations are implemented.*"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "86c4a357",
   "metadata": {},
   "source": [
    "## 6  Before/after comparison figures\n",
    "*To be filled in after §2.6 optimisations are implemented.*"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09710307",
   "metadata": {},
   "source": [
    "## 8  Parallel scaling (§3.2)\n",
    "\n",
    "Strong-scaling benchmark: fixed total work (N=50 maps, H=64²), varying worker count.\n",
    "All functions now accept `n_jobs` — see §3.2 implementation notes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bf79d08",
   "metadata": {},
   "outputs": [],
   "source": [
    "from multiprocessing import cpu_count\n",
    "from foregrounds_diffusion.moments import compute_summed_moments, compute_cross_moments\n",
    "from foregrounds_diffusion.peak_counts import compute_peak_minima_counts\n",
    "from foregrounds_diffusion.morphology import compute_minkowski_tensors\n",
    "\n",
    "N_PARALLEL = 50\n",
    "MAX_WORKERS = min(cpu_count(), 8)\n",
    "N_WORKERS = [w for w in [1, 2, 4, 8] if w <= MAX_WORKERS]\n",
    "print(f'Logical CPUs available: {cpu_count()}, sweeping: {N_WORKERS}')\n",
    "\n",
    "maps50, _ = make_maps(N_PARALLEL, H_FIXED)\n",
    "thresholds_p = np.linspace(-2, 4, 15)\n",
    "thresholds_m = np.linspace(-4, 2, 15)\n",
    "bp4 = make_bp_filters(flatskymapparams, B_FIXED)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad3c5f4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Strong scaling — time vs n_workers for three functions\n",
    "scale_fns = {\n",
    "    'compute_minkowski_tensors': lambda nj: compute_minkowski_tensors(\n",
    "        maps50, lambda x: x, thresholds, n_jobs=nj),\n",
    "    'compute_cross_moments': lambda nj: compute_cross_moments(\n",
    "        maps50, maps50, bp4, n_jobs=nj),\n",
    "    'compute_peak_minima_counts': lambda nj: compute_peak_minima_counts(\n",
    "        maps50, thresholds_p, thresholds_m,\n",
    "        smoothing_scales_arcmin=(2.5,), n_jobs=nj),\n",
    "}\n",
    "\n",
    "scale_times = {fn: [] for fn in scale_fns}\n",
    "for fn_name, fn in scale_fns.items():\n",
    "    print(f'\\n{fn_name}')\n",
    "    for nj in N_WORKERS:\n",
    "        r = profile_fn(fn, nj)\n",
    "        scale_times[fn_name].append(r['time_median_s'])\n",
    "        eff = scale_times[fn_name][0] / (nj * r['time_median_s']) * 100\n",
    "        print(f'  n_jobs={nj:2d}  {r[\"time_median_s\"]:.3f}s  eff={eff:.0f}%')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52c9c2bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Figure 11 — Strong scaling: wall-clock time vs n_workers\n",
    "fig, ax = plt.subplots(figsize=(6, 4))\n",
    "\n",
    "colors_scale = [plt.rcParams['axes.prop_cycle'].by_key()['color'][i] for i in range(3)]\n",
    "for (fn_name, times), color in zip(scale_times.items(), colors_scale):\n",
    "    t1 = times[0]\n",
    "    ax.plot(N_WORKERS, times, 'o-', color=color,\n",
    "            label=fn_name.replace('_', '\\\\_') if False else fn_name)\n",
    "\n",
    "# Ideal linear speedup reference (dashed)\n",
    "t1_ref = max(t[0] for t in scale_times.values())\n",
    "ideal = [t1_ref / nj for nj in N_WORKERS]\n",
    "ax.plot(N_WORKERS, ideal, 'k--', lw=0.8, label='Ideal (linear)')\n",
    "\n",
    "ax.set_xlabel('Number of workers (n_jobs)')\n",
    "ax.set_ylabel('Wall-clock time (s)')\n",
    "ax.set_title(f'Strong scaling — N={N_PARALLEL} maps, H={H_FIXED}²')\n",
    "ax.legend(fontsize=7)\n",
    "ax.set_xticks(N_WORKERS)\n",
    "plt.tight_layout()\n",
    "plt.savefig('plots/benchmarks/fig11_strong_scaling.pdf')\n",
    "plt.savefig('plots/benchmarks/fig11_strong_scaling.png', dpi=150)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79b4c443",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Strong-scaling summary table\n",
    "rows = []\n",
    "for fn_name, times in scale_times.items():\n",
    "    t1 = times[0]\n",
    "    row = {'Function': fn_name, 'Serial (s)': f'{t1:.3f}'}\n",
    "    for nj, t in zip(N_WORKERS[1:], times[1:]):\n",
    "        eff = t1 / (nj * t) * 100\n",
    "        row[f'{nj} workers'] = f'{t:.3f}s ({eff:.0f}%)'\n",
    "    rows.append(row)\n",
    "\n",
    "import pandas as pd\n",
    "pd.set_option('display.max_colwidth', 40)\n",
    "pd.DataFrame(rows).set_index('Function')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}