{ "cells": [ { "cell_type": "markdown", "id": "143b0bee", "metadata": {}, "source": [ "# 13 — Profiling and Benchmarking Baseline\n", "\n", "Phase 2 baseline measurements for the evaluation pipeline functions.\n", "Sweeps: N (maps), H (pixel side), T (thresholds), B (ℓ-bands).\n", "Pre-optimisation timings on the local machine (CPU, single core).\n" ] }, { "cell_type": "markdown", "id": "92c995a7", "metadata": {}, "source": [ "## 1 Setup" ] }, { "cell_type": "code", "execution_count": null, "id": "2d1cb0e7", "metadata": {}, "outputs": [], "source": [ "import cProfile, pstats, tracemalloc, timeit, os, sys\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import linregress\n", "\n", "sys.path.insert(0, os.path.join(os.getcwd(), '..', '..'))\n", "\n", "from foregrounds_diffusion.flatmaps import (\n", " get_lpf_hpf, make_gaussian_realisation, map2cl,\n", ")\n", "from foregrounds_diffusion.moments import compute_cross_moments, compute_summed_moments\n", "from foregrounds_diffusion.morphology import compute_minkowski_tensors\n", "from foregrounds_diffusion.peak_counts import compute_peak_minima_counts\n", "\n", "%matplotlib inline\n", "plt.rcParams.update({'figure.dpi': 100, 'font.size': 10})\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d42d9098", "metadata": {}, "outputs": [], "source": [ "def profile_fn(fn, *args, n_repeat=3, **kwargs):\n", " \"\"\"Return median wall-clock time and peak memory for one call of fn.\"\"\"\"\n", " times = timeit.repeat(lambda: fn(*args, **kwargs), number=1, repeat=n_repeat)\n", " tracemalloc.start()\n", " fn(*args, **kwargs)\n", " _, peak = tracemalloc.get_traced_memory()\n", " tracemalloc.stop()\n", " return {\n", " 'time_median_s': float(sorted(times)[n_repeat // 2]),\n", " 'time_min_s': float(min(times)),\n", " 'peak_mem_mb': peak / 1024 ** 2,\n", " }\n", "\n", "def sweep(fn, dim_values, make_args, n_repeat=3, label='N', verbose=True):\n", " \"\"\"Sweep a single input dimension; return (times, peak_mems) arrays.\"\"\"\"\n", " times, mems = [], []\n", " for val in dim_values:\n", " args, kwargs = make_args(val)\n", " r = profile_fn(fn, *args, n_repeat=n_repeat, **kwargs)\n", " times.append(r['time_median_s'])\n", " mems.append(r['peak_mem_mb'])\n", " if verbose:\n", " print(f' {label}={val:>5} {r[\"time_median_s\"]:.4f}s {r[\"peak_mem_mb\"]:.1f} MB')\n", " return np.array(times), np.array(mems)\n", "\n", "def fit_slope(xs, ys):\n", " \"\"\"Fit log-log power-law slope.\"\"\"\"\n", " xs, ys = np.array(xs, float), np.array(ys, float)\n", " mask = ys > 0\n", " if mask.sum() < 2:\n", " return float('nan')\n", " slope, *_ = linregress(np.log(xs[mask]), np.log(ys[mask]))\n", " return slope\n" ] }, { "cell_type": "code", "execution_count": null, "id": "15c9e726", "metadata": {}, "outputs": [], "source": [ "_EL = np.arange(1, 5000)\n", "_CL = 1e-5 * _EL.astype(float) ** (-2)\n", "\n", "def make_maps(N, H):\n", " np.random.seed(42)\n", " params = [H, H, 1.40625, 1.40625]\n", " return (\n", " np.array([make_gaussian_realisation(params, _EL, _CL) for _ in range(N)]),\n", " params,\n", " )\n", "\n", "def make_bp_filters(params, B):\n", " edges = np.linspace(200, 7000, B + 1)\n", " return [get_lpf_hpf(params, (edges[i], edges[i+1]), filter_type=2) for i in range(B)]\n", "\n", "N_VALUES = [1, 5, 10, 50, 100]\n", "H_VALUES = [32, 64, 128, 256]\n", "T_VALUES = [5, 10, 25, 50, 100]\n", "B_VALUES = [2, 4, 8, 16]\n", "\n", "H_FIXED, T_FIXED, B_FIXED, N_FIXED = 64, 25, 4, 10\n", "flatskymapparams = [H_FIXED, H_FIXED, 1.40625, 1.40625]\n", "thresholds = np.linspace(-2, 2, T_FIXED)\n", "bp_fixed = make_bp_filters(flatskymapparams, B_FIXED)\n", "thresh_peaks = np.linspace(-1, 5, 20)\n", "thresh_minima = np.linspace(-5, 1, 20)\n", "\n", "maps_cache = {N: make_maps(N, H_FIXED)[0] for N in N_VALUES}\n", "maps10, _ = make_maps(N_FIXED, H_FIXED)\n", "print('Fixtures ready.')\n" ] }, { "cell_type": "markdown", "id": "0aefd2f0", "metadata": {}, "source": [ "## 2 Baseline measurements (pre-optimisation)\n", "### 2a `compute_minkowski_tensors`" ] }, { "cell_type": "code", "execution_count": null, "id": "44fd16e0", "metadata": {}, "outputs": [], "source": [ "print('N-scaling (H=64, T=25)')\n", "mink_N_times, mink_N_mems = sweep(\n", " compute_minkowski_tensors, N_VALUES,\n", " lambda N: ((maps_cache[N], lambda x: x, thresholds), {}), label='N')\n", "\n", "print()\n", "print('H-scaling (N=10, T=25)')\n", "mink_H_times, mink_H_mems = [], []\n", "for H in H_VALUES:\n", " maps, params = make_maps(N_FIXED, H)\n", " r = profile_fn(compute_minkowski_tensors, maps, lambda x: x, np.linspace(-2,2,T_FIXED))\n", " mink_H_times.append(r['time_median_s']); mink_H_mems.append(r['peak_mem_mb'])\n", " print(f' H={H:>4} {r[\"time_median_s\"]:.4f}s {r[\"peak_mem_mb\"]:.1f} MB')\n", "mink_H_times, mink_H_mems = np.array(mink_H_times), np.array(mink_H_mems)\n", "\n", "print()\n", "print('T-scaling (N=10, H=64)')\n", "mink_T_times, mink_T_mems = sweep(\n", " compute_minkowski_tensors, T_VALUES,\n", " lambda T: ((maps10, lambda x: x, np.linspace(-2,2,T)), {}), label='T')\n", "\n", "print(f'\\nN-slope={fit_slope(N_VALUES,mink_N_times):.2f} '\n", " f'H-slope={fit_slope(H_VALUES,mink_H_times):.2f} '\n", " f'T-slope={fit_slope(T_VALUES,mink_T_times):.2f}')\n" ] }, { "cell_type": "markdown", "id": "d74ab000", "metadata": {}, "source": [ "### 2b `compute_cross_moments`" ] }, { "cell_type": "code", "execution_count": null, "id": "e71e2056", "metadata": {}, "outputs": [], "source": [ "print('N-scaling (H=64, B=4)')\n", "cross_N_times, cross_N_mems = sweep(\n", " compute_cross_moments, N_VALUES,\n", " lambda N: ((maps_cache[N], maps_cache[N], bp_fixed), {}), label='N')\n", "\n", "print()\n", "print('B-scaling (N=10, H=64)')\n", "cross_B_times, cross_B_mems = sweep(\n", " compute_cross_moments, B_VALUES,\n", " lambda B: ((maps10, maps10, make_bp_filters(flatskymapparams, B)), {}), label='B')\n", "\n", "print(f'\\nN-slope={fit_slope(N_VALUES,cross_N_times):.2f} '\n", " f'B-slope={fit_slope(B_VALUES,cross_B_times):.2f}')\n" ] }, { "cell_type": "markdown", "id": "e204aaee", "metadata": {}, "source": [ "### 2c `map2cl` (per-map loop)" ] }, { "cell_type": "code", "execution_count": null, "id": "ce63e4ea", "metadata": {}, "outputs": [], "source": [ "print('N-scaling (H=64)')\n", "map2cl_N_times, map2cl_N_mems = [], []\n", "for N in N_VALUES:\n", " maps = maps_cache[N]\n", " r = profile_fn(lambda maps=maps: [map2cl(flatskymapparams, m) for m in maps])\n", " map2cl_N_times.append(r['time_median_s']); map2cl_N_mems.append(r['peak_mem_mb'])\n", " print(f' N={N:>5} {r[\"time_median_s\"]:.4f}s {r[\"peak_mem_mb\"]:.1f} MB')\n", "map2cl_N_times, map2cl_N_mems = np.array(map2cl_N_times), np.array(map2cl_N_mems)\n", "\n", "print()\n", "print('H-scaling (N=10)')\n", "map2cl_H_times, map2cl_H_mems = [], []\n", "for H in H_VALUES:\n", " maps, params = make_maps(N_FIXED, H)\n", " r = profile_fn(lambda maps=maps, p=params: [map2cl(p, m) for m in maps])\n", " map2cl_H_times.append(r['time_median_s']); map2cl_H_mems.append(r['peak_mem_mb'])\n", " print(f' H={H:>4} {r[\"time_median_s\"]:.4f}s {r[\"peak_mem_mb\"]:.1f} MB')\n", "map2cl_H_times, map2cl_H_mems = np.array(map2cl_H_times), np.array(map2cl_H_mems)\n", "\n", "print(f'\\nN-slope={fit_slope(N_VALUES,map2cl_N_times):.2f} '\n", " f'H-slope={fit_slope(H_VALUES,map2cl_H_times):.2f}')\n" ] }, { "cell_type": "markdown", "id": "5601711b", "metadata": {}, "source": [ "### 2d `compute_peak_minima_counts`" ] }, { "cell_type": "code", "execution_count": null, "id": "4a8674b1", "metadata": {}, "outputs": [], "source": [ "print('N-scaling (H=64)')\n", "peak_N_times, peak_N_mems = sweep(\n", " compute_peak_minima_counts, N_VALUES,\n", " lambda N: ((maps_cache[N], thresh_peaks, thresh_minima), {}), label='N')\n", "\n", "print(f'\\nN-slope={fit_slope(N_VALUES,peak_N_times):.2f}')\n" ] }, { "cell_type": "markdown", "id": "e98c1ad4", "metadata": {}, "source": [ "## 3 Figures (pre-optimisation)" ] }, { "cell_type": "code", "execution_count": null, "id": "ddf44de3", "metadata": {}, "outputs": [], "source": [ "# Figure 1 — Wall-clock time vs N (log-log), one panel per function\n", "fig, ax = plt.subplots(figsize=(7, 4))\n", "for label, times in [\n", " ('compute_minkowski_tensors', mink_N_times),\n", " ('compute_cross_moments', cross_N_times),\n", " ('map2cl (loop)', map2cl_N_times),\n", " ('compute_peak_minima_counts', peak_N_times),\n", "]:\n", " ax.loglog(N_VALUES, times, marker='o', label=label)\n", "# Reference: ideal linear\n", "ax.loglog(N_VALUES, np.array(N_VALUES, float) / N_VALUES[0] * mink_N_times[0],\n", " 'k--', lw=0.8, label='O(N) reference')\n", "ax.set_xlabel('N (number of maps)')\n", "ax.set_ylabel('Wall-clock time (s)')\n", "ax.set_title('Figure 1 — Time vs N (H=64, T=25, B=4)')\n", "ax.legend(fontsize=8)\n", "fig.tight_layout()\n", "os.makedirs('../../plots/benchmarks', exist_ok=True)\n", "fig.savefig('../../plots/benchmarks/fig1_time_vs_N.png', dpi=120)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "66dcbfca", "metadata": {}, "outputs": [], "source": [ "# Figure 2 — Wall-clock time vs map size H×W (log-log)\n", "fig, ax = plt.subplots(figsize=(6, 4))\n", "ax.loglog(H_VALUES, mink_H_times, marker='s', label='compute_minkowski_tensors')\n", "ax.loglog(H_VALUES, map2cl_H_times, marker='^', label='map2cl (N=10 loop)')\n", "# O(H²) and O(H² log H) references\n", "h = np.array(H_VALUES, float)\n", "ax.loglog(h, h**2 / h[0]**2 * mink_H_times[0], 'k--', lw=0.8, label='O(H²) ref')\n", "ax.set_xlabel('Map side length H (pixels)')\n", "ax.set_ylabel('Wall-clock time (s)')\n", "ax.set_title('Figure 2 — Time vs H (N=10)')\n", "ax.legend(fontsize=8)\n", "fig.tight_layout()\n", "fig.savefig('../../plots/benchmarks/fig2_time_vs_H.png', dpi=120)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c504a802", "metadata": {}, "outputs": [], "source": [ "# Figure 3 — Peak memory vs N\n", "fig, ax = plt.subplots(figsize=(7, 4))\n", "for label, mems in [\n", " ('compute_minkowski_tensors', mink_N_mems),\n", " ('compute_cross_moments', cross_N_mems),\n", " ('map2cl', map2cl_N_mems),\n", "]:\n", " ax.plot(N_VALUES, mems, marker='o', label=label)\n", "ax.set_xlabel('N (number of maps)')\n", "ax.set_ylabel('Peak memory (MB)')\n", "ax.set_title('Figure 3 — Peak memory vs N (H=64)')\n", "ax.legend(fontsize=8)\n", "fig.tight_layout()\n", "fig.savefig('../../plots/benchmarks/fig3_mem_vs_N.png', dpi=120)\n", "plt.show()\n" ] }, { "cell_type": "markdown", "id": "f2aac32b", "metadata": {}, "source": [ "## 7 Scaling law summary table (pre-optimisation)" ] }, { "cell_type": "code", "execution_count": null, "id": "20bca39a", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "rows = [\n", " ('compute_minkowski_tensors',\n", " fit_slope(N_VALUES, mink_N_times), fit_slope(H_VALUES, mink_H_times),\n", " fit_slope(T_VALUES, mink_T_times), '—',\n", " mink_N_times[-1], '—'),\n", " ('compute_cross_moments',\n", " fit_slope(N_VALUES, cross_N_times), '—', '—',\n", " fit_slope(B_VALUES, cross_B_times),\n", " cross_N_times[-1], '—'),\n", " ('map2cl (loop)',\n", " fit_slope(N_VALUES, map2cl_N_times), fit_slope(H_VALUES, map2cl_H_times),\n", " '—', '—',\n", " map2cl_N_times[-1], '—'),\n", " ('compute_peak_minima_counts',\n", " fit_slope(N_VALUES, peak_N_times), '—', '—', '—',\n", " peak_N_times[-1], '—'),\n", "]\n", "\n", "df = pd.DataFrame(rows, columns=[\n", " 'Function', 'N-slope', 'H-slope', 'T-slope', 'B-slope',\n", " 'Time @N=100,H=64 (s)', 'Post-opt time (s)'\n", "])\n", "df = df.set_index('Function')\n", "print(df.to_string())\n" ] }, { "cell_type": "markdown", "id": "89f6566f", "metadata": {}, "source": [ "## 4 Optimisations applied\n", "*To be filled in after §2.6 optimisations are implemented.*" ] }, { "cell_type": "markdown", "id": "a8d3c223", "metadata": {}, "source": [ "## 5 Post-optimisation measurements\n", "*To be filled in after §2.6 optimisations are implemented.*" ] }, { "cell_type": "markdown", "id": "86c4a357", "metadata": {}, "source": [ "## 6 Before/after comparison figures\n", "*To be filled in after §2.6 optimisations are implemented.*" ] }, { "cell_type": "markdown", "id": "09710307", "metadata": {}, "source": [ "## 8 Parallel scaling (§3.2)\n", "\n", "Strong-scaling benchmark: fixed total work (N=50 maps, H=64²), varying worker count.\n", "All functions now accept `n_jobs` — see §3.2 implementation notes." ] }, { "cell_type": "code", "execution_count": null, "id": "1bf79d08", "metadata": {}, "outputs": [], "source": [ "from multiprocessing import cpu_count\n", "from foregrounds_diffusion.moments import compute_summed_moments, compute_cross_moments\n", "from foregrounds_diffusion.peak_counts import compute_peak_minima_counts\n", "from foregrounds_diffusion.morphology import compute_minkowski_tensors\n", "\n", "N_PARALLEL = 50\n", "MAX_WORKERS = min(cpu_count(), 8)\n", "N_WORKERS = [w for w in [1, 2, 4, 8] if w <= MAX_WORKERS]\n", "print(f'Logical CPUs available: {cpu_count()}, sweeping: {N_WORKERS}')\n", "\n", "maps50, _ = make_maps(N_PARALLEL, H_FIXED)\n", "thresholds_p = np.linspace(-2, 4, 15)\n", "thresholds_m = np.linspace(-4, 2, 15)\n", "bp4 = make_bp_filters(flatskymapparams, B_FIXED)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ad3c5f4c", "metadata": {}, "outputs": [], "source": [ "# Strong scaling — time vs n_workers for three functions\n", "scale_fns = {\n", " 'compute_minkowski_tensors': lambda nj: compute_minkowski_tensors(\n", " maps50, lambda x: x, thresholds, n_jobs=nj),\n", " 'compute_cross_moments': lambda nj: compute_cross_moments(\n", " maps50, maps50, bp4, n_jobs=nj),\n", " 'compute_peak_minima_counts': lambda nj: compute_peak_minima_counts(\n", " maps50, thresholds_p, thresholds_m,\n", " smoothing_scales_arcmin=(2.5,), n_jobs=nj),\n", "}\n", "\n", "scale_times = {fn: [] for fn in scale_fns}\n", "for fn_name, fn in scale_fns.items():\n", " print(f'\\n{fn_name}')\n", " for nj in N_WORKERS:\n", " r = profile_fn(fn, nj)\n", " scale_times[fn_name].append(r['time_median_s'])\n", " eff = scale_times[fn_name][0] / (nj * r['time_median_s']) * 100\n", " print(f' n_jobs={nj:2d} {r[\"time_median_s\"]:.3f}s eff={eff:.0f}%')\n" ] }, { "cell_type": "code", "execution_count": null, "id": "52c9c2bd", "metadata": {}, "outputs": [], "source": [ "# Figure 11 — Strong scaling: wall-clock time vs n_workers\n", "fig, ax = plt.subplots(figsize=(6, 4))\n", "\n", "colors_scale = [plt.rcParams['axes.prop_cycle'].by_key()['color'][i] for i in range(3)]\n", "for (fn_name, times), color in zip(scale_times.items(), colors_scale):\n", " t1 = times[0]\n", " ax.plot(N_WORKERS, times, 'o-', color=color,\n", " label=fn_name.replace('_', '\\\\_') if False else fn_name)\n", "\n", "# Ideal linear speedup reference (dashed)\n", "t1_ref = max(t[0] for t in scale_times.values())\n", "ideal = [t1_ref / nj for nj in N_WORKERS]\n", "ax.plot(N_WORKERS, ideal, 'k--', lw=0.8, label='Ideal (linear)')\n", "\n", "ax.set_xlabel('Number of workers (n_jobs)')\n", "ax.set_ylabel('Wall-clock time (s)')\n", "ax.set_title(f'Strong scaling — N={N_PARALLEL} maps, H={H_FIXED}²')\n", "ax.legend(fontsize=7)\n", "ax.set_xticks(N_WORKERS)\n", "plt.tight_layout()\n", "plt.savefig('plots/benchmarks/fig11_strong_scaling.pdf')\n", "plt.savefig('plots/benchmarks/fig11_strong_scaling.png', dpi=150)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "79b4c443", "metadata": {}, "outputs": [], "source": [ "# Strong-scaling summary table\n", "rows = []\n", "for fn_name, times in scale_times.items():\n", " t1 = times[0]\n", " row = {'Function': fn_name, 'Serial (s)': f'{t1:.3f}'}\n", " for nj, t in zip(N_WORKERS[1:], times[1:]):\n", " eff = t1 / (nj * t) * 100\n", " row[f'{nj} workers'] = f'{t:.3f}s ({eff:.0f}%)'\n", " rows.append(row)\n", "\n", "import pandas as pd\n", "pd.set_option('display.max_colwidth', 40)\n", "pd.DataFrame(rows).set_index('Function')\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }