DEGRACE-pilot: load halo catalogues and derived statistics¶
Walkthrough for loading a halo catalogue and its derived halo
statistics (halo mass function, halo–halo 2PCF, pairwise velocity
moments) for a single (gravity, redshift, imodel, ibox) tuple from
the GLAM DEGRACE-pilot output tree.
The on-disk layout for one realisation:
<OUTPUT_ROOT>/gravity_<gravity>/z_<z:.2f>/imodel_<imodel>/box_<ibox>/
halo.hdf5 # reformatted halo catalogue
hmf.hdf5 # halo mass function
xi_hh.hdf5 # halo–halo 2PCF (per mass-bin pair)
velocity_moments.hdf5 # pairwise velocity moments
where OUTPUT_ROOT = halocat.config.OUTPUT_ROOT. Missing files are
produced on demand by the HMFLoader / XiHHLoader classes
(load-or-measure) and by halocat.pipeline.run_single.
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
from halocat import config as C
from halocat.io import read_halo_hdf5
from halocat import HMFLoader, XiHHLoader
# Pick one realisation. Edit these to inspect a different sub-grid point.
GRAVITY = "LCDM" # one of C.GRAVITIES = ['LCDM', 'fRn1']
REDSHIFT = 0.25 # must be a key of C.Z_SNAP_MAP
IMODEL = 1 # 1..64
IBOX = 1 # 1..5
out_dir = C.get_output_dir(GRAVITY, REDSHIFT, IMODEL, IBOX)
print(out_dir)
print("contents:", sorted(os.listdir(out_dir)))
/cosma8/data/dp203/dc-ruan1/degrace_pilot/gravity_LCDM/z_0.25/imodel_1/box_1 contents: ['halo.hdf5', 'hmf.hdf5', 'velocity_moments.hdf5', 'xi_hh.hdf5']
1. Halo catalogue — halo.hdf5¶
halocat.io.read_halo_hdf5 returns a dict[str, np.ndarray] keyed by
the columns in halocat.config.CATALOGUE_COLUMNS (positions in
Mpc/h, velocities in km/s, masses in M⊙/h).
halo_path = os.path.join(out_dir, "halo.hdf5")
with h5py.File(halo_path, "r") as f:
halo_attrs = {k: f.attrs[k] for k in f.attrs}
halo_attrs
{'box_size': np.float64(1024.0),
'gravity': 'LCDM',
'ibox': np.int64(1),
'imodel': np.int64(1),
'redshift': np.float64(0.25),
'snapnum': np.int64(137)}
data = read_halo_hdf5(halo_path)
n_halo = len(data[C.MASS_COLUMN])
print(f"loaded {n_halo:,} haloes, {len(data)} columns")
print("columns:", list(data))
loaded 1,010,208 haloes, 24 columns columns: ['2KEp_m1', 'Cvir', 'DistinctSub', 'Lambda', 'Major_x', 'Major_y', 'Major_z', 'Mbound', 'Mtot', 'Nhalo', 'Nparticles', 'RadRMS_k', 'Rvir', 'Vcirc', 'Vrms', 'Xoff', 'b_a', 'c_a', 'vx', 'vy', 'vz', 'x', 'y', 'z']
# Per-column summary for the columns most often used downstream.
key_cols = ["x", "y", "z", "vx", "vy", "vz", C.MASS_COLUMN, "Rvir"]
w = max(len(c) for c in key_cols)
print(f"{'column'.ljust(w)} {'shape':>10} {'min':>12} {'max':>12} {'mean':>12}")
for c in key_cols:
a = data[c]
print(f"{c.ljust(w)} {str(a.shape):>10} "
f"{a.min():12.4g} {a.max():12.4g} {a.mean():12.4g}")
column shape min max mean x (1010208,) 0 1024 512.7 y (1010208,) 0.001 1024 526.9 z (1010208,) 0.0007 1024 499.6 vx (1010208,) -2130 2644 2.364 vy (1010208,) -2836 3116 -6.845 vz (1010208,) -2780 3654 1.38 Mtot (1010208,) 6.086e+11 7.281e+15 6.944e+12 Rvir (1010208,) 378.6 4763 461.5
# Example selection: log10(Mtot) > 13.5, then build (N, 3) arrays.
log10M = np.log10(data[C.MASS_COLUMN])
sel = log10M > 13.5
print(f"selected {sel.sum():,} / {n_halo:,} "
f"({100 * sel.mean():.2f}%)")
positions = np.column_stack([data[c][sel] for c in ("x", "y", "z")])
velocities = np.column_stack([data[c][sel] for c in ("vx", "vy", "vz")])
print(f"positions shape={positions.shape}, box=[0, {C.BOX_SIZE:g}] Mpc/h")
print(f"velocities shape={velocities.shape}, "
f"|v|_med={np.median(np.linalg.norm(velocities, axis=1)):.1f} km/s")
selected 32,543 / 1,010,208 (3.22%) positions shape=(32543, 3), box=[0, 1024] Mpc/h velocities shape=(32543, 3), |v|_med=747.9 km/s
2. Halo mass function — hmf.hdf5¶
HMFLoader is load-or-measure: if hmf.hdf5 is missing it calls
halocat.pipeline.run_single(..., do_hmf=True) to produce it,
auto-reformatting the source .DAT if halo.hdf5 is also missing.
Pass overwrite=True to force re-measurement.
hmf_loader = HMFLoader()
hmf = hmf_loader.get(GRAVITY, REDSHIFT, IMODEL, IBOX)
print("dndlog10M.shape :", hmf.dndlog10M.shape)
print("log10M centres : "
f"{hmf.log10M_bin_centre[0]:.3f} … {hmf.log10M_bin_centre[-1]:.3f}")
print("box_size :", hmf.box_size, "Mpc/h")
print("total counts :", int(hmf.counts.sum()))
dndlog10M.shape : (30,) log10M centres : 12.550 … 15.450 box_size : 1024.0 Mpc/h total counts : 396948
fig, ax = plt.subplots(figsize=(5.0, 4.0))
M = 10 ** hmf.log10M_bin_centre
ax.loglog(M, hmf.dndlog10M, "o-", lw=1, ms=4)
ax.set(
xlabel=r"$M_\mathrm{tot}\;[M_\odot/h]$",
ylabel=r"$\mathrm{d}n/\mathrm{d}\log_{10}M\;[(\mathrm{Mpc}/h)^{-3}]$",
title=f"HMF — {GRAVITY} z={REDSHIFT:g} m{IMODEL} b{IBOX}",
)
fig.tight_layout()
3. Halo–halo 2PCF — xi_hh.hdf5¶
XiHHLoader returns an XiHHRecord with xi.shape == (n_pairs, n_r)
where pairs are the auto + cross combinations of the mass bins
defined in scripts/config_xi_hh.yaml. Bin centres r are the
arithmetic mean of r_edges (never pycorr.sepavg).
xi_loader = XiHHLoader()
xi = xi_loader.get(GRAVITY, REDSHIFT, IMODEL, IBOX)
print("pairs :", xi.pairs)
print("xi.shape :", xi.xi.shape, " r.shape:", xi.r.shape)
print("mass_bins (log10) :\n", xi.mass_bins)
print("r range (Mpc/h) : "
f"{xi.r_edges[0]:.3g} … {xi.r_edges[-1]:.3g}")
pairs : ['M0_M0', 'M0_M1', 'M0_M2', 'M0_M3', 'M0_M4', 'M0_M5', 'M0_M6', 'M0_M7', 'M0_M8', 'M0_M9', 'M0_M10', 'M0_M11', 'M0_M12', 'M0_M13', 'M0_M14', 'M0_M15', 'M0_M16', 'M0_M17', 'M0_M18', 'M0_M19', 'M0_M20', 'M1_M1', 'M1_M2', 'M1_M3', 'M1_M4', 'M1_M5', 'M1_M6', 'M1_M7', 'M1_M8', 'M1_M9', 'M1_M10', 'M1_M11', 'M1_M12', 'M1_M13', 'M1_M14', 'M1_M15', 'M1_M16', 'M1_M17', 'M1_M18', 'M1_M19', 'M1_M20', 'M2_M2', 'M2_M3', 'M2_M4', 'M2_M5', 'M2_M6', 'M2_M7', 'M2_M8', 'M2_M9', 'M2_M10', 'M2_M11', 'M2_M12', 'M2_M13', 'M2_M14', 'M2_M15', 'M2_M16', 'M2_M17', 'M2_M18', 'M2_M19', 'M2_M20', 'M3_M3', 'M3_M4', 'M3_M5', 'M3_M6', 'M3_M7', 'M3_M8', 'M3_M9', 'M3_M10', 'M3_M11', 'M3_M12', 'M3_M13', 'M3_M14', 'M3_M15', 'M3_M16', 'M3_M17', 'M3_M18', 'M3_M19', 'M3_M20', 'M4_M4', 'M4_M5', 'M4_M6', 'M4_M7', 'M4_M8', 'M4_M9', 'M4_M10', 'M4_M11', 'M4_M12', 'M4_M13', 'M4_M14', 'M4_M15', 'M4_M16', 'M4_M17', 'M4_M18', 'M4_M19', 'M4_M20', 'M5_M5', 'M5_M6', 'M5_M7', 'M5_M8', 'M5_M9', 'M5_M10', 'M5_M11', 'M5_M12', 'M5_M13', 'M5_M14', 'M5_M15', 'M5_M16', 'M5_M17', 'M5_M18', 'M5_M19', 'M5_M20', 'M6_M6', 'M6_M7', 'M6_M8', 'M6_M9', 'M6_M10', 'M6_M11', 'M6_M12', 'M6_M13', 'M6_M14', 'M6_M15', 'M6_M16', 'M6_M17', 'M6_M18', 'M6_M19', 'M6_M20', 'M7_M7', 'M7_M8', 'M7_M9', 'M7_M10', 'M7_M11', 'M7_M12', 'M7_M13', 'M7_M14', 'M7_M15', 'M7_M16', 'M7_M17', 'M7_M18', 'M7_M19', 'M7_M20', 'M8_M8', 'M8_M9', 'M8_M10', 'M8_M11', 'M8_M12', 'M8_M13', 'M8_M14', 'M8_M15', 'M8_M16', 'M8_M17', 'M8_M18', 'M8_M19', 'M8_M20', 'M9_M9', 'M9_M10', 'M9_M11', 'M9_M12', 'M9_M13', 'M9_M14', 'M9_M15', 'M9_M16', 'M9_M17', 'M9_M18', 'M9_M19', 'M9_M20', 'M10_M10', 'M10_M11', 'M10_M12', 'M10_M13', 'M10_M14', 'M10_M15', 'M10_M16', 'M10_M17', 'M10_M18', 'M10_M19', 'M10_M20', 'M11_M11', 'M11_M12', 'M11_M13', 'M11_M14', 'M11_M15', 'M11_M16', 'M11_M17', 'M11_M18', 'M11_M19', 'M11_M20', 'M12_M12', 'M12_M13', 'M12_M14', 'M12_M15', 'M12_M16', 'M12_M17', 'M12_M18', 'M12_M19', 'M12_M20', 'M13_M13', 'M13_M14', 'M13_M15', 'M13_M16', 'M13_M17', 'M13_M18', 'M13_M19', 'M13_M20', 'M14_M14', 'M14_M15', 'M14_M16', 'M14_M17', 'M14_M18', 'M14_M19', 'M14_M20', 'M15_M15', 'M15_M16', 'M15_M17', 'M15_M18', 'M15_M19', 'M15_M20', 'M16_M16', 'M16_M17', 'M16_M18', 'M16_M19', 'M16_M20', 'M17_M17', 'M17_M18', 'M17_M19', 'M17_M20', 'M18_M18', 'M18_M19', 'M18_M20', 'M19_M19', 'M19_M20', 'M20_M20'] xi.shape : (231, 30) r.shape: (231, 30) mass_bins (log10) : [[12.4 12.5] [12.5 12.6] [12.6 12.7] [12.7 12.8] [12.8 12.9] [12.9 13. ] [13. 13.1] [13.1 13.2] [13.2 13.3] [13.3 13.4] [13.4 13.5] [13.5 13.6] [13.6 13.7] [13.7 13.8] [13.8 13.9] [13.9 14. ] [14. 14.1] [14.1 14.2] [14.2 14.3] [14.3 14.4] [14.4 14.5]] r range (Mpc/h) : 0.3 … 120
# Plot r^2 * xi for the auto pairs (i == j).
fig, ax = plt.subplots(figsize=(5.0, 4.0))
for k, (i, j) in enumerate(xi.pair_indices):
if i != j:
continue
lo, hi = xi.mass_bins[i]
label = f"{xi.pairs[k]} log10M=[{lo:.2f}, {hi:.2f})"
ax.plot(xi.r[k], xi.r[k] ** 2 * xi.xi[k], label=label)
ax.set(
xlabel=r"$r\;[\mathrm{Mpc}/h]$",
ylabel=r"$r^{2}\,\xi_{hh}(r)$",
xscale="log",
title=f"xi_hh autos — {GRAVITY} z={REDSHIFT:g} m{IMODEL} b{IBOX}",
)
ax.legend(fontsize=8)
ax.grid(which="both", alpha=0.3)
fig.tight_layout()
4. Pairwise velocity moments — velocity_moments.hdf5¶
Velocity moments are written by halocat.velocity.write_velocity_moments
as one HDF5 group per mass-bin pair (M{i}_M{j} with i ≤ j). There
is no dedicated loader class — read directly with h5py. The moment
datasets per group are: npairs, m10 (mean radial pairwise
velocity), and central moments c20, c02, c12, c30, c40, c04, c22.
vel_path = os.path.join(out_dir, "velocity_moments.hdf5")
have_vel = os.path.exists(vel_path)
print("velocity_moments.hdf5:", "found" if have_vel else "missing")
if have_vel:
with h5py.File(vel_path, "r") as f:
r_edges_vel = f["r_edges"][...]
mass_bins_vel = f["mass_bins"][...]
moment_keys = [s.decode() if isinstance(s, bytes) else s
for s in f.attrs["moment_keys"]]
groups = sorted(
(k for k in f.keys() if k.startswith("M") and "_M" in k),
key=lambda s: tuple(int(t[1:]) for t in s.split("_")),
)
autos = [g for g in groups
if g.split("_")[0] == g.split("_")[1]]
print(f"{len(groups)} groups ({len(autos)} autos)")
print("moment keys:", moment_keys)
print("r_edges:", r_edges_vel.shape,
f"({r_edges_vel[0]:.2f} … {r_edges_vel[-1]:.2f}) Mpc/h")
print("mass_bins:\n", mass_bins_vel)
velocity_moments.hdf5: found 10 groups (4 autos) moment keys: ['npairs', 'm10', 'c20', 'c02', 'c12', 'c30', 'c40', 'c04', 'c22'] r_edges: (61,) (0.00 … 150.00) Mpc/h mass_bins: [[12.5 13. ] [13. 13.5] [13.5 14. ] [14. 14.5]]
# Plot mean radial pairwise velocity m10(r) for the auto pairs.
if have_vel:
r_centres_vel = 0.5 * (r_edges_vel[:-1] + r_edges_vel[1:])
fig, ax = plt.subplots(figsize=(5.0, 4.0))
with h5py.File(vel_path, "r") as f:
for grp in autos:
m10 = f[grp]["m10"][...]
lo, hi = f[grp].attrs["log10M1"]
ax.plot(r_centres_vel, m10,
label=f"{grp} log10M=[{lo:.2f}, {hi:.2f})")
ax.axhline(0, color="k", lw=0.5)
ax.set(
xlabel=r"$r\;[\mathrm{Mpc}/h]$",
ylabel=r"$\langle v_r\rangle\;[\mathrm{km/s}]$",
title=f"pairwise mean radial velocity — "
f"{GRAVITY} z={REDSHIFT:g} m{IMODEL} b{IBOX}",
)
ax.legend(fontsize=8)
ax.grid(alpha=0.3)
fig.tight_layout()
5. Stack across iboxes¶
Both loaders provide get_grid(...) for loading a sub-grid of
realisations and stacking them into one array. Below: the HMF for
(GRAVITY, IMODEL, REDSHIFT) averaged over ibox=1..5.
grid = hmf_loader.get_grid(
gravities=[GRAVITY],
redshifts=[REDSHIFT],
imodels=[IMODEL],
iboxes=[1, 2, 3, 4, 5],
skip_missing=True,
)
print("dndlog10M.shape :", grid["dndlog10M"].shape,
" (G, Z, M, B, K)")
print("present mask :", grid["present"][0, 0, 0])
# Mean over the ibox axis (axis=3), then squeeze G/Z/M down to (K,).
mean_dndlog10M = np.nanmean(grid["dndlog10M"], axis=3).squeeze()
std_dndlog10M = np.nanstd(grid["dndlog10M"], axis=3).squeeze()
fig, ax = plt.subplots(figsize=(5.0, 4.0))
M = 10 ** grid["log10M_bin_centre"]
ax.loglog(M, mean_dndlog10M, "o-", lw=1, ms=4, label="mean over iboxes")
ax.fill_between(
M,
np.clip(mean_dndlog10M - std_dndlog10M, 1e-30, None),
mean_dndlog10M + std_dndlog10M,
alpha=0.2, label=r"$\pm 1\sigma$ across iboxes",
)
ax.set(
xlabel=r"$M_\mathrm{tot}\;[M_\odot/h]$",
ylabel=r"$\mathrm{d}n/\mathrm{d}\log_{10}M\;[(\mathrm{Mpc}/h)^{-3}]$",
title=f"HMF mean over iboxes — {GRAVITY} z={REDSHIFT:g} m{IMODEL}",
)
ax.legend()
ax.grid(which="both", alpha=0.3)
fig.tight_layout()
dndlog10M.shape : (1, 1, 1, 5, 30) (G, Z, M, B, K) present mask : [ True True True True True]