Skip to content

halocat.tpcf

tpcf

Halo–halo two-point correlation function in periodic boxes (pycorr).

measure_xi_hh

measure_xi_hh(data: dict, box_size: float, mass_bins: list[tuple[float, float]], r_edges: ndarray, mass_key: str = 'Mtot') -> dict

Compute halo–halo ξ(r) for every auto and cross mass-bin pair.

Parameters:

Name Type Description Default
data dict[str, ndarray]

Halo catalogue with positions "x", "y", "z" (Mpc/h) and the mass column named by mass_key.

required
box_size float

Periodic box side length (Mpc/h).

required
mass_bins list of tuple of float

Finite-width log10(M / [Msun/h]) bins (lo, hi). The output contains one entry per (i, j) with i <= j (autos plus upper-triangular crosses).

required
r_edges ndarray

Shape (K + 1,) separation bin edges (Mpc/h).

required
mass_key str

Mass column name in data. Defaults to "Mtot".

'Mtot'

Returns:

Name Type Description
out dict

Per-pair entries keyed by "M{i}_M{j}" (i <= j). Each value is a dict with:

  • "r" : (K,) arithmetic centres of r_edges
  • "xi" : (K,) correlation values; NaN for empty bins
  • "log10M1", "log10M2" : (2,) [lo, hi] edges
  • "n1", "n2" : halo counts in each side

Plus a top-level "_meta" dict carrying r_edges, box_size, and mass_bins for :func:write_xi_hh.

Notes

r is always the arithmetic mean of r_edges — never pycorr.sepavg, which returns NaN for empty bins and varies between realisations and would break sub-grid stacking.

Source code in halocat/tpcf.py
def measure_xi_hh(data: dict, box_size: float,
                  mass_bins: list[tuple[float, float]],
                  r_edges: np.ndarray,
                  mass_key: str = "Mtot") -> dict:
    """Compute halo–halo ξ(r) for every auto and cross mass-bin pair.

    Parameters
    ----------
    data : dict[str, numpy.ndarray]
        Halo catalogue with positions ``"x"``, ``"y"``, ``"z"`` (Mpc/h)
        and the mass column named by ``mass_key``.
    box_size : float
        Periodic box side length (Mpc/h).
    mass_bins : list of tuple of float
        Finite-width log10(M / [Msun/h]) bins ``(lo, hi)``. The output
        contains one entry per ``(i, j)`` with ``i <= j`` (autos plus
        upper-triangular crosses).
    r_edges : numpy.ndarray
        Shape ``(K + 1,)`` separation bin edges (Mpc/h).
    mass_key : str, optional
        Mass column name in ``data``. Defaults to ``"Mtot"``.

    Returns
    -------
    out : dict
        Per-pair entries keyed by ``"M{i}_M{j}"`` (``i <= j``). Each
        value is a dict with:

        - ``"r"`` : ``(K,)`` arithmetic centres of ``r_edges``
        - ``"xi"`` : ``(K,)`` correlation values; ``NaN`` for empty bins
        - ``"log10M1"``, ``"log10M2"`` : ``(2,)`` ``[lo, hi]`` edges
        - ``"n1"``, ``"n2"`` : halo counts in each side

        Plus a top-level ``"_meta"`` dict carrying ``r_edges``,
        ``box_size``, and ``mass_bins`` for :func:`write_xi_hh`.

    Notes
    -----
    ``r`` is always the arithmetic mean of ``r_edges`` — never
    ``pycorr.sepavg``, which returns NaN for empty bins and varies
    between realisations and would break sub-grid stacking.
    """
    r_edges = np.asarray(r_edges, dtype=np.float64)
    # Always use the arithmetic mean of the bin edges for r centres —
    # never pycorr.sepavg (it returns NaN for empty bins and varies between
    # realisations, which breaks stacking across the sub-grid).
    r_centres = 0.5 * (r_edges[:-1] + r_edges[1:])

    # Pre-select positions per mass bin once.
    selections = []
    for lo, hi in mass_bins:
        sel = _select_mass_bin(data, lo, hi, mass_key=mass_key)
        pos = np.array([data["x"][sel], data["y"][sel], data["z"][sel]])
        selections.append(pos)

    out: dict = {}
    nbins = len(mass_bins)
    for i in range(nbins):
        for j in range(i, nbins):
            xi = _compute_xi_one(
                selections[i], selections[j] if i != j else None,
                r_edges, box_size,
            )
            out[f"M{i}_M{j}"] = {
                "r": r_centres.copy(),
                "xi": np.asarray(xi),
                "log10M1": np.array(mass_bins[i], dtype=np.float64),
                "log10M2": np.array(mass_bins[j], dtype=np.float64),
                "n1": int(selections[i].shape[1]),
                "n2": int(selections[j].shape[1]),
            }
    out["_meta"] = {
        "r_edges": r_edges,
        "box_size": float(box_size),
        "mass_bins": np.array(mass_bins, dtype=np.float64),
    }
    return out

write_xi_hh

write_xi_hh(xi_dict: dict, outpath: str, attrs: dict | None = None) -> None

Write the dict produced by :func:measure_xi_hh to an HDF5 file.

Parameters:

Name Type Description Default
xi_dict dict

Output of :func:measure_xi_hh. Must contain the per-pair entries plus the "_meta" block with r_edges / mass_bins / box_size.

required
outpath str

Destination path. Parent directories are created if needed.

required
attrs dict

Extra top-level HDF5 file attributes (e.g. gravity, redshift, imodel, ibox).

None
Source code in halocat/tpcf.py
def write_xi_hh(xi_dict: dict, outpath: str, attrs: dict | None = None) -> None:
    """Write the dict produced by :func:`measure_xi_hh` to an HDF5 file.

    Parameters
    ----------
    xi_dict : dict
        Output of :func:`measure_xi_hh`. Must contain the per-pair
        entries plus the ``"_meta"`` block with ``r_edges`` /
        ``mass_bins`` / ``box_size``.
    outpath : str
        Destination path. Parent directories are created if needed.
    attrs : dict, optional
        Extra top-level HDF5 file attributes (e.g. ``gravity``,
        ``redshift``, ``imodel``, ``ibox``).
    """
    os.makedirs(os.path.dirname(outpath), exist_ok=True)
    meta = xi_dict.get("_meta", {})
    with h5py.File(outpath, "w") as f:
        if "r_edges" in meta:
            f.create_dataset("r_edges", data=meta["r_edges"])
        if "mass_bins" in meta:
            f.create_dataset("mass_bins", data=meta["mass_bins"])
        f.attrs["box_size"] = meta.get("box_size", 0.0)
        if attrs:
            for k, v in attrs.items():
                f.attrs[k] = v
        for key, val in xi_dict.items():
            if key == "_meta":
                continue
            g = f.create_group(key)
            g.create_dataset("r", data=val["r"])
            g.create_dataset("xi", data=val["xi"])
            g.attrs["log10M1"] = val["log10M1"]
            g.attrs["log10M2"] = val["log10M2"]
            g.attrs["n1"] = val["n1"]
            g.attrs["n2"] = val["n2"]