Skip to content

Commit

Permalink
Merge pull request #137 from renecotyfanboy/data-upgrade
Browse files Browse the repository at this point in the history
Data upgrade
  • Loading branch information
renecotyfanboy authored Mar 19, 2024
2 parents 03b713d + 2b5d00b commit 0bcf86f
Show file tree
Hide file tree
Showing 23 changed files with 457 additions and 131 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
*.npz
*.arf
*.rmf
*.lock

data
data_to_test
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-and-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:

steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tests/data"]
path = tests/data
url = https://github.com/HEACIT/curated-test-data
Binary file modified docs/examples/statics/subtract_background_with_errors.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions docs/references/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@
show_root_heading: false
show_root_toc_entry: false
heading_level: 3

## OGIP data parsers

::: jaxspec.data.ogip
options:
show_root_heading: false
show_root_toc_entry: false
heading_level: 3
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "jaxspec"
version = "0.0.2"
version = "0.0.3"
description = "jaxspec is a bayesian spectral fitting library for X-ray astronomy."
authors = ["sdupourque <sdupourque@irap.omp.eu>"]
license = "MIT"
Expand All @@ -26,6 +26,7 @@ jaxopt = "^0.8.1"
tinygp = "^0.3.0"
seaborn = "^0.13.1"
mkdocstrings = "^0.24.0"
sparse = "^0.15.1"


[tool.poetry.group.docs.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/jaxspec/analysis/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def plot_ppc(self, percentile: Tuple[int, int] = (14, 86)) -> plt.Figure:
folding_model.out_energies,
y_observed=folding_model.folded_background.data,
y_samples=bkg_count,
denominator=denominator / folding_model.backratio.data,
denominator=denominator * folding_model.folded_backratio.data,
color=(0.26787604, 0.60085972, 0.63302651),
percentile=percentile,
)
Expand Down
3 changes: 3 additions & 0 deletions src/jaxspec/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
import astropy.units as u

u.add_enabled_aliases({"counts": u.count})
u.add_enabled_aliases({"channel": u.dimensionless_unscaled})
# Arbitrary units are found in .rsp files , let's hope it is compatible with what we would expect as the rmf x arf
# u.add_enabled_aliases({"au": u.dimensionless_unscaled})
23 changes: 14 additions & 9 deletions src/jaxspec/data/instrument.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import numpy as np
import xarray as xr
from matplotlib import colors
from .ogip import DataARF, DataRMF


Expand Down Expand Up @@ -64,21 +65,26 @@ def from_matrix(cls, redistribution_matrix, spectral_response, e_min_unfolded, e
)

@classmethod
def from_ogip_file(cls, arf_file: str | os.PathLike, rmf_file: str | os.PathLike, **kwargs):
def from_ogip_file(cls, rmf_path: str | os.PathLike, arf_path: str | os.PathLike = None):
"""
Load the data from OGIP files.
Parameters:
arf_file: The ARF file path.
rmf_file: The RMF file path.
rmf_path: The RMF file path.
arf_path: The ARF file path.
exposure: The exposure time in second.
grouping: The grouping matrix.
"""

arf = DataARF.from_file(arf_file)
rmf = DataRMF.from_file(rmf_file)
rmf = DataRMF.from_file(rmf_path)

return cls.from_matrix(rmf.matrix, arf.specresp, rmf.energ_lo, rmf.energ_hi, rmf.e_min, rmf.e_max)
if arf_path is not None:
specresp = DataARF.from_file(arf_path).specresp

else:
specresp = np.ones(rmf.energ_lo.shape)

return cls.from_matrix(rmf.sparse_matrix, specresp, rmf.energ_lo, rmf.energ_hi, rmf.e_min, rmf.e_max)

def plot_redistribution(self, **kwargs):
"""
Expand All @@ -95,9 +101,8 @@ def plot_redistribution(self, **kwargs):
y="e_max_channel",
xscale="log",
yscale="log",
cmap=cmr.cosmic,
vmin=0,
vmax=0.075,
cmap=cmr.ember_r,
norm=colors.LogNorm(vmin=1e-6, vmax=1),
add_labels=True,
**kwargs,
)
Expand Down
64 changes: 46 additions & 18 deletions src/jaxspec/data/obsconf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import numpy as np
import xarray as xr
import sparse
from .instrument import Instrument
from .observation import Observation


def densify_xarray(xarray):
return xr.DataArray(xarray.data.todense(), dims=xarray.dims, coords=xarray.coords, attrs=xarray.attrs, name=xarray.name)


class ObsConfiguration(xr.Dataset):
"""
Class to store the data of a folding model, which is the link between the unfolded and folded spectra.
Expand Down Expand Up @@ -51,20 +56,34 @@ def out_energies(self):

out_energies = np.stack(
(
np.asarray(self.coords["e_min_folded"], dtype=np.float64),
np.asarray(self.coords["e_max_folded"], dtype=np.float64),
np.asarray(self.coords["e_min_folded"].data.todense(), dtype=np.float64),
np.asarray(self.coords["e_max_folded"].data.todense(), dtype=np.float64),
)
)

return out_energies

@classmethod
def from_pha_file(cls, pha_file, low_energy: float = 1e-20, high_energy: float = 1e20):
def from_pha_file(
cls, pha_path, rmf_path=None, arf_path=None, bkg_path=None, low_energy: float = 1e-20, high_energy: float = 1e20
):
from .util import data_loader

pha, arf, rmf, bkg, metadata = data_loader(pha_file)
pha, arf, rmf, bkg, metadata = data_loader(pha_path, arf_path=arf_path, rmf_path=rmf_path, bkg_path=bkg_path)

instrument = Instrument.from_matrix(rmf.matrix, arf.specresp, rmf.energ_lo, rmf.energ_hi, rmf.e_min, rmf.e_max)
instrument = Instrument.from_matrix(
rmf.sparse_matrix,
arf.specresp if arf is not None else np.ones_like(rmf.energ_lo),
rmf.energ_lo,
rmf.energ_hi,
rmf.e_min,
rmf.e_max,
)

if bkg is not None:
backratio = np.where(bkg.backscal > 0.0, pha.backscal / np.where(bkg.backscal > 0, bkg.backscal, 1.0), 0.0)
else:
backratio = np.ones_like(pha.counts)

observation = Observation.from_matrix(
pha.counts,
Expand All @@ -73,7 +92,7 @@ def from_pha_file(cls, pha_file, low_energy: float = 1e-20, high_energy: float =
pha.quality,
pha.exposure,
background=bkg.counts if bkg is not None else None,
backratio=pha.backscal / bkg.backscal if bkg is not None else 1.0,
backratio=backratio,
attributes=metadata,
)

Expand All @@ -83,33 +102,42 @@ def from_pha_file(cls, pha_file, low_energy: float = 1e-20, high_energy: float =
def from_instrument(
cls, instrument: Instrument, observation: Observation, low_energy: float = 1e-20, high_energy: float = 1e20
):
grouping = observation.grouping.copy()
# Exclude the bins flagged with bad quality
quality_filter = observation.quality == 0
grouping = observation.grouping * quality_filter

# Computing the lower and upper energies of the bins after grouping
# This is just a trick to compute it without 10 lines of code
e_min = (xr.where(grouping > 0, grouping, np.nan) * instrument.coords["e_min_channel"]).min(
skipna=True, dim="instrument_channel"
)

e_max = (xr.where(grouping > 0, grouping, np.nan) * instrument.coords["e_max_channel"]).max(
skipna=True, dim="instrument_channel"
)

transfer_matrix = grouping @ (instrument.redistribution * instrument.area * observation.exposure)
transfer_matrix = transfer_matrix.assign_coords({"e_min_folded": e_min, "e_max_folded": e_max})

# Exclude the bins flagged with bad quality
quality_filter = observation.quality == 0
grouping[:, ~quality_filter] = 0
# Exclude bins out of the considered energy range, and bins without contribution from the RMF
row_idx = densify_xarray(((e_min > low_energy) & (e_max < high_energy)) * (grouping.sum(dim="instrument_channel") > 0))

col_idx = densify_xarray(
(instrument.coords["e_min_unfolded"] > 0) * (instrument.redistribution.sum(dim="instrument_channel") > 0)
)

# The transfer matrix is converted locally to csr format to allow FAST slicing
transfer_matrix_scipy = transfer_matrix.data.to_scipy_sparse().tocsr()
transfer_matrix_reduced = transfer_matrix_scipy[row_idx.data][:, col_idx.data]
transfer_matrix_reduced = sparse.COO.from_scipy_sparse(transfer_matrix_reduced)

row_idx = xr.ones_like(e_min, dtype=bool)
row_idx *= (e_min > low_energy) & (e_max < high_energy) # Strict exclusion as in XSPEC
row_idx *= grouping.sum(dim="instrument_channel") > 0 # Exclude channels with no contribution
# A dummy zero matrix is put so that the slicing in xarray is fast
transfer_matrix.data = sparse.zeros_like(transfer_matrix.data)
transfer_matrix = transfer_matrix[row_idx][:, col_idx]

col_idx = xr.ones_like(instrument.area, dtype=bool)
col_idx *= instrument.coords["e_min_unfolded"] > 0.0 # Exclude channels with 0. as lower energy bound
col_idx *= instrument.redistribution.sum(dim="instrument_channel") > 0 # Exclude channels with no contribution
# The reduced transfer matrix is put back in the xarray
transfer_matrix.data = transfer_matrix_reduced

transfer_matrix = transfer_matrix.where(row_idx & col_idx, drop=True)
folded_counts = observation.folded_counts.copy().where(row_idx, drop=True)

if observation.folded_background is not None:
Expand All @@ -123,7 +151,7 @@ def from_instrument(
"transfer_matrix": transfer_matrix,
"area": instrument.area.copy().where(col_idx, drop=True),
"exposure": observation.exposure,
"backratio": observation.backratio,
"folded_backratio": observation.folded_backratio.copy().where(row_idx, drop=True),
"folded_counts": folded_counts,
"folded_background": folded_background,
}
Expand Down
33 changes: 21 additions & 12 deletions src/jaxspec/data/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,39 +43,48 @@ def from_matrix(
attributes = {}

if background is None:
background = np.zeros_like(counts, dtype=int)
background = np.zeros_like(counts, dtype=np.int64)

data_dict = {
"counts": (["instrument_channel"], np.array(counts, dtype=np.int64), {"description": "Counts", "unit": "photons"}),
"counts": (["instrument_channel"], np.asarray(counts, dtype=np.int64), {"description": "Counts", "unit": "photons"}),
"folded_counts": (
["folded_channel"],
np.array(grouping @ counts, dtype=int),
np.asarray(np.ma.filled(grouping @ counts), dtype=np.int64),
{"description": "Folded counts, after grouping", "unit": "photons"},
),
"grouping": (
["folded_channel", "instrument_channel"],
np.array(grouping, dtype=bool),
grouping,
{"description": "Grouping matrix."},
),
"quality": (["instrument_channel"], np.array(quality, dtype=int), {"description": "Quality flag."}),
"quality": (["instrument_channel"], np.asarray(quality, dtype=np.int64), {"description": "Quality flag."}),
"exposure": ([], float(exposure), {"description": "Total exposure", "unit": "s"}),
"backratio": ([], float(backratio), {"description": "Background scaling (SRC_BACKSCAL/BKG_BACKSCAL)"}),
"backratio": (
["instrument_channel"],
np.asarray(backratio, dtype=float),
{"description": "Background scaling (SRC_BACKSCAL/BKG_BACKSCAL)"},
),
"folded_backratio": (
["folded_channel"],
np.asarray(np.ma.filled(grouping @ backratio), dtype=float),
{"description": "Background scaling after grouping"},
),
"background": (
["instrument_channel"],
np.array(background, dtype=int),
np.asarray(background, dtype=np.int64),
{"description": "Background counts", "unit": "photons"},
),
"folded_background": (
["folded_channel"],
np.array(grouping @ background, dtype=int),
np.asarray(np.ma.filled(grouping @ background), dtype=np.int64),
{"description": "Background counts", "unit": "photons"},
),
}

return cls(
data_dict,
coords={
"channel": (["instrument_channel"], np.array(channel, dtype=np.int64), {"description": "Channel number"}),
"channel": (["instrument_channel"], np.asarray(channel, dtype=np.int64), {"description": "Channel number"}),
"grouped_channel": (
["folded_channel"],
np.arange(len(grouping @ counts), dtype=np.int64),
Expand All @@ -92,9 +101,9 @@ def from_pha_file(cls, pha_file: str | os.PathLike, **kwargs):
pha, arf, rmf, bkg, metadata = data_loader(pha_file)

if bkg is not None:
backratio = (pha.backscal * pha.exposure * pha.areascal) / (bkg.backscal * bkg.exposure * bkg.areascal)
backratio = np.nan_to_num((pha.backscal * pha.exposure * pha.areascal) / (bkg.backscal * bkg.exposure * bkg.areascal))
else:
backratio = 1.0
backratio = np.ones_like(pha.counts)

return cls.from_matrix(
pha.counts,
Expand Down Expand Up @@ -133,7 +142,7 @@ def plot_grouping(self):
ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)
sns.heatmap(self.grouping.T, ax=ax, cbar=False)
sns.heatmap(self.grouping.data.todense().T, ax=ax, cbar=False)
ax_histx.step(np.arange(len(self.folded_counts)), self.folded_counts, where="post")
ax_histy.step(self.counts, np.arange(len(self.counts)), where="post")

Expand Down
Loading

0 comments on commit 0bcf86f

Please sign in to comment.