"""This submodule contains a dataclass used to generate core common components required
by models. It is used as input to the
:class:`~virtual_ecosystem.core.base_model.BaseModel`, allowing single instances of
these components to be cascaded down to individual model subclass instances via the
``__init__`` method of the base model..
""" # noqa: D205
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import InitVar, dataclass, field
import numpy as np
import pint
from numpy.typing import NDArray
from xarray import DataArray
from virtual_ecosystem.core.exceptions import ConfigurationError
from virtual_ecosystem.core.grid import Grid
from virtual_ecosystem.core.logger import LOGGER
from virtual_ecosystem.core.model_config import (
CoreConfiguration,
CoreConstants,
LayersConfiguration,
TimingConfiguration,
)
[docs]
@dataclass
class CoreComponents:
"""Core model components.
This dataclass takes a validated model configuration and uses it to generate a set
of core model attributes, populated via the ``__init__`` method of
:class:`~virtual_ecosystem.core.base_model.BaseModel` and hence inherited by the
specific model subclasses.
"""
grid: Grid = field(init=False)
"""A grid structure for the simulation."""
layer_structure: LayerStructure = field(init=False)
"""The vertical layer structure for the simulation."""
model_timing: ModelTiming = field(init=False)
"""The model timing details for the simulation."""
core_constants: CoreConstants = field(init=False)
"""The core constant definitions for the simulation."""
config: InitVar[CoreConfiguration]
"""A validated model configuration."""
[docs]
def __post_init__(self, config: CoreConfiguration) -> None:
"""Populate the core components from the config."""
self.grid = Grid.from_config(config=config.grid)
self.core_constants = config.constants
self.layer_structure = LayerStructure(
config=config.layers,
n_cells=self.grid.n_cells,
max_depth_of_microbial_activity=self.core_constants.max_depth_of_microbial_activity,
)
self.model_timing = ModelTiming(config=config.timing)
[docs]
@dataclass
class ModelTiming:
"""Model timing details.
This data class defines the timing of a Virtual Ecosystem simulation from the
{class}`~virtual_ecosystem.core.model_config.TimingConfiguration` section of a
validated model configuration.
The end time is calculated from the previously extracted timing information. This
end time will always be the largest whole multiple of the update interval that
exceeds or equal the configured ``run_length``.
"""
start_time: np.datetime64 = field(init=False)
"""The start time of the simulation."""
end_time: np.datetime64 = field(init=False)
"""The calculated end time of the simulation."""
reconciled_run_length: np.timedelta64 = field(init=False)
"""The difference between start and calculated end time."""
run_length: np.timedelta64 = field(init=False)
"""The configured run length."""
update_datestamps: NDArray[np.datetime64] = field(init=False)
"""The date of the start of each update interval.""" # TODO: temp fix from #1257
update_interval: np.timedelta64 = field(init=False)
"""The configured update interval."""
run_length_quantity: pint.Quantity = field(init=False)
"""The configured run length."""
update_interval_quantity: pint.Quantity = field(init=False)
"""The configured update interval."""
update_interval_seconds: float = field(init=False)
"""The configured update interval in seconds."""
n_updates: int = field(init=False)
"""The total number of model updates in the configured run."""
updates_per_year: np.float64 = field(init=False)
"""The number of updates per year based on update_interval."""
config: InitVar[TimingConfiguration]
"""A validated model configuration."""
[docs]
def __post_init__(self, config: TimingConfiguration) -> None:
"""Populate the ``ModelTiming`` instance.
This method populates the ``ModelTiming`` attributes from the provided
:class:`~virtual_ecosystem.core.model_config.TimingConfiguration` instance.
Args:
config: A TimingConfiguration instance.
"""
# Convert configuration into datetime64 and timedelta64
self.start_time = np.datetime64(config.start_date)
self.update_interval = np.timedelta64(int(config.update_interval_seconds), "s")
self.run_length = np.timedelta64(int(config.run_length_seconds), "s")
self.update_interval_quantity = pint.Quantity(config.update_interval)
self.run_length_quantity = pint.Quantity(config.run_length)
# Calculate when the simulation should stop as the first number of update
# intervals to exceed the requested run length and calculate the actual run
# length
self.end_time = (
self.start_time
+ np.ceil(self.run_length / self.update_interval) * self.update_interval
)
self.reconciled_run_length = self.end_time - self.start_time
self.n_updates = int((self.end_time - self.start_time) / self.update_interval)
# Calculate the approximate dates of these updates
# TODO: This is a temporary fix to provide actual data to the data science team
# alongside the time index: see
# https://github.com/ImperialCollegeLondon/virtual_ecosystem/discussions/1246
self.update_datestamps = np.arange(
self.start_time, self.end_time, self.update_interval
).astype("datetime64[D]")
# Calculate the number of updates in one year
# TODO - this is not calendar aware - variable length months and leap years.
seconds_per_year = np.timedelta64(31536000, "s")
self.updates_per_year = seconds_per_year / self.update_interval
# Calculate the total number of seconds in the update interval
self.update_interval_seconds = float(
self.update_interval / np.timedelta64(1, "s")
)
# Log the completed timing creation.
LOGGER.info(
"Timing details built from model configuration: " # noqa: UP032
"start - {}, end - {}, run length - {}".format(
self.start_time, self.end_time, self.reconciled_run_length
)
)
[docs]
@dataclass
class LayerStructure:
"""Vertical layer structure of the Virtual Ecosystem.
This class defines the structure of the vertical dimension of a simulation using the
Virtual Ecosystem. The vertical dimension is divided into a series of layers,
ordered from above the canopy to the bottom of the soil, that perform different
roles in the simulation. The configuration of the layer structure is defined in the
:class:`virtual_ecosystem.core.model_config.LayersConfiguration` class.
The layer structure is shown below, along with the default configured height values
in metres relative to ground level.
.. csv-table::
:header: "Index", "Role", "Description", "Set by", "Default"
:widths: 5, 10, 30, 30, 10
0, "above", "Above canopy conditions", "``above_ground_canopy_offset``", "+2 m"
1, "canopy", "Height of first canopy layer", "``PlantsModel``", "--"
"...", "canopy", "Height of other canopy layers", "``PlantsModel``", "--"
10, "canopy", "Height of the last canopy layer ", "``PlantsModel``", "--"
11, "surface", "Near surface conditions", ``surface_layer_height``, "0.1 m"
12, "topsoil", "Top soil layer depth", ``soil_layers``, "-0.25 m"
13, "subsoil", "First subsoil layer depth", ``soil_layers``, "-1.00 m"
.. role:: python(code)
:language: python
**Additional Roles**:
The following additional roles and attributes are also defined when the instance
is created and are constant through the runtime of the model.
1. The ``active_soil`` role indicates soil layers that fall even partially above
the configured `max_depth_of_microbial_activity`. The `soil_layer_thickness`
attribute provides the thickness of each soil layer - including both top- and
sub-soil layers - and the `soil_layer_active_thickness` records the thickness
of biologically active soil within each layer. Note that the ``soil_layers``
provides the sequence of depths of soil horizons relative to the surface and
these values provide the thickness of individual layers: the default
``soil_layers`` values of ``[-0.25, -1.00]`` give thickness values of
``[0.25, 0.75]``.
2. The ``all_soil`` role is the combination of the ``topsoil`` and ``subsoil``
layers.
3. The ``atmosphere`` role is the combination of ``above``, ``canopy`` and
``surface`` layers.
**Dynamic roles**:
The following roles are set when the instance is initialised but are can be
updated during the model run using the :meth:`.set_filled_canopy`
method.
1. The ``filled_canopy`` role indicates canopy layers that contain any canopy
across all of the grid cells. No grid cell contains actual canopy in any of
the canopy layers below the filled canopy layers. This is initialised to show
no filled canopy layers.
2, The ``filled_atmosphere`` role includes the above canopy layer, all filled
canopy layer indices and the surface layer.
3. The ``flux_layers`` role includes the filled canopy layers, understorey, and
the topsoil layer.
In addition, the :attr:`.lowest_canopy_filled` attribute provides an array
giving the vertical index of the lowest filled canopy layer in each grid cell.
It contains ``np.nan`` when there is no canopy in a grid cell and is
initialised as an array of ``np.nan`` values.
**Getting layer indices**:
The :attr:`._role_indices_bool` and :attr:`._role_indices_int` attributes
contain dictionaries keyed by role name of the boolean or integer indices of the
different defined roles. However, all of the role indices should be accessed
using the specific instance properties e.g. :attr:`.index_above`.
Note that the standard index properties like :attr:`.index_above` will return an
array index, which extracts a two dimensional slice of the vertical structure.
It is sometimes more convenient to extract a 1 dimensional slice across cells,
dropping the vertical dimension. This only makes sense for the role layers that
are by definition a single layer thick (``above``, ``surface`` and ``topsoil``),
and for these three layers, additional properties (e.g.
:attr:`.index_above_scalar`) are defined that will return a scalar index that
extracts a one-dimensional slice.
**Methods overview**:
* :meth:`.from_template`: this returns an empty DataArray with
the standard vertical layer structure and grid cell dimensions used across the
Virtual Ecosystem models.
* :meth:`.set_filled_canopy`: this method is used to update the
``filled_canopy`` role indices, the related ``filled_atmosphere`` and
``flux_layers`` roles, and the :attr:`.lowest_canopy_filled` attribute.
Raises:
ConfigurationError: If the configuration elements are incorrect for defining
the layer structure.
"""
config: InitVar[LayersConfiguration]
"""A configuration object instance."""
# These two init arguments could also be accessed directly from the config, but
# this allows for the core components flow from Grid and CoreConstants to validate
# these values rather than doing it internally.
n_cells: InitVar[int]
"""The number of grid cells in the simulation."""
max_depth_of_microbial_activity: float
"""The maximum soil depth of significant microbial activity."""
# Attributes populated by __post_init__
n_canopy_layers: int = field(init=False)
"""The maximum number of canopy layers."""
soil_layer_depths: NDArray[np.floating] = field(init=False)
"""A list of the depths of soil layer boundaries."""
n_soil_layers: int = field(init=False)
"""The number of soil layers."""
above_canopy_height_offset: float = field(init=False)
"""The height above the canopy of the provided reference climate variables."""
surface_layer_height: float = field(init=False)
"""The height above ground used to represent surface conditions."""
_n_cells: int = field(init=False)
"""Private record of the number of grid cells in simulation."""
layer_roles: NDArray[np.str_] = field(init=False)
"""An array of vertical layer role names from top to bottom."""
n_layers: int = field(init=False)
"""The total number of vertical layers in the model."""
layer_indices: NDArray[np.int_] = field(init=False)
"""An array of the integer indices of the vertical layers in the model."""
_role_indices_bool: dict[str, NDArray[np.bool_]] = field(
init=False, default_factory=lambda: {}
)
"""A dictionary of boolean layer role indices within the vertical structure."""
_role_indices_int: dict[str, NDArray[np.int_]] = field(
init=False, default_factory=lambda: {}
)
"""A dictionary of integer layer role indices within the vertical structure."""
_role_indices_scalar: dict[str, int] = field(init=False, default_factory=lambda: {})
"""A dictionary of scalar role indices within the vertical structure for single
layer roles."""
lowest_canopy_filled: NDArray[np.int_] = field(init=False)
"""An integer index showing the lowest filled canopy layer for each grid cell"""
n_canopy_layers_filled: int = field(init=False)
"""The current number of filled canopy layers across grid cells"""
soil_layer_thickness: NDArray[np.floating] = field(init=False)
"""Thickness of each soil layer (m)"""
soil_layer_active_thickness: NDArray[np.floating] = field(init=False)
"""Thickness of the microbially active soil in each soil layer (m)"""
_array_template: DataArray = field(init=False)
"""A private data array template. Access copies using get_template."""
[docs]
def __post_init__(self, config: LayersConfiguration, n_cells: int) -> None:
"""Populate the ``LayerStructure`` instance.
This method populates the ``LayerStructure`` attributes from the dataclass init
arguments.
Args:
config: A Config instance.
n_cells: The number of grid cells in the simulation.
"""
# Store the number of grid cells privately
self._n_cells = n_cells
# Validates the configuration inputs and sets the layer structure attributes
self._initialise_layers(config)
# Now populate the initial role indices and create the layer data template
self._populate_role_indices()
# Set the layer structure DataArray template
self._set_layer_data_array_template()
LOGGER.info("Layer structure built from model configuration")
def _initialise_layers(self, config: LayersConfiguration):
"""Layer structure attribute initialisation.
Args:
config: A Config instance.
"""
# Extract validated configuration values
self.n_canopy_layers = config.canopy_layers
self.soil_layer_depths = np.array(config.soil_layers)
self.n_soil_layers = self.soil_layer_depths.size
self.above_canopy_height_offset = config.above_canopy_height_offset
self.surface_layer_height = config.surface_layer_height
# Set the layer role sequence
self.layer_roles: NDArray[np.str_] = np.array(
["above"]
+ ["canopy"] * self.n_canopy_layers
+ ["surface"]
+ ["topsoil"]
+ ["subsoil"] * (self.n_soil_layers - 1)
)
# Record the number of layers and layer indices
self.n_layers = len(self.layer_roles)
self.layer_indices = np.arange(0, self.n_layers)
# Default values for lowest canopy filled and n filled canopy
self.lowest_canopy_filled = np.repeat(np.nan, self._n_cells)
self.n_canopy_layers_filled = 0
# Check that the maximum depth of the last layer is greater than the max depth
# of microbial activity.
if self.soil_layer_depths[-1] > -self.max_depth_of_microbial_activity:
to_raise = ConfigurationError(
"Maximum depth of soil layers is less than the maximum depth "
"of microbial activity"
)
LOGGER.error(to_raise)
raise to_raise
# Set up soil layer thickness and the thickness of microbially active soil
soil_layer_boundaries = np.array([0, *self.soil_layer_depths])
self.soil_layer_thickness = -np.diff(soil_layer_boundaries)
self.soil_layer_active_thickness = np.clip(
np.minimum(
self.soil_layer_thickness,
(soil_layer_boundaries + self.max_depth_of_microbial_activity)[:-1],
),
a_min=0,
a_max=np.inf,
)
def _populate_role_indices(self):
"""Populate the initial values for the layer role indices."""
# The five core role names
for layer_role in ("above", "canopy", "surface", "topsoil", "subsoil"):
self._set_base_index(layer_role, self.layer_roles == layer_role)
# Add the `all_soil` and `atmospheric` indices
self._set_base_index(
"all_soil",
np.logical_or(
self._role_indices_bool["topsoil"], self._role_indices_bool["subsoil"]
),
)
self._set_base_index("atmosphere", ~self._role_indices_bool["all_soil"])
self._set_base_index(
"active_soil",
np.concatenate(
[
np.repeat(False, self.n_canopy_layers + 2),
self.soil_layer_active_thickness > 0,
]
),
)
# Set the default filled canopy indices to an empty canopy
self._set_base_index("filled_canopy", np.repeat(False, self.n_layers))
# Set two additional widely used indices
self._set_base_index(
"filled_atmosphere",
np.logical_or.reduce(
(
self._role_indices_bool["above"],
self._role_indices_bool["filled_canopy"],
self._role_indices_bool["surface"],
)
),
)
self._set_base_index(
"flux_layers",
np.logical_or(
self._role_indices_bool["filled_canopy"],
np.logical_or(
self._role_indices_bool["surface"],
self._role_indices_bool["topsoil"],
),
),
)
# Set the scalar indices - using item here as a deliberate trap for accidental
# definition of these layers as being more than a single layer.
self._role_indices_scalar["above"] = self._role_indices_int["above"].item()
self._role_indices_scalar["surface"] = self._role_indices_int["surface"].item()
self._role_indices_scalar["topsoil"] = self._role_indices_int["topsoil"].item()
def _set_layer_data_array_template(self):
"""Sets the template data array with the simulation vertical structure.
This data array structure is widely used across the Virtual Ecosystem and this
method sets up a template that can be copied via the
:meth:`LayerStructure.from_template`
method. The private attribute itself should not be accessed directly to avoid
accidental modification of the template.
"""
# PERFORMANCE - does deepcopy of a store template provide any real benefit over
# from_template creating it when called.
self._array_template = DataArray(
np.full((self.n_layers, self._n_cells), np.nan),
dims=("layers", "cell_id"),
coords={
"layers": self.layer_indices,
"layer_roles": ("layers", self.layer_roles),
"cell_id": np.arange(self._n_cells),
},
)
def _set_base_index(self, name: str, bool_values: NDArray[np.bool_]) -> None:
"""Helper method to populate the boolean and integer indices for base roles.
Args:
name: the name of the base role
bool_values: the boolean representation of the index data.
"""
self._role_indices_bool[name] = bool_values
self._role_indices_int[name] = np.nonzero(bool_values)[0]
[docs]
def set_filled_canopy(self, canopy_heights: NDArray[np.floating]) -> None:
"""Set the dynamic canopy indices and attributes.
The layer structure includes a fixed number of canopy layers but these layers
are not all necessarily occupied. This method takes an array of canopy heights
across the grid cells of the simulation and populates the "filled_canopy"
indices, which are the canopy layers that contain at least one filled canopy
layer. It also populates the "lowest_canopy_filled" attribute.
Args:
canopy_heights: A n_canopy_layers by n_grid_cells array of canopy layer
heights.
"""
if canopy_heights.shape != (self.n_canopy_layers, self._n_cells):
to_raise = ValueError("canopy_heights array has wrong dimensions.")
LOGGER.error(to_raise)
raise to_raise
# Update the filled canopy index
canopy_present = ~np.isnan(canopy_heights)
filled_canopy_bool = np.repeat(False, self.n_layers)
filled_canopy_bool[1 : (self.n_canopy_layers + 1)] = np.any(
canopy_present, axis=1
)
self._set_base_index("filled_canopy", filled_canopy_bool)
# Set the lowest filled attribute and number of layers
lowest_filled = np.nansum(canopy_present, axis=0)
self.lowest_canopy_filled = np.where(lowest_filled > 0, lowest_filled, np.nan)
self.n_canopy_layers_filled = np.sum(filled_canopy_bool)
# Update indices that rely on filled canopy
self._set_base_index(
"filled_atmosphere",
np.logical_or.reduce(
(
self._role_indices_bool["above"],
self._role_indices_bool["filled_canopy"],
self._role_indices_bool["surface"],
)
),
)
self._set_base_index(
"flux_layers",
np.logical_or(
self._role_indices_bool["filled_canopy"],
np.logical_or(
self._role_indices_bool["surface"],
self._role_indices_bool["topsoil"],
),
),
)
[docs]
def from_template(self, array_name: str | None = None) -> DataArray:
"""Get a DataArray with the simulation vertical structure.
This method returns two dimensional :class:`xarray.DataArray` with coordinates
set to match the layer roles and number of grid cells for the current
simulation. The array is filled with ``np.nan`` values and the array name is set
if a name is provided.
Args:
array_name: An optional variable name to assign to the returned data array.
"""
# Note that copy defaults to a deep copy, which is what is needed.
template_copy = self._array_template.copy()
if array_name:
template_copy.name = array_name
return template_copy
@property
def index_above(self) -> NDArray[np.bool_]:
"""Layer indices for the above layer."""
return self._role_indices_bool["above"]
@property
def index_canopy(self) -> NDArray[np.bool_]:
"""Layer indices for the above canopy layers."""
return self._role_indices_bool["canopy"]
@property
def index_surface(self) -> NDArray[np.bool_]:
"""Layer indices for the surface layer."""
return self._role_indices_bool["surface"]
@property
def index_topsoil(self) -> NDArray[np.bool_]:
"""Layer indices for the topsoil layer."""
return self._role_indices_bool["topsoil"]
@property
def index_subsoil(self) -> NDArray[np.bool_]:
"""Layer indices for the subsoil layers."""
return self._role_indices_bool["subsoil"]
@property
def index_all_soil(self) -> NDArray[np.bool_]:
"""Layer indices for all soil layers."""
return self._role_indices_bool["all_soil"]
@property
def index_atmosphere(self) -> NDArray[np.bool_]:
"""Layer indices for all atmospheric layers."""
return self._role_indices_bool["atmosphere"]
@property
def index_active_soil(self) -> NDArray[np.bool_]:
"""Layer indices for microbially active soil layers."""
return self._role_indices_bool["active_soil"]
@property
def index_filled_canopy(self) -> NDArray[np.bool_]:
"""Layer indices for the filled canopy layers."""
return self._role_indices_bool["filled_canopy"]
@property
def index_filled_atmosphere(self) -> NDArray[np.bool_]:
"""Layer indices for the filled atmospheric layers."""
return self._role_indices_bool["filled_atmosphere"]
@property
def index_flux_layers(self) -> NDArray[np.bool_]:
"""Layer indices for the flux layers."""
return self._role_indices_bool["flux_layers"]
@property
def index_above_scalar(self) -> int:
"""Layer indices for the above canopy layer."""
return self._role_indices_scalar["above"]
@property
def index_topsoil_scalar(self) -> int:
"""Layer indices for the topsoil layer."""
return self._role_indices_scalar["topsoil"]
@property
def index_surface_scalar(self) -> int:
"""Layer indices for the surface layer."""
return self._role_indices_scalar["surface"]
[docs]
class DisturbanceTiming:
"""Implement the timing for disturbance models."""
def __init__(
self,
model_timing: ModelTiming,
run_at: int | tuple[int, ...] = (),
run_every: tuple[int, ...] = (),
) -> None:
"""Constructor for the DisturbanceTiming class.
At least 'run_at' or 'run_every' need to be provided. 'run_at' takes precedence.
Args:
model_timing: The timing for the models.
run_at: Either a single integer or a tuple of integers indicating the time
indices when the disturbance is to run.
run_every: A tuple of integers indicating (start), or (start, step), or
(start, step, stop), from where a list of integers indicating the time
indices when the disturbance is to run can be constructed. If not
provided, 'step' defaults to 1 and 'stop' defaults to the last time
index. 'start' must always be provided.
"""
if run_at != ():
self._run_at = sorted(run_at) if isinstance(run_at, Iterable) else [run_at]
elif run_every != ():
match len(run_every):
case 1:
start = run_every[0]
step = 1
stop = model_timing.n_updates
case 2:
start, step = run_every
stop = model_timing.n_updates
case 3:
start, step, stop = run_every
case _:
raise ValueError(
"Invalid disturbance timing: 'run_every' must have 1, 2 or 3 "
f"elements. {len(run_every)} found."
)
self._run_at = list(range(start, stop, step))
else:
raise ValueError(
"Invalid disturbance timing: either 'run_at' or 'run_every' must be "
"provided."
)
if self._run_at[0] < 0 or self._run_at[-1] >= model_timing.n_updates:
raise ValueError(
"Invalid disturbance timing: 'run_at' values must be between 0 and"
f" {model_timing.n_updates - 1}"
)
[docs]
def check_run(self, time_index) -> bool:
"""Check if the disturbance needs to be run.
Args:
time_index: The index of the time to check.
Return:
True if the disturbance must be run, False otherwise.
"""
return True if time_index in self._run_at else False