Source code for virtual_ecosystem.entry_points

"""The :mod:`~virtual_ecosystem.entry_points`  module defines the command line entry
points to the virtual_ecosystem package. At the moment a single entry point is defined
`ve_run`, which simply configures and runs a Virtual Ecosystem simulation based on a
set of configuration files.
"""  # noqa D210, D415

import argparse
import sys
import textwrap
import tomllib
from collections.abc import Sequence
from pathlib import Path
from shutil import copytree, ignore_patterns
from tomllib import TOMLDecodeError
from typing import Any

import virtual_ecosystem as ve
from virtual_ecosystem import example_data_path
from virtual_ecosystem.core.config_builder import merge_configuration_dicts
from virtual_ecosystem.core.exceptions import ConfigurationError
from virtual_ecosystem.core.logger import LOGGER
from virtual_ecosystem.main import Progress, ve_run



[docs]
def _parse_config_string(config_string: str) -> dict[str, Any]:
    """Parse a single configuration string into a dictionary.

    Args:
        config_string: A string containing a TOML formatted configuration setting, for
            example: "hydrology.initial_soil_moisture=0.3"

    Raises:
        ConfigurationError: If the command-line parameters are not valid TOML
    """
    try:
        return tomllib.loads(config_string)
    except TOMLDecodeError:
        to_raise = ConfigurationError(
            f"Invalid format for command-line configuration setting: {config_string}"
        )
        LOGGER.critical(to_raise)
        raise to_raise




[docs]
def _parse_command_line_config(config_strings: Sequence[str]) -> dict[str, Any]:
    """Parse command-line configuration settings.

    This function takes a list of strings containing configuration settings passed to
    the ``ve_run_cli`` entry points using the ``--config`` option. Each string should be
    parseable TOML (e.g. ``plants.constants.value=0.4``) and the function builds a
    partial configuration dictionary from the input strings.

    Args:
        config_strings: A list of strings containing configuration settings.

    Returns:
        A partial configuration dictionary containing parsed settings.

    Raises:
        ConfigurationError: Invalid format for parameters or conflicting values supplied
    """

    config_dict: dict[str, Any] = {}

    for param_str in config_strings:
        param_dict = _parse_config_string(param_str)
        config_dict, conflicts = merge_configuration_dicts(config_dict, param_dict)

    if conflicts:
        to_raise = ConfigurationError(
            "Conflicting values supplied for command-line arguments"
        )
        LOGGER.critical(to_raise)
        raise to_raise

    return config_dict




[docs]
def _parse_cli_paths(cli_paths: Sequence[str]) -> dict[str, Path]:
    """Parse command-line data input path substitutions.

    This function takes a list of strings containing path substitutions to
    the ``ve_run_cli`` entry points using the ``-p`` option. Each string should provide
    a file marker that can be referred to in a configuration file and a data path that
    should be used for that marker.

    Args:
        cli_paths: A list of strings containing configuration settings.

    Returns:
        A dictionary of markers and paths.
    """

    cli_path_dict: dict[str, Path] = {}

    for path_data in cli_paths:
        # Try and split on first equals sign (allowing further '=' in path names)
        try:
            marker, file = path_data.split("=", 1)
        except ValueError:
            raise ValueError(
                "Incorrect syntax in command line path input: should use "
                "'marker=path' values."
            )

        # Check the file exists
        file_path = Path(file)
        if not (file_path.exists() and file_path.is_file()):
            raise ValueError(
                f"Command line path input does not point to existing file: {file}"
            )

        cli_path_dict[marker] = file_path

    return cli_path_dict




[docs]
def install_example_directory(install_dir: Path) -> int:
    """Install the example directory to a location.

    This function installs the example directory data files and configuration files
    provided within the package to a selected location. This allows users to look at the
    simulation directory structure and files more easily and avoids working with the
    original files inside the package tree.

    The files are installed to a ``ve_example`` directory within the provided install
    location.

    Args:
        install_dir: the installation path.

    Returns:
        An integer indicating success (0) or failure (1).
    """
    if not install_dir.is_dir():
        sys.stderr.write("--install-example path is not a valid directory.\n")
        return 1

    example_dir = install_dir / "ve_example"
    if example_dir.exists():
        sys.stderr.write(f"VE example directory already present in: {install_dir} \n")
        return 1

    copytree(example_data_path, example_dir, ignore=ignore_patterns("__*"))

    print(f"Example directory created at:\n{example_dir}")
    return 0




[docs]
def ve_run_cli(args_list: list[str] | None = None) -> int:
    """Configure and run a Virtual Ecosystem simulation.

    This program sets up and runs a Virtual Ecosystem simulation. The program expects
    to be provided with paths to TOML formatted configuration files for the simulation.
    The configuration is modular: a directory path can be used to add all TOML
    configuration files in the directory, or individual file paths can be used to select
    specific combinations of configuration files. These are combined and validated and
    then used to initialise and run the model.

    As an alternative to providing configuration paths, the `--install-example` option
    allows users to provide a location where a simple example set of datasets and
    configuration files provided with the Virtual Ecosystem package can be installed.
    This option will create a `ve_example` directory in the location, and users can
    examine the input files and run the simulation from that directory:

    `ve_run /provided/install/path/ve_example`

    The output directory for simulation results is typically set in the configuration
    files, but can be overwritten using the `--outpath` option. A log file path can be
    provided for logging output. If this is not provided then the log will be written to
    the console, but the logging is typically verbose and it is usually better to
    redirect the log to a file.

    When logging is redirected to a file, a short progress report is written to stdout.
    By default, the command reports: the start and end of the simulation and log
    location; the completion of simulation stages; and a progress bar over the time
    steps of the model. The `--quiet` command can be used to incrementally mute this
    output: `-q` will remove the progress bar, `-qq` just prints the start and stop and
    `-qqq` mutes the report entirely.

    The `--config` option can be used to override configuration settings provided in the
    file or to add additional settings. This is typically used to run a set of parallel
    simulations that vary configuration settings of interest around a central
    configuration setup, without the need to write a specific configuration file for
    each permutation.

    The `--data-path` option can be used to dynamically set the location of data paths
    in the configuration. A file path in the config can be set as a path marker, which
    must be a string starting with a "$", for example "$CLIMATE_DATA". This option can
    then be used to substitute different files into that marker for different runs:
    `--data-path CLIMATE_DATA=/path/to/file.nc`.

    The `--validate-config-only` flag can be used to only run the configuration
    validation part of the model setup and the exit before running any models.

    The resolved complete configuration will then be written to a single consolidated
    config file in the output path with a default name of `compiled_configuration.toml`.
    This can be disabled by setting the
    `core.data_output_options.save_compiled_configuration` option to false. Note that
    the merged configuration automatically converts all file paths within the merged
    configurations to absolute file paths - this ties the merged configuration to the
    file system where the run is executed.


    Args:
        args_list: This is a developer and testing facing argument that is used to
            simulate command line arguments, allowing this function to be called
            directly. For example, ``ve_run --install-example /usr/abc`` can be
            replicated by calling ``ve_run_cli(['--install-example', '/usr/abc/'])``.

    Returns:
        An integer indicating success (0) or failure (1)
    """

    # If no arguments list is provided
    if args_list is None:
        args_list = sys.argv[1:]

    # Check function docstring exists to safeguard against -OO mode, and strip off the
    # description of the function args_list, which should not be included in the command
    # line docs
    if ve_run_cli.__doc__ is not None:
        desc = textwrap.dedent("\n".join(ve_run_cli.__doc__.splitlines()[:-10]))
    else:
        desc = "Python in -OO mode: no docs"

    fmt = argparse.RawDescriptionHelpFormatter
    parser = argparse.ArgumentParser(description=desc, formatter_class=fmt)

    parser.add_argument(
        "--version",
        action="version",
        version=f"%(prog)s {ve.__version__}",
    )

    parser.add_argument("cfg_paths", type=str, help="Paths to config files", nargs="*")

    parser.add_argument(
        "--install-example",
        type=Path,
        help="Install the Virtual Ecosystem example data to the given location",
        dest="install_example",
    )

    parser.add_argument(
        "-o", "--outpath", type=str, help="Path for output files", dest="outpath"
    )
    parser.add_argument(
        "-c",
        "--config",
        type=str,
        action="append",
        help="Override configuration settings",
        dest="cli_config",
        default=[],
    )

    parser.add_argument(
        "--validate-config-only",
        action="store_true",
        help="Exit after validating configuration",
        dest="validate_only",
    )

    parser.add_argument(
        "-p",
        "--data-path",
        type=str,
        action="append",
        help="Set data paths used for input data",
        dest="cli_paths",
        default=[],
    )

    parser.add_argument(
        "--logfile",
        type=Path,
        help="A file path to use for logging a Virtual Ecosystem simulation",
        default=None,
    )

    parser.add_argument(
        "-q",
        "--quiet",
        action="count",
        help="Quieten the default progress reporting",
        default=0,
    )

    args = parser.parse_args(args=args_list)

    # Cannot use both install example and paths
    if args.cfg_paths and args.install_example:
        sys.stderr.write(
            "--install-example cannot be used in combination with cfg_paths.\n"
        )
        return 1

    # Install the example directory to the provided empty location if requested and then
    # exit.
    if args.install_example:
        installed = install_example_directory(args.install_example)
        return installed

    # If the output path is provided on the command line, add it to the list of command
    # line modifications of the configuration.
    # NOTE: The quoting style here is important. The text here needs to be parsable
    #       TOML and needs to support literal strings for pathnames on Windows (rather
    #       than trying to interpret backslashes as escape characters). In TOML, literal
    #       strings are written using single quotes.
    if args.outpath:
        # Set the output path
        args.cli_config.append(f"core.data_output_options.out_path='{args.outpath}'")

    # Parse any extra parameters passed using the --param flag
    if args.cli_config:
        cli_config = _parse_command_line_config(args.cli_config)
    else:
        cli_config = {}

    # Parse any input data file path substitution
    if args.cli_paths:
        cli_paths = _parse_cli_paths(args.cli_paths)
    else:
        cli_paths = {}

    # Figure out the progress reporting level - the defaults is FULL (3 - 0) and as
    # `-q` is repeatedly applied that decrease down to SILENT (3, 3) with `-qqq`
    progress = Progress(3 - min(3, args.quiet))

    # Run the virtual ecosystem run function
    ve_run(
        cfg_paths=args.cfg_paths,
        cli_config=cli_config,
        cli_paths=cli_paths,
        validate_only=args.validate_only,
        logfile=args.logfile,
        progress=progress,
    )

    return 0