"""Generated: validation needed.
Description:
Central catalogue of allowed config values for refactored VmaxBuilder API.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from VmaxBuilder.config.enums import (
DiagnosticSeverity,
KcatLevel,
LoadResolutionMode,
PrimaryOutputFormat,
ProteinSourceMode,
ReactionNotation,
StageName,
ValidationMode,
)
[docs]
@dataclass(frozen=True, slots=True)
class OptionSpec:
"""Generated: validation needed.
Description:
Describes one editable option catalogue entry.
Args:
name (str): Canonical option path.
allowed_values (tuple[Any, ...] | None): Allowed values for the option.
description (str): Human-readable guidance for the option.
validation_mode (ValidationMode): Default validation mode for the option.
"""
name: str
allowed_values: tuple[Any, ...] | None
description: str
validation_mode: ValidationMode = ValidationMode.STRICT
VALIDATION_OPTION_SPECS: dict[str, OptionSpec] = {
"validation.mode": OptionSpec(
name="validation.mode",
allowed_values=(ValidationMode.STRICT.value, ValidationMode.LENIENT.value),
description="Global validation default for config and stage inputs.",
),
"validation.field_mode": OptionSpec(
name="validation.field_mode",
allowed_values=(ValidationMode.STRICT.value, ValidationMode.LENIENT.value),
description="Per-field validation override.",
),
"validation.halt_severity": OptionSpec(
name="validation.halt_severity",
allowed_values=tuple(severity.value for severity in DiagnosticSeverity),
description="Minimum severity that stops downstream stage execution.",
),
}
STAGE_OPTION_SPECS: dict[str, OptionSpec] = {
"stage.name": OptionSpec(
name="stage.name",
allowed_values=tuple(stage.value for stage in StageName),
description="Top-level orchestrator stage name.",
),
}
PROTEIN_OPTION_SPECS: dict[str, OptionSpec] = {
"protein.source_mode": OptionSpec(
name="protein.source_mode",
allowed_values=tuple(mode.value for mode in ProteinSourceMode),
description="Protein abundance source strategy.",
),
}
MODEL_OPTION_SPECS: dict[str, OptionSpec] = {
"model.reaction_notation": OptionSpec(
name="model.reaction_notation",
allowed_values=tuple(notation.value for notation in ReactionNotation),
description="Reaction identifier notation expected by model-stage processors.",
),
"model.id_type": OptionSpec(
name="model.id_type",
allowed_values=(
"ensembl",
"entrez_gene_id",
"symbol",
"uniprot",
),
description="Identifier provider used by model-stage and downstream mapping logic.",
),
"model.level": OptionSpec(
name="model.level",
allowed_values=("gene", "transcript"),
description="Gene or transcript granularity for model identifiers.",
),
}
EXPRESSION_OPTION_SPECS: dict[str, OptionSpec] = {
"expression.transcript_aggregation_policy": OptionSpec(
name="expression.transcript_aggregation_policy",
allowed_values=("sum",),
description="Transcript-to-gene aggregation policy used by expression preprocessing.",
),
"expression.data_type": OptionSpec(
name="expression.data_type",
allowed_values=("TPM", "FPKM", "raw_counts"),
description="Type of expression quantification provided in input.",
),
"expression.thresholding": OptionSpec(
name="expression.thresholding",
allowed_values=(True, False),
description=(
"Whether to apply a data-type-specific thresholding to expression values."
),
),
"expression.id_type": OptionSpec(
name="expression.id_type",
allowed_values=(
"ensembl",
"entrez_gene_id",
"symbol",
),
description="Identifier provider for expression features.",
),
"expression.level": OptionSpec(
name="expression.level",
allowed_values=("gene", "transcript"),
description="Gene or transcript granularity for expression identifiers.",
),
"expression.sample_type_map": OptionSpec(
name="expression.sample_type_map",
allowed_values=None,
description=(
"Mapping from expression sample columns to PTR tissue/sample columns "
"used in expression+PTR multiplication."
),
),
"expression.id_translation_provider": OptionSpec(
name="expression.id_translation_provider",
allowed_values=("auto", "mygene"),
description="Provider used for expression identifier translation lookups.",
),
}
PTR_OPTION_SPECS: dict[str, OptionSpec] = {
"ptr.pretransformed_type": OptionSpec(
name="ptr.pretransformed_type",
allowed_values=("linear", "log10", "log2", "ln"),
description="Log-scale applied to raw PTR input before linear conversion.",
),
"ptr.partial_missing_use_weighted": OptionSpec(
name="ptr.partial_missing_use_weighted",
allowed_values=(True, False),
description="Apply weighted per-sample scaling during within-sample PTR imputation.",
),
"ptr.partial_missing_weighted_statistic": OptionSpec(
name="ptr.partial_missing_weighted_statistic",
allowed_values=("median", "mean", "mode", "max", "min"),
description="Column statistic used for weighted within-sample PTR imputation.",
),
"ptr.partial_missing_imputation_statistic": OptionSpec(
name="ptr.partial_missing_imputation_statistic",
allowed_values=("median", "mean", "mode", "max", "min"),
description="Row statistic used for within-sample PTR imputation.",
),
"ptr.unobserved_gene_imputation_strategy": OptionSpec(
name="ptr.unobserved_gene_imputation_strategy",
allowed_values=("sample_after_imputation", "sample_before_imputation"),
description="Strategy to fill genes present in expression but absent from PTR.",
),
"ptr.unobserved_gene_imputation_statistic": OptionSpec(
name="ptr.unobserved_gene_imputation_statistic",
allowed_values=("median", "mean", "mode", "max", "min"),
description="Per-sample statistic used when imputing unobserved genes.",
),
"ptr.use_special_groups_for_unobserved_imputation": OptionSpec(
name="ptr.use_special_groups_for_unobserved_imputation",
allowed_values=(True, False),
description=(
"Enable independent unobserved-gene imputation per configured special groups."
),
),
}
LOADING_OPTION_SPECS: dict[str, OptionSpec] = {
"loading.create_dynamically_named_results": OptionSpec(
name="loading.create_dynamically_named_results",
allowed_values=(True, False),
description=(
"Whether to create a child output directory named from configured input paths."
),
),
"loading.results_dir_name": OptionSpec(
name="loading.results_dir_name",
allowed_values=("VmaxResults",),
description="Legacy output folder label retained for backward compatibility.",
),
"loading.primary_output_format": OptionSpec(
name="loading.primary_output_format",
allowed_values=tuple(output_format.value for output_format in PrimaryOutputFormat),
description="Primary file format used when saving output tables.",
),
"loading.write_additional_csv": OptionSpec(
name="loading.write_additional_csv",
allowed_values=(True, False),
description="Whether additional csv output copies are written.",
),
}
VMAX_OPTION_SPECS: dict[str, OptionSpec] = {
"kcat.level": OptionSpec(
name="kcat.level",
allowed_values=tuple(level.value for level in KcatLevel),
description="Canonical kcat level exposed by a predictor or converter.",
),
}
RESOLUTION_OPTION_SPECS: dict[str, OptionSpec] = {
"load.resolution_mode": OptionSpec(
name="load.resolution_mode",
allowed_values=tuple(mode.value for mode in LoadResolutionMode),
description="Path resolution policy used when loading inputs and outputs.",
),
}
OPTION_SPECS: dict[str, OptionSpec] = {
**VALIDATION_OPTION_SPECS,
**STAGE_OPTION_SPECS,
**PROTEIN_OPTION_SPECS,
**MODEL_OPTION_SPECS,
**EXPRESSION_OPTION_SPECS,
**PTR_OPTION_SPECS,
**LOADING_OPTION_SPECS,
**VMAX_OPTION_SPECS,
**RESOLUTION_OPTION_SPECS,
}
[docs]
def get_allowed_values(option_name: str) -> tuple[Any, ...] | None:
"""Generated: validation needed.
Description:
Return allowed values for one known option catalogue entry.
Args:
option_name (str): Canonical option path to inspect.
Returns:
tuple[Any, ...] | None: Allowed values if option is known, else None.
"""
option_spec = OPTION_SPECS.get(option_name)
if option_spec is None:
return None
return option_spec.allowed_values