Source code for VmaxBuilder.utils.extra_utils

import re
from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast

if TYPE_CHECKING:
    from cobra import Model, Reaction


def _metabolite_has_same_identifiers(met1: Any, met2: Any) -> bool:
    """Generated: validation needed.

    Description:
        Compare metabolite identifiers after stripping compartment suffixes.

    Args:
        met1 (Any): First metabolite-like object.
        met2 (Any): Second metabolite-like object.

    Returns:
        bool: True when identifiers match after compartment removal.
    """
    met_1_id = met1.id
    met_1_compartment = met1.compartment
    if met_1_id.endswith(f"[{met_1_compartment}]"):
        met_1_id = met_1_id[: -len(f"[{met_1_compartment}]")]
    elif met_1_id.endswith(f"_{met_1_compartment}"):
        met_1_id = met_1_id[: -len(f"_{met_1_compartment}")]
    elif met_1_id.endswith(met_1_compartment):
        met_1_id = met_1_id[: -len(met_1_compartment)]
    met_2_id = met2.id
    met_2_compartment = met2.compartment
    if met_1_id.endswith(f"[{met_1_compartment}]"):
        met_1_id = met_1_id[: -len(f"[{met_1_compartment}]")]
    elif met_2_id.endswith(f"_{met_2_compartment}"):
        met_2_id = met_2_id[: -len(f"_{met_2_compartment}")]
    elif met_2_id.endswith(met_2_compartment):
        met_2_id = met_2_id[: -len(met_2_compartment)]

    return met_1_id == met_2_id


def _metabolite_has_same_names(met1: Any, met2: Any) -> bool:
    """Generated: validation needed.

    Description:
        Compare metabolite names after trimming whitespace.

    Args:
        met1 (Any): First metabolite-like object.
        met2 (Any): Second metabolite-like object.

    Returns:
        bool: True when names match exactly after strip.
    """
    met_1_name = met1.name.strip()
    met_2_name = met2.name.strip()
    return met_1_name == met_2_name


def _metabolite_has_same_formula(
    met1: Any,
    met2: Any,
    ignore_h_plus: bool = False,
) -> bool:
    """Generated: validation needed.

    Description:
        Compare metabolite formulas, optionally ignoring hydrogen fragments.

    Args:
        met1 (Any): First metabolite-like object.
        met2 (Any): Second metabolite-like object.
        ignore_h_plus (bool): When True, compare formulas after removing hydrogen fragments.

    Returns:
        bool: True when formulas match under selected comparison mode.
    """
    met_1_formula = met1.formula.strip() if met1.formula is not None else None
    met_2_formula = met2.formula.strip() if met2.formula is not None else None
    if ignore_h_plus:
        if met_1_formula is None or met_2_formula is None:
            return False
        # remove all H+ and h+ from the formula before comparing
        met_1_formula = (
            met_1_formula.replace("H+", "")
            .replace("h+", "")
            .replace("H", "")
            .replace("h", "")
        )
        met_2_formula = (
            met_2_formula.replace("H+", "")
            .replace("h+", "")
            .replace("H", "")
            .replace("h", "")
        )
        return met_1_formula == met_2_formula
    return met_1_formula == met_2_formula


def _metabolite_has_same_charge(
    met1: Any,
    met2: Any,
    ignore_h_plus: bool = False,
) -> bool:
    """Generated: validation needed.

    Description:
        Compare metabolite charges, optionally adjusting for hydrogen fragments.

    Args:
        met1 (Any): First metabolite-like object.
        met2 (Any): Second metabolite-like object.
        ignore_h_plus (bool): When True, subtract hydrogen fragments from charge.

    Returns:
        bool: True when charges match under selected comparison mode.
    """
    if ignore_h_plus:
        if met1.charge is None or met2.charge is None:
            return False
        # count the number of H+ and h+ in the formula and adjust the charge accordingly
        h_plus_count1 = met1.formula.lower().count("h+") + met1.formula.lower().count("h")
        h_plus_count2 = met2.formula.lower().count("h+") + met2.formula.lower().count("h")
        adjusted_charge1 = met1.charge - h_plus_count1
        adjusted_charge2 = met2.charge - h_plus_count2
        adjusted_charge1 = int(adjusted_charge1)
        adjusted_charge2 = int(adjusted_charge2)
        return adjusted_charge1 == adjusted_charge2
    met_1_charge = int(met1.charge) if met1.charge is not None else None
    met_2_charge = int(met2.charge) if met2.charge is not None else None
    return met_1_charge == met_2_charge


[docs] def extract_compartment(old_id: Any) -> str: """Generated: validation needed. Description: Extract compartment suffix from metabolite identifier. Args: old_id (Any): Identifier to inspect. Non-string values return empty string. Returns: str: Extracted compartment or empty string when absent. """ if not isinstance(old_id, str): return "" # underscore form: "MAM20065_c" or "MAM20065_cyt" m = re.search(r"_(?P<comp>[A-Za-z]+)$", old_id) if m: return m.group("comp") # trailing letters after digits: "MAM20065n" or "MAM20065cg" m = re.search(r"^[A-Za-z]*\d+(?P<comp>[A-Za-z]+)$", old_id) if m: return m.group("comp") # [comp] form: "MAM20065[c]" or "MAM20065[cyt]" m = re.search(r"\[(?P<comp>[A-Za-z]+)]$", old_id) if m: return m.group("comp") return ""
[docs] def remove_compartment(old_id: Any) -> Any: """Generated: validation needed. Description: Remove compartment suffix from metabolite identifier. Args: old_id (Any): Identifier to normalise. Non-string values are returned unchanged. Returns: Any: Identifier without compartment suffix when pattern matches. """ if not isinstance(old_id, str): return old_id # underscore form: "MAM20065_c" or "MAM20065_cyt" new_id = re.sub( r"^(MAM\d+)(?:_[A-Za-z]+|[A-Za-z]+)$", r"\1", old_id, ) # [comp] form: "MAM20065[c]" or "MAM20065[cyt]" new_id = re.sub(r"^(MAM\d+)\[([A-Za-z]+)]$", r"\1", new_id) return new_id
[docs] def convert_camel_case_to_snake_case(name: str) -> str: """Generated: validation needed. Description: Convert camelCase string to snake_case. Args: name (str): Input name. Returns: str: Snake case version of name. """ if not name: return name result = [name[0].lower()] for char in name[1:]: if char.isupper(): result.append("_") result.append(char.lower()) else: result.append(char) return "".join(result)
[docs] def convert_model_to_cobra_model( model: Union["Model", Dict[str, Any]], make_copy: Optional[bool] = False ) -> "Model": """Generated: validation needed. Description: Convert a model-like object to a COBRApy Model. Not yet implemented. Args: model (Model | Dict[str, Any]): Source model or raw dictionary. make_copy (Optional[bool]): Reserved for future copy semantics. Raises: NotImplementedError: Always raised; conversion not yet implemented. """ raise NotImplementedError( "This function is not implemented in the VmaxBuilder package. " "Please use the appropriate method from the VmaxBuilder package to convert models." ) # todo
[docs] def is_effectively_integer(value: Any) -> bool: """Generated: validation needed. Description: Check whether value can be represented as integer without remainder. Args: value (Any): Candidate numeric value. Returns: bool: True when float conversion succeeds and is integer-like. """ try: return float(value).is_integer() except (ValueError, TypeError): return False
[docs] def check_if_string_or_integer(column: Any) -> bool: """Generated: validation needed. Description: Check whether column contains enough string or integer-like values. Args: column (Any): Column-like object supporting astype and string contains. Returns: bool: True when string or integer counts meet threshold. """ string_count = column.astype(str).str.contains("[a-zA-Z]").sum() integer_count = column.astype(str).str.contains("[0-9]").sum() return string_count >= 2 or integer_count >= 2
def _deduplicate_preserve_order(values: list[str]) -> list[str]: """Generated: validation needed. Description: Deduplicate strings while preserving first-seen order. Args: values (list[str]): Input string list. Returns: list[str]: Ordered unique string list. """ return list(dict.fromkeys(values))
[docs] def resolve_gene_or_reaction_group_members( model: Any | None, identifiers: list[str], expression_gene_ids: set[str] | None = None, ) -> list[str]: """Generated: validation needed. Description: Expand a mixed list of gene IDs and reaction IDs into gene IDs. Reaction IDs contribute all associated gene IDs. Gene IDs are passed through unchanged. Args: model (Any | None): Cobra-like model with ``reactions`` collection. identifiers (list[str]): Gene or reaction identifiers. expression_gene_ids (set[str] | None): Optional filter restricting returned genes to those present in expression data. Returns: list[str]: Ordered unique gene IDs. """ if model is None: resolved_gene_ids = identifiers else: reaction_lookup: dict[str, Any] = { str(reaction.id): reaction for reaction in getattr(model, "reactions", []) } resolved_gene_ids = [] for identifier in identifiers: reaction = reaction_lookup.get(identifier) if reaction is None: resolved_gene_ids.append(identifier) continue resolved_gene_ids.extend(str(gene.id) for gene in getattr(reaction, "genes", [])) if expression_gene_ids is not None: resolved_gene_ids = [ gene_id for gene_id in resolved_gene_ids if gene_id in expression_gene_ids ] return _deduplicate_preserve_order(resolved_gene_ids)
[docs] def get_transport_reaction_gene_ids( model: Any, expression_gene_ids: set[str] | None = None, ) -> list[str]: """Generated: validation needed. Description: Return genes associated exclusively with transport reactions. Transport reactions are defined as reactions spanning more than one compartment and carrying a non-empty gene-reaction rule. Args: model (Any): Cobra-like model with ``reactions`` and ``genes``. expression_gene_ids (set[str] | None): Optional filter restricting returned genes to those present in expression data. Returns: list[str]: Ordered unique transport-associated gene IDs. """ transport_reactions = [ reaction for reaction in getattr(model, "reactions", []) if len(getattr(reaction, "compartments", ())) > 1 and str(getattr(reaction, "gene_reaction_rule", "")) != "" ] non_transport_gene_ids = { str(gene.id) for reaction in getattr(model, "reactions", []) if reaction not in transport_reactions and str(getattr(reaction, "gene_reaction_rule", "")) != "" for gene in getattr(reaction, "genes", []) } transport_gene_ids = [ str(gene.id) for gene in getattr(model, "genes", []) if str(gene.id) not in non_transport_gene_ids ] if expression_gene_ids is not None: transport_gene_ids = [ gene_id for gene_id in transport_gene_ids if gene_id in expression_gene_ids ] return _deduplicate_preserve_order(transport_gene_ids)
[docs] def compare_dicts( dict1: Dict[str, Any], dict2: Dict[str, Any], ) -> None: """Generated: validation needed. Description: Print a human-readable diff of two dictionaries to stdout, listing keys unique to each dict and value differences for shared keys. Args: dict1 (Dict[str, Any]): First dictionary. dict2 (Dict[str, Any]): Second dictionary. """ if dict1 == dict2: print("Dictionaries are equal.") return print("Dictionaries differ:") keys1 = set(dict1.keys()) keys2 = set(dict2.keys()) only_in_1 = keys1 - keys2 only_in_2 = keys2 - keys1 common_keys = keys1 & keys2 if only_in_1: print(" Keys only in dict1:", only_in_1) if only_in_2: print(" Keys only in dict2:", only_in_2) for key in common_keys: if dict1[key] != dict2[key]: print(f" Different value for key '{key}':") print(f" dict1: {dict1[key]}") print(f" dict2: {dict2[key]}")
[docs] def create_task_specific_model_for_diagnostics( # noqa: C901 irreversible_cobra_model: "Model", task_information: Dict[str, Any], unbounded_lower_bound_value: float | int = 1, unbounded_upper_bound_value: float | int = 1000, make_copy: bool = False, ): """Generated: validation needed. Description: Add temporary exchange reactions for task-specific diagnostics. Args: irreversible_cobra_model (Model): Model to modify. task_information (Dict[str, Any]): Task metadata with input/output metabolites. unbounded_lower_bound_value (float | int): Lower bound fallback for unbounded inputs. unbounded_upper_bound_value (float | int): Upper bound fallback for unbounded bounds. make_copy (bool): Copy model before mutation when True. Returns: tuple[Model, list[Any]]: Modified model and reactions added to it. """ if make_copy: irreversible_cobra_model = irreversible_cobra_model.copy() from cobra import Reaction reactions_in_both_input_and_output = set( task_information["input_metabolites"] ).intersection(set(task_information["output_metabolites"])) amount_of_input_reactions = len(task_information["input_metabolites"]) amount_of_output_reactions = len(task_information["output_metabolites"]) added_reactions = [] for idx in range(amount_of_input_reactions): if task_information["input_metabolites"][idx] in reactions_in_both_input_and_output: continue metabolite = task_information["input_metabolites"][idx] lower_bound = task_information["input_metabolite_lower_bounds"][idx] upper_bound = task_information["input_metabolite_upper_bounds"][idx] if lower_bound == "unbounded": lower_bound = unbounded_lower_bound_value if upper_bound == "unbounded": upper_bound = unbounded_upper_bound_value new_reaction = Reaction("temporary_exchange_metabolite") new_reaction.name = f"temporary_exchange_{metabolite}" new_reaction.id = f"temporary_exchange_{metabolite}" new_reaction.subsystem = "temporary_exchange" cast(Any, new_reaction).EC_number = "" new_reaction.lower_bound = lower_bound new_reaction.upper_bound = upper_bound metabolite_object = irreversible_cobra_model.metabolites.get_by_id(metabolite) new_reaction.add_metabolites({metabolite_object: 1}) irreversible_cobra_model.add_reactions([new_reaction]) added_reactions.append(new_reaction) for idx in range(amount_of_output_reactions): if task_information["output_metabolites"][idx] in reactions_in_both_input_and_output: continue metabolite = task_information["output_metabolites"][idx] lower_bound = task_information["output_metabolite_lower_bounds"][idx] upper_bound = task_information["output_metabolite_upper_bounds"][idx] if lower_bound == "unbounded": lower_bound = unbounded_lower_bound_value if upper_bound == "unbounded": upper_bound = unbounded_upper_bound_value # add exchange reaction for output metabolite new_reaction = Reaction(f"temporary_exchange_{metabolite}") new_reaction.name = f"temporary_exchange_{metabolite}" new_reaction.id = f"temporary_exchange_{metabolite}" new_reaction.subsystem = "temporary_exchange" cast(Any, new_reaction).EC_number = "" new_reaction.lower_bound = lower_bound new_reaction.upper_bound = upper_bound metabolite_object = irreversible_cobra_model.metabolites.get_by_id(metabolite) new_reaction.add_metabolites({metabolite_object: -1}) irreversible_cobra_model.add_reactions([new_reaction]) added_reactions.append(new_reaction) for metabolite in reactions_in_both_input_and_output: input_upper_bound = task_information["input_metabolite_upper_bounds"][ task_information["input_metabolites"].index(metabolite) ] output_upper_bound = task_information["output_metabolite_upper_bounds"][ task_information["output_metabolites"].index(metabolite) ] if input_upper_bound == "unbounded": input_upper_bound = unbounded_upper_bound_value if output_upper_bound == "unbounded": output_upper_bound = unbounded_upper_bound_value new_reaction = Reaction(f"temporary_exchange_{metabolite}_f") new_reaction.name = f"temporary_exchange_{metabolite}_f" new_reaction.id = f"temporary_exchange_{metabolite}_f" new_reaction.subsystem = "temporary_exchange" new_reaction.lower_bound = 0 cast(Any, new_reaction).EC_number = "" new_reaction.upper_bound = input_upper_bound metabolite_object = irreversible_cobra_model.metabolites.get_by_id(metabolite) new_reaction.add_metabolites({metabolite_object: 1}) irreversible_cobra_model.add_reactions([new_reaction]) added_reactions.append(new_reaction) new_reaction = Reaction(f"temporary_exchange_{metabolite}_r") new_reaction.name = f"temporary_exchange_{metabolite}_r" new_reaction.id = f"temporary_exchange_{metabolite}_r" new_reaction.subsystem = "temporary_exchange" new_reaction.lower_bound = 0 cast(Any, new_reaction).EC_number = "" new_reaction.upper_bound = output_upper_bound metabolite_object = irreversible_cobra_model.metabolites.get_by_id(metabolite) new_reaction.add_metabolites({metabolite_object: -1}) irreversible_cobra_model.add_reactions([new_reaction]) added_reactions.append(new_reaction) return irreversible_cobra_model, added_reactions