Module `pychnosz.core`

Core thermodynamic calculation functions for CHNOSZ.

Sub-modules

pychnosz.core.animation
pychnosz.core.balance: Reaction balancing utilities …
pychnosz.core.equilibrium: Chemical equilibrium solver for CHNOSZ …
pychnosz.core.speciation: Chemical speciation calculation engine for CHNOSZ …
pychnosz.core.unicurve: CHNOSZ unicurve() function - Calculate univariant curves for geothermometry/geobarometry …

Functions

Expand source code

def affinity(messages: bool = True, basis: Optional[pd.DataFrame] = None,
             species: Optional[pd.DataFrame] = None, iprotein: Optional[Union[int, List[int], np.ndarray]] = None,
             loga_protein: Union[float, List[float]] = 0.0, **kwargs) -> Dict[str, Any]:
    """
    Calculate affinities of formation reactions.

    This function calculates chemical affinities for the formation reactions of
    species of interest from user-selected basis species. The affinities are
    calculated as A/2.303RT where A is the chemical affinity.

    Parameters
    ----------
    messages : bool, default True
        Whether to print informational messages
    basis : pd.DataFrame, optional
        Basis species definition to use (if not using global basis)
    species : pd.DataFrame, optional
        Species definition to use (if not using global species)
    iprotein : int, list of int, or array, optional
        Build proteins from residues (row numbers in thermo().protein)
    loga_protein : float or list of float, default 0.0
        Activity of proteins (log scale)
    **kwargs : dict
        Variable arguments defining calculation conditions:
        - Basis species names (e.g., CO2=[-60, 20, 5]): Variable basis species activities
        - T : float or list, Temperature in °C
        - P : float or list, Pressure in bar
        - property : str, Property to calculate ("A", "logK", "G", etc.)
        - exceed_Ttr : bool, Allow extrapolation beyond transition temperatures
        - exceed_rhomin : bool, Allow calculations below minimum water density
        - return_buffer : bool, Return buffer activities
        - balance : str, Balance method for protein buffers

    Returns
    -------
    dict
        Dictionary containing:
        - fun : str, Function name ("affinity")
        - args : dict, Arguments used in calculation
        - sout : dict, Subcrt calculation results
        - property : str, Property calculated
        - basis : pd.DataFrame, Basis species definition
        - species : pd.DataFrame, Species of interest definition
        - T : float or array, Temperature(s) in Kelvin
        - P : float or array, Pressure(s) in bar
        - vars : list, Variable names
        - vals : dict, Variable values
        - values : dict, Calculated affinity values by species

    Examples
    --------
    >>> import pychnosz
    >>> pychnosz.reset()
    >>> pychnosz.basis(["CO2", "H2O", "NH3", "H2S", "H+", "O2"])
    >>> pychnosz.species(["glycine", "tyrosine", "serine", "methionine"])
    >>> result = pychnosz.affinity(CO2=[-60, 20, 5], T=350, P=2000)
    >>> print(result['values'][1566])  # Glycine affinities

    >>> # With proteins
    >>> import pandas as pd
    >>> aa = pd.read_csv("POLG.csv")
    >>> iprotein = pychnosz.add_protein(aa)
    >>> pychnosz.basis("CHNOSe")
    >>> a = pychnosz.affinity(iprotein=iprotein, pH=[2, 14], Eh=[-1, 1])

    Notes
    -----
    This implementation maintains complete fidelity to R CHNOSZ affinity():
    - Identical argument processing including dynamic basis species parameters
    - Same variable expansion and multi-dimensional calculations
    - Exact energy() function behavior for property calculations
    - Identical output structure and formatting
    - Support for protein calculations via iprotein parameter
    """

    # Get thermo object for protein handling
    thermo_obj = thermo()

    # Handle iprotein parameter
    ires = None
    original_species = None
    if iprotein is not None:
        # Convert to array
        if isinstance(iprotein, (int, np.integer)):
            iprotein = np.array([iprotein])
        elif isinstance(iprotein, list):
            iprotein = np.array(iprotein)

        # Check all proteins are available
        if np.any(np.isnan(iprotein)):
            raise AffinityError("`iprotein` has some NA values")
        if thermo_obj.protein is None or not np.all(iprotein < len(thermo_obj.protein)):
            raise AffinityError("some value(s) of `iprotein` are not rownumbers of thermo().protein")

        # Add protein residues to the species list
        # Amino acids in 3-letter code
        aminoacids_3 = ["Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu",
                        "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr"]

        # Use _RESIDUE notation (matches R CHNOSZ affinity.R line 84)
        resnames_residue = ["H2O_RESIDUE"] + [f"{aa}_RESIDUE" for aa in aminoacids_3]

        # Save original species
        from .species import species as species_func
        original_species = get_species() if is_species_defined() else None

        # Add residue species with activity 0 (all in "aq" state)
        species_func(resnames_residue, state="aq", add=True, messages=messages)

        # Get indices of residues in species list
        species_df_temp = get_species()
        ires = []
        for name in resnames_residue:
            idx = np.where(species_df_temp['name'] == name)[0]
            if len(idx) > 0:
                ires.append(idx[0])
        ires = np.array(ires)

    # Check if basis and species are defined (use provided or global)
    if basis is None:
        if not is_basis_defined():
            raise AffinityError("basis species are not defined")
        basis_df = get_basis()
    else:
        basis_df = basis

    if species is None:
        if not is_species_defined():
            raise AffinityError("species are not defined")
        species_df = get_species()
    else:
        species_df = species

    # Process arguments
    args_orig = dict(kwargs)

    # Handle argument recall (if first argument is previous affinity result)
    if len(args_orig) > 0:
        first_key = list(args_orig.keys())[0]
        first_value = args_orig[first_key]
        if (isinstance(first_value, dict) and
            first_value.get('fun') == 'affinity'):
            # Update arguments from previous result
            aargs = first_value.get('args', {})
            # Update with new arguments (skip the first one)
            new_args = dict(list(args_orig.items())[1:])
            aargs.update(new_args)
            return affinity(**aargs)

    # Process energy arguments
    args = energy_args(args_orig, messages, basis_df=basis_df)

    # Get property to calculate
    property_name = args.get('what', 'A')

    # Get thermo data
    thermo_obj = thermo()
    # basis_df and species_df are already set above

    # Determine if we need specific property calculation
    if property_name and property_name != 'A':
        # Calculate specific property using energy function
        energy_result = energy(
            what=property_name,
            vars=args['vars'],
            vals=args['vals'],
            lims=args['lims'],
            T=args['T'],
            P=args['P'],
            IS=args.get('IS', 0),
            exceed_Ttr=kwargs.get('exceed_Ttr', True),
            exceed_rhomin=kwargs.get('exceed_rhomin', False),
            basis_df=basis_df,
            species_df=species_df,
            messages=messages
        )
        affinity_values = energy_result['a']
        energy_sout = energy_result['sout']
    else:
        # Calculate affinities (A/2.303RT)
        energy_result = energy(
            what='A',
            vars=args['vars'],
            vals=args['vals'],
            lims=args['lims'],
            T=args['T'],
            P=args['P'],
            IS=args.get('IS', 0),
            exceed_Ttr=kwargs.get('exceed_Ttr', True),
            exceed_rhomin=kwargs.get('exceed_rhomin', False),
            basis_df=basis_df,
            species_df=species_df,
            messages=messages
        )
        affinity_values = energy_result['a']
        energy_sout = energy_result['sout']

    # Handle protein affinity calculations if iprotein was provided
    if iprotein is not None and ires is not None:
        # Calculate protein affinities from residue affinities using group additivity
        # Normalize loga_protein to match number of proteins
        if isinstance(loga_protein, (int, float)):
            loga_protein_arr = np.full(len(iprotein), loga_protein)
        else:
            loga_protein_arr = np.array(loga_protein)
            if len(loga_protein_arr) < len(iprotein):
                loga_protein_arr = np.resize(loga_protein_arr, len(iprotein))

        # Calculate affinity for each protein
        protein_affinities = {}

        for ip, iprot in enumerate(iprotein):
            # Get protein amino acid composition from thermo().protein
            # Columns 4:24 contain chains and amino acid counts (0-indexed: columns 4-23)
            protein_row = thermo_obj.protein.iloc[iprot]
            aa_counts = protein_row.iloc[4:24].values.astype(float)

            # Calculate protein affinity by summing residue affinities weighted by composition
            # affinity_values keys are ispecies indices
            # Get the ispecies for each residue
            species_df_current = get_species()
            residue_ispecies = species_df_current.iloc[ires]['ispecies'].values

            # Initialize protein affinity with same shape as residue affinities
            first_residue_key = residue_ispecies[0]
            if first_residue_key in affinity_values:
                template_affinity = affinity_values[first_residue_key]
                protein_affinity = np.zeros_like(template_affinity)

                # Sum up contributions from all residues
                for i, res_ispecies in enumerate(residue_ispecies):
                    if res_ispecies in affinity_values:
                        residue_contrib = affinity_values[res_ispecies] * aa_counts[i]
                        protein_affinity = protein_affinity + residue_contrib

                # Subtract protein activity
                protein_affinity = protein_affinity - loga_protein_arr[ip]

                # Use negative index to denote protein (matches R CHNOSZ convention)
                protein_key = -(iprot + 1)  # Negative of (row number + 1)
                protein_affinities[protein_key] = protein_affinity

        # Add ionization affinity if H+ is in basis (matching R CHNOSZ behavior)
        if 'H+' in basis_df.index:
            if messages:
                print("affinity: ionizing proteins ...")

            # Get protein amino acid compositions
            from ..biomolecules.proteins import pinfo
            from ..biomolecules.ionize_aa import ionize_aa

            # Get aa compositions for these proteins
            aa = pinfo(iprotein)

            # Determine pH values from vars/vals or basis
            # Check if H+ is a variable
            if 'H+' in args['vars']:
                # H+ is a variable - get pH from vals
                iHplus = args['vars'].index('H+')
                pH_vals = -np.array(args['vals'][iHplus])  # pH = -log(a_H+)
            else:
                # H+ is constant - get from basis
                pH_val = -basis_df.loc['H+', 'logact']  # pH = -log(a_H+)
                pH_vals = np.array([pH_val])

            # Get T values (already processed earlier)
            T_vals = args['T']
            if isinstance(T_vals, (int, float)):
                T_celsius = T_vals - 273.15
            else:
                T_celsius = T_vals - 273.15

            # Get P values
            P_vals = args['P']

            # Calculate ionization affinity
            # ionize_aa expects arrays, so ensure T, P, pH are properly shaped
            # For grid calculations, we need to expand T, P, pH into a grid matching the affinity grid
            if len(args['vars']) >= 2:
                # Multi-dimensional case - create grid
                # Figure out which vars are T, P, H+
                var_names = args['vars']
                has_T_var = 'T' in var_names
                has_P_var = 'P' in var_names
                has_Hplus_var = 'H+' in var_names

                # Build T, P, pH grids matching the affinity calculation grid
                if has_T_var and has_Hplus_var:
                    # Both T and pH vary - create meshgrid
                    T_grid, pH_grid = np.meshgrid(T_celsius, pH_vals, indexing='ij')
                    T_flat = T_grid.flatten()
                    pH_flat = pH_grid.flatten()
                    if isinstance(P_vals, str):
                        P_flat = np.array([P_vals] * len(T_flat))
                    else:
                        P_flat = np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
                elif has_T_var:
                    # Only T varies
                    T_flat = T_celsius if isinstance(T_celsius, np.ndarray) else np.array([T_celsius])
                    pH_flat = np.full(len(T_flat), pH_vals[0])
                    P_flat = np.array([P_vals] * len(T_flat)) if isinstance(P_vals, str) else np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
                elif has_Hplus_var:
                    # Only pH varies
                    pH_flat = pH_vals
                    T_flat = np.full(len(pH_flat), T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0])
                    P_flat = np.array([P_vals] * len(pH_flat)) if isinstance(P_vals, str) else np.full(len(pH_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
                else:
                    # No T or pH variables
                    T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
                    pH_flat = pH_vals
                    P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])
            else:
                # Single or no variable case
                T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
                pH_flat = pH_vals if isinstance(pH_vals, np.ndarray) else np.array([pH_vals[0] if hasattr(pH_vals, '__getitem__') else pH_vals])
                P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])

            # Call ionize_aa to get ionization affinity
            ionization_result = ionize_aa(aa, property="A", T=T_flat, P=P_flat, pH=pH_flat)

            # Add ionization affinity to formation affinity for each protein
            for ip, iprot in enumerate(iprotein):
                protein_key = -(iprot + 1)
                ionization_affinity = ionization_result.iloc[:, ip].values

                # Reshape to match formation affinity dimensions if needed
                formation_affinity = protein_affinities[protein_key]
                if isinstance(formation_affinity, np.ndarray):
                    if formation_affinity.shape != ionization_affinity.shape:
                        # Reshape ionization affinity to match formation affinity
                        ionization_affinity = ionization_affinity.reshape(formation_affinity.shape)

                # Add ionization to formation affinity
                protein_affinities[protein_key] = formation_affinity + ionization_affinity

        # Replace affinity_values with protein affinities
        affinity_values = protein_affinities

        # Calculate stoichiometric coefficients for proteins using matrix multiplication
        # This matches R CHNOSZ: protbasis <- t(t((resspecies[ires, 1:nrow(thermo$basis)])) %*% t((thermo$protein[iprotein, 5:25])))
        # IMPORTANT: Get the species list BEFORE deletion
        species_df_with_residues = get_species()

        # Extract basis species coefficients from residue species (rows = residues, cols = basis species)
        # ires contains indices of residues in the species list
        # We need the columns corresponding to basis species
        basis_cols = list(basis_df.index)  # e.g., ['CO2', 'H2O', 'NH3', 'H2S', 'e-', 'H+']

        # Create residue coefficient matrix (n_residues x n_basis)
        # resspecies[ires, 1:nrow(thermo$basis)] in R
        res_coeffs = species_df_with_residues.iloc[ires][basis_cols].values.astype(float)

        # Get amino acid composition matrix (n_proteins x n_residues)
        # thermo$protein[iprotein, 5:25] in R (columns 5-25 contain chains and 20 amino acids)
        # In Python (0-indexed): columns 4:24 contain chains and 20 amino acids
        aa_composition = []
        for iprot in iprotein:
            protein_row = thermo_obj.protein.iloc[iprot]
            # Columns 4:24 contain: chains, Ala, Cys, Asp, Glu, Phe, Gly, His, Ile, Lys, Leu,
            #                       Met, Asn, Pro, Gln, Arg, Ser, Thr, Val, Trp, Tyr
            aa_counts = protein_row.iloc[4:24].values.astype(float)
            aa_composition.append(aa_counts)
        aa_composition = np.array(aa_composition)  # Shape: (n_proteins, 21)

        # Matrix multiplication: (n_proteins x 21) @ (21 x n_basis) = (n_proteins x n_basis)
        # Note: res_coeffs has shape (21, n_basis) - first row is H2O, next 20 are amino acids
        # R code: t(t(resspecies) %*% t(protein)) means: (n_basis x n_residues) @ (n_residues x n_proteins) = (n_basis x n_proteins)
        # Then transpose to get (n_proteins x n_basis)
        # In Python: (n_proteins x n_residues) @ (n_residues x n_basis) = (n_proteins x n_basis)
        protein_coeffs = aa_composition @ res_coeffs  # Shape: (n_proteins, n_basis)

        # Delete residue species from species list now that we have the coefficients
        from .species import species as species_func
        species_func(ires.tolist(), delete=True, messages=False)

        if original_species is not None:
            # Restore original species (but we've already calculated, so just update species_df)
            pass

        # Create DataFrame for proteins with basis species coefficients
        species_data = {}

        # Add basis species columns
        for j, basis_sp in enumerate(basis_cols):
            species_data[basis_sp] = protein_coeffs[:, j]

        # Add metadata columns
        protein_names = []
        protein_ispecies = []

        for iprot in iprotein:
            prot_row = thermo_obj.protein.iloc[iprot]
            # Escape underscores for LaTeX compatibility in diagram labels
            protein_name = f"{prot_row['protein']}_{prot_row['organism']}"
            # Replace underscores with escaped version for matplotlib/LaTeX
            protein_name_escaped = protein_name.replace('_', r'\_')
            protein_names.append(protein_name_escaped)
            protein_ispecies.append(-(iprot + 1))  # Negative index

        species_data['ispecies'] = protein_ispecies
        species_data['logact'] = loga_protein_arr[:len(iprotein)]
        species_data['state'] = ['aq'] * len(iprotein)
        species_data['name'] = protein_names

        species_df = pd.DataFrame(species_data)

    # Process temperature and pressure for output
    T_out = args['T']
    P_out = args['P']
    vars_list = args['vars']
    vals_dict = {}

    # Convert variable names and values for output
    # Important: Keep vars_list with actual basis species names (H+, e-) for internal use
    # but create display versions in vals_dict with user-friendly names (pH, pe, Eh)
    vars_list_display = vars_list.copy()
    for i, var in enumerate(vars_list):
        # Handle pH, pe, Eh conversions for output
        if var == 'H+' and 'pH' in args_orig:
            vars_list_display[i] = 'pH'
            vals_dict['pH'] = [-val for val in args['vals'][i]]
        elif var == 'e-' and 'pe' in args_orig:
            vars_list_display[i] = 'pe'
            vals_dict['pe'] = [-val for val in args['vals'][i]]
        elif var == 'e-' and 'Eh' in args_orig:
            vars_list_display[i] = 'Eh'
            # Convert from log(a_e-) back to Eh using temperature-dependent formula
            # log(a_e-) = -pe, so pe = -log(a_e-)
            # Eh = pe * (ln(10) * R * T) / F = -log(a_e-) * T / 5039.76
            T_kelvin = args['T'] if isinstance(args['T'], (int, float)) else args['T'][0] if hasattr(args['T'], '__len__') else 298.15
            conversion_factor = T_kelvin / 5039.76  # volts per pe unit
            vals_dict['Eh'] = [-val * conversion_factor for val in args['vals'][i]]
        else:
            vals_dict[var] = args['vals'][i]

    # Keep vars_list as-is (with basis species names) for internal calculations
    # vars_list_display will be used for output only

    # Check if T or P are variables
    if 'T' in vars_list:
        T_out = []  # Variable T
        # Convert back to Celsius for output
        T_vals = vals_dict['T']
        vals_dict['T'] = [T - 273.15 for T in T_vals]
    else:
        # Convert to Kelvin for output (matching R)
        T_out = args['T']

    if 'P' in vars_list:
        P_out = []  # Variable P
    else:
        P_out = args['P']

    # Build output dictionary matching R CHNOSZ structure
    result = {
        'fun': 'affinity',
        'args': {
            **args_orig,
            'property': property_name,
            'exceed_Ttr': kwargs.get('exceed_Ttr', False),
            'exceed_rhomin': kwargs.get('exceed_rhomin', False),
            'return_buffer': kwargs.get('return_buffer', False),
            'balance': kwargs.get('balance', 'PBB')
        },
        'sout': energy_sout,
        'property': property_name,
        'basis': basis_df,
        'species': species_df,
        'T': T_out,
        'P': P_out,
        'vars': vars_list_display,  # Use display version with 'Eh', 'pH', 'pe' for output
        'vals': vals_dict,
        'values': affinity_values
    }

    return result

Calculate affinities of formation reactions.

This function calculates chemical affinities for the formation reactions of species of interest from user-selected basis species. The affinities are calculated as A/2.303RT where A is the chemical affinity.

Parameters

messages : bool, default True: Whether to print informational messages
basis : pd.DataFrame, optional: Basis species definition to use (if not using global basis)
species : pd.DataFrame, optional: Species definition to use (if not using global species)
iprotein : int, list of int, or array, optional: Build proteins from residues (row numbers in thermo().protein)
loga_protein : float or list of float, default 0.0: Activity of proteins (log scale)
**kwargs : dict: Variable arguments defining calculation conditions: - Basis species names (e.g., CO2=[-60, 20, 5]): Variable basis species activities - T : float or list, Temperature in °C - P : float or list, Pressure in bar - property : str, Property to calculate ("A", "logK", "G", etc.) - exceed_Ttr : bool, Allow extrapolation beyond transition temperatures - exceed_rhomin : bool, Allow calculations below minimum water density - return_buffer : bool, Return buffer activities - balance : str, Balance method for protein buffers

Returns

dict: Dictionary containing: - fun : str, Function name ("affinity") - args : dict, Arguments used in calculation - sout : dict, Subcrt calculation results - property : str, Property calculated - basis : pd.DataFrame, Basis species definition - species : pd.DataFrame, Species of interest definition - T : float or array, Temperature(s) in Kelvin - P : float or array, Pressure(s) in bar - vars : list, Variable names - vals : dict, Variable values - values : dict, Calculated affinity values by species

Examples

>>> import pychnosz
>>> pychnosz.reset()
>>> pychnosz.basis(["CO2", "H2O", "NH3", "H2S", "H+", "O2"])
>>> pychnosz.species(["glycine", "tyrosine", "serine", "methionine"])
>>> result = pychnosz.affinity(CO2=[-60, 20, 5], T=350, P=2000)
>>> print(result['values'][1566])  # Glycine affinities

>>> # With proteins
>>> import pandas as pd
>>> aa = pd.read_csv("POLG.csv")
>>> iprotein = pychnosz.add_protein(aa)
>>> pychnosz.basis("CHNOSe")
>>> a = pychnosz.affinity(iprotein=iprotein, pH=[2, 14], Eh=[-1, 1])

Notes

This implementation maintains complete fidelity to R CHNOSZ affinity(): - Identical argument processing including dynamic basis species parameters - Same variable expansion and multi-dimensional calculations - Exact energy() function behavior for property calculations - Identical output structure and formatting - Support for protein calculations via iprotein parameter

Expand source code

def basis(species: Optional[Union[str, int, List[Union[str, int]]]] = None,
          state: Optional[Union[str, List[str]]] = None,
          logact: Optional[Union[float, List[float]]] = None,
          delete: bool = False,
          add: bool = False,
          messages: bool = True,
          global_state: bool = True) -> Optional[pd.DataFrame]:
    """
    Set up the basis species of a thermodynamic system.

    Parameters
    ----------
    species : str, int, list, or None
        Species name(s), formula(s), or index(es), or preset keyword.
        If None, returns current basis definition.
    state : str, list of str, or None
        Physical state(s) for the species
    logact : float, list of float, or None
        Log activities for the basis species
    delete : bool, default False
        If True, delete the basis definition
    add : bool, default False
        If True, add to existing basis instead of replacing
    messages : bool, default True
        If True, print informational messages about species lookup
        If False, suppress all output (equivalent to R's suppressMessages())
    global_state : bool, default True
        If True, store basis definition in global thermo().basis (default behavior)
        If False, return basis definition without storing globally (local state)

    Returns
    -------
    pd.DataFrame or None
        Basis species definition DataFrame, or None if deleted

    Examples
    --------
    >>> # Set up a simple basis
    >>> basis(["H2O", "CO2", "NH3"], logact=[0, -3, -4])

    >>> # Use a preset basis
    >>> basis("CHNOS")

    >>> # Add species to existing basis
    >>> basis("Fe2O3", add=True)

    >>> # Delete basis
    >>> basis(delete=True)

    >>> # Suppress messages
    >>> basis("CHNOS", messages=False)
    """
    thermo_obj = thermo()
    
    # Get current basis
    old_basis = thermo_obj.basis
    
    # Delete basis if requested
    if delete or species == "":
        thermo_obj.basis = None
        thermo_obj.species = None
        return old_basis
    
    # Return current basis if no species specified
    if species is None:
        return old_basis
    
    # Handle empty species list
    if isinstance(species, list) and len(species) == 0:
        raise ValueError("species argument is empty")
    
    # Check for preset keywords
    if isinstance(species, str) and species in _get_preset_basis_keywords():
        return preset_basis(species, messages=messages, global_state=global_state)

    # Ensure species names are unique
    if isinstance(species, list):
        if len(set([str(s) for s in species])) != len(species):
            raise ValueError("species names are not unique")

    # Process arguments
    species, state, logact = _process_basis_arguments(species, state, logact)

    # Handle special transformations
    species, logact = _handle_special_species(species, logact)

    # Check if we're modifying existing basis species
    if (old_basis is not None and not add and
        _all_species_in_basis(species, old_basis)):
        if state is not None or logact is not None:
            return mod_basis(species, state, logact, messages=messages)

    # Create new basis definition or add to existing
    if logact is None:
        logact = [0.0] * len(species)

    # Get species indices
    ispecies = _get_species_indices(species, state, messages=messages)
    
    # Handle adding to existing basis
    if add and old_basis is not None:
        # Check for duplicates
        existing_indices = old_basis['ispecies'].tolist()
        for i, idx in enumerate(ispecies):
            if idx in existing_indices:
                sp_name = species[i] if isinstance(species[i], str) else str(species[i])
                raise BasisError(f"Species {sp_name} is already in the basis definition")
        
        # Append to existing basis
        ispecies = existing_indices + ispecies
        logact = old_basis['logact'].tolist() + logact
    
    # Create new basis
    new_basis = put_basis(ispecies, logact, global_state=global_state)

    # Only update global species list if using global state
    if global_state:
        # Handle species list when adding
        if add and thermo_obj.species is not None:
            _update_species_for_added_basis(old_basis, new_basis)
        else:
            # Clear species since basis changed
            from .species import species as species_func
            species_func(delete=True)

    return new_basis

Set up the basis species of a thermodynamic system.

Parameters

species : str, int, list, or None: Species name(s), formula(s), or index(es), or preset keyword. If None, returns current basis definition.
state : str, list of str, or None: Physical state(s) for the species
logact : float, list of float, or None: Log activities for the basis species
delete : bool, default False: If True, delete the basis definition
add : bool, default False: If True, add to existing basis instead of replacing
messages : bool, default True: If True, print informational messages about species lookup If False, suppress all output (equivalent to R's suppressMessages())
global_state : bool, default True: If True, store basis definition in global thermo().basis (default behavior) If False, return basis definition without storing globally (local state)

Returns

pd.DataFrame or None: Basis species definition DataFrame, or None if deleted

Examples

>>> # Set up a simple basis
>>> basis(["H2O", "CO2", "NH3"], logact=[0, -3, -4])

>>> # Use a preset basis
>>> basis("CHNOS")

>>> # Add species to existing basis
>>> basis("Fe2O3", add=True)

>>> # Delete basis
>>> basis(delete=True)

>>> # Suppress messages
>>> basis("CHNOS", messages=False)

Expand source code

def diagram(eout: Dict[str, Any],
            type: str = "auto",
            alpha: bool = False,
            balance: Optional[Union[str, float, List[float]]] = None,
            names: Optional[List[str]] = None,
            format_names: bool = True,
            xlab: Optional[str] = None,
            ylab: Optional[str] = None,
            xlim: Optional[List[float]] = None,
            ylim: Optional[List[float]] = None,
            col: Optional[Union[str, List[str]]] = None,
            col_names: Optional[Union[str, List[str]]] = None,
            lty: Optional[Union[str, int, List]] = None,
            lwd: Union[float, List[float]] = 1,
            cex: Union[float, List[float]] = 1.0,
            main: Optional[str] = None,
            fill: Optional[str] = None,
            fill_NA: str = "0.8",
            limit_water: Optional[bool] = None,
            plot_it: bool = True,
            add_to: Optional[Dict[str, Any]] = None,
            contour_method: Optional[Union[str, List[str]]] = "edge",
            messages: bool = True,
            interactive: bool = False,
            annotation: Optional[str] = None,
            annotation_coords: List[float] = [0, 0],
            width: int = 600,
            height: int = 520,
            save_as: Optional[str] = None,
            save_format: Optional[str] = None,
            save_scale: float = 1,
            normalize: Union[bool, List[bool]] = False,
            as_residue: bool = False,
            **kwargs) -> Dict[str, Any]:
    """
    Plot equilibrium chemical activity and predominance diagrams.

    This function creates plots from the output of affinity() or equilibrate().
    For 1D diagrams, it produces line plots showing how affinity or activity
    varies with a single variable. For 2D diagrams, it creates predominance
    field diagrams.

    Parameters
    ----------
    eout : dict
        Output from affinity() or equilibrate()
    type : str, default "auto"
        Type of diagram:
        - "auto" (default): Plot affinity values (A/2.303RT)
        - "loga.equil": Plot equilibrium activities from equilibrate()
        - "saturation": Draw affinity=0 contour lines (mineral saturation)
        - Basis species name (e.g., "O2", "H2O", "CO2"): Plot equilibrium
          log activity/fugacity of the specified basis species where affinity=0
          for each formed species. Useful for Eh-pH diagrams and showing
          oxygen/water fugacities at equilibrium.
    alpha : bool or str, default False
        Plot degree of formation instead of activities?
        If "balance", scale by balancing coefficients
    balance : str, float, or list of float, optional
        Balancing coefficients or method for balancing reactions
    names : list of str, optional
        Custom names for species (for labels)
    format_names : bool, default True
        Apply formatting to chemical formulas?
    xlab : str, optional
        Custom x-axis label
    ylab : str, optional
        Custom y-axis label
    xlim : list of float, optional
        X-axis limits [min, max]
    ylim : list of float, optional
        Y-axis limits [min, max]
    col : str or list of str, optional
        Line colors for 1-D plots and boundary lines in 2-D plots (matplotlib color specs)
    col_names : str or list of str, optional
        Text colors for field labels in 2-D plots (matplotlib color specs)
    lty : str, int, or list, optional
        Line styles (matplotlib linestyle specs)
    lwd : float or list of float, default 1
        Line widths for 1-D plots and boundary lines in 2-D predominance
        diagrams. Set to 0 to disable borders in 2-D diagrams. If fill is
        None and lwd > 0, uses white fill with black borders (R CHNOSZ default).
    cex : float or list of float, default 1.0
        Character expansion factor for text labels. Values > 1 make text larger,
        values < 1 make text smaller. Can be a single value or a list (one per species).
        Used for contour labels in type="saturation" plots.
    main : str, optional
        Plot title
    fill : str, optional
        Color palette for 2-D predominance diagrams. Can be any matplotlib
        colormap name (e.g., 'viridis', 'plasma', 'terrain', 'rainbow',
        'Set1', 'tab10', 'Pastel1'). If None, uses discrete colors from
        the default color cycle. Ignored for 1-D diagrams.
    fill_NA : str, default "0.8"
        Color for regions outside water stability limits (water instability regions).
        Matplotlib color specification (e.g., "0.8" for gray, "#CCCCCC").
        Set to "transparent" to disable shading. Default "0.8" matches R's "gray80".
    limit_water : bool, optional
        Whether to show water stability limits as shaded regions (default True for
        2-D diagrams). If True, also clips the diagram to the water stability region.
        Set to False to disable water stability shading.
    plot_it : bool, default True
        Display the plot?
    add_to : dict, optional
        A diagram result dictionary from a previous diagram() call. When provided,
        this plot will be AUTOMATICALLY COPIED and the new diagram will be added to
        the copy. This preserves the original plot while creating a modified version.
        The axes object is extracted from add_to['ax'].

        This parameter eliminates the need for a separate 'add' boolean - when
        add_to is provided, the function automatically operates in "add" mode.

        Example workflow:
        >>> plot_a = diagram(affinity1, fill='terrain')  # Create base plot
        >>> plot_a1 = diagram(affinity2, add_to=plot_a, col='blue')  # Copy and add
        >>> plot_a2 = diagram(affinity3, add_to=plot_a, col='red')   # Copy and add again
        >>> # plot_a remains unchanged, plot_a1 and plot_a2 are independent modifications
    contour_method : str or list of str, optional
        Method for labeling contour lines. Default "edge" labels at plot edges.
        Can be a single value (applied to all species) or a list (one per species).
        Set to None, NA, or "" to disable labels (only for type="saturation").
        In R CHNOSZ, different methods like "edge", "flattest", "simple" control
        label placement; in Python, this mainly controls whether labels are shown.
    interactive : bool, default False
        Create an interactive plot using Plotly instead of matplotlib?
        If True, calls diagram_interactive() with the appropriate parameters.
    annotation : str, optional
        For interactive plots only. Annotation text to add to the plot.
    annotation_coords : list of float, default [0, 0]
        For interactive plots only. Coordinates of annotation, where [0, 0] is
        bottom left and [1, 1] is top right.
    width : int, default 600
        For interactive plots only. Width of the plot in pixels.
    height : int, default 520
        For interactive plots only. Height of the plot in pixels.
    save_as : str, optional
        For interactive plots only. Provide a filename to save this figure.
        Filetype is determined by `save_format`.
    save_format : str, optional
        For interactive plots only. Desired format of saved or downloaded figure.
        Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'.
        If 'html', an interactive plot will be saved.
    save_scale : float, default 1
        For interactive plots only. Multiply title/legend/axis/canvas sizes by
        this factor when saving the figure.
    **kwargs
        Additional arguments passed to matplotlib plotting functions

    Returns
    -------
    dict
        Dictionary containing:
        - plotvar : str, Variable that was plotted
        - plotvals : dict, Values that were plotted
        - names : list, Names used for labels
        - predominant : array or NA, Predominance matrix (for 2D)
        - balance : str or list, Balancing coefficients used
        - n.balance : list, Numerical balancing coefficients
        - ax : matplotlib.axes.Axes, The axes object used for plotting (if plot_it=True)
        - fig : matplotlib.figure.Figure, The figure object used for plotting (if plot_it=True)
        - All original eout contents

    Examples
    --------
    >>> import pychnosz
    >>> pychnosz.basis(["Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen", "H+"],
    ...              [0, -3, 0, -4, -7, -80, -7])
    >>> pychnosz.species(["pyrite", "goethite"])
    >>> a = pychnosz.affinity(H2S=[-60, 20, 5], T=25, P=1)
    >>> d = diagram(a)

    Notes
    -----
    This implementation is based on R CHNOSZ diagram() function but adapted
    for Python's matplotlib plotting instead of R's base graphics. The key
    differences from diagram_from_WORM.py are:
    - Works directly with Python dict output from affinity() (no rpy2)
    - Uses matplotlib for 1D plots by default
    - Can optionally use plotly if requested
    """

    # Handle add_to parameter: automatically copy the provided plot
    # This extracts the axes object and creates an independent copy
    # When add_to is provided, we're in "add" mode
    ax = None
    add = add_to is not None
    plot_was_provided = add

    if add_to is not None:
        # Make a deep copy of the provided plot to preserve the original
        plot_copy = copy_plot(add_to)
        # Extract the axes from the copied plot
        if 'ax' in plot_copy:
            ax = plot_copy['ax']
        else:
            raise ValueError("The 'add_to' parameter must contain an 'ax' key (a diagram result dictionary)")

    # If interactive mode is requested, delegate to diagram_interactive
    if interactive:
        df, fig = diagram_interactive(
            eout=eout,
            type=type,
            main=main,
            borders=lwd,
            names=names,
            format_names=format_names,
            annotation=annotation,
            annotation_coords=annotation_coords,
            balance=balance,
            xlab=xlab,
            ylab=ylab,
            fill=fill,
            width=width,
            height=height,
            alpha=alpha,
            plot_it=plot_it,
            add=add,
            ax=ax,
            col=col,
            lty=lty,
            lwd=lwd,
            cex=cex,
            contour_method=contour_method,
            save_as=save_as,
            save_format=save_format,
            save_scale=save_scale,
            messages=messages
        )
        # Return in a format compatible with diagram's normal output
        # diagram_interactive returns (df, fig), wrap in a dict for consistency
        # Include eout data so water_lines() can access vars, vals, basis, etc.
        result = {
            **eout,  # Include all original eout data
            'df': df,
            'fig': fig,
            'ax': fig  # For compatibility, store fig in ax key for add=True workflow
        }
        return result

    # Check that eout is valid
    efun = eout.get('fun', '')
    if efun not in ['affinity', 'equilibrate', 'solubility']:
        raise ValueError("'eout' is not the output from affinity(), equilibrate(), or solubility()")

    # Determine if eout is from affinity() (as opposed to equilibrate())
    # Check for both Python naming (loga_equil) and R naming (loga.equil)
    eout_is_aout = 'loga_equil' not in eout and 'loga.equil' not in eout

    # Check if type is a basis species name
    plot_loga_basis = False
    if type not in ["auto", "saturation", "loga.equil", "loga_equil", "loga.balance", "loga_balance"]:
        # Check if type matches a basis species name
        if 'basis' in eout:
            basis_species = list(eout['basis'].index) if hasattr(eout['basis'], 'index') else []
            if type in basis_species:
                plot_loga_basis = True
                if alpha:
                    raise ValueError("equilibrium activities of basis species not available with alpha = TRUE")

    # Handle type="saturation" - requires affinity output
    if type == "saturation":
        if not eout_is_aout:
            raise ValueError("type='saturation' requires output from affinity(), not equilibrate()")
        # Set eout_is_aout flag
        eout_is_aout = True

    # Get number of dimensions
    # Handle both dict (affinity) and list (equilibrate) values structures
    if isinstance(eout['values'], dict):
        first_values = list(eout['values'].values())[0]
    elif isinstance(eout['values'], list):
        first_values = eout['values'][0]
    else:
        first_values = eout['values']

    if hasattr(first_values, 'shape'):
        nd = len(first_values.shape)
    elif hasattr(first_values, '__len__'):
        nd = 1
    else:
        nd = 0  # Single value

    # For affinity output, get balancing coefficients
    if eout_is_aout and type == "auto":
        n_balance, balance = _get_balance(eout, balance, messages)
    elif eout_is_aout and type == "saturation":
        # For saturation diagrams, use n_balance = 1 for all species (don't normalize by stoichiometry)
        if isinstance(eout['values'], dict):
            n_balance = [1] * len(eout['values'])
        elif isinstance(eout['values'], list):
            n_balance = [1] * len(eout['values'])
        else:
            n_balance = [1]
        if balance is None:
            balance = 1
    else:
        # For equilibrate output, use n_balance from equilibrate if available
        if 'n_balance' in eout:
            n_balance = eout['n_balance']
            balance = eout.get('balance', 1)
        else:
            if isinstance(eout['values'], dict):
                n_balance = [1] * len(eout['values'])
            elif isinstance(eout['values'], list):
                n_balance = [1] * len(eout['values'])
            else:
                n_balance = [1]
            if balance is None:
                balance = 1

    # Determine what to plot
    plotvals = {}
    plotvar = eout.get('property', 'A')

    # Calculate equilibrium log activity/fugacity of basis species
    if plot_loga_basis:
        # Find the index of the basis species
        basis_df = eout['basis']
        ibasis = list(basis_df.index).index(type)

        # Get the logarithm of activity used in the affinity calculation
        logact = basis_df.iloc[ibasis]['logact']

        # Check if logact is numeric
        try:
            loga_basis = float(logact)
        except (ValueError, TypeError):
            raise ValueError(f"the logarithm of activity for basis species {type} is not numeric - was a buffer selected?")

        # Get the reaction coefficients for this basis species
        # eout['species'] is a DataFrame with basis species as columns
        nu_basis = eout['species'].iloc[:, ibasis].values

        # Calculate the logarithm of activity where affinity = 0
        # loga_equilibrium = loga_basis - affinity / nu_basis
        plotvals = {}
        for i, (sp_idx, affinity_vals) in enumerate(eout['values'].items()):
            plotvals[sp_idx] = loga_basis - affinity_vals / nu_basis[i]

        plotvar = type

        # Set n_balance (not used for basis species plots, but needed for compatibility)
        n_balance = [1] * len(plotvals)
        if balance is None:
            balance = 1
    elif eout_is_aout:
        # Plot affinity values divided by balancing coefficients
        # DEBUG: Check balance application
        if False:  # Set to True for debugging
            print(f"\nDEBUG: Applying balance to affinity values")
            print(f"  n_balance: {n_balance}")

        # Handle dict-based values (from affinity)
        if isinstance(eout['values'], dict):
            for i, (species_idx, values) in enumerate(eout['values'].items()):
                if False:  # Set to True for debugging
                    print(f"  Species {i} (ispecies {species_idx}): values/n_balance[{i}]={n_balance[i]}")
                plotvals[species_idx] = values / n_balance[i]
        # Handle list-based values
        elif isinstance(eout['values'], list):
            for i, values in enumerate(eout['values']):
                species_idx = eout['species']['ispecies'].iloc[i]
                plotvals[species_idx] = values / n_balance[i]

        if plotvar == 'A':
            plotvar = 'A/(2.303RT)'
            if nd == 1:
                if messages:
                    print(f"diagram: plotting {plotvar} / n.balance")
    else:
        # Plot equilibrated activities
        # Check for both Python naming (loga_equil) and R naming (loga.equil)
        loga_equil_key = 'loga_equil' if 'loga_equil' in eout else 'loga.equil'
        loga_equil_list = eout[loga_equil_key]

        # For equilibrate output, keep plotvals as a dict with INTEGER indices as keys
        # This preserves the 1:1 correspondence with the species list, including duplicates
        # Do NOT use ispecies as keys because duplicates would overwrite each other
        if isinstance(loga_equil_list, list):
            for i, loga_val in enumerate(loga_equil_list):
                plotvals[i] = loga_val  # Use integer index, not ispecies
        else:
            # Already a dict
            plotvals = loga_equil_list

        plotvar = 'loga.equil'

    # Handle alpha (degree of formation)
    if alpha:
        # Convert to activities (remove logarithms)
        # Use numpy arrays for proper element-wise operations
        act_vals = {}
        for k, v in plotvals.items():
            if isinstance(v, np.ndarray):
                act_vals[k] = 10**v
            else:
                act_vals[k] = np.power(10, v)

        # Scale by balance if requested
        if alpha == "balance":
            species_keys = list(act_vals.keys())
            for i, k in enumerate(species_keys):
                act_vals[k] = act_vals[k] * n_balance[i]

        # Calculate sum of activities (element-wise for arrays)
        # Get the first value to determine shape
        first_val = list(act_vals.values())[0]
        if isinstance(first_val, np.ndarray):
            # Multi-dimensional case
            sum_act = np.zeros_like(first_val)
            for v in act_vals.values():
                sum_act = sum_act + v
        else:
            # Single value case
            sum_act = sum(act_vals.values())

        # Calculate alpha (fraction) - element-wise division
        plotvals = {k: v / sum_act for k, v in act_vals.items()}
        plotvar = "alpha"

    # Get species information for labels
    species_df = eout['species']
    if names is None:
        names = species_df['name'].tolist()

    # Format chemical names if requested
    if format_names and not alpha:
        names = [_format_chemname(name) for name in names]

    # Prepare for plotting
    if nd == 0:
        # 0-D: Bar plot (not implemented yet)
        raise NotImplementedError("0-D bar plots not yet implemented")

    elif nd == 1:
        # 1-D: Line plot
        result = _plot_1d(eout, plotvals, plotvar, names, n_balance, balance,
                       xlab, ylab, xlim, ylim, col, lty, lwd, main, add, plot_it, ax, width, height, plot_was_provided, **kwargs)

    elif nd == 2:
        # 2-D: Predominance diagram or saturation lines
        # Pass lty and cex through kwargs for saturation plots
        result = _plot_2d(eout, plotvals, plotvar, names, n_balance, balance,
                       xlab, ylab, xlim, ylim, col, col_names, fill, fill_NA, limit_water, lwd, main, add, plot_it, ax,
                       type, contour_method, messages, width, height, plot_was_provided, lty=lty, cex=cex, **kwargs)

    else:
        raise ValueError(f"Cannot create diagram with {nd} dimensions")

    # Handle Jupyter display behavior
    # When plot_it=True, we want the figure to display
    # When plot_it=False, we want to suppress display and close the figure
    if not plot_it and result is not None and 'fig' in result:
        # Close the figure to prevent auto-display in Jupyter
        # The figure is still in the result dict, so users can access it via result['fig']
        # but it won't be displayed automatically
        plt.close(result['fig'])
    elif plot_it and result is not None and 'fig' in result:
        # Try to use IPython display if available (for Jupyter notebooks)
        try:
            from IPython.display import display
            display(result['fig'])
        except (ImportError, NameError):
            # Not in IPython/Jupyter, regular matplotlib display
            pass

    return result

Plot equilibrium chemical activity and predominance diagrams.

This function creates plots from the output of affinity() or equilibrate(). For 1D diagrams, it produces line plots showing how affinity or activity varies with a single variable. For 2D diagrams, it creates predominance field diagrams.

Parameters

eout : dict

Output from affinity() or equilibrate()

type : str, default "auto"

Type of diagram: - "auto" (default): Plot affinity values (A/2.303RT) - "loga.equil": Plot equilibrium activities from equilibrate() - "saturation": Draw affinity=0 contour lines (mineral saturation) - Basis species name (e.g., "O2", "H2O", "CO2"): Plot equilibrium log activity/fugacity of the specified basis species where affinity=0 for each formed species. Useful for Eh-pH diagrams and showing oxygen/water fugacities at equilibrium.

alpha : bool or str, default False

Plot degree of formation instead of activities? If "balance", scale by balancing coefficients

balance : str, float, or list of float, optional

Balancing coefficients or method for balancing reactions

names : list of str, optional

Custom names for species (for labels)

format_names : bool, default True

Apply formatting to chemical formulas?

xlab : str, optional

Custom x-axis label

ylab : str, optional

Custom y-axis label

xlim : list of float, optional

X-axis limits [min, max]

ylim : list of float, optional

Y-axis limits [min, max]

col : str or list of str, optional

Line colors for 1-D plots and boundary lines in 2-D plots (matplotlib color specs)

col_names : str or list of str, optional

Text colors for field labels in 2-D plots (matplotlib color specs)

lty : str, int, or list, optional

Line styles (matplotlib linestyle specs)

lwd : float or list of float, default 1

Line widths for 1-D plots and boundary lines in 2-D predominance diagrams. Set to 0 to disable borders in 2-D diagrams. If fill is None and lwd > 0, uses white fill with black borders (R CHNOSZ default).

cex : float or list of float, default 1.0

Character expansion factor for text labels. Values > 1 make text larger, values < 1 make text smaller. Can be a single value or a list (one per species). Used for contour labels in type="saturation" plots.

main : str, optional

Plot title

fill : str, optional

Color palette for 2-D predominance diagrams. Can be any matplotlib colormap name (e.g., 'viridis', 'plasma', 'terrain', 'rainbow', 'Set1', 'tab10', 'Pastel1'). If None, uses discrete colors from the default color cycle. Ignored for 1-D diagrams.

fill_NA : str, default "0.8"

Color for regions outside water stability limits (water instability regions). Matplotlib color specification (e.g., "0.8" for gray, "#CCCCCC"). Set to "transparent" to disable shading. Default "0.8" matches R's "gray80".

limit_water : bool, optional

Whether to show water stability limits as shaded regions (default True for 2-D diagrams). If True, also clips the diagram to the water stability region. Set to False to disable water stability shading.

plot_it : bool, default True

Display the plot?

add_to : dict, optional

A diagram result dictionary from a previous diagram() call. When provided, this plot will be AUTOMATICALLY COPIED and the new diagram will be added to the copy. This preserves the original plot while creating a modified version. The axes object is extracted from add_to['ax'].

This parameter eliminates the need for a separate 'add' boolean - when add_to is provided, the function automatically operates in "add" mode.

Example workflow:

plot_a = diagram(affinity1, fill='terrain') # Create base plot plot_a1 = diagram(affinity2, add_to=plot_a, col='blue') # Copy and add plot_a2 = diagram(affinity3, add_to=plot_a, col='red') # Copy and add again

plot_a remains unchanged, plot_a1 and plot_a2 are independent modifications

contour_method : str or list of str, optional

Method for labeling contour lines. Default "edge" labels at plot edges. Can be a single value (applied to all species) or a list (one per species). Set to None, NA, or "" to disable labels (only for type="saturation"). In R CHNOSZ, different methods like "edge", "flattest", "simple" control label placement; in Python, this mainly controls whether labels are shown.

interactive : bool, default False

Create an interactive plot using Plotly instead of matplotlib? If True, calls diagram_interactive() with the appropriate parameters.

annotation : str, optional

For interactive plots only. Annotation text to add to the plot.

annotation_coords : list of float, default [0, 0]

For interactive plots only. Coordinates of annotation, where [0, 0] is bottom left and [1, 1] is top right.

width : int, default 600

For interactive plots only. Width of the plot in pixels.

height : int, default 520

For interactive plots only. Height of the plot in pixels.

save_as : str, optional

For interactive plots only. Provide a filename to save this figure. Filetype is determined by save_format.

save_format : str, optional

For interactive plots only. Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved.

save_scale : float, default 1

For interactive plots only. Multiply title/legend/axis/canvas sizes by this factor when saving the figure.

**kwargs

Additional arguments passed to matplotlib plotting functions

Returns

dict: Dictionary containing: - plotvar : str, Variable that was plotted - plotvals : dict, Values that were plotted - names : list, Names used for labels - predominant : array or NA, Predominance matrix (for 2D) - balance : str or list, Balancing coefficients used - n.balance : list, Numerical balancing coefficients - ax : matplotlib.axes.Axes, The axes object used for plotting (if plot_it=True) - fig : matplotlib.figure.Figure, The figure object used for plotting (if plot_it=True) - All original eout contents

Examples

>>> import pychnosz
>>> pychnosz.basis(["Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen", "H+"],
...              [0, -3, 0, -4, -7, -80, -7])
>>> pychnosz.species(["pyrite", "goethite"])
>>> a = pychnosz.affinity(H2S=[-60, 20, 5], T=25, P=1)
>>> d = diagram(a)

Notes

This implementation is based on R CHNOSZ diagram() function but adapted for Python's matplotlib plotting instead of R's base graphics. The key differences from diagram_from_WORM.py are: - Works directly with Python dict output from affinity() (no rpy2) - Uses matplotlib for 1D plots by default - Can optionally use plotly if requested

Expand source code

def equilibrate(aout: Dict[str, Any],
                balance: Optional[Union[str, int, List[float]]] = None,
                loga_balance: Optional[Union[float, List[float]]] = None,
                ispecies: Optional[Union[List[int], List[bool]]] = None,
                normalize: Union[bool, List[bool]] = False,
                as_residue: bool = False,
                method: Optional[Union[str, List[str]]] = None,
                tol: float = np.finfo(float).eps ** 0.25,
                messages: bool = True) -> Dict[str, Any]:
    """
    Calculate equilibrium activities of species from affinities.

    This function calculates the equilibrium activities of species in
    (metastable) equilibrium from the affinities of their formation reactions
    from basis species at given activities.

    Parameters
    ----------
    aout : dict
        Output from affinity() containing chemical affinities
    balance : str, int, or list of float, optional
        Balancing method:
        - None: Autoselect using which_balance()
        - str: Name of basis species to balance on
        - "length": Balance on protein length (for proteins)
        - "volume": Balance on standard-state volume
        - 1: Balance on one mole of species (formula units)
        - list: User-defined balancing coefficients
    loga_balance : float or list of float, optional
        Logarithm of total activity of the balancing basis species
        If None, calculated from species initial activities and n.balance
    ispecies : list of int or list of bool, optional
        Indices or boolean mask of species to include in equilibration
        Default: all species except those with state "cr" (crystalline)
    normalize : bool or list of bool, default False
        Normalize formulas by balancing coefficients?
    as_residue : bool, default False
        Use residue basis for proteins?
    method : str or list of str, optional
        Equilibration method:
        - "boltzmann": Boltzmann distribution (for n.balance = 1)
        - "reaction": Reaction-based equilibration (general method)
        If None, chooses "boltzmann" if all n.balance == 1, else "reaction"
    tol : float, default np.finfo(float).eps**0.25
        Tolerance for root-finding in reaction method
    messages : bool, default True
        Whether to print informational messages

    Returns
    -------
    dict
        Dictionary containing all aout contents plus:
        - balance : str or list, Balancing description
        - m_balance : list, Molar formula divisors
        - n_balance : list, Balancing coefficients
        - loga_balance : float or array, Log activity of balanced quantity
        - Astar : list of arrays, Normalized affinities
        - loga_equil : list of arrays, Equilibrium log activities

    Examples
    --------
    >>> import pychnosz
    >>> pychnosz.basis("CHNOS")
    >>> pychnosz.basis("NH3", -2)
    >>> pychnosz.species(["alanine", "glycine", "serine"])
    >>> a = pychnosz.affinity(NH3=[-80, 60], T=55, P=2000)
    >>> e = pychnosz.equilibrate(a, balance="CO2")

    Notes
    -----
    This is a 1:1 replica of the R CHNOSZ equilibrate() function.
    - Handles both Boltzmann and reaction-based equilibration
    - Supports normalization and residue basis for proteins
    - Properly handles crystalline species via predominance diagrams
    - Implements identical balancing logic to R version
    """

    # Handle mosaic output (not implemented yet, but keep structure)
    if aout.get('fun') == 'mosaic':
        raise NotImplementedError("mosaic equilibration not yet implemented")

    # Number of possible species
    # affinity() returns values as a dict with ispecies as keys
    if isinstance(aout['values'], dict):
        # Convert dict to list ordered by species dataframe
        values_list = []
        for i in range(len(aout['species'])):
            species_idx = aout['species']['ispecies'].iloc[i]
            if species_idx in aout['values']:
                values_list.append(aout['values'][species_idx])
            else:
                # Species not in values dict - use NaN array
                values_list.append(np.array([np.nan]))
        aout['values'] = values_list

    nspecies = len(aout['values'])

    # Get the balancing coefficients
    bout = _balance(aout, balance, messages)
    n_balance_orig = bout['n_balance'].copy()
    n_balance = bout['n_balance'].copy()
    balance = bout['balance']

    # If solids (cr) species are present, find them on a predominance diagram
    iscr = [('cr' in str(state)) for state in aout['species']['state']]
    ncr = sum(iscr)

    # Set default ispecies to exclude cr species (matching R default)
    if ispecies is None:
        ispecies = [not is_cr for is_cr in iscr]

    if ncr > 0:
        # Import diagram here to avoid circular imports
        from .diagram import diagram
        dout = diagram(aout, balance=balance, normalize=normalize,
                      as_residue=as_residue, plot_it=False, limit_water=False, messages=messages)

    if ncr == nspecies:
        # We get here if there are only solids
        m_balance = None
        Astar = None
        loga_equil = []
        for i in range(len(aout['values'])):
            la = np.array(aout['values'][i], copy=True)
            la[:] = np.nan
            loga_equil.append(la)
    else:
        # We get here if there are any aqueous species
        # Take selected species in 'ispecies'
        if len(ispecies) == 0:
            raise ValueError("the length of ispecies is zero")

        # Convert boolean to indices if needed
        if isinstance(ispecies, list) and len(ispecies) > 0:
            if isinstance(ispecies[0], bool):
                ispecies = [i for i, x in enumerate(ispecies) if x]

        # Take out species that have NA affinities
        ina = [all(np.isnan(np.array(x).flatten())) for x in aout['values']]
        ispecies = [i for i in ispecies if not ina[i]]

        if len(ispecies) == 0:
            raise ValueError("all species have NA affinities")

        if ispecies != list(range(nspecies)):
            if messages:
                print(f"equilibrate: using {len(ispecies)} of {nspecies} species")
            aout_species_df = aout['species']
            aout['species'] = aout_species_df.iloc[ispecies].reset_index(drop=True)
            aout['values'] = [aout['values'][i] for i in ispecies]
            n_balance = [n_balance[i] for i in ispecies]

        # Number of species that are left
        nspecies = len(aout['values'])

        # Say what the balancing coefficients are
        if len(n_balance) < 100:
            if messages:
                print(f"equilibrate: n.balance is {', '.join(map(str, n_balance))}")

        # Logarithm of total activity of the balancing basis species
        if loga_balance is None:
            # Sum up the activities, then take absolute value
            # in case n.balance is negative
            logact = np.array([aout['species']['logact'].iloc[i] for i in range(len(aout['species']))])
            sumact = abs(sum(10**logact * n_balance))
            loga_balance = np.log10(sumact)

        # Make loga.balance the same length as the values of affinity
        if isinstance(loga_balance, (int, float)):
            loga_balance = float(loga_balance)
        else:
            loga_balance = np.array(loga_balance).flatten()

        nvalues = len(np.array(aout['values'][0]).flatten())

        if isinstance(loga_balance, float) or len(np.atleast_1d(loga_balance)) == 1:
            # We have a constant loga.balance
            if isinstance(loga_balance, np.ndarray):
                loga_balance = float(loga_balance[0])
            if messages:
                print(f"equilibrate: loga.balance is {loga_balance}")
            loga_balance = np.full(nvalues, loga_balance)
        else:
            # We are using a variable loga.balance (supplied by the user)
            if len(loga_balance) != nvalues:
                raise ValueError(f"length of loga.balance ({len(loga_balance)}) doesn't match "
                               f"the affinity values ({nvalues})")
            if messages:
                print(f"equilibrate: loga.balance has same length as affinity values ({len(loga_balance)})")

        # Normalize the molar formula by the balance coefficients
        m_balance = n_balance.copy()
        isprotein = ['_' in str(name) for name in aout['species']['name']]

        # Handle normalize parameter
        if isinstance(normalize, bool):
            normalize = [normalize] * nspecies
        elif not isinstance(normalize, list):
            normalize = list(normalize)

        if any(normalize) or as_residue:
            if any(n < 0 for n in n_balance):
                raise ValueError("one or more negative balancing coefficients prohibit using normalized molar formulas")

            for i in range(nspecies):
                if normalize[i] or as_residue:
                    n_balance[i] = 1

            if as_residue:
                if messages:
                    print("equilibrate: using 'as.residue' for molar formulas")
            else:
                if messages:
                    print("equilibrate: using 'normalize' for molar formulas")

            # Set the formula divisor (m.balance) to 1 for species whose formulas are *not* normalized
            m_balance = [m_balance[i] if (normalize[i] or as_residue) else 1
                        for i in range(nspecies)]
        else:
            m_balance = [1] * nspecies

        # Astar: the affinities/2.303RT of formation reactions with
        # formed species in their standard-state activities
        Astar = []
        for i in range(nspecies):
            # 'starve' the affinity of the activity of the species,
            # and normalize the value by the molar ratio
            logact_i = aout['species']['logact'].iloc[i]
            astar_i = (np.array(aout['values'][i]) + logact_i) / m_balance[i]
            Astar.append(astar_i)

        # Choose a method and compute the equilibrium activities of species
        if method is None:
            if all(n == 1 for n in n_balance):
                method = ["boltzmann"]
            else:
                method = ["reaction"]
        elif isinstance(method, str):
            method = [method]

        if messages:
            print(f"equilibrate: using {method[0]} method")

        if method[0] == "boltzmann":
            loga_equil = equil_boltzmann(Astar, n_balance, loga_balance)
        elif method[0] == "reaction":
            loga_equil = equil_reaction(Astar, n_balance, loga_balance, tol)
        else:
            raise ValueError(f"unknown method: {method[0]}")

        # If we normalized the formulas, get back to activities of species
        if any(normalize) and not as_residue:
            loga_equil = [loga_equil[i] - np.log10(m_balance[i])
                         for i in range(nspecies)]

    # Process cr species
    if ncr > 0:
        # cr species were excluded from equilibrium calculation,
        # so get values back to original lengths
        norig = len(dout['values'])
        n_balance = n_balance_orig

        # Ensure ispecies is in index form (not boolean)
        # When ncr == nspecies, ispecies was never converted from boolean to indices
        if isinstance(ispecies, list) and len(ispecies) > 0:
            if isinstance(ispecies[0], bool):
                ispecies = [i for i, x in enumerate(ispecies) if x]

        # Match indices back to original
        imatch = [None] * norig
        for j, orig_idx in enumerate(range(norig)):
            if orig_idx in ispecies:
                imatch[orig_idx] = ispecies.index(orig_idx)

        # Handle None values (when ncr == nspecies, these are set to None)
        # In R, indexing NULL returns NULL, so we need to check for None in Python
        if m_balance is not None:
            m_balance = [m_balance[imatch[i]] if imatch[i] is not None else None
                        for i in range(norig)]
        if Astar is not None:
            Astar = [Astar[imatch[i]] if imatch[i] is not None else None
                    for i in range(norig)]

        # Get a template from first loga_equil to determine shape
        loga_equil1 = loga_equil[0]
        loga_equil_orig = [None] * norig

        for i in range(norig):
            if imatch[i] is not None:
                loga_equil_orig[i] = loga_equil[imatch[i]]

        # Replace None loga_equil with -999 for cr-only species (will be set to 0 where predominant)
        # Use np.full with shape, not full_like, to avoid inheriting NaN values
        ina = [i for i in range(norig) if imatch[i] is None]
        for i in ina:
            loga_equil_orig[i] = np.full(loga_equil1.shape, -999.0)
        loga_equil = loga_equil_orig
        aout['species'] = dout['species']
        aout['values'] = dout['values']

        # Find the grid points where any cr species is predominant
        icr = [i for i in range(len(dout['species']))
               if 'cr' in str(dout['species']['state'].iloc[i])]

        # predominant uses 1-based R indexing (1, 2, 3, ...), convert to 0-based for Python
        predominant = dout['predominant']
        iscr_mask = np.zeros_like(predominant, dtype=bool)
        for icr_idx in icr:
            # Compare with icr_idx + 1 because predominant is 1-based
            iscr_mask |= (predominant == icr_idx + 1)

        # At those grid points, make the aqueous species' activities practically zero
        for i in range(norig):
            if i not in icr:
                loga_equil[i] = np.array(loga_equil[i], copy=True)
                loga_equil[i][iscr_mask] = -999

        # At the grid points where cr species predominate, set their loga_equil to 0 (standard state)
        for i in icr:
            # Compare with i + 1 because predominant is 1-based
            ispredom = (predominant == i + 1)
            loga_equil[i] = np.array(loga_equil[i], copy=True)
            # Set to standard state activity (logact, typically 0) where predominant
            loga_equil[i][ispredom] = dout['species']['logact'].iloc[i]

    # Put together the output
    out = aout.copy()
    out['fun'] = 'equilibrate'  # Mark this as equilibrate output
    out['balance'] = balance
    out['m_balance'] = m_balance
    out['n_balance'] = n_balance
    out['loga_balance'] = loga_balance
    out['Astar'] = Astar
    out['loga_equil'] = loga_equil

    return out

Calculate equilibrium activities of species from affinities.

This function calculates the equilibrium activities of species in (metastable) equilibrium from the affinities of their formation reactions from basis species at given activities.

Parameters

aout : dict: Output from affinity() containing chemical affinities
balance : str, int, or list of float, optional: Balancing method: - None: Autoselect using which_balance() - str: Name of basis species to balance on - "length": Balance on protein length (for proteins) - "volume": Balance on standard-state volume - 1: Balance on one mole of species (formula units) - list: User-defined balancing coefficients
loga_balance : float or list of float, optional: Logarithm of total activity of the balancing basis species If None, calculated from species initial activities and n.balance
ispecies : list of int or list of bool, optional: Indices or boolean mask of species to include in equilibration Default: all species except those with state "cr" (crystalline)
normalize : bool or list of bool, default False: Normalize formulas by balancing coefficients?
as_residue : bool, default False: Use residue basis for proteins?
method : str or list of str, optional: Equilibration method: - "boltzmann": Boltzmann distribution (for n.balance = 1) - "reaction": Reaction-based equilibration (general method) If None, chooses "boltzmann" if all n.balance == 1, else "reaction"
tol : float, default np.finfo(float).eps**0.25: Tolerance for root-finding in reaction method
messages : bool, default True: Whether to print informational messages

Returns

dict: Dictionary containing all aout contents plus: - balance : str or list, Balancing description - m_balance : list, Molar formula divisors - n_balance : list, Balancing coefficients - loga_balance : float or array, Log activity of balanced quantity - Astar : list of arrays, Normalized affinities - loga_equil : list of arrays, Equilibrium log activities

Examples

>>> import pychnosz
>>> pychnosz.basis("CHNOS")
>>> pychnosz.basis("NH3", -2)
>>> pychnosz.species(["alanine", "glycine", "serine"])
>>> a = pychnosz.affinity(NH3=[-80, 60], T=55, P=2000)
>>> e = pychnosz.equilibrate(a, balance="CO2")

Notes

This is a 1:1 replica of the R CHNOSZ equilibrate() function. - Handles both Boltzmann and reaction-based equilibration - Supports normalization and residue basis for proteins - Properly handles crystalline species via predominance diagrams - Implements identical balancing logic to R version

def find_species(name: str, state: str | None = None, messages: bool = True) ‑> int

Expand source code

def find_species(name: str, state: Optional[str] = None, messages: bool = True) -> int:
    """
    Find a single species index by name.

    Parameters
    ----------
    name : str
        Species name, formula, or abbreviation
    state : str, optional
        Physical state
    messages : bool, default True
        If True, print informational messages

    Returns
    -------
    int
        Species index (1-based)

    Raises
    ------
    ValueError
        If species not found or multiple matches
    """
    result = info(name, state, messages=messages)

    if pd.isna(result):
        raise ValueError(f"Species '{name}' not found")

    if isinstance(result, list):
        if len(result) > 1:
            raise ValueError(f"Multiple matches found for '{name}'")
        result = result[0]

    return int(result)

Find a single species index by name.

Parameters

name : str: Species name, formula, or abbreviation
state : str, optional: Physical state
messages : bool, default True: If True, print informational messages

Returns

int: Species index (1-based)

Raises

ValueError: If species not found or multiple matches

def get_basis() ‑> pandas.core.frame.DataFrame | None

Expand source code

def get_basis() -> Optional[pd.DataFrame]:
    """
    Get current basis definition.
    
    Returns
    -------
    pd.DataFrame or None
        Current basis definition
    """
    return thermo().basis

Get current basis definition.

Returns

pd.DataFrame or None: Current basis definition

def get_species() ‑> pandas.core.frame.DataFrame | None

Expand source code

def get_species() -> Optional[pd.DataFrame]:
    """
    Get current species definition.
    
    Returns
    -------
    pd.DataFrame or None
        Current species definition
    """
    return thermo().species

Get current species definition.

Returns

pd.DataFrame or None: Current species definition

def get_species_data(species: str | int, state: str | None = None, messages: bool = True) ‑> pandas.core.frame.DataFrame

Expand source code

def get_species_data(species: Union[str, int], state: Optional[str] = None, messages: bool = True) -> pd.DataFrame:
    """
    Get complete thermodynamic data for a species.

    Parameters
    ----------
    species : str or int
        Species name/formula or index
    state : str, optional
        Physical state
    messages : bool, default True
        Display messages?

    Returns
    -------
    pd.DataFrame
        Species thermodynamic data
    """
    if isinstance(species, str):
        species = find_species(species, state)

    return info(species, messages=messages)

Get complete thermodynamic data for a species.

Parameters

species : str or int: Species name/formula or index
state : str, optional: Physical state
messages : bool, default True: Display messages?

Returns

pd.DataFrame: Species thermodynamic data

Expand source code

def info(species: Optional[Union[str, int, List[Union[str, int]], pd.Series]] = None,
         state: Optional[Union[str, List[str]]] = None,
         check_it: bool = True,
         messages: bool = True) -> Union[pd.DataFrame, int, List[int], None]:
    """
    Search for species in the thermodynamic database.

    Parameters
    ----------
    species : str, int, list of str/int, pd.Series, or None
        Species name, formula, abbreviation, or OBIGT index.
        Can also be a pandas Series (e.g., from retrieve()).
        If None, returns summary information about the database.
    state : str, list of str, or None
        Physical state(s) to match ('aq', 'cr', 'gas', 'liq')
    check_it : bool, default True
        Whether to perform consistency checks on thermodynamic data
    messages : bool, default True
        Whether to print informational messages

    Returns
    -------
    pd.DataFrame, int, list of int, or None
        - If species is None: prints database summary, returns None
        - If species is numeric: returns DataFrame with species data
        - If species is string: returns species index(es) or NA if not found

    Examples
    --------
    >>> # Get database summary
    >>> info()

    >>> # Find species index
    >>> info("H2O")

    >>> # Get species data by index
    >>> info(1)

    >>> # Search with specific state
    >>> info("CO2", "aq")

    >>> # Use output from retrieve()
    >>> zn_species = retrieve("Zn", ["O", "H"], state="aq")
    >>> info(zn_species)
    """
    thermo_obj = thermo()

    # Initialize database if needed
    if not thermo_obj.is_initialized():
        thermo_obj.reset()

    # Return database summary if no species specified
    if species is None:
        return _print_database_summary(thermo_obj, messages)

    # Handle pandas Series (e.g., from retrieve())
    if isinstance(species, pd.Series):
        # Extract the integer indices from the Series values
        indices = species.values.tolist()
        return _info_numeric(indices, thermo_obj, check_it, messages)

    # Handle numeric species indices
    if isinstance(species, (int, list)) and all(isinstance(s, int) for s in (species if isinstance(species, list) else [species])):
        return _info_numeric(species, thermo_obj, check_it, messages)

    # Handle string species names/formulas
    if isinstance(species, (str, list)):
        return _info_character(species, state, thermo_obj, messages)

    raise ValueError(f"Invalid species type: {type(species)}")

Search for species in the thermodynamic database.

Parameters

species : str, int, list of str/int, pd.Series, or None: Species name, formula, abbreviation, or OBIGT index. Can also be a pandas Series (e.g., from retrieve()). If None, returns summary information about the database.
state : str, list of str, or None: Physical state(s) to match ('aq', 'cr', 'gas', 'liq')
check_it : bool, default True: Whether to perform consistency checks on thermodynamic data
messages : bool, default True: Whether to print informational messages

Returns

pd.DataFrame, int, list of int, or None

If species is None: prints database summary, returns None
If species is numeric: returns DataFrame with species data
If species is string: returns species index(es) or NA if not found

Examples

>>> # Get database summary
>>> info()

>>> # Find species index
>>> info("H2O")

>>> # Get species data by index
>>> info(1)

>>> # Search with specific state
>>> info("CO2", "aq")

>>> # Use output from retrieve()
>>> zn_species = retrieve("Zn", ["O", "H"], state="aq")
>>> info(zn_species)

def is_basis_defined() ‑> bool

Expand source code

def is_basis_defined() -> bool:
    """
    Check if basis is currently defined.
    
    Returns
    -------
    bool
        True if basis is defined
    """
    return thermo().basis is not None

Check if basis is currently defined.

Returns

bool: True if basis is defined

def is_species_defined() ‑> bool

Expand source code

def is_species_defined() -> bool:
    """
    Check if species are currently defined.
    
    Returns
    -------
    bool
        True if species are defined
    """
    return thermo().species is not None

Check if species are currently defined.

Returns

bool: True if species are defined

def list_species(pattern: str | None = None, state: str | None = None) ‑> pandas.core.frame.DataFrame

Expand source code

def list_species(pattern: Optional[str] = None, state: Optional[str] = None) -> pd.DataFrame:
    """
    List species matching criteria.
    
    Parameters
    ----------
    pattern : str, optional
        Pattern to match in species names
    state : str, optional
        Physical state to filter by
        
    Returns
    -------
    pd.DataFrame
        Matching species information
    """
    thermo_obj = thermo()
    if not thermo_obj.is_initialized():
        thermo_obj.reset()
    
    obigt = thermo_obj.obigt.copy()
    
    # Filter by state
    if state is not None:
        obigt = obigt[obigt['state'] == state]
    
    # Filter by pattern
    if pattern is not None:
        mask = obigt['name'].str.contains(pattern, case=False, na=False)
        obigt = obigt[mask]
    
    # Return relevant columns
    columns = ['name', 'formula', 'state', 'ref1', 'model']
    return obigt[columns].reset_index(drop=True)

List species matching criteria.

Parameters

pattern : str, optional: Pattern to match in species names
state : str, optional: Physical state to filter by

Returns

pd.DataFrame: Matching species information

def n_species() ‑> int

Expand source code

def n_species() -> int:
    """
    Get number of defined species.
    
    Returns
    -------
    int
        Number of defined species
    """
    species_df = get_species()
    return len(species_df) if species_df is not None else 0

Get number of defined species.

Returns

int: Number of defined species

def preset_basis(key: str | None = None, messages: bool = True, global_state: bool = True) ‑> List[str] | pandas.core.frame.DataFrame

Expand source code

def preset_basis(key: Optional[str] = None, messages: bool = True, global_state: bool = True) -> Union[List[str], pd.DataFrame]:
    """
    Load a preset basis definition by keyword.

    Parameters
    ----------
    key : str or None
        Preset keyword. If None, returns available keywords.
    messages : bool, default True
        If True, print informational messages
    global_state : bool, default True
        If True, store in global thermo().basis (default)
        If False, return without storing globally

    Returns
    -------
    list of str or pd.DataFrame
        Available keywords or basis definition

    Examples
    --------
    >>> # List available presets
    >>> preset_basis()

    >>> # Load CHNOS basis
    >>> preset_basis("CHNOS")
    """
    keywords = _get_preset_basis_keywords()
    
    if key is None:
        return keywords
    
    if key not in keywords:
        raise ValueError(f"{key} is not a keyword for preset basis species")

    # Clear existing basis only if using global state
    if global_state:
        basis(delete=True)
    
    # Define preset species
    species_map = {
        "CHNOS": ["CO2", "H2O", "NH3", "H2S", "oxygen"],
        "CHNOS+": ["CO2", "H2O", "NH3", "H2S", "oxygen", "H+"],
        "CHNOSe": ["CO2", "H2O", "NH3", "H2S", "e-", "H+"],
        "CHNOPS+": ["CO2", "H2O", "NH3", "H3PO4", "H2S", "oxygen", "H+"],
        "CHNOPSe": ["CO2", "H2O", "NH3", "H3PO4", "H2S", "e-", "H+"],
        "MgCHNOPS+": ["Mg+2", "CO2", "H2O", "NH3", "H3PO4", "H2S", "oxygen", "H+"],
        "MgCHNOPSe": ["Mg+2", "CO2", "H2O", "NH3", "H3PO4", "H2S", "e-", "H+"],
        "FeCHNOS": ["Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen"],
        "FeCHNOS+": ["Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen", "H+"],
        "QEC4": ["glutamine", "glutamic acid", "cysteine", "H2O", "oxygen"],
        "QEC": ["glutamine", "glutamic acid", "cysteine", "H2O", "oxygen"],
        "QEC+": ["glutamine", "glutamic acid", "cysteine", "H2O", "oxygen", "H+"],
        "QCa": ["glutamine", "cysteine", "acetic acid", "H2O", "oxygen"],
        "QCa+": ["glutamine", "cysteine", "acetic acid", "H2O", "oxygen", "H+"]
    }
    
    species = species_map[key]
    logact = _preset_logact(species)

    # Special case for QEC4
    if key == "QEC4":
        logact[:3] = [-4.0] * 3

    return basis(species, logact=logact, messages=messages, global_state=global_state)

Load a preset basis definition by keyword.

Parameters

key : str or None: Preset keyword. If None, returns available keywords.
messages : bool, default True: If True, print informational messages
global_state : bool, default True: If True, store in global thermo().basis (default) If False, return without storing globally

Returns

list of str or pd.DataFrame: Available keywords or basis definition

Examples

>>> # List available presets
>>> preset_basis()

>>> # Load CHNOS basis
>>> preset_basis("CHNOS")

Expand source code

def retrieve(elements: Optional[Union[str, List[str], Tuple[str]]] = None,
            ligands: Optional[Union[str, List[str], Tuple[str]]] = None,
            state: Optional[Union[str, List[str], Tuple[str]]] = None,
            T: Optional[Union[float, List[float]]] = None,
            P: Union[str, float, List[float]] = "Psat",
            add_charge: bool = True,
            hide_groups: bool = True,
            messages: bool = True) -> pd.Series:
    """
    Retrieve species containing specified elements.

    Parameters
    ----------
    elements : str, list of str, or tuple of str, optional
        Elements in a chemical system. If `elements` is a string, retrieve
        species containing that element.

        E.g., `retrieve("Au")` will return all species containing Au.

        If `elements` is a list, retrieve species that have all of the elements
        in the list.

        E.g., `retrieve(["Au", "Cl"])` will return all species that have both
        Au and Cl.

        If `elements` is a tuple, retrieve species relevant to the system,
        including charged species.

        E.g., `retrieve(("Au", "Cl"))` will return species that have Au
        and/or Cl, including charged species, but no other elements.

    ligands : str, list of str, or tuple of str, optional
        Elements present in any ligands. This affects the species search:
        - If ligands is a state ('cr', 'liq', 'gas', 'aq'), use that as the state filter
        - Otherwise, include elements in the system defined by ligands

    state : str, list of str, or tuple of str, optional
        Filter the result on these state(s) ('aq', 'cr', 'gas', 'liq').

    T : float or list of float, optional
        Temperature (K) for filtering species with non-NA Gibbs energy.

    P : str, float, or list of float, default "Psat"
        Pressure for Gibbs energy calculation. Default is "Psat" (saturation).

    add_charge : bool, default True
        For chemical systems (tuple input), automatically include charge (Z).

    hide_groups : bool, default True
        Exclude group species (names in brackets like [CH2]).

    messages : bool, default True
        Print informational messages. If False, suppress messages about
        updating the stoichiometric matrix and other information.

    Returns
    -------
    pd.Series
        Series of species indices (1-based) with chemical formulas as index.
        This behaves like R's named vector - you can access by name or position.
        Names are chemical formulas (or 'e-' for electrons).
        Values are species indices that match the criteria.

    Examples
    --------
    >>> # All species containing Au
    >>> retrieve("Au")

    >>> # All species that have both Au and Cl
    >>> retrieve(["Au", "Cl"])

    >>> # Au-Cl system: species with Au and/or Cl, including charged species
    >>> retrieve(("Au", "Cl"))

    >>> # All Au-bearing species in the Au-Cl system
    >>> retrieve("Au", ("Cl",))

    >>> # All uncharged Au-bearing species in the Au-Cl system
    >>> retrieve("Au", ("Cl",), add_charge=False)

    >>> # Minerals in the system SiO2-MgO-CaO-CO2
    >>> retrieve(("Si", "Mg", "Ca", "C", "O"), state="cr")

    Notes
    -----
    This function uses 1-based indexing to match R CHNOSZ conventions.
    The returned indices are labels that can be used with .loc[], not positions.
    """
    # Empty argument handling
    if elements is None:
        return pd.Series([], dtype=int)

    thermo_obj = thermo()

    # Initialize database if needed
    if not thermo_obj.is_initialized():
        thermo_obj.reset()

    ## Stoichiometric matrix
    # Get stoichiometric matrix from thermo object
    stoich = _get_or_update_stoich(thermo_obj, messages=messages)

    ## Generate error for missing element(s)
    allelements = []
    if elements is not None:
        if isinstance(elements, (list, tuple)):
            allelements.extend(elements)
        else:
            allelements.append(elements)
    if ligands is not None:
        if isinstance(ligands, (list, tuple)):
            allelements.extend(ligands)
        else:
            allelements.append(ligands)

    not_present = [elem for elem in allelements if elem not in stoich.columns and elem != "all"]
    if not_present:
        if len(not_present) == 1:
            raise ValueError(f'"{not_present[0]}" is not an element that is present in any species in the database')
        else:
            raise ValueError(f'"{", ".join(not_present)}" are not elements that are present in any species in the database')

    ## Handle 'ligands' argument
    if ligands is not None:
        # If 'ligands' is cr, liq, gas, or aq, use that as the state
        if ligands in ['cr', 'liq', 'gas', 'aq']:
            state = ligands
            ispecies = retrieve(elements, add_charge=add_charge, messages=messages)
        else:
            # Include the element in the system defined by the ligands list
            # Convert ligands to tuple if it's a string or list
            if isinstance(ligands, str):
                ligands_tuple = (ligands,)
            elif isinstance(ligands, list):
                ligands_tuple = tuple(ligands)
            else:
                ligands_tuple = ligands

            # Combine elements with ligands
            if isinstance(elements, str):
                combined = (elements,) + ligands_tuple
            elif isinstance(elements, list):
                combined = tuple(elements) + ligands_tuple
            else:
                combined = elements + ligands_tuple

            # Call retrieve() for each argument and take the intersection
            r1 = retrieve(elements, add_charge=add_charge, messages=messages)
            r2 = retrieve(combined, add_charge=add_charge, messages=messages)
            ispecies = np.intersect1d(r1, r2)
    else:
        ## Species identification
        ispecies_list = []

        # Determine if elements is a tuple (chemical system)
        is_system = isinstance(elements, tuple)

        # Convert single string to list for iteration
        if isinstance(elements, str):
            elements_iter = [elements]
        else:
            elements_iter = list(elements)

        # Automatically add charge to a system
        if add_charge and is_system and "Z" not in elements_iter:
            elements_iter.append("Z")

        # Proceed element-by-element
        for element in elements_iter:
            if element == "all":
                ispecies_list.append(np.array(thermo_obj.obigt.index.tolist()))
            else:
                # Identify the species that have the element
                has_element = (stoich[element] != 0)
                ispecies_list.append(np.array(stoich.index[has_element].tolist()))

        # Now we have a list of ispecies (one array for each element)
        # What we do next depends on whether the argument is a tuple or not
        if is_system:
            # For a chemical system, all species are included that do not contain any other elements
            ispecies = np.unique(np.concatenate(ispecies_list))

            # Get columns not in elements
            other_columns = [col for col in stoich.columns if col not in elements_iter]

            if other_columns:
                # Check which species have other elements
                otherstoich = stoich.loc[ispecies, other_columns]
                iother = (otherstoich != 0).any(axis=1)
                ispecies = ispecies[~iother.values]
        else:
            # Get species that have all the elements; the species must be present in each array
            # This is the intersection of all arrays
            ispecies = ispecies_list[0]
            for arr in ispecies_list[1:]:
                ispecies = np.intersect1d(ispecies, arr)

    # Exclude groups
    if hide_groups:
        obigt = thermo_obj.obigt
        names = obigt.loc[ispecies, 'name'].values
        is_group = np.array([bool(re.match(r'^\[.*\]$', str(name))) for name in names])
        ispecies = ispecies[~is_group]

    # Filter on state
    if state is not None:
        obigt = thermo_obj.obigt

        # Ensure state is a list
        if isinstance(state, str):
            state_list = [state]
        elif isinstance(state, tuple):
            state_list = list(state)
        else:
            state_list = state

        species_states = obigt.loc[ispecies, 'state'].values
        istate = np.array([s in state_list for s in species_states])
        ispecies = ispecies[istate]

    # Require non-NA Delta G0 at specific temperature
    if T is not None:
        from .subcrt import subcrt
        # Suppress warnings and (optionally) messages
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                result = subcrt(ispecies.tolist(), T=T, P=P, messages=False, show=False)
                if result is not None and 'out' in result:
                    G_values = []
                    for species_out in result['out']:
                        if isinstance(species_out, dict) and 'G' in species_out:
                            G = species_out['G']
                            if isinstance(G, (list, np.ndarray)):
                                G_values.append(G[0] if len(G) > 0 else np.nan)
                            else:
                                G_values.append(G)
                        else:
                            G_values.append(np.nan)

                    # Filter out species with NA G values
                    has_G = np.array([not pd.isna(g) for g in G_values])
                    ispecies = ispecies[has_G]
            except:
                # If subcrt fails, keep all species
                pass

    # Create a pandas Series with formula names (R-style named vector)
    obigt = thermo_obj.obigt
    formulas = obigt.loc[ispecies, 'formula'].values

    # Use e- instead of (Z-1) for electron
    formulas = np.array([f if f != '(Z-1)' else 'e-' for f in formulas])

    # Return empty Series if nothing found
    if len(ispecies) == 0:
        return pd.Series([], dtype=int)

    # Create a pandas Series with formulas as index (R-style named vector)
    # This allows both named access (result["Au"]) and positional access (result[0])
    result = pd.Series(ispecies, index=formulas)

    return result

Retrieve species containing specified elements.

Parameters

elements : str, list of str, or tuple of str, optional

Elements in a chemical system. If elements is a string, retrieve species containing that element.

E.g., retrieve("Au") will return all species containing Au.

If elements is a list, retrieve species that have all of the elements in the list.

E.g., retrieve(["Au", "Cl"]) will return all species that have both Au and Cl.

If elements is a tuple, retrieve species relevant to the system, including charged species.

E.g., retrieve(("Au", "Cl")) will return species that have Au and/or Cl, including charged species, but no other elements.

ligands : str, list of str, or tuple of str, optional

Elements present in any ligands. This affects the species search: - If ligands is a state ('cr', 'liq', 'gas', 'aq'), use that as the state filter - Otherwise, include elements in the system defined by ligands

state : str, list of str, or tuple of str, optional

Filter the result on these state(s) ('aq', 'cr', 'gas', 'liq').

T : float or list of float, optional

Temperature (K) for filtering species with non-NA Gibbs energy.

P : str, float, or list of float, default "Psat"

Pressure for Gibbs energy calculation. Default is "Psat" (saturation).

add_charge : bool, default True

For chemical systems (tuple input), automatically include charge (Z).

hide_groups : bool, default True

Exclude group species (names in brackets like [CH2]).

messages : bool, default True

Print informational messages. If False, suppress messages about updating the stoichiometric matrix and other information.

Returns

pd.Series: Series of species indices (1-based) with chemical formulas as index. This behaves like R's named vector - you can access by name or position. Names are chemical formulas (or 'e-' for electrons). Values are species indices that match the criteria.

Examples

>>> # All species containing Au
>>> retrieve("Au")

>>> # All species that have both Au and Cl
>>> retrieve(["Au", "Cl"])

>>> # Au-Cl system: species with Au and/or Cl, including charged species
>>> retrieve(("Au", "Cl"))

>>> # All Au-bearing species in the Au-Cl system
>>> retrieve("Au", ("Cl",))

>>> # All uncharged Au-bearing species in the Au-Cl system
>>> retrieve("Au", ("Cl",), add_charge=False)

>>> # Minerals in the system SiO2-MgO-CaO-CO2
>>> retrieve(("Si", "Mg", "Ca", "C", "O"), state="cr")

Notes

This function uses 1-based indexing to match R CHNOSZ conventions. The returned indices are labels that can be used with .loc[], not positions.

Expand source code

def species(species: Optional[Union[str, int, List[Union[str, int]], pd.Series]] = None,
            state: Optional[Union[str, List[str]]] = None,
            delete: bool = False,
            add: bool = False,
            index_return: bool = False,
            global_state: bool = True,
            basis: Optional[pd.DataFrame] = None,
            messages: bool = True) -> Optional[Union[pd.DataFrame, List[int]]]:
    """
    Define species of interest for thermodynamic calculations.

    Parameters
    ----------
    species : str, int, list, pd.Series, or None
        Species name(s), formula(s), or index(es).
        Can also be a pandas Series (e.g., from retrieve()).
        If None, returns current species definition.
    state : str, list of str, or None
        Physical state(s) for the species
    delete : bool, default False
        If True, delete species (all if species is None)
    add : bool, default False
        If True, add to existing species instead of replacing
    index_return : bool, default False
        If True, return species indices instead of DataFrame
    global_state : bool, default True
        If True, store species in global thermo().species (default behavior)
        If False, return species definition without storing globally (local state)
    basis : pd.DataFrame, optional
        Basis species definition to use (if not using global basis)
        Required when global_state=False and basis is not defined globally
    messages : bool, default True
        If True, print informational messages

    Returns
    -------
    pd.DataFrame, list of int, or None
        Species definition DataFrame or indices, or None if deleted

    Examples
    --------
    >>> # Define species of interest
    >>> species(["CO2", "HCO3-", "CO3-2"])

    >>> # Add more species
    >>> species(["CH4", "C2H4"], add=True)

    >>> # Delete specific species
    >>> species(["CO2"], delete=True)

    >>> # Delete all species
    >>> species(delete=True)

    >>> # Use output from retrieve()
    >>> zn_species = retrieve("Zn", ["O", "H"], state="aq")
    >>> species(zn_species)
    """
    thermo_obj = thermo()

    # Handle pandas Series (e.g., from retrieve())
    if isinstance(species, pd.Series):
        # Extract the integer indices from the Series values
        species = species.values.tolist()

    # Handle NA species
    if species is pd.NA or species is np.nan:
        raise SpeciesError("'species' is NA")
    
    # Handle deletion
    if delete:
        return _delete_species(species, thermo_obj)
    
    # Return current species if no arguments
    if species is None and state is None:
        if index_return:
            if thermo_obj.species is not None:
                return list(range(1, len(thermo_obj.species) + 1))
            else:
                return []
        return thermo_obj.species
    
    # Use all species indices if species is None but state is given
    if species is None and thermo_obj.species is not None:
        species = list(range(1, len(thermo_obj.species) + 1))
    
    # Process state argument
    state = _process_state_argument(state)
    
    # Make species and state same length
    species, state = _match_argument_lengths(species, state)
    
    # Handle numeric state (treat as logact)
    logact = None
    if state is not None and len(state) > 0:
        if isinstance(state[0], (int, float)):
            logact = [float(s) for s in state]
            state = None
        elif _can_be_numeric(state[0]):
            logact = [float(s) for s in state]
            state = None
    
    # Handle species-state combinations for proteins
    if state is not None:
        species, state = _handle_protein_naming(species, state, thermo_obj)
    
    # Process species argument
    iOBIGT = None
    if isinstance(species[0], str):
        # Check if species are in current definition
        if thermo_obj.species is not None:
            existing_indices = _match_existing_species(species, thermo_obj.species)
            if all(idx is not None for idx in existing_indices) and logact is not None:
                # Update activities of existing species
                # Update activities of existing species directly
                species_indices = [i+1 for i in existing_indices]  # Convert to 1-based
                return _update_existing_species(species_indices, None, logact, index_return, thermo_obj)
        
        # Look up species in database
        iOBIGT = _lookup_species_indices(species, state, messages)
        
    else:
        # Handle numeric species
        if thermo_obj.species is not None:
            max_current = len(thermo_obj.species)
            if all(isinstance(s, int) and s <= max_current for s in species):
                # Referring to existing species
                return _update_existing_species(species, state, logact, index_return, thermo_obj)
        
        # Referring to OBIGT indices
        iOBIGT = species
    
    # Create or modify species definition
    if iOBIGT is not None:
        return _create_species_definition(iOBIGT, state, logact, add, index_return, thermo_obj, global_state, basis)
    else:
        return _update_existing_species(species, state, logact, index_return, thermo_obj)

Define species of interest for thermodynamic calculations.

Parameters

species : str, int, list, pd.Series, or None: Species name(s), formula(s), or index(es). Can also be a pandas Series (e.g., from retrieve()). If None, returns current species definition.
state : str, list of str, or None: Physical state(s) for the species
delete : bool, default False: If True, delete species (all if species is None)
add : bool, default False: If True, add to existing species instead of replacing
index_return : bool, default False: If True, return species indices instead of DataFrame
global_state : bool, default True: If True, store species in global thermo().species (default behavior) If False, return species definition without storing globally (local state)
basis : pd.DataFrame, optional: Basis species definition to use (if not using global basis) Required when global_state=False and basis is not defined globally
messages : bool, default True: If True, print informational messages

Returns

pd.DataFrame, list of int, or None: Species definition DataFrame or indices, or None if deleted

Examples

>>> # Define species of interest
>>> species(["CO2", "HCO3-", "CO3-2"])

>>> # Add more species
>>> species(["CH4", "C2H4"], add=True)

>>> # Delete specific species
>>> species(["CO2"], delete=True)

>>> # Delete all species
>>> species(delete=True)

>>> # Use output from retrieve()
>>> zn_species = retrieve("Zn", ["O", "H"], state="aq")
>>> species(zn_species)

Expand source code

def subcrt(species: Union[str, List[str], int, List[int]],
           coeff: Union[int, float, List[Union[int, float]], None] = 1,
           state: Optional[Union[str, List[str]]] = None,
           property: List[str] = ["logK", "G", "H", "S", "V", "Cp"],
           T: Union[float, List[float], np.ndarray] = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)]),
           P: Union[float, List[float], np.ndarray, str] = "Psat",
           grid: Optional[str] = None,
           convert: bool = True,
           exceed_Ttr: bool = True,
           exceed_rhomin: bool = False,
           logact: Optional[List[float]] = None,
           autobalance: bool = True,
           use_polymorphs: bool = True,
           IS: Union[float, List[float]] = 0,
           messages: bool = True,
           show: bool = True,
           basis: Optional[pd.DataFrame] = None,
           _recursion_count: int = 0) -> SubcrtResult:
    """
    Calculate standard molal thermodynamic properties of species and reactions.
    
    This function reproduces the behavior of R CHNOSZ subcrt() exactly, including
    all argument handling, validation, calculations, and output formatting.
    
    Parameters
    ----------
    species : str, list of str, int, or list of int
        Species names, formulas, or indices in thermodynamic database
    coeff : int, float, list, or None
        Stoichiometric coefficients for reaction calculation
        If 1 (default), calculate individual species properties
        If list, calculate reaction with given coefficients
    state : str, list of str, or None
        Physical states ("aq", "cr", "gas", "liq") for species
    property : list of str
        Properties to calculate: "logK", "G", "H", "S", "V", "Cp", "rho", "kT", "E"
    T : float, list, or ndarray
        Temperature(s) in K (default: 273.16, then 298.15 to 623.15 by 25 K)
    P : float, list, ndarray, or "Psat"
        Pressure(s) in bar or "Psat" for saturation pressure
    grid : str or None
        Grid calculation mode: "T", "P", "IS", or None
    convert : bool
        Convert temperature/pressure units (default: True)
    exceed_Ttr : bool
        Allow calculations beyond transition temperatures (default: False)
    exceed_rhomin : bool
        Allow calculations below minimum water density (default: False)
    logact : list of float or None
        Activity coefficients (log10 scale)
    autobalance : bool
        Automatically balance reactions using basis species (default: True)
    use_polymorphs : bool
        Include polymorphic phases for minerals (default: True)
    IS : float or list of float
        Ionic strength for activity corrections (default: 0)
    messages : bool, default True
        Whether to print informational messages
    show : bool, default True
        Whether to display result tables in Jupyter notebooks (default: True)
        Set to False when calling subcrt() from other functions
    basis : pd.DataFrame, optional
        Basis species definition to use for autobalancing (if not using global basis)

    Returns
    -------
    SubcrtResult
        Object containing:
        - species: DataFrame with species information
        - out: DataFrame with calculated thermodynamic properties
        - reaction: DataFrame with reaction stoichiometry (if reaction)
        - warnings: List of warning messages
        
    Examples
    --------
    >>> import pychnosz
    >>> pychnosz.reset()
    >>> 
    >>> # Single species properties
    >>> result = subcrt("H2O", T=25, P=1)
    >>> print(result.out[["G", "H", "S", "Cp"]])
    >>> 
    >>> # Reaction calculation
    >>> result = subcrt(["H2O", "H+", "OH-"], [-1, 1, 1], T=25, P=1)
    >>> print(f"Water dissociation ΔG° = {result.out.G[0]:.3f} kJ/mol")
    >>> 
    >>> # Temperature array
    >>> result = subcrt("quartz", T=[25, 100, 200], P=1)
    >>> print(result.out[["T", "G", "H", "S"]])
    
    Notes
    -----
    This implementation maintains complete fidelity to R CHNOSZ subcrt():
    - Identical argument processing and validation
    - Same species lookup and polymorphic handling
    - Exact HKF and CGL equation-of-state calculations
    - Same reaction balancing and autobalance logic
    - Identical output structure and formatting
    - Same warning and error messages
    """
    
    result = SubcrtResult()

    # Prevent infinite recursion in auto-balancing
    if _recursion_count > 5:
        result.warnings.append("Maximum recursion depth reached in auto-balancing")
        return result

    try:
        # === Phase 1: Argument Processing and Validation ===
        # (Exactly matching R subcrt.R lines 21-77)
        
        # Handle argument reordering if states are second argument
        if coeff != 1 and isinstance(coeff, (list, str)) and isinstance(coeff[0] if isinstance(coeff, list) else coeff, str):
            # States were passed as second argument - reorder
            if state is not None:
                if isinstance(state, (int, float)) or (isinstance(state, list) and all(isinstance(x, (int, float)) for x in state)):
                    # Third argument is coefficients
                    new_coeff = state
                    new_state = coeff
                    return subcrt(species, new_coeff, new_state, property, T, P, grid,
                                convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
                                messages, show, basis, _recursion_count)
                else:
                    raise ValueError("If both coeff and state are given, one should be numeric coefficients")
            else:
                # Only states provided, no coefficients
                new_state = coeff
                return subcrt(species, 1, new_state, property, T, P, grid,
                            convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
                            messages, show, basis, _recursion_count)
        
        # Determine if this is a reaction calculation
        do_reaction = (coeff != 1 and coeff is not None and 
                      (isinstance(coeff, list) or isinstance(coeff, (int, float)) and coeff != 1))
        
        # Convert inputs to consistent formats
        species = [species] if isinstance(species, (str, int)) else list(species)
        if state is not None:
            state = [state] if isinstance(state, str) else list(state)
            # Make species and state same length
            if len(state) > len(species):
                species = species * (len(state) // len(species) + 1)
                species = species[:len(state)]
            elif len(species) > len(state):
                state = state * (len(species) // len(state) + 1)
                state = state[:len(species)]
        
        if do_reaction:
            if isinstance(coeff, (int, float)):
                coeff = [coeff]
            coeff = list(coeff)
        
        # Validate properties
        allowed_properties = ["rho", "logK", "G", "H", "S", "Cp", "V", "kT", "E"]
        if isinstance(property, str):
            property = [property]
        
        invalid_props = [p for p in property if p not in allowed_properties]
        if invalid_props:
            if len(invalid_props) == 1:
                raise ValueError(f"invalid property name: {invalid_props[0]}")
            else:
                raise ValueError(f"invalid property names: {', '.join(invalid_props)}")
        
        # Length checking
        if do_reaction and len(species) != len(coeff):
            raise ValueError("the length of 'coeff' must equal the number of species")
        
        if logact is not None and len(logact) != len(species):
            raise ValueError("the length of 'logact' must equal the number of species")
        
        # Unit conversion
        T_array = np.atleast_1d(np.asarray(T, dtype=float))
        # Convert temperature to Kelvin if convert=True (matching R CHNOSZ behavior)
        # R: if(convert) T <- envert(T, "K") - converts Celsius input to Kelvin
        # Default parameter is [273.16, 298.15, 323.15, ..., 623.15] which is already in K, so only convert user input
        default_T = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)])
        if convert and not np.array_equal(T_array, default_T[:len(T_array)]):
            # User provided temperature, assume Celsius and convert to Kelvin
            T_array = T_array + 273.15

        # Handle T=273.15K (0°C) exactly - R CHNOSZ uses 273.16K (0.01°C) instead
        # This avoids numerical issues at the freezing point
        T_array = np.where(np.abs(T_array - 273.15) < 1e-10, 273.16, T_array)
        
        if isinstance(P, str) and P == "Psat":
            P_array = "Psat"
        else:
            P_array = np.atleast_1d(np.asarray(P, dtype=float))
            # P is assumed to be in bar (R CHNOSZ standard)
        
        # Warning for high temperatures with Psat
        # Check if P is "Psat" (compare to the original P, not P_array which may be converted)
        if isinstance(P, str) and P == "Psat" and np.any(T_array > 647.067):
            n_over = np.sum(T_array > 647.067)
            vtext = "value" if n_over == 1 else "values"
            result.warnings.append(f"P = 'Psat' undefined for T > Tcritical ({n_over} T {vtext})")
        
        # === Phase 2: Grid Processing ===
        # Handle grid calculations (T-P arrays)
        if grid is not None:
            if grid == "T":
                # Grid over temperature
                new_T = []
                for temp in T_array:
                    if isinstance(P_array, str):
                        new_T.extend([temp] * 1)
                    else:
                        new_T.extend([temp] * len(P_array))
                if isinstance(P_array, str):
                    new_P = P_array
                else:
                    new_P = list(P_array) * len(T_array)
                T_array = np.array(new_T)
                P_array = new_P
            elif grid == "P":
                # Grid over pressure
                if not isinstance(P_array, str):
                    new_P = []
                    for press in P_array:
                        new_P.extend([press] * len(T_array))
                    new_T = list(T_array) * len(P_array)
                    T_array = np.array(new_T)
                    P_array = np.array(new_P)
            elif grid == "IS":
                # Grid over ionic strength
                IS_array = np.atleast_1d(np.asarray(IS))
                original_len = max(len(T_array), len(P_array) if not isinstance(P_array, str) else 1)
                new_IS = []
                for ionic_str in IS_array:
                    new_IS.extend([ionic_str] * original_len)
                T_array = np.tile(T_array, len(IS_array))
                if isinstance(P_array, str):
                    P_array = P_array
                else:
                    P_array = np.tile(P_array, len(IS_array))
                IS = new_IS
        else:
            # Ensure T and P are same length
            if isinstance(P_array, str):
                # P = "Psat", keep T as is
                pass
            else:
                max_len = max(len(T_array), len(P_array))
                if len(T_array) < max_len:
                    T_array = np.resize(T_array, max_len)
                if len(P_array) < max_len:
                    P_array = np.resize(P_array, max_len)
        
        # === Phase 3: Species Lookup and Validation ===
        result.species, result.reaction, iphases, isaq, isH2O, iscgl, polymorph_species, ispecies = _process_species(
            species, state, coeff, do_reaction, use_polymorphs, messages=messages)
        
        # === Phase 4: Generate Output Message ===
        if (len(species) > 1 or convert) and messages:
            _print_subcrt_message(species, T_array, P_array, isaq.any() or isH2O.any(), messages)
        
        # === Phase 5: Reaction Balance Check ===
        if do_reaction and autobalance:
            # Use original ispecies and coeff for balance check (before polymorph expansion)
            # This matches R CHNOSZ behavior where balance check happens before polymorph expansion
            rebalanced_result = _check_reaction_balance(result, species, coeff, state, property,
                                                      T_array, P_array, grid, convert, logact,
                                                      exceed_Ttr, exceed_rhomin, IS, ispecies, _recursion_count, basis, T, P, messages, show)
            if rebalanced_result is not None:  # If reaction was rebalanced, return the result
                return rebalanced_result
        
        # === Phase 6: Property Calculations ===
        result.out, calc_warnings = _calculate_properties(property, iphases, isaq, isH2O, iscgl,
                                         T_array, P_array, exceed_rhomin, exceed_Ttr, IS, logact, do_reaction)
        # Add calculation warnings to result
        result.warnings.extend(calc_warnings)
        
        # === Phase 6.5: Polymorph Selection ===
        if use_polymorphs:
            # Select stable polymorphs based on minimum Gibbs energy
            # Apply to both individual species AND reactions (matching R CHNOSZ behavior)
            thermo_sys = thermo()
            if do_reaction:
                # For reactions, also update coefficients and rebuild reaction DataFrame
                result.out, updated_coeff, updated_iphases = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, result.reaction['coeff'].tolist(), messages)
                # Rebuild reaction DataFrame with updated species list
                reaction_data = []
                for i, iph in enumerate(updated_iphases):
                    row = thermo_sys.obigt.loc[iph]
                    model = row.get('model', 'unknown')
                    if model == "H2O":
                        water_model = thermo_sys.get_option('water', 'SUPCRT92')
                        model = f"water.{water_model}"
                    reaction_data.append({
                        'coeff': updated_coeff[i],
                        'name': row['name'],
                        'formula': row['formula'],
                        'state': row['state'],
                        'ispecies': iph,
                        'model': model
                    })
                result.reaction = pd.DataFrame(reaction_data)
            else:
                # For individual species, no coefficient update needed
                result.out, _ = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, None, messages)
            
            # For single species (non-reaction), convert back to DataFrame format
            if not do_reaction and isinstance(result.out, dict) and 'species_data' in result.out and len(result.out['species_data']) == 1:
                result.out = result.out['species_data'][0]
        
        # === Phase 7: Reaction Property Summation ===
        if do_reaction:
            result.out = _sum_reaction_properties(result.out, result.reaction['coeff'])
        
        # === Phase 8: Unit Conversion (convert=True) ===
        if convert:
            # Apply R CHNOSZ compatible conversion
            # This matches the observed behavior where convert=TRUE gives different results
            # than just multiplying by 4.184
            result.out = _apply_r_chnosz_conversion(result.out, do_reaction)
            
            # Recalculate logK after unit conversion to ensure consistency
            if do_reaction and 'logK' in property and 'G' in result.out.columns:
                if not result.out['G'].isna().all():
                    R = 8.314462618  # J/(mol·K) - CODATA 2018 value
                    T_array = np.atleast_1d(T_array)
                    result.out['logK'] = -result.out['G'] / (np.log(10) * R * T_array)

        # Display tables in Jupyter notebooks if show=True
        if show:
            _display_subcrt_result(result)

        # Print warnings (matching R CHNOSZ behavior - lines 621-624)
        if result.warnings and messages:
            for warn in result.warnings:
                warnings.warn(warn)

        return result
        
    except Exception as e:
        result.warnings.append(f"subcrt error: {str(e)}")
        raise

Calculate standard molal thermodynamic properties of species and reactions.

This function reproduces the behavior of R CHNOSZ subcrt() exactly, including all argument handling, validation, calculations, and output formatting.

Parameters

species : str, list of str, int, or list of int: Species names, formulas, or indices in thermodynamic database
coeff : int, float, list, or None: Stoichiometric coefficients for reaction calculation If 1 (default), calculate individual species properties If list, calculate reaction with given coefficients
state : str, list of str, or None: Physical states ("aq", "cr", "gas", "liq") for species
property : list of str: Properties to calculate: "logK", "G", "H", "S", "V", "Cp", "rho", "kT", "E"
T : float, list, or ndarray: Temperature(s) in K (default: 273.16, then 298.15 to 623.15 by 25 K)
P : float, list, ndarray, or "Psat": Pressure(s) in bar or "Psat" for saturation pressure
grid : str or None: Grid calculation mode: "T", "P", "IS", or None
convert : bool: Convert temperature/pressure units (default: True)
exceed_Ttr : bool: Allow calculations beyond transition temperatures (default: False)
exceed_rhomin : bool: Allow calculations below minimum water density (default: False)
logact : list of float or None: Activity coefficients (log10 scale)
autobalance : bool: Automatically balance reactions using basis species (default: True)
use_polymorphs : bool: Include polymorphic phases for minerals (default: True)
IS : float or list of float: Ionic strength for activity corrections (default: 0)
messages : bool, default True: Whether to print informational messages
show : bool, default True: Whether to display result tables in Jupyter notebooks (default: True) Set to False when calling subcrt() from other functions
basis : pd.DataFrame, optional: Basis species definition to use for autobalancing (if not using global basis)

Returns

SubcrtResult: Object containing: - species: DataFrame with species information - out: DataFrame with calculated thermodynamic properties - reaction: DataFrame with reaction stoichiometry (if reaction) - warnings: List of warning messages

Examples

>>> import pychnosz
>>> pychnosz.reset()
>>> 
>>> # Single species properties
>>> result = subcrt("H2O", T=25, P=1)
>>> print(result.out[["G", "H", "S", "Cp"]])
>>> 
>>> # Reaction calculation
>>> result = subcrt(["H2O", "H+", "OH-"], [-1, 1, 1], T=25, P=1)
>>> print(f"Water dissociation ΔG° = {result.out.G[0]:.3f} kJ/mol")
>>> 
>>> # Temperature array
>>> result = subcrt("quartz", T=[25, 100, 200], P=1)
>>> print(result.out[["T", "G", "H", "S"]])

Notes

This implementation maintains complete fidelity to R CHNOSZ subcrt(): - Identical argument processing and validation - Same species lookup and polymorphic handling - Exact HKF and CGL equation-of-state calculations - Same reaction balancing and autobalance logic - Identical output structure and formatting - Same warning and error messages

def thermo(*args, messages=True, **kwargs)

Expand source code

def thermo(*args, messages=True, **kwargs):
    """
    Access or modify the thermodynamic system data object.

    This function provides a convenient interface to get or set parts of the
    thermodynamic system, similar to R's par() function for graphics parameters.

    Parameters
    ----------
    *args : str or list of str
        Names of attributes to retrieve (e.g., "element", "opt$ideal.H")
        For nested access, use "$" notation (e.g., "opt$E.units")
        Special values:
        - "WORM": Load the WORM thermodynamic database (Python-exclusive feature)
    messages : bool, default True
        Whether to print informational messages during operations
    **kwargs : any
        Named arguments to set attributes (e.g., element=new_df, opt={'E.units': 'cal'})
        For nested attributes, use "$" in the name (e.g., **{"opt$ideal.H": False})

    Returns
    -------
    various
        - If no arguments: returns the ThermoSystem object
        - If single unnamed argument: returns the requested value
        - If multiple unnamed arguments: returns list of requested values
        - If named arguments: returns original values before modification

    Examples
    --------
    >>> import pychnosz
    >>> # Get the entire thermo object
    >>> ts = pychnosz.thermo()

    >>> # Get a specific attribute
    >>> elem = pychnosz.thermo("element")

    >>> # Get nested attribute
    >>> e_units = pychnosz.thermo("opt$E.units")

    >>> # Get multiple attributes
    >>> elem, buf = pychnosz.thermo("element", "buffer")

    >>> # Set an attribute
    >>> old_elem = pychnosz.thermo(element=new_element_df)

    >>> # Set nested attribute
    >>> old_units = pychnosz.thermo(**{"opt$ideal.H": False})

    >>> # Load WORM database (Python-exclusive feature)
    >>> pychnosz.thermo("WORM")

    >>> # Suppress messages
    >>> pychnosz.thermo("WORM", messages=False)

    Notes
    -----
    This function mimics the behavior of R CHNOSZ thermo() function,
    providing flexible access to the thermodynamic data object.

    The "WORM" special argument is a Python-exclusive feature that loads
    the Water-Organic-Rock-Microbe thermodynamic database from the
    WORM-db GitHub repository.
    """
    # Get the global thermo system
    thermo_sys = get_thermo_system()

    # If no arguments, return the entire object
    if len(args) == 0 and len(kwargs) == 0:
        return thermo_sys

    # Handle character vectors passed as args (like R's c("basis", "species"))
    # If all args are strings or lists of strings, flatten them
    flat_args = []
    for arg in args:
        if isinstance(arg, (list, tuple)) and all(isinstance(x, str) for x in arg):
            flat_args.extend(arg)
        else:
            flat_args.append(arg)
    args = flat_args

    # Prepare return values list
    return_values = []

    # Ensure system is initialized if needed (before accessing any properties)
    # This prevents auto-initialization from using hardcoded messages=True
    if not thermo_sys.is_initialized() and len(args) > 0:
        thermo_sys.reset(messages=messages)

    # Process unnamed arguments (getters)
    for arg in args:
        if not isinstance(arg, str):
            raise TypeError(f"Unnamed arguments must be strings, got {type(arg)}")

        # Special handling for "WORM" - load WORM database
        if arg.upper() == "WORM":
            from ..data.worm import load_WORM
            success = load_WORM(keep_default=False, messages=messages)
            return_values.append(success)
            continue

        # Parse the argument to get slots (handle nested access with $)
        slots = arg.split('$')

        # Get the value from thermo_sys
        value = thermo_sys
        for slot in slots:
            # Handle OBIGT case-insensitively (R uses uppercase, Python uses lowercase)
            slot_lower = slot.lower()
            if hasattr(value, slot_lower):
                value = getattr(value, slot_lower)
            elif hasattr(value, slot):
                value = getattr(value, slot)
            elif isinstance(value, dict) and slot in value:
                value = value[slot]
            else:
                raise AttributeError(f"Attribute '{arg}' not found in thermo object")

        return_values.append(value)

    # Process named arguments (setters)
    setter_returns = {}

    # Ensure system is initialized if needed (before setting any properties)
    if not thermo_sys.is_initialized() and len(kwargs) > 0:
        thermo_sys.reset(messages=messages)

    for key, new_value in kwargs.items():
        # Parse the key to get slots
        slots = key.split('$')

        # Get the original value before modification
        orig_value = thermo_sys
        for slot in slots:
            # Handle case-insensitive attribute access (for OBIGT, etc.)
            slot_lower = slot.lower()
            if hasattr(orig_value, slot_lower):
                orig_value = getattr(orig_value, slot_lower)
            elif hasattr(orig_value, slot):
                orig_value = getattr(orig_value, slot)
            elif isinstance(orig_value, dict) and slot in orig_value:
                orig_value = orig_value[slot]
            else:
                raise AttributeError(f"Attribute '{key}' not found in thermo object")

        setter_returns[key] = orig_value

        # Set the new value
        if len(slots) == 1:
            # Direct attribute
            # Special handling for OBIGT - normalize index and handle refs
            if slots[0].upper() == 'OBIGT':
                # Handle OBIGT replacement with proper index normalization
                _set_obigt_data(thermo_sys, new_value)
            else:
                # Use lowercase version if it exists (Python convention)
                slot_lower = slots[0].lower()
                if hasattr(thermo_sys, slot_lower):
                    setattr(thermo_sys, slot_lower, new_value)
                else:
                    setattr(thermo_sys, slots[0], new_value)
        elif len(slots) == 2:
            # Nested attribute (e.g., opt$ideal.H)
            parent = getattr(thermo_sys, slots[0])
            if isinstance(parent, dict):
                parent[slots[1]] = new_value
            else:
                setattr(parent, slots[1], new_value)
        else:
            # Deeper nesting (if needed)
            current = thermo_sys
            for i, slot in enumerate(slots[:-1]):
                if hasattr(current, slot):
                    current = getattr(current, slot)
                elif isinstance(current, dict) and slot in current:
                    current = current[slot]

            # Set the final value
            final_slot = slots[-1]
            if isinstance(current, dict):
                current[final_slot] = new_value
            else:
                setattr(current, final_slot, new_value)

    # Determine return value based on R's behavior
    if len(kwargs) > 0:
        # If we had setters, return the original values as a named dict
        # In R, setters always return a named list
        if len(args) == 0:
            # Only setters - return dict (named list in R)
            return setter_returns
        else:
            # Mix of getters and setters - return all original values
            combined = {}
            for i, arg in enumerate(args):
                combined[arg] = return_values[i]
            combined.update(setter_returns)
            return combined
    else:
        # Only getters
        # Single unnamed argument returns the value directly
        if len(return_values) == 1:
            return return_values[0]
        return return_values

Access or modify the thermodynamic system data object.

This function provides a convenient interface to get or set parts of the thermodynamic system, similar to R's par() function for graphics parameters.

Parameters

*args : str or list of str: Names of attributes to retrieve (e.g., "element", "opt$ideal.H") For nested access, use "$" notation (e.g., "opt$E.units") Special values: - "WORM": Load the WORM thermodynamic database (Python-exclusive feature)
messages : bool, default True: Whether to print informational messages during operations
**kwargs : any: Named arguments to set attributes (e.g., element=new_df, opt={'E.units': 'cal'}) For nested attributes, use "$" in the name (e.g., **{"opt$ideal.H": False})

Returns

various

If no arguments: returns the ThermoSystem object
If single unnamed argument: returns the requested value
If multiple unnamed arguments: returns list of requested values
If named arguments: returns original values before modification

Examples

>>> import pychnosz
>>> # Get the entire thermo object
>>> ts = pychnosz.thermo()

>>> # Get a specific attribute
>>> elem = pychnosz.thermo("element")

>>> # Get nested attribute
>>> e_units = pychnosz.thermo("opt$E.units")

>>> # Get multiple attributes
>>> elem, buf = pychnosz.thermo("element", "buffer")

>>> # Set an attribute
>>> old_elem = pychnosz.thermo(element=new_element_df)

>>> # Set nested attribute
>>> old_units = pychnosz.thermo(**{"opt$ideal.H": False})

>>> # Load WORM database (Python-exclusive feature)
>>> pychnosz.thermo("WORM")

>>> # Suppress messages
>>> pychnosz.thermo("WORM", messages=False)

Notes

This function mimics the behavior of R CHNOSZ thermo() function, providing flexible access to the thermodynamic data object.

The "WORM" special argument is a Python-exclusive feature that loads the Water-Organic-Rock-Microbe thermodynamic database from the WORM-db GitHub repository.

Classes

class BasisError (*args, **kwargs)

Expand source code

class BasisError(Exception):
    """Exception raised for basis-related errors."""
    pass

Exception raised for basis-related errors.

Ancestors

builtins.Exception
builtins.BaseException

class SpeciesError (*args, **kwargs)

Expand source code

class SpeciesError(Exception):
    """Exception raised for species-related errors."""
    pass

Exception raised for species-related errors.

Ancestors

builtins.Exception
builtins.BaseException

class ThermoSystem

Expand source code

class ThermoSystem:
    """
    Global thermodynamic system manager for CHNOSZ.
    
    This class manages the thermodynamic database, basis species, 
    formed species, and calculation options - essentially serving
    as the global state container for all CHNOSZ calculations.
    """
    
    def __init__(self):
        """Initialize the thermodynamic system."""
        self._data_loader = DataLoader()
        self._obigt_db = None
        self._initialized = False
        
        # Core data containers (similar to R thermo object)
        self.opt: Dict[str, Any] = {}
        self.element: Optional[pd.DataFrame] = None
        self.obigt: Optional[pd.DataFrame] = None
        self.refs: Optional[pd.DataFrame] = None
        self.Berman: Optional[pd.DataFrame] = None
        self.buffer: Optional[pd.DataFrame] = None
        self.protein: Optional[pd.DataFrame] = None
        self.groups: Optional[pd.DataFrame] = None
        self.stoich: Optional[np.ndarray] = None
        self.stoich_formulas: Optional[np.ndarray] = None
        self.bdot_acirc: Optional[Dict[str, float]] = None
        self.formula_ox: Optional[pd.DataFrame] = None
        
        # System state
        self.basis: Optional[pd.DataFrame] = None
        self.species: Optional[pd.DataFrame] = None
        
        # Options and parameters
        self.opar: Dict[str, Any] = {}
        
    def reset(self, messages: bool = True) -> None:
        """
        Initialize/reset the thermodynamic system.

        This is equivalent to reset() in the R version, loading all
        the thermodynamic data and initializing the system.

        Parameters
        ----------
        messages : bool, default True
            Whether to print informational messages
        """
        try:
            # Load core data files
            self._load_options(messages)
            self._load_element_data(messages)
            self._load_berman_data(messages)
            self._load_buffer_data(messages)
            self._load_protein_data(messages)
            self._load_stoich_data(messages)
            self._load_bdot_data(messages)
            self._load_refs_data(messages)

            # Initialize OBIGT database
            self._obigt_db = OBIGTDatabase()
            self.obigt = self._obigt_db.get_combined_data()

            # Reset system state
            self.basis = None
            self.species = None
            self.opar = {}

            self._initialized = True
            if messages:
                print('reset: thermodynamic system initialized')

        except Exception as e:
            raise RuntimeError(f"Failed to initialize thermodynamic system: {e}")
    
    def _load_options(self, messages: bool = True) -> None:
        """Load default thermodynamic options."""
        try:
            opt_file = self._data_loader.get_data_path() / "thermo" / "opt.csv"
            if opt_file.exists():
                df = pd.read_csv(opt_file)
                # Convert to dictionary format (first row contains values)
                self.opt = dict(zip(df.columns, df.iloc[0]))
            else:
                # Default options if file not found
                self.opt = {
                    'E.units': 'J',
                    'T.units': 'C',
                    'P.units': 'bar',
                    'state': 'aq',
                    'water': 'SUPCRT92',
                    'G.tol': 100,
                    'Cp.tol': 1,
                    'V.tol': 1,
                    'varP': False,
                    'IAPWS.sat': 'liquid',
                    'paramin': 1000,
                    'ideal.H': True,
                    'ideal.e': True,
                    'nonideal': 'Bdot',
                    'Setchenow': 'bgamma0',
                    'Berman': np.nan,
                    'maxcores': 2,
                    'ionize.aa': True
                }
        except Exception as e:
            if messages:
                print(f"Warning: Could not load options: {e}")
            # Fallback to hardcoded defaults with critical unit options
            self.opt = {
                'E.units': 'J',
                'T.units': 'C',
                'P.units': 'bar',
                'state': 'aq',
                'water': 'SUPCRT92',
                'G.tol': 100,
                'Cp.tol': 1,
                'V.tol': 1,
                'varP': False,
                'IAPWS.sat': 'liquid',
                'paramin': 1000,
                'ideal.H': True,
                'ideal.e': True,
                'nonideal': 'Bdot',
                'Setchenow': 'bgamma0',
                'Berman': np.nan,
                'maxcores': 2,
                'ionize.aa': True
            }
    
    def _load_element_data(self, messages: bool = True) -> None:
        """Load element properties data."""
        try:
            self.element = self._data_loader.load_elements()
        except Exception as e:
            if messages:
                print(f"Warning: Could not load element data: {e}")
            self.element = None
    
    def _load_berman_data(self, messages: bool = True) -> None:
        """Load Berman mineral parameters from CSV files."""
        try:
            # Get path to Berman directory
            berman_path = self._data_loader.data_path / "Berman"

            if not berman_path.exists():
                if messages:
                    print(f"Warning: Berman directory not found: {berman_path}")
                self.Berman = None
                return

            # Find all CSV files in the directory
            csv_files = list(berman_path.glob("*.csv"))

            if not csv_files:
                if messages:
                    print(f"Warning: No CSV files found in {berman_path}")
                self.Berman = None
                return
            
            # Extract year from filename and sort in reverse chronological order (youngest first)
            # Following R logic: files <- rev(files[order(sapply(strsplit(files, "_"), "[", 2))])
            def extract_year(filepath):
                filename = filepath.name
                parts = filename.split('_')
                if len(parts) >= 2:
                    year_part = parts[1].replace('.csv', '')
                    try:
                        return int(year_part)
                    except ValueError:
                        return 0
                return 0
            
            # Sort files by year (youngest first)
            sorted_files = sorted(csv_files, key=extract_year, reverse=True)
            
            # Read parameters from each file
            berman_dfs = []
            for file_path in sorted_files:
                try:
                    df = pd.read_csv(file_path)
                    berman_dfs.append(df)
                except Exception as e:
                    print(f"Warning: Could not read Berman file {file_path}: {e}")
            
            # Combine all data frames (equivalent to do.call(rbind, Berman))
            if berman_dfs:
                self.Berman = pd.concat(berman_dfs, ignore_index=True)
                # Ensure all numeric columns are properly typed
                numeric_cols = ['GfPrTr', 'HfPrTr', 'SPrTr', 'VPrTr', 'k0', 'k1', 'k2', 'k3', 'k4', 'k5', 'k6',
                               'v1', 'v2', 'v3', 'v4', 'Tlambda', 'Tref', 'dTdP', 'l1', 'l2', 'DtH', 'Tmax', 'Tmin',
                               'd0', 'd1', 'd2', 'd3', 'd4', 'Vad']
                for col in numeric_cols:
                    if col in self.Berman.columns:
                        self.Berman[col] = pd.to_numeric(self.Berman[col], errors='coerce')
            else:
                self.Berman = None
                
        except Exception as e:
            if messages:
                print(f"Warning: Could not load Berman data: {e}")
            self.Berman = None

    def _load_buffer_data(self, messages: bool = True) -> None:
        """Load buffer definitions."""
        try:
            self.buffer = self._data_loader.load_buffers()
        except Exception as e:
            if messages:
                print(f"Warning: Could not load buffer data: {e}")
            self.buffer = None

    def _load_protein_data(self, messages: bool = True) -> None:
        """Load protein composition data.""" 
        try:
            self.protein = self._data_loader.load_proteins()
        except Exception as e:
            if messages:
                print(f"Warning: Could not load protein data: {e}")
            self.protein = None

    def _load_stoich_data(self, messages: bool = True) -> None:
        """Load stoichiometric matrix data."""
        try:
            stoich_df = self._data_loader.load_stoich()
            if stoich_df is not None:
                # Extract formulas and convert to matrix
                self.stoich_formulas = stoich_df.iloc[:, 0].values
                self.stoich = stoich_df.iloc[:, 1:].values
            else:
                self.stoich_formulas = None
                self.stoich = None
        except Exception as e:
            if messages:
                print(f"Warning: Could not load stoichiometric data: {e}")
            self.stoich_formulas = None
            self.stoich = None

    def _load_bdot_data(self, messages: bool = True) -> None:
        """Load B-dot activity coefficient parameters."""
        try:
            bdot_file = self._data_loader.get_data_path() / "thermo" / "Bdot_acirc.csv"
            if bdot_file.exists():
                df = pd.read_csv(bdot_file)
                if len(df.columns) >= 2:
                    self.bdot_acirc = dict(zip(df.iloc[:, 0], df.iloc[:, 1]))
                else:
                    self.bdot_acirc = {}
            else:
                self.bdot_acirc = {}
        except Exception as e:
            if messages:
                print(f"Warning: Could not load B-dot data: {e}")
            self.bdot_acirc = {}

    def _load_refs_data(self, messages: bool = True) -> None:
        """Load references data."""
        try:
            self.refs = self._data_loader.load_refs()
        except Exception as e:
            if messages:
                print(f"Warning: Could not load refs data: {e}")
            self.refs = None
    
    def is_initialized(self) -> bool:
        """Check if the thermodynamic system is initialized."""
        return self._initialized
    
    def get_obigt_db(self) -> OBIGTDatabase:
        """Get the OBIGT database instance."""
        if not self._initialized:
            self.reset()
        return self._obigt_db
    
    def get_option(self, key: str, default: Any = None) -> Any:
        """Get a thermodynamic option value."""
        return self.opt.get(key, default)
    
    def set_option(self, key: str, value: Any) -> None:
        """Set a thermodynamic option value."""
        self.opt[key] = value
    
    def info(self) -> Dict[str, Any]:
        """Get information about the current thermodynamic system."""
        if not self._initialized:
            return {"status": "Not initialized"}
        
        info = {
            "status": "Initialized",
            "obigt_species": len(self.obigt) if self.obigt is not None else 0,
            "elements": len(self.element) if self.element is not None else 0,
            "berman_minerals": len(self.Berman) if self.Berman is not None else 0,
            "buffers": len(self.buffer) if self.buffer is not None else 0,
            "proteins": len(self.protein) if self.protein is not None else 0,
            "stoich_species": len(self.stoich_formulas) if self.stoich_formulas is not None else 0,
            "basis_species": len(self.basis) if self.basis is not None else 0,
            "formed_species": len(self.species) if self.species is not None else 0,
            "current_options": dict(self.opt)
        }
        return info
    
    def __repr__(self) -> str:
        """String representation of the thermodynamic system."""
        if not self._initialized:
            return "ThermoSystem(uninitialized)"

        info = self.info()
        return (f"ThermoSystem("
                f"obigt={info['obigt_species']} species, "
                f"basis={info['basis_species']}, "
                f"formed={info['formed_species']})")

    # R-style uppercase property aliases for compatibility
    @property
    def OBIGT(self):
        """Alias for obigt (R compatibility)."""
        # Auto-initialize if needed AND obigt is None (matches R behavior)
        if self.obigt is None and not self._initialized:
            self.reset(messages=True)
        return self.obigt

    @OBIGT.setter
    def OBIGT(self, value):
        """Setter for OBIGT (R compatibility)."""
        _set_obigt_data(self, value)

Global thermodynamic system manager for CHNOSZ.

This class manages the thermodynamic database, basis species, formed species, and calculation options - essentially serving as the global state container for all CHNOSZ calculations.

Initialize the thermodynamic system.

Instance variables

prop OBIGT

Expand source code

@property
def OBIGT(self):
    """Alias for obigt (R compatibility)."""
    # Auto-initialize if needed AND obigt is None (matches R behavior)
    if self.obigt is None and not self._initialized:
        self.reset(messages=True)
    return self.obigt

Alias for obigt (R compatibility).

Methods

def get_obigt_db(self) ‑> OBIGTDatabase

Expand source code

def get_obigt_db(self) -> OBIGTDatabase:
    """Get the OBIGT database instance."""
    if not self._initialized:
        self.reset()
    return self._obigt_db

Get the OBIGT database instance.

def get_option(self, key: str, default: Any = None) ‑> Any

Expand source code

def get_option(self, key: str, default: Any = None) -> Any:
    """Get a thermodynamic option value."""
    return self.opt.get(key, default)

Get a thermodynamic option value.

def info(self) ‑> Dict[str, Any]

Expand source code

def info(self) -> Dict[str, Any]:
    """Get information about the current thermodynamic system."""
    if not self._initialized:
        return {"status": "Not initialized"}
    
    info = {
        "status": "Initialized",
        "obigt_species": len(self.obigt) if self.obigt is not None else 0,
        "elements": len(self.element) if self.element is not None else 0,
        "berman_minerals": len(self.Berman) if self.Berman is not None else 0,
        "buffers": len(self.buffer) if self.buffer is not None else 0,
        "proteins": len(self.protein) if self.protein is not None else 0,
        "stoich_species": len(self.stoich_formulas) if self.stoich_formulas is not None else 0,
        "basis_species": len(self.basis) if self.basis is not None else 0,
        "formed_species": len(self.species) if self.species is not None else 0,
        "current_options": dict(self.opt)
    }
    return info

Get information about the current thermodynamic system.

def is_initialized(self) ‑> bool

Expand source code

def is_initialized(self) -> bool:
    """Check if the thermodynamic system is initialized."""
    return self._initialized

Check if the thermodynamic system is initialized.

def reset(self, messages: bool = True) ‑> None

Expand source code

def reset(self, messages: bool = True) -> None:
    """
    Initialize/reset the thermodynamic system.

    This is equivalent to reset() in the R version, loading all
    the thermodynamic data and initializing the system.

    Parameters
    ----------
    messages : bool, default True
        Whether to print informational messages
    """
    try:
        # Load core data files
        self._load_options(messages)
        self._load_element_data(messages)
        self._load_berman_data(messages)
        self._load_buffer_data(messages)
        self._load_protein_data(messages)
        self._load_stoich_data(messages)
        self._load_bdot_data(messages)
        self._load_refs_data(messages)

        # Initialize OBIGT database
        self._obigt_db = OBIGTDatabase()
        self.obigt = self._obigt_db.get_combined_data()

        # Reset system state
        self.basis = None
        self.species = None
        self.opar = {}

        self._initialized = True
        if messages:
            print('reset: thermodynamic system initialized')

    except Exception as e:
        raise RuntimeError(f"Failed to initialize thermodynamic system: {e}")

Initialize/reset the thermodynamic system.

This is equivalent to reset() in the R version, loading all the thermodynamic data and initializing the system.

Parameters

messages : bool, default True: Whether to print informational messages

def set_option(self, key: str, value: Any) ‑> None

Expand source code

def set_option(self, key: str, value: Any) -> None:
    """Set a thermodynamic option value."""
    self.opt[key] = value

Set a thermodynamic option value.