Module pychnosz.utils
Utility functions for CHNOSZ calculations.
Sub-modules
pychnosz.utils.expression-
Expression utilities for formatted labels and text …
pychnosz.utils.formula-
Chemical formula parsing and manipulation utilities …
pychnosz.utils.formula_ox-
Formula oxidation state utilities for CHNOSZ …
pychnosz.utils.units-
Unit conversion utilities for CHNOSZ …
Functions
def ZC(formula: str | int | List[str | int]) ‑> float | List[float]-
Expand source code
def ZC(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]: """ Calculate average oxidation state of carbon in chemical formulas. Parameters ---------- formula : str, int, or list Chemical formula(s) or species index(es) Returns ------- float or list of float Average oxidation state(s) of carbon """ # Get elemental compositions compositions = makeup(formula, count_zero=False) if not isinstance(compositions, list): compositions = [compositions] results = [] # Nominal charges of elements known_elements = ['H', 'N', 'O', 'S', 'Z'] charges = [-1, 3, 2, 2, 1] for comp in compositions: if comp is None or 'C' not in comp: results.append(np.nan) continue # Calculate total charge from known elements total_charge = 0 unknown_elements = [] for element, count in comp.items(): if element == 'C': continue elif element in known_elements: idx = known_elements.index(element) total_charge += count * charges[idx] else: unknown_elements.append(element) if unknown_elements: warnings.warn(f"element(s) {' '.join(unknown_elements)} not in " f"{' '.join(known_elements)} so not included in ZC calculation") # Calculate carbon oxidation state n_carbon = comp['C'] zc = total_charge / n_carbon results.append(zc) if len(results) == 1: return results[0] else: return resultsCalculate average oxidation state of carbon in chemical formulas.
Parameters
formula:str, int,orlist- Chemical formula(s) or species index(es)
Returns
floatorlistoffloat- Average oxidation state(s) of carbon
def as_chemical_formula(makeup_dict: Dict[str, float] | pandas.core.frame.DataFrame,
drop_zero: bool = True) ‑> str | List[str]-
Expand source code
def as_chemical_formula(makeup_dict: Union[Dict[str, float], pd.DataFrame], drop_zero: bool = True) -> Union[str, List[str]]: """ Convert elemental makeup to chemical formula string(s). Parameters ---------- makeup_dict : dict or DataFrame Elemental composition(s) drop_zero : bool Whether to exclude zero coefficients Returns ------- str or list of str Chemical formula string(s) """ if isinstance(makeup_dict, pd.DataFrame): # Handle matrix of compositions results = [] for i in range(len(makeup_dict)): row_dict = makeup_dict.iloc[i].to_dict() formula = _dict_to_formula(row_dict, drop_zero) results.append(formula) return results else: # Handle single composition return _dict_to_formula(makeup_dict, drop_zero)Convert elemental makeup to chemical formula string(s).
Parameters
makeup_dict:dictorDataFrame- Elemental composition(s)
drop_zero:bool- Whether to exclude zero coefficients
Returns
strorlistofstr- Chemical formula string(s)
def calculate_ghs(formula: str,
G: float = nan,
H: float = nan,
S: float = nan,
T: float = 298.15,
E_units: str = 'J') ‑> Dict[str, float]-
Expand source code
def calculate_ghs(formula: str, G: float = np.nan, H: float = np.nan, S: float = np.nan, T: float = 298.15, E_units: str = "J") -> Dict[str, float]: """ Calculate missing G, H, or S from the other two values. Parameters ---------- formula : str Chemical formula G : float Gibbs energy of formation H : float Enthalpy of formation S : float Standard entropy T : float Temperature in K E_units : str Energy units ("J" or "cal") Returns ------- dict Dictionary with G, H, S values """ # Calculate elemental entropy Se = entropy(formula) if E_units == "cal": Se = Se / 4.184 # Convert J to cal # Calculate missing value if pd.isna(G): G = H - T * (S - Se) elif pd.isna(H): H = G + T * (S - Se) elif pd.isna(S): S = (H - G) / T + Se return {"G": G, "H": H, "S": S}Calculate missing G, H, or S from the other two values.
Parameters
formula:str- Chemical formula
G:float- Gibbs energy of formation
H:float- Enthalpy of formation
S:float- Standard entropy
T:float- Temperature in K
E_units:str- Energy units ("J" or "cal")
Returns
dict- Dictionary with G, H, S values
def entropy(formula: str | int | List[str | int]) ‑> float | List[float]-
Expand source code
def entropy(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]: """ Calculate standard molal entropy of elements in chemical formulas. Parameters ---------- formula : str, int, or list Chemical formula(s) or species index(es) Returns ------- float or list of float Standard entropy(ies) in J/(mol*K) """ thermo_obj = thermo() if thermo_obj.element is None: raise RuntimeError("Element data not available") # Convert to stoichiometric matrix compositions = makeup(formula, count_zero=False) if not isinstance(compositions, list): compositions = [compositions] entropies = [] for comp in compositions: if comp is None: entropies.append(np.nan) continue total_entropy = 0.0 has_na = False for element, count in comp.items(): # Look up element entropy element_data = thermo_obj.element[thermo_obj.element['element'] == element] if len(element_data) == 0: warnings.warn(f"Element {element} not available in thermo().element") has_na = True continue element_s = element_data.iloc[0]['s'] element_n = element_data.iloc[0]['n'] if pd.isna(element_s) or pd.isna(element_n): has_na = True continue # Entropy per atom entropy_per_atom = element_s / element_n total_entropy += count * entropy_per_atom if has_na and total_entropy == 0: entropies.append(np.nan) else: # Convert to Joules (assuming input is in cal) entropies.append(total_entropy * 4.184) # cal to J conversion if len(entropies) == 1: return entropies[0] else: return entropiesCalculate standard molal entropy of elements in chemical formulas.
Parameters
formula:str, int,orlist- Chemical formula(s) or species index(es)
Returns
floatorlistoffloat- Standard entropy(ies) in J/(mol*K)
def get_formula(formula: str | int | List[str | int]) ‑> str | List[str]-
Expand source code
def get_formula(formula: Union[str, int, List[Union[str, int]]]) -> Union[str, List[str]]: """ Get chemical formulas for species indices or return formula strings. Parameters ---------- formula : str, int, or list Chemical formula(s) or species index(es) Returns ------- str or list of str Chemical formula(s) """ # Handle single values if not isinstance(formula, list): formula = [formula] single_result = True else: single_result = False results = [] thermo_obj = thermo() for f in formula: if isinstance(f, str): # Already a formula results.append(f) elif isinstance(f, int): # Species index - look up formula if thermo_obj.obigt is not None: # Use .loc for label-based indexing (species indices are 1-based labels) if f in thermo_obj.obigt.index: formula_str = thermo_obj.obigt.loc[f, 'formula'] results.append(formula_str) else: raise FormulaError(f"Species index {f} not found in OBIGT database") else: raise FormulaError("Thermodynamic database not initialized") else: # Try to convert to string results.append(str(f)) if single_result: return results[0] else: return resultsGet chemical formulas for species indices or return formula strings.
Parameters
formula:str, int,orlist- Chemical formula(s) or species index(es)
Returns
strorlistofstr- Chemical formula(s)
def i2A(formula: str | List[str] | Dict[str, float]) ‑> numpy.ndarray-
Expand source code
def i2A(formula: Union[str, List[str], Dict[str, float]]) -> np.ndarray: """ Convert formula(s) to stoichiometric matrix. Parameters ---------- formula : str, list, or dict Chemical formula(s) or composition Returns ------- np.ndarray Stoichiometric matrix with elements as columns """ if isinstance(formula, np.ndarray): return formula elif isinstance(formula, dict) and all(isinstance(k, str) for k in formula.keys()): # Single composition dictionary return np.array([[formula.get(k, 0) for k in sorted(formula.keys())]]) # Get compositions with zero padding compositions = makeup(formula, count_zero=True) if not isinstance(compositions, list): compositions = [compositions] # Get all elements all_elements = set() for comp in compositions: if comp is not None: all_elements.update(comp.keys()) all_elements = sorted(list(all_elements)) # Build matrix matrix = np.zeros((len(compositions), len(all_elements))) for i, comp in enumerate(compositions): if comp is not None: for j, element in enumerate(all_elements): matrix[i, j] = comp.get(element, 0) return matrixConvert formula(s) to stoichiometric matrix.
Parameters
formula:str, list,ordict- Chemical formula(s) or composition
Returns
np.ndarray- Stoichiometric matrix with elements as columns
def makeup(formula: str | int | List[str | int],
multiplier: float | List[float] = 1.0,
sum_formulas: bool = False,
count_zero: bool = False) ‑> Dict[str, float] | List[Dict[str, float]]-
Expand source code
def makeup(formula: Union[str, int, List[Union[str, int]]], multiplier: Union[float, List[float]] = 1.0, sum_formulas: bool = False, count_zero: bool = False) -> Union[Dict[str, float], List[Dict[str, float]]]: """ Return elemental makeup (counts) of chemical formula(s). Handles formulas with parenthetical subformulas, suffixed formulas, charges, and fractional coefficients. Parameters ---------- formula : str, int, or list Chemical formula(s) or species index(es) multiplier : float or list of float Multiplier(s) to apply to formula coefficients sum_formulas : bool If True, return sum of all formulas count_zero : bool If True, include zero counts for all elements appearing in any formula Returns ------- dict or list of dict Elemental composition(s) as {element: count} dictionaries Examples -------- >>> makeup("H2O") {'H': 2, 'O': 1} >>> makeup("Ca(OH)2") {'Ca': 1, 'O': 2, 'H': 2} >>> makeup(["H2O", "CO2"]) [{'H': 2, 'O': 1}, {'C': 1, 'O': 2}] """ # Handle matrix input if isinstance(formula, np.ndarray) and formula.ndim == 2: return [makeup(formula[i, :]) for i in range(formula.shape[0])] # Handle named numeric objects (return unchanged) if isinstance(formula, dict) and all(isinstance(k, str) for k in formula.keys()): return formula # Handle list of named objects if isinstance(formula, list) and len(formula) > 0: if isinstance(formula[0], dict) and all(isinstance(k, str) for k in formula[0].keys()): return formula # Prepare multiplier if not isinstance(multiplier, list): multiplier = [multiplier] # Handle multiple formulas if isinstance(formula, list): if len(multiplier) != 1 and len(multiplier) != len(formula): raise ValueError("multiplier does not have length = 1 or length = number of formulas") if len(multiplier) == 1: multiplier = multiplier * len(formula) # Get formulas for any species indices formula = get_formula(formula) results = [] for i, f in enumerate(formula): result = makeup(f, multiplier[i]) results.append(result) # Handle sum_formulas option if sum_formulas: all_elements = set() for result in results: if result is not None: all_elements.update(result.keys()) summed = {} for element in all_elements: summed[element] = sum(result.get(element, 0) for result in results if result is not None) return summed # Handle count_zero option elif count_zero: # Get all elements appearing in any formula all_elements = set() for result in results: if result is not None: all_elements.update(result.keys()) # Add zero counts for missing elements complete_results = [] for result in results: if result is None: complete_result = {element: np.nan for element in all_elements} else: complete_result = {element: result.get(element, 0) for element in all_elements} complete_results.append(complete_result) return complete_results return results # Handle single formula if isinstance(formula, int): # Get formula from species index thermo_obj = thermo() if thermo_obj.obigt is not None: # Use .loc for label-based indexing (species indices are 1-based labels) if formula in thermo_obj.obigt.index: formula = thermo_obj.obigt.loc[formula, 'formula'] else: raise FormulaError(f"Species index {formula} not found in OBIGT database") else: raise FormulaError("Thermodynamic database not initialized") if formula is None or pd.isna(formula): return None # Parse single formula try: result = _parse_formula(str(formula)) # Apply multiplier if multiplier[0] != 1.0: result = {element: count * multiplier[0] for element, count in result.items()} # Validate elements _validate_elements(result) return result except Exception as e: raise FormulaError(f"Error parsing formula '{formula}': {e}")Return elemental makeup (counts) of chemical formula(s).
Handles formulas with parenthetical subformulas, suffixed formulas, charges, and fractional coefficients.
Parameters
formula:str, int,orlist- Chemical formula(s) or species index(es)
multiplier:floatorlistoffloat- Multiplier(s) to apply to formula coefficients
sum_formulas:bool- If True, return sum of all formulas
count_zero:bool- If True, include zero counts for all elements appearing in any formula
Returns
dictorlistofdict- Elemental composition(s) as {element: count} dictionaries
Examples
>>> makeup("H2O") {'H': 2, 'O': 1}>>> makeup("Ca(OH)2") {'Ca': 1, 'O': 2, 'H': 2}>>> makeup(["H2O", "CO2"]) [{'H': 2, 'O': 1}, {'C': 1, 'O': 2}] def mass(formula: str | int | List[str | int]) ‑> float | List[float]-
Expand source code
def mass(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]: """ Calculate molecular mass of chemical formula(s). Parameters ---------- formula : str, int, or list Chemical formula(s) or species index(es) Returns ------- float or list of float Molecular mass(es) in g/mol """ thermo_obj = thermo() if thermo_obj.element is None: raise RuntimeError("Element data not available") # Convert to stoichiometric matrix compositions = makeup(formula, count_zero=False) if not isinstance(compositions, list): compositions = [compositions] masses = [] for comp in compositions: if comp is None: masses.append(np.nan) continue total_mass = 0.0 for element, count in comp.items(): if element == 'Z': continue # Charge has no mass # Look up element mass element_data = thermo_obj.element[thermo_obj.element['element'] == element] if len(element_data) == 0: raise FormulaError(f"Element {element} not found in element database") element_mass = element_data.iloc[0]['mass'] total_mass += count * element_mass masses.append(total_mass) if len(masses) == 1: return masses[0] else: return massesCalculate molecular mass of chemical formula(s).
Parameters
formula:str, int,orlist- Chemical formula(s) or species index(es)
Returns
floatorlistoffloat- Molecular mass(es) in g/mol
def reset(messages: bool = True)-
Expand source code
def reset(messages: bool = True): """ Initialize or reset the CHNOSZ thermodynamic system. This function initializes the global thermodynamic system by loading all thermodynamic data files, setting up the OBIGT database, and preparing the system for calculations. This is equivalent to the reset() function in the R version of CHNOSZ. Parameters ---------- messages : bool, default True Whether to print informational messages Examples -------- >>> import pychnosz >>> pychnosz.reset() # Initialize the system reset: thermodynamic system initialized """ thermo_system = get_thermo_system() thermo_system.reset(messages=messages)Initialize or reset the CHNOSZ thermodynamic system.
This function initializes the global thermodynamic system by loading all thermodynamic data files, setting up the OBIGT database, and preparing the system for calculations.
This is equivalent to the reset() function in the R version of CHNOSZ.
Parameters
messages:bool, defaultTrue- Whether to print informational messages
Examples
>>> import pychnosz >>> pychnosz.reset() # Initialize the system reset: thermodynamic system initialized def species_basis(species: List[int] | numpy.ndarray,
makeup_matrix: numpy.ndarray | None = None,
basis_df: pandas.core.frame.DataFrame | None = None) ‑> numpy.ndarray-
Expand source code
def species_basis(species: Union[List[int], np.ndarray], makeup_matrix: Optional[np.ndarray] = None, basis_df: Optional[pd.DataFrame] = None) -> np.ndarray: """ Calculate coefficients for formation reactions from basis species. Parameters ---------- species : list of int or array Species indices in thermo().obigt makeup_matrix : array, optional Pre-calculated makeup matrix basis_df : pd.DataFrame, optional Basis definition to use (if not using global basis) Returns ------- np.ndarray Formation reaction coefficients matrix """ from ..core.basis import basis_elements, get_basis # Follow R CHNOSZ species.basis algorithm exactly from ..core.thermo import thermo # Get basis dataframe if basis_df is None: basis_df = get_basis() if basis_df is None: raise RuntimeError("Basis species not defined") # Get basis element names basis_element_names = [col for col in basis_df.columns if col not in ['ispecies', 'logact', 'state']] # Calculate basis elements matrix from basis_df element_cols = [col for col in basis_df.columns if col not in ['ispecies', 'logact', 'state']] bmat = basis_df[element_cols].values.T # basis_elements() already returns transposed matrix (equivalent to R tbmat) tbmat = bmat # Get thermo object for species lookup thermo_obj = thermo() # Initialize result matrix n_species = len(species) n_basis = len(basis_element_names) formation_coeffs = np.zeros((n_species, n_basis)) # Process each species individually (following R apply logic) for i, sp_idx in enumerate(species): # Get species makeup (equivalent to R mkp <- as.matrix(sapply(makeup(species), c))) formula = thermo_obj.obigt.iloc[sp_idx - 1]['formula'] sp_makeup = makeup([formula], count_zero=True)[0] # Convert makeup to array ordered by elements present in species sp_elements = list(sp_makeup.keys()) sp_values = np.array(list(sp_makeup.values())) # Find positions of species elements in basis elements (R ielem <- match) # All species elements must be in basis missing_elements = [] for elem in sp_elements: if elem not in basis_element_names: missing_elements.append(elem) if missing_elements: raise RuntimeError(f"element(s) not in the basis: {' '.join(missing_elements)}") # Find positions of basis elements in species elements (R jelem <- match) jelem = [] for elem in basis_element_names: try: jelem.append(sp_elements.index(elem)) except ValueError: jelem.append(None) # NA in R # Reorder species matrix to match basis elements (R mkp <- mkp[jelem, , drop = FALSE]) sp_makeup_ordered = np.zeros(len(basis_element_names)) for j, pos in enumerate(jelem): if pos is not None: sp_makeup_ordered[j] = sp_values[pos] # else remains 0 (equivalent to R mkp[ina, ] <- 0) # Solve linear system: tbmat @ coeffs = sp_makeup_ordered # This is equivalent to R solve(tbmat, x) try: coeffs = np.linalg.solve(tbmat, sp_makeup_ordered) except np.linalg.LinAlgError: raise RuntimeError(f"Singular basis matrix for species {sp_idx}") # Apply R zapsmall equivalent (digits=7) coeffs = np.around(coeffs, decimals=7) # Clean up very small numbers coeffs[np.abs(coeffs) < 1e-7] = 0 formation_coeffs[i, :] = coeffs return formation_coeffsCalculate coefficients for formation reactions from basis species.
Parameters
species:listofintorarray- Species indices in thermo().obigt
makeup_matrix:array, optional- Pre-calculated makeup matrix
basis_df:pd.DataFrame, optional- Basis definition to use (if not using global basis)
Returns
np.ndarray- Formation reaction coefficients matrix
Classes
class FormulaError (*args, **kwargs)-
Expand source code
class FormulaError(Exception): """Exception raised for formula parsing errors.""" passException raised for formula parsing errors.
Ancestors
- builtins.Exception
- builtins.BaseException