Module `AqOrg.AqOrg`

Expand source code

from IPython.display import SVG
from rdkit import Chem
from rdkit.Chem import rdDepictor, Draw
from rdkit.Chem.Draw import rdMolDraw2D
import pandas as pd
import math
import sigfig
import pubchempy as pcp
import os
from chemparse import parse_formula
import pkg_resources
from datetime import datetime
from WORMutils import Error_Handler, find_HKF

def find_sigfigs(x):
    
    '''
    Get the number of significant digits in a string representing a number up to
    eight digits long.

    Parameters
    ----------
    x : str
        A string denoting a number. This can include scientific notation.
    
    Parameters
    ----------
    int
        The number of significant digits.
    
    Examples
    --------
    >>> find_sigfigs("5.220")
    4
    
    This also takes into account scientific notation.
    
    >>> find_sigfigs("1.23e+3")
    3
    
    Insignificant zeros are ignored.
    
    >>> find_sigfigs("4000")
    1
    
    A decimal point denotes that zeros are significant.
    
    >>> find_sigfigs("4000.")
    4
    '''
    
    x = str(x)
    
    # change all the 'E' to 'e'
    x = x.lower()
    if ('-' == x[0]):
        x = x[1:]
    if ('e' in x):
        # return the length of the numbers before the 'e'
        myStr = x.split('e')
        return len(myStr[0]) - 1  # to compenstate for the decimal point
    else:
        # put it in e format and return the result of that
        ### NOTE: because of the 8 below, it may do crazy things when it parses 9 sigfigs
        n = ('%.*e' % (8, float(x))).split('e')
        # remove and count the number of removed user added zeroes. (these are sig figs)
        if '.' in x:
            s = x.replace('.', '')
            #number of zeroes to add back in
            l = len(s) - len(s.rstrip('0'))
            #strip off the python added zeroes and add back in the ones the user added
            n[0] = n[0].rstrip('0') + ''.join(['0' for num in range(l)])
        else:
            #the user had no trailing zeroes so just strip them all
            n[0] = n[0].rstrip('0')
        #pass it back to the beginning to be parsed
    return find_sigfigs('e'.join(n))


class Estimate():
    
    """
    Estimate thermodynamic properties of an aqueous organic molecule.
    
    Parameters
    ----------
    name : str
        Name of the aqueous organic molecule that will have its thermodynamic
        properties estimated.
                       
    show : bool, default True
        Show a diagram of the molecule?
    
    group_data : str, optional
        Name of a CSV containing custom group contribution data.
    
    test : bool, default False
        Perform a simple group matching test instead of estimating properties?

    state : str, default "aq"
        Can be "aq" or "gas". Estimate the properties of an aqueous molecule or
        an ideal gas?

    save : bool, default False
        Save molecular structure figures as PNG and SVG?
    
    **kwargs : numeric or str, optional
        Known standard state partial molal thermodynamic properties at 298.15 K
        and 1 bar. These will not be estimated, but instead will be used to
        estimate other properties and parameters. Valid **kwargs include:
        
        - Gh : Gibbs free energy change of hydration, kJ/mol.
        - Hh : Enthalpy change of hydration, kJ/mol.
        - Sh : Entropy change of hydration, J/mol/K.
        - Cph : Heat capacity change of hydration, J/mol/K.
        - V : Volume change of hydration, cm3/mol.
        - Gh_err : Error associated with Gh (default 0 kJ/mol).
        - Hh_err : Error associated with Hh (default 0 kJ/mol).
        - Sh_err : Error associated with Sh (default 0 J/mol/K).
        - Cph_err : Error associated with Cph (default 0 J/mol/K).
        - V_err : error associated with V (default 0 cm3/mol).
        - Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
        - Hig : Ideal gas enthalpy of formation, kJ/mol.
        - Sig : Ideal gas entropy, J/mol/K.
        - Cpig : Ideal gas isobaric heat capacity, J/mol/K.
        - Gaq : Aqueous Gibbs free energy of formation, kJ/mol.
        - Haq : Aqueous enthalpy of formation, kJ/mol.
        - Saq : Aqueous entropy, J/mol/K.
        - Cpaq : Aqueous isobaric heat capacity, J/mol/K.
    
    Attributes
    ----------
    pcp_compound : pcp.get_compounds()
        PubChemPy compound object.
        
    smiles : str
        Canonical SMILES string.
        
    formula : str
        Molecular formula.
        
    formula_dict : dict
        Dictionary of element abundance in the molecular formula.
        
    element_data : pd.DataFrame()
        Table of element data adapted from Jeff Dick's CHNOSZ package for R.
        
    Selements : numeric
        Sum of the contributions of the entropies of the elements according to
        Cox, J. D., Wagman, D. D., & Medvedev, V. A. (1989). CODATA key values
        for thermodynamics. Chem/Mats-Sci/E.
        
    note : str
        Notes and warnings associated with the estimation.
        
    charge : numeric
        The charge of the molecule.
        
    OBIGT : pd.DataFrame()
        Table of estimated thermodynamic properties and parameters. The format
        is styled after Jeff Dick's OBIGT thermodynamic table in the CHNOSZ
        package (see https://chnosz.net/manual/thermo.html).

    hide_traceback : bool, default True
        Hide traceback message when encountering errors handled by this function?
        When True, error messages handled by this class will be short and to
        the point.
    
    """
    
    def __init__(self, name, show=True, group_data=None,
                       test=False, state='aq', save=False, hide_traceback=True,
                       **kwargs):
                       # E_units="J" # not implemented... tricky because groups
                                     # are in both kJ and J units.

        self.err_handler = Error_Handler(clean=hide_traceback)
        
        self.name = name
        
        # valid kwargs
        self.Gh = None
        self.Hh = None
        self.Sh = None
        self.Cph = None
        self.V = None
        self.Gh_err = 0
        self.Hh_err = 0
        self.Sh_err = 0
        self.Cph_err = 0
        self.V_err = 0
        self.Gig = None
        self.Hig = None
        self.Sig = None
        self.Cpig_a = None
        self.Cpig_b = None
        self.Cpig_c = None
        self.Cpig_d = None
        self.Cpig = None
        self.Gaq = None
        self.Haq = None
        self.Saq = None
        self.Cpaq = None

        for key, value in kwargs.items():
            self.__setattr__(key, value)
        
        # load group contribution data
        if group_data == None:
            if state == "aq":
                group_data = pkg_resources.resource_stream(__name__, "data/group_contribution_data.csv")
            elif state == "gas":
                group_data=pkg_resources.resource_stream(__name__, 'data/joback_groups.csv')
            else:
                self.err_handler.raise_exception("State is unrecognized. Must be either 'aq' or 'gas'.")
        self.__load_group_data(group_data)
        
        # look up compound on PubChem
        self.pcp_compound = pcp.get_compounds(self.name, "name")
        if len(self.pcp_compound) == 0:
            self.err_handler.raise_exception("Could not find '" + self.name + "' in PubChem's online database.")
        self.smiles = self.pcp_compound[0].canonical_smiles
        self.formula = self.pcp_compound[0].molecular_formula
        self.formula_dict = parse_formula(self.formula)
        
        if "-" in self.formula_dict.keys() or "+" in self.formula_dict.keys():
            self.err_handler.raise_exception(self.name + " cannot be estimated because it has a net charge.")

        if show:
            self.__display_molecule(save=save)

        if test:
            print(self.__test_group_match())            
        else:
            # load properties of the elements
            # Cox, J. D., Wagman, D. D., and Medvedev, V. A., CODATA Key Values
            # for Thermodynamics, Hemisphere Publishing Corp., New York, 1989.
            # Compiled into a CSV by Jeffrey Dick for CHNOSZ
            element_data = pd.read_csv(pkg_resources.resource_stream(__name__, 'data/element.csv'), index_col="element")
            self.element_data = element_data.loc[element_data['source'] == "CWM89"]
            
            self.Selements = self.__entropy()
            self.note = ""
            self.charge = 0 # !
            
            if state == 'aq':
                self.OBIGT = self.__estimate()
            elif state == 'gas':
                self.__estimate_joback()

    def __load_group_data(self, db_filename):
        self.group_data = pd.read_csv(db_filename, dtype=str)
        self.group_data['elem'] = self.group_data['elem'].fillna('')
        self.pattern_dict = pd.Series(self.group_data["elem"].values,
                                      index=self.group_data["smarts"]).to_dict()
        self.group_data = self.group_data.set_index("smarts")

        
    def __set_groups(self):
        
        self.group_matches = pd.DataFrame(self.__match_groups(), index=[self.name])

        # remove columns with no matches
        self.group_matches = self.group_matches.loc[:, (self.group_matches.sum(axis=0) != 0)]
        
        # get a list of relevent groups
        self.groups = [grp for grp in self.group_matches.columns if grp != "formula"]
        

    def __entropy(self, unit="J/mol/K"):
        
        """
        Calculate the standard molal entropy of elements in a molecule.
        """

        entropies = [(self.element_data.loc[elem, "s"]/self.element_data.loc[elem, "n"])*self.formula_dict[elem] for elem in list(self.formula_dict.keys())]
        if unit == "J/mol/K":
            unit_conv = 4.184
        elif unit == "cal/mol/K":
            unit_conv = 1
        else:
            print("Warning in entropy: specified unit", unit,
                  "is not recognized. Returning entropy in J/mol/K")
            unit_conv = 4.184
            
        return sum(entropies)*unit_conv

    
    def __dict_to_formula(self, formula_dict):
        
        """
        Convert a formula dictionary into a formula string.
        Example:
        ```dict_to_formula(parse_formula("CO3-2"))```
        """
        
        formula_string = ""
        for key in formula_dict.keys():
            if abs(formula_dict[key]) == 1:
                v = ""
            else:
                v = formula_dict[key]
                if (v).is_integer():
                    v = int(v)

            formula_string = formula_string + str(key) + str(v)
        return formula_string

    
    def __match_groups(self, show=False, save=False):
        patterns = self.pattern_dict.keys()
        mol = Chem.MolFromSmiles(self.smiles)

        match_dict = dict(zip(patterns, [0]*len(patterns))) # initialize match_dict
        for pattern in patterns:
            if pattern != "Yo": # never match material point
                try:
                    match_dict[pattern] = len(mol.GetSubstructMatches(Chem.MolFromSmarts(pattern)))
                except:
                    print("Warning in match_groups(): problem",
                          "identifying SMARTS group", pattern,
                          ". Skipping this group.")

        ### check that total formula of groups matches that of the molecule
        
        # create a dictionary of element matches
        total_formula_dict = {}
        for match in match_dict.keys():
            this_match = parse_formula(self.pattern_dict[match])
            for element in this_match.keys():
                this_match[element] *= match_dict[match]
                if element in total_formula_dict:
                    total_formula_dict[element] += this_match[element]
                else:
                    total_formula_dict[str(element)] = 0
                    total_formula_dict[element] += this_match[element]
        
        # remove keys of elements with a value of 0 (e.g. "H":0.0)
        for key in list(total_formula_dict.keys()):
            if total_formula_dict[key] == 0.0:
                total_formula_dict.pop(key, None)
        
        # retrieve individual charges that contribute to net charge
        atomic_info = self.pcp_compound[0].record["atoms"]
        chargedict = {}
        if "charge" in atomic_info.keys():
            all_charges = [chargedict.get("value", 0) for chargedict in atomic_info["charge"]]
            pos_charge = sum([charge for charge in all_charges if charge > 0])
            neg_charge = abs(sum([charge for charge in all_charges if charge < 0]))
            if pos_charge > 0:
                chargedict['+']=float(pos_charge)
            if neg_charge > 0:
                chargedict['-']=float(neg_charge)
        else:
            chargedict = {}

        # perform the comparison
        test_dict = parse_formula(self.pcp_compound[0].molecular_formula)
        test_dict.update(chargedict)
        if total_formula_dict != test_dict:
            mssg = "The formula of " + self.name + \
                " does not equal the the elemental composition of the " + \
                "matched groups. This could be because the database " + \
                "is missing representative groups.\nFormula of " + \
                self.name + ":\n"
            pcp_dict = parse_formula(self.pcp_compound[0].molecular_formula)
            pcp_dict.update(chargedict)
            mssg = mssg + str(pcp_dict) + "\nTotal formula of group matches:\n" + \
                str(total_formula_dict)
            mssg = mssg + "\nIncomplete group matches:\n" + \
                str({k:v for k,v in zip(match_dict.keys(), match_dict.values()) if v!= 0})
            self.err_handler.raise_exception(mssg)
        
        # add molecular formula to match dictionary
        match_dict["formula"] = self.__dict_to_formula(total_formula_dict)
        
        return match_dict

    
    def __display_molecule(self, show=True, save=False):
        mol_smiles = Chem.MolFromSmiles(self.smiles)
        
        mc = Chem.Mol(mol_smiles.ToBinary())
        molSize=(450, 150)
        
        if not mc.GetNumConformers():
            #Compute 2D coordinates
            rdDepictor.Compute2DCoords(mc)
        # init the drawer with the size
        drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
        #draw the molcule
        drawer.DrawMolecule(mc)
        drawer.FinishDrawing()
        # get the SVG string
        svg = drawer.GetDrawingText()

        if show:
            # fix the svg string and display it
            display(SVG(svg.replace('svg:','')))

        if save:
            os.makedirs("mol_svg", exist_ok=True)
            os.makedirs("mol_png", exist_ok=True)
            #Draw.MolToFile( mol, "mol_svg/"+self.name+".svg" )
            Draw.MolToFile(mol_smiles, "mol_png/"+self.name+".png")


    def __test_group_match(self):
        match_dict = self.__match_groups()
        return {key:value for key,value in zip(match_dict.keys(), match_dict.values()) if value !=0}

    
    def __est_joback(self):
        
        # values to be added to final estimate of each property
        joback_props = {"Gig":53.88, "Hig":68.29, # kJ/mol
                        "Cpig_a":-37.93, "Cpig_b":0.210, # j/mol/K
                        "Cpig_c":-3.91*10**-4, "Cpig_d":2.06*10**-7} # j/mol/K
        
        for prop in joback_props.keys():
            mol_prop = 0
            error_groups = []

            for group in self.groups:

                try:
                    contains_group = self.group_matches.loc[self.name, group][0] != 0
                except:
                    contains_group = self.group_matches.loc[self.name, group] != 0

                # if this molecule contains this group...
                if contains_group:
                    try:
                        # add number of groups multiplied by its contribution
                        mol_prop += self.group_matches.loc[self.name, group] * float(self.group_data.loc[group, prop])
                    except:
                        error_groups.append(group)
                        
                if len(error_groups) == 0:
                    self.__setattr__(prop, mol_prop+joback_props[prop])
                else:
                    self.err_handler.raise_exception("" + self.name + " encountered errors with group(s): "
                        ""+str(error_groups) + ". Are these groups assigned "
                        "ideal gas properties in the Joback data file?")
            
        # calculate Cpig
        T=298.15
        self.Cpig = self.Cpig_a + self.Cpig_b*T + self.Cpig_c*T**2 +\
                    self.Cpig_d*T**3
        
        # calculate Sig
        self.Sig = ((self.Gig - self.Hig)/-298.15)*1000 + self.Selements
    
    
    def __est_calcs(self):

        props = ["Gh", "Hh", "Sh", "Cph", "V"]
        
        for prop in props:
            if self.__getattribute__(prop) == None:
                err_str = prop + "_err"

                # derive Sh, entropy of hydration, in J/mol K
                if prop == "Sh":
                    # Entropy calculated from S = (G-H)/(-Tref)
                    mol_prop = (float(self.Gh) - float(self.Hh))/(-298.15)
                    mol_prop = mol_prop*1000 # convert kJ/molK to J/molK

                    # propagate error from Gh and Hh to estimate Sh error.
                    # equation used: Sh_err = Sh*sqrt((Gh_err/Gh)**2 + (Hh_err/Hh)**2)
                    Gh_err_float = float(self.Gh_err)/float(self.Gh)
                    Hh_err_float = float(self.Hh_err)/float(self.Hh)
                    mol_err = abs(mol_prop)*math.sqrt(Gh_err_float**2 + Hh_err_float**2)

                    # check whether Gh or Hh as the fewest sigfigs
                    sf = min([find_sigfigs(self.Gh), find_sigfigs(self.Hh)])

                    # round Sh to this number of sigfigs
                    mol_prop = sigfig.round(str(mol_prop), sigfigs=sf)

                    # check how many decimal places Sh has after sigfig rounding
                    if "." in mol_prop:
                        this_split = mol_prop.split(".")
                        n_dec = len(this_split[len(this_split)-1])
                    else:
                        n_dec = 0

                    # assign Sh and Sh_err
                    #self.__setattr__(prop, mol_prop) # for trailing zeros, but must store Sh as str.
                    #self.__setattr__(err_str, format(mol_err, '.'+str(n_dec)+'f')) # for trailing zeros, but must store Sh_err as str.
                    self.__setattr__(prop, float(mol_prop))
                    self.__setattr__(err_str, round(float(mol_err), n_dec))


                    continue

                # For all properties except for Sh:
                # initialize variables and lists
                mol_prop = 0
                mol_err = 999
                prop_errs = []
                n_dec = 999
                error_groups = []

                for group in self.groups:

                    try:
                        contains_group = self.group_matches.loc[self.name, group][0] != 0
                    except:
                        contains_group = self.group_matches.loc[self.name, group] != 0

                    # if this molecule contains this group...
                    if contains_group:

                        try:
                            # add number of groups multiplied by its contribution
                            mol_prop += self.group_matches.loc[self.name, group] * float(self.group_data.loc[group, prop])

                            # round property to smallest number of decimal places
                            if "." in self.group_data.loc[group, prop]:
                                this_split = self.group_data.loc[group, prop].split(".")
                                n_dec_group = len(this_split[len(this_split)-1])
                            else:
                                n_dec_group = 0

                            if n_dec_group < n_dec:
                                n_dec = n_dec_group

                            # handle group std errors
                            try:
                                float(self.group_data.loc[group, err_str]) # assert that this group's error is numeric
                                prop_errs.append(self.group_data.loc[group, err_str]) # append error
                            except:
                                # if group's error is non-numeric, pass
                                pass

                        except:
                            error_groups.append(group)

                if len(error_groups) == 0:

                    # add Y0
                    mol_prop += float(self.group_data.loc["Yo", prop])

                    # propagate error of summed groups: sqrt(a**2 + b**2 + ...)
                    mol_err = round(math.sqrt(sum([float(err)**2 for err in prop_errs])), n_dec)

#                     # format output as string (preserves trailing zeros)
#                     mol_prop = format(mol_prop, '.'+str(n_dec)+'f')
#                     mol_err = format(mol_err, '.'+str(n_dec)+'f')

                    self.__setattr__(prop, mol_prop)
                    self.__setattr__(err_str, mol_err)

                else:
                    msg = self.name + " encountered errors with group(s): " +\
                        str(error_groups) + ". Are these groups assigned "+\
                        "hydration properties in the data file?"
                    self.err_handler.raise_exception(msg)
        
        ig_gas_error = False
        if self.Gig != None and self.Hig != None and self.Sig != None and self.Cpig != None:
            # no ideal gas estimation needed.
            # TODO: Modify if statement to allow calculating remainder if
            # two out of three are provided for: Gig, Hig, Sig
            pass
        else:
            # Joback estimation of the Gibbs free energy of formation of the
            # ideal gas (Joule-based).
            try:
                J_estimate = Joback(self.name)
                
                if self.Gig == None:
                    self.Gig = J_estimate["Gig"]
                if self.Hig == None:
                    self.Hig = J_estimate["Hig"]
                if self.Sig == None:
                    self.Sig = ((float(self.Gig) - float(self.Hig))/-298.15)*1000 + self.Selements
                if self.Cpig == None:
                    self.Cpig = J_estimate["Cpig"]
            except:
                ig_gas_error = True

        if ig_gas_error:
            self.err_handler.raise_exception("The properties of aqueous "+self.name+" could not be "
                "estimated because its ideal gas properties could not be "
                "estimated with the Joback method.")
                
        # estimate the Gibbs free energy of formation of the aqueous molecule by summing
        # its ideal gas and hydration properties.
        # TODO: if ideal gas properties are NaN, ensure aqueous properties are too.
        # TODO: determine estimation error of ideal gas, then propagate with hydration errors.
        # TODO: propagate errors into HKF parameter estimations.
        
        try:
            if self.Gaq == None:
                self.Gaq = float(self.Gig) + float(self.Gh)
        except:
            self.Gaq = float("NaN")

        try:
            if self.Haq == None:
                self.Haq = float(self.Hig) + float(self.Hh)
        except:
            self.Haq = float("NaN")

        try:
            if self.Saq == None:
                self.Saq = ((float(self.Gaq) - float(self.Haq))/-298.15)*1000 + self.Selements
        except:
            self.Saq = float("NaN")
        try:
            if self.Cpaq == None:
                self.Cpaq = self.Cpig + float(self.Cph)
        except:
            self.Cpaq = float("NaN") 

        # calculate HKF parameters
        try:
            # find_HKF requires calories
            hkf_dict, eq = find_HKF(Gh=float(self.Gh)*1000/4.184,
                                    V=float(self.V),
                                    Cp=float(self.Cpaq)/4.184,
                                    Gf=float(self.Gaq)*1000/4.184,
                                    Hf=float(self.Haq)*1000/4.184,
                                    Saq=float(self.Saq)/4.184,
                                    Z=float(self.charge))

            properties_to_convert = ["G", "H", "S", "Cp", "a1", "a2", "a3", "a4", "c1", "c2", "omega"]
            for k,v in zip(hkf_dict.keys(), hkf_dict.values()):
                if k in properties_to_convert:
                    hkf_dict[k] = v*4.184
                else:
                    hkf_dict[k] = v

            for param in ["a1", "a2", "a3", "a4", "c1", "c2", "omega"]:
                self.__setattr__(param, hkf_dict[param])

        except:
            print("Could not calculate HKF parameters for", self.name)
            pass

    # convert dataframe into an OBIGT table with an option to write to a csv file.
    def __convert_to_OBIGT(self):


        df_prop = {'name':[self.name],
                   'abbrv':[self.formula],
                   'formula':[self.formula],
                   'state':['aq'],
                   'ref1':['AqOrg'],
                   'ref2':['GrpAdd'],
                   'date':[datetime.now().strftime("%d/%m/%Y %H:%M:%S")],
                   'E_units':['J'],
                   'G':[float(self.Gaq)*1000],
                   'H':[float(self.Haq)*1000],
                   'S':[float(self.Saq)],
                   'Cp':[float(self.Cpaq)],
                   'V':[float(self.V)]}
        try:
            # if HKF parameters could be estimated
            df_hkf = {'a1.a':[float(self.a1)],
                      'a2.b':[float(self.a2)],
                      'a3.c':[float(self.a3)],
                      'a4.d':[float(self.a4)],
                      'c1.e':[float(self.c1)],
                      'c2.f':[float(self.c2)],
                      'omega.lambda':[float(self.omega)],
                      'z.T':[self.charge]}
        except:
            df_hkf = {'a1.a':[float("NaN")],
                      'a2.b':[float("NaN")],
                      'a3.c':[float("NaN")],
                      'a4.d':[float("NaN")],
                      'c1.e':[float("NaN")],
                      'c2.f':[float("NaN")],
                      'omega.lambda':[float("NaN")],
                      'z.T':[self.charge]}

        df_prop.update(df_hkf)
        
        df = pd.DataFrame(df_prop)
        
        return df

    def __estimate(self):
        self.__set_groups()
        self.__est_calcs()
        return self.__convert_to_OBIGT()

    def __estimate_joback(self):
        self.__set_groups()
        self.__est_joback()

def Joback(name):
    
    """
    Estimate standard state ideal gas properties of a molecule using the Joback
    method. (Joback K. G., Reid R. C., "Estimation of Pure-Component Properties
    from Group-Contributions", Chem. Eng. Commun., 57, 233–243, 1987.)
    
    Parameters
    ----------
    name : str
        Name of the molecule for which to estimate ideal gas properties.
        
    Returns
    ----------
    dict
        A dictionary containing standard state ideal gas properties estimated
        with the Joback method:
        
        - Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
        - Hig : Ideal gas enthalpy of formation, kJ/mol.
        - Sig : Ideal gas entropy, J/mol/K.
        - Cpig : Ideal gas isobaric heat capacity, J/mol/K.
    """
    
    ig_est = Estimate(name, state='gas', show=False,
                      group_data=pkg_resources.resource_stream(__name__, 'data/joback_groups.csv'), index_col="groups")
    
    return {'Gig':ig_est.Gig, 'Hig':ig_est.Hig,
            'Sig':ig_est.Sig, 'Cpig':ig_est.Cpig}

Functions

def Joback(name)

Estimate standard state ideal gas properties of a molecule using the Joback method. (Joback K. G., Reid R. C., "Estimation of Pure-Component Properties from Group-Contributions", Chem. Eng. Commun., 57, 233–243, 1987.)

Parameters

name : str: Name of the molecule for which to estimate ideal gas properties.

Returns

dict

A dictionary containing standard state ideal gas properties estimated with the Joback method:

Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
Hig : Ideal gas enthalpy of formation, kJ/mol.
Sig : Ideal gas entropy, J/mol/K.
Cpig : Ideal gas isobaric heat capacity, J/mol/K.

Expand source code

def Joback(name):
    
    """
    Estimate standard state ideal gas properties of a molecule using the Joback
    method. (Joback K. G., Reid R. C., "Estimation of Pure-Component Properties
    from Group-Contributions", Chem. Eng. Commun., 57, 233–243, 1987.)
    
    Parameters
    ----------
    name : str
        Name of the molecule for which to estimate ideal gas properties.
        
    Returns
    ----------
    dict
        A dictionary containing standard state ideal gas properties estimated
        with the Joback method:
        
        - Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
        - Hig : Ideal gas enthalpy of formation, kJ/mol.
        - Sig : Ideal gas entropy, J/mol/K.
        - Cpig : Ideal gas isobaric heat capacity, J/mol/K.
    """
    
    ig_est = Estimate(name, state='gas', show=False,
                      group_data=pkg_resources.resource_stream(__name__, 'data/joback_groups.csv'), index_col="groups")
    
    return {'Gig':ig_est.Gig, 'Hig':ig_est.Hig,
            'Sig':ig_est.Sig, 'Cpig':ig_est.Cpig}

def find_sigfigs(x)

Get the number of significant digits in a string representing a number up to eight digits long.

Parameters

x : str: A string denoting a number. This can include scientific notation.

Parameters

int: The number of significant digits.

Examples

>>> find_sigfigs("5.220")
4

This also takes into account scientific notation.

>>> find_sigfigs("1.23e+3")
3

Insignificant zeros are ignored.

>>> find_sigfigs("4000")
1

A decimal point denotes that zeros are significant.

>>> find_sigfigs("4000.")
4

Expand source code

def find_sigfigs(x):
    
    '''
    Get the number of significant digits in a string representing a number up to
    eight digits long.

    Parameters
    ----------
    x : str
        A string denoting a number. This can include scientific notation.
    
    Parameters
    ----------
    int
        The number of significant digits.
    
    Examples
    --------
    >>> find_sigfigs("5.220")
    4
    
    This also takes into account scientific notation.
    
    >>> find_sigfigs("1.23e+3")
    3
    
    Insignificant zeros are ignored.
    
    >>> find_sigfigs("4000")
    1
    
    A decimal point denotes that zeros are significant.
    
    >>> find_sigfigs("4000.")
    4
    '''
    
    x = str(x)
    
    # change all the 'E' to 'e'
    x = x.lower()
    if ('-' == x[0]):
        x = x[1:]
    if ('e' in x):
        # return the length of the numbers before the 'e'
        myStr = x.split('e')
        return len(myStr[0]) - 1  # to compenstate for the decimal point
    else:
        # put it in e format and return the result of that
        ### NOTE: because of the 8 below, it may do crazy things when it parses 9 sigfigs
        n = ('%.*e' % (8, float(x))).split('e')
        # remove and count the number of removed user added zeroes. (these are sig figs)
        if '.' in x:
            s = x.replace('.', '')
            #number of zeroes to add back in
            l = len(s) - len(s.rstrip('0'))
            #strip off the python added zeroes and add back in the ones the user added
            n[0] = n[0].rstrip('0') + ''.join(['0' for num in range(l)])
        else:
            #the user had no trailing zeroes so just strip them all
            n[0] = n[0].rstrip('0')
        #pass it back to the beginning to be parsed
    return find_sigfigs('e'.join(n))

Classes

class Estimate (name, show=True, group_data=None, test=False, state='aq', save=False, hide_traceback=True, **kwargs)

Estimate thermodynamic properties of an aqueous organic molecule.

Parameters

name : str

Name of the aqueous organic molecule that will have its thermodynamic properties estimated.

show : bool, default True

Show a diagram of the molecule?

group_data : str, optional

Name of a CSV containing custom group contribution data.

test : bool, default False

Perform a simple group matching test instead of estimating properties?

state : str, default "aq"

Can be "aq" or "gas". Estimate the properties of an aqueous molecule or an ideal gas?

save : bool, default False

Save molecular structure figures as PNG and SVG?

**kwargs : numeric or str, optional

Known standard state partial molal thermodynamic properties at 298.15 K and 1 bar. These will not be estimated, but instead will be used to estimate other properties and parameters. Valid **kwargs include:

Gh : Gibbs free energy change of hydration, kJ/mol.
Hh : Enthalpy change of hydration, kJ/mol.
Sh : Entropy change of hydration, J/mol/K.
Cph : Heat capacity change of hydration, J/mol/K.
V : Volume change of hydration, cm3/mol.
Gh_err : Error associated with Gh (default 0 kJ/mol).
Hh_err : Error associated with Hh (default 0 kJ/mol).
Sh_err : Error associated with Sh (default 0 J/mol/K).
Cph_err : Error associated with Cph (default 0 J/mol/K).
V_err : error associated with V (default 0 cm3/mol).
Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
Hig : Ideal gas enthalpy of formation, kJ/mol.
Sig : Ideal gas entropy, J/mol/K.
Cpig : Ideal gas isobaric heat capacity, J/mol/K.
Gaq : Aqueous Gibbs free energy of formation, kJ/mol.
Haq : Aqueous enthalpy of formation, kJ/mol.
Saq : Aqueous entropy, J/mol/K.
Cpaq : Aqueous isobaric heat capacity, J/mol/K.

Attributes

pcp_compound : pcp.get_compounds(): PubChemPy compound object.
smiles : str: Canonical SMILES string.
formula : str: Molecular formula.
formula_dict : dict: Dictionary of element abundance in the molecular formula.
element_data : pd.DataFrame(): Table of element data adapted from Jeff Dick's CHNOSZ package for R.
Selements : numeric: Sum of the contributions of the entropies of the elements according to Cox, J. D., Wagman, D. D., & Medvedev, V. A. (1989). CODATA key values for thermodynamics. Chem/Mats-Sci/E.
note : str: Notes and warnings associated with the estimation.
charge : numeric: The charge of the molecule.
OBIGT : pd.DataFrame(): Table of estimated thermodynamic properties and parameters. The format is styled after Jeff Dick's OBIGT thermodynamic table in the CHNOSZ package (see https://chnosz.net/manual/thermo.html).
hide_traceback : bool, default True: Hide traceback message when encountering errors handled by this function? When True, error messages handled by this class will be short and to the point.

Expand source code

class Estimate():
    
    """
    Estimate thermodynamic properties of an aqueous organic molecule.
    
    Parameters
    ----------
    name : str
        Name of the aqueous organic molecule that will have its thermodynamic
        properties estimated.
                       
    show : bool, default True
        Show a diagram of the molecule?
    
    group_data : str, optional
        Name of a CSV containing custom group contribution data.
    
    test : bool, default False
        Perform a simple group matching test instead of estimating properties?

    state : str, default "aq"
        Can be "aq" or "gas". Estimate the properties of an aqueous molecule or
        an ideal gas?

    save : bool, default False
        Save molecular structure figures as PNG and SVG?
    
    **kwargs : numeric or str, optional
        Known standard state partial molal thermodynamic properties at 298.15 K
        and 1 bar. These will not be estimated, but instead will be used to
        estimate other properties and parameters. Valid **kwargs include:
        
        - Gh : Gibbs free energy change of hydration, kJ/mol.
        - Hh : Enthalpy change of hydration, kJ/mol.
        - Sh : Entropy change of hydration, J/mol/K.
        - Cph : Heat capacity change of hydration, J/mol/K.
        - V : Volume change of hydration, cm3/mol.
        - Gh_err : Error associated with Gh (default 0 kJ/mol).
        - Hh_err : Error associated with Hh (default 0 kJ/mol).
        - Sh_err : Error associated with Sh (default 0 J/mol/K).
        - Cph_err : Error associated with Cph (default 0 J/mol/K).
        - V_err : error associated with V (default 0 cm3/mol).
        - Gig : Ideal gas Gibbs free energy of formation, kJ/mol.
        - Hig : Ideal gas enthalpy of formation, kJ/mol.
        - Sig : Ideal gas entropy, J/mol/K.
        - Cpig : Ideal gas isobaric heat capacity, J/mol/K.
        - Gaq : Aqueous Gibbs free energy of formation, kJ/mol.
        - Haq : Aqueous enthalpy of formation, kJ/mol.
        - Saq : Aqueous entropy, J/mol/K.
        - Cpaq : Aqueous isobaric heat capacity, J/mol/K.
    
    Attributes
    ----------
    pcp_compound : pcp.get_compounds()
        PubChemPy compound object.
        
    smiles : str
        Canonical SMILES string.
        
    formula : str
        Molecular formula.
        
    formula_dict : dict
        Dictionary of element abundance in the molecular formula.
        
    element_data : pd.DataFrame()
        Table of element data adapted from Jeff Dick's CHNOSZ package for R.
        
    Selements : numeric
        Sum of the contributions of the entropies of the elements according to
        Cox, J. D., Wagman, D. D., & Medvedev, V. A. (1989). CODATA key values
        for thermodynamics. Chem/Mats-Sci/E.
        
    note : str
        Notes and warnings associated with the estimation.
        
    charge : numeric
        The charge of the molecule.
        
    OBIGT : pd.DataFrame()
        Table of estimated thermodynamic properties and parameters. The format
        is styled after Jeff Dick's OBIGT thermodynamic table in the CHNOSZ
        package (see https://chnosz.net/manual/thermo.html).

    hide_traceback : bool, default True
        Hide traceback message when encountering errors handled by this function?
        When True, error messages handled by this class will be short and to
        the point.
    
    """
    
    def __init__(self, name, show=True, group_data=None,
                       test=False, state='aq', save=False, hide_traceback=True,
                       **kwargs):
                       # E_units="J" # not implemented... tricky because groups
                                     # are in both kJ and J units.

        self.err_handler = Error_Handler(clean=hide_traceback)
        
        self.name = name
        
        # valid kwargs
        self.Gh = None
        self.Hh = None
        self.Sh = None
        self.Cph = None
        self.V = None
        self.Gh_err = 0
        self.Hh_err = 0
        self.Sh_err = 0
        self.Cph_err = 0
        self.V_err = 0
        self.Gig = None
        self.Hig = None
        self.Sig = None
        self.Cpig_a = None
        self.Cpig_b = None
        self.Cpig_c = None
        self.Cpig_d = None
        self.Cpig = None
        self.Gaq = None
        self.Haq = None
        self.Saq = None
        self.Cpaq = None

        for key, value in kwargs.items():
            self.__setattr__(key, value)
        
        # load group contribution data
        if group_data == None:
            if state == "aq":
                group_data = pkg_resources.resource_stream(__name__, "data/group_contribution_data.csv")
            elif state == "gas":
                group_data=pkg_resources.resource_stream(__name__, 'data/joback_groups.csv')
            else:
                self.err_handler.raise_exception("State is unrecognized. Must be either 'aq' or 'gas'.")
        self.__load_group_data(group_data)
        
        # look up compound on PubChem
        self.pcp_compound = pcp.get_compounds(self.name, "name")
        if len(self.pcp_compound) == 0:
            self.err_handler.raise_exception("Could not find '" + self.name + "' in PubChem's online database.")
        self.smiles = self.pcp_compound[0].canonical_smiles
        self.formula = self.pcp_compound[0].molecular_formula
        self.formula_dict = parse_formula(self.formula)
        
        if "-" in self.formula_dict.keys() or "+" in self.formula_dict.keys():
            self.err_handler.raise_exception(self.name + " cannot be estimated because it has a net charge.")

        if show:
            self.__display_molecule(save=save)

        if test:
            print(self.__test_group_match())            
        else:
            # load properties of the elements
            # Cox, J. D., Wagman, D. D., and Medvedev, V. A., CODATA Key Values
            # for Thermodynamics, Hemisphere Publishing Corp., New York, 1989.
            # Compiled into a CSV by Jeffrey Dick for CHNOSZ
            element_data = pd.read_csv(pkg_resources.resource_stream(__name__, 'data/element.csv'), index_col="element")
            self.element_data = element_data.loc[element_data['source'] == "CWM89"]
            
            self.Selements = self.__entropy()
            self.note = ""
            self.charge = 0 # !
            
            if state == 'aq':
                self.OBIGT = self.__estimate()
            elif state == 'gas':
                self.__estimate_joback()

    def __load_group_data(self, db_filename):
        self.group_data = pd.read_csv(db_filename, dtype=str)
        self.group_data['elem'] = self.group_data['elem'].fillna('')
        self.pattern_dict = pd.Series(self.group_data["elem"].values,
                                      index=self.group_data["smarts"]).to_dict()
        self.group_data = self.group_data.set_index("smarts")

        
    def __set_groups(self):
        
        self.group_matches = pd.DataFrame(self.__match_groups(), index=[self.name])

        # remove columns with no matches
        self.group_matches = self.group_matches.loc[:, (self.group_matches.sum(axis=0) != 0)]
        
        # get a list of relevent groups
        self.groups = [grp for grp in self.group_matches.columns if grp != "formula"]
        

    def __entropy(self, unit="J/mol/K"):
        
        """
        Calculate the standard molal entropy of elements in a molecule.
        """

        entropies = [(self.element_data.loc[elem, "s"]/self.element_data.loc[elem, "n"])*self.formula_dict[elem] for elem in list(self.formula_dict.keys())]
        if unit == "J/mol/K":
            unit_conv = 4.184
        elif unit == "cal/mol/K":
            unit_conv = 1
        else:
            print("Warning in entropy: specified unit", unit,
                  "is not recognized. Returning entropy in J/mol/K")
            unit_conv = 4.184
            
        return sum(entropies)*unit_conv

    
    def __dict_to_formula(self, formula_dict):
        
        """
        Convert a formula dictionary into a formula string.
        Example:
        ```dict_to_formula(parse_formula("CO3-2"))```
        """
        
        formula_string = ""
        for key in formula_dict.keys():
            if abs(formula_dict[key]) == 1:
                v = ""
            else:
                v = formula_dict[key]
                if (v).is_integer():
                    v = int(v)

            formula_string = formula_string + str(key) + str(v)
        return formula_string

    
    def __match_groups(self, show=False, save=False):
        patterns = self.pattern_dict.keys()
        mol = Chem.MolFromSmiles(self.smiles)

        match_dict = dict(zip(patterns, [0]*len(patterns))) # initialize match_dict
        for pattern in patterns:
            if pattern != "Yo": # never match material point
                try:
                    match_dict[pattern] = len(mol.GetSubstructMatches(Chem.MolFromSmarts(pattern)))
                except:
                    print("Warning in match_groups(): problem",
                          "identifying SMARTS group", pattern,
                          ". Skipping this group.")

        ### check that total formula of groups matches that of the molecule
        
        # create a dictionary of element matches
        total_formula_dict = {}
        for match in match_dict.keys():
            this_match = parse_formula(self.pattern_dict[match])
            for element in this_match.keys():
                this_match[element] *= match_dict[match]
                if element in total_formula_dict:
                    total_formula_dict[element] += this_match[element]
                else:
                    total_formula_dict[str(element)] = 0
                    total_formula_dict[element] += this_match[element]
        
        # remove keys of elements with a value of 0 (e.g. "H":0.0)
        for key in list(total_formula_dict.keys()):
            if total_formula_dict[key] == 0.0:
                total_formula_dict.pop(key, None)
        
        # retrieve individual charges that contribute to net charge
        atomic_info = self.pcp_compound[0].record["atoms"]
        chargedict = {}
        if "charge" in atomic_info.keys():
            all_charges = [chargedict.get("value", 0) for chargedict in atomic_info["charge"]]
            pos_charge = sum([charge for charge in all_charges if charge > 0])
            neg_charge = abs(sum([charge for charge in all_charges if charge < 0]))
            if pos_charge > 0:
                chargedict['+']=float(pos_charge)
            if neg_charge > 0:
                chargedict['-']=float(neg_charge)
        else:
            chargedict = {}

        # perform the comparison
        test_dict = parse_formula(self.pcp_compound[0].molecular_formula)
        test_dict.update(chargedict)
        if total_formula_dict != test_dict:
            mssg = "The formula of " + self.name + \
                " does not equal the the elemental composition of the " + \
                "matched groups. This could be because the database " + \
                "is missing representative groups.\nFormula of " + \
                self.name + ":\n"
            pcp_dict = parse_formula(self.pcp_compound[0].molecular_formula)
            pcp_dict.update(chargedict)
            mssg = mssg + str(pcp_dict) + "\nTotal formula of group matches:\n" + \
                str(total_formula_dict)
            mssg = mssg + "\nIncomplete group matches:\n" + \
                str({k:v for k,v in zip(match_dict.keys(), match_dict.values()) if v!= 0})
            self.err_handler.raise_exception(mssg)
        
        # add molecular formula to match dictionary
        match_dict["formula"] = self.__dict_to_formula(total_formula_dict)
        
        return match_dict

    
    def __display_molecule(self, show=True, save=False):
        mol_smiles = Chem.MolFromSmiles(self.smiles)
        
        mc = Chem.Mol(mol_smiles.ToBinary())
        molSize=(450, 150)
        
        if not mc.GetNumConformers():
            #Compute 2D coordinates
            rdDepictor.Compute2DCoords(mc)
        # init the drawer with the size
        drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
        #draw the molcule
        drawer.DrawMolecule(mc)
        drawer.FinishDrawing()
        # get the SVG string
        svg = drawer.GetDrawingText()

        if show:
            # fix the svg string and display it
            display(SVG(svg.replace('svg:','')))

        if save:
            os.makedirs("mol_svg", exist_ok=True)
            os.makedirs("mol_png", exist_ok=True)
            #Draw.MolToFile( mol, "mol_svg/"+self.name+".svg" )
            Draw.MolToFile(mol_smiles, "mol_png/"+self.name+".png")


    def __test_group_match(self):
        match_dict = self.__match_groups()
        return {key:value for key,value in zip(match_dict.keys(), match_dict.values()) if value !=0}

    
    def __est_joback(self):
        
        # values to be added to final estimate of each property
        joback_props = {"Gig":53.88, "Hig":68.29, # kJ/mol
                        "Cpig_a":-37.93, "Cpig_b":0.210, # j/mol/K
                        "Cpig_c":-3.91*10**-4, "Cpig_d":2.06*10**-7} # j/mol/K
        
        for prop in joback_props.keys():
            mol_prop = 0
            error_groups = []

            for group in self.groups:

                try:
                    contains_group = self.group_matches.loc[self.name, group][0] != 0
                except:
                    contains_group = self.group_matches.loc[self.name, group] != 0

                # if this molecule contains this group...
                if contains_group:
                    try:
                        # add number of groups multiplied by its contribution
                        mol_prop += self.group_matches.loc[self.name, group] * float(self.group_data.loc[group, prop])
                    except:
                        error_groups.append(group)
                        
                if len(error_groups) == 0:
                    self.__setattr__(prop, mol_prop+joback_props[prop])
                else:
                    self.err_handler.raise_exception("" + self.name + " encountered errors with group(s): "
                        ""+str(error_groups) + ". Are these groups assigned "
                        "ideal gas properties in the Joback data file?")
            
        # calculate Cpig
        T=298.15
        self.Cpig = self.Cpig_a + self.Cpig_b*T + self.Cpig_c*T**2 +\
                    self.Cpig_d*T**3
        
        # calculate Sig
        self.Sig = ((self.Gig - self.Hig)/-298.15)*1000 + self.Selements
    
    
    def __est_calcs(self):

        props = ["Gh", "Hh", "Sh", "Cph", "V"]
        
        for prop in props:
            if self.__getattribute__(prop) == None:
                err_str = prop + "_err"

                # derive Sh, entropy of hydration, in J/mol K
                if prop == "Sh":
                    # Entropy calculated from S = (G-H)/(-Tref)
                    mol_prop = (float(self.Gh) - float(self.Hh))/(-298.15)
                    mol_prop = mol_prop*1000 # convert kJ/molK to J/molK

                    # propagate error from Gh and Hh to estimate Sh error.
                    # equation used: Sh_err = Sh*sqrt((Gh_err/Gh)**2 + (Hh_err/Hh)**2)
                    Gh_err_float = float(self.Gh_err)/float(self.Gh)
                    Hh_err_float = float(self.Hh_err)/float(self.Hh)
                    mol_err = abs(mol_prop)*math.sqrt(Gh_err_float**2 + Hh_err_float**2)

                    # check whether Gh or Hh as the fewest sigfigs
                    sf = min([find_sigfigs(self.Gh), find_sigfigs(self.Hh)])

                    # round Sh to this number of sigfigs
                    mol_prop = sigfig.round(str(mol_prop), sigfigs=sf)

                    # check how many decimal places Sh has after sigfig rounding
                    if "." in mol_prop:
                        this_split = mol_prop.split(".")
                        n_dec = len(this_split[len(this_split)-1])
                    else:
                        n_dec = 0

                    # assign Sh and Sh_err
                    #self.__setattr__(prop, mol_prop) # for trailing zeros, but must store Sh as str.
                    #self.__setattr__(err_str, format(mol_err, '.'+str(n_dec)+'f')) # for trailing zeros, but must store Sh_err as str.
                    self.__setattr__(prop, float(mol_prop))
                    self.__setattr__(err_str, round(float(mol_err), n_dec))


                    continue

                # For all properties except for Sh:
                # initialize variables and lists
                mol_prop = 0
                mol_err = 999
                prop_errs = []
                n_dec = 999
                error_groups = []

                for group in self.groups:

                    try:
                        contains_group = self.group_matches.loc[self.name, group][0] != 0
                    except:
                        contains_group = self.group_matches.loc[self.name, group] != 0

                    # if this molecule contains this group...
                    if contains_group:

                        try:
                            # add number of groups multiplied by its contribution
                            mol_prop += self.group_matches.loc[self.name, group] * float(self.group_data.loc[group, prop])

                            # round property to smallest number of decimal places
                            if "." in self.group_data.loc[group, prop]:
                                this_split = self.group_data.loc[group, prop].split(".")
                                n_dec_group = len(this_split[len(this_split)-1])
                            else:
                                n_dec_group = 0

                            if n_dec_group < n_dec:
                                n_dec = n_dec_group

                            # handle group std errors
                            try:
                                float(self.group_data.loc[group, err_str]) # assert that this group's error is numeric
                                prop_errs.append(self.group_data.loc[group, err_str]) # append error
                            except:
                                # if group's error is non-numeric, pass
                                pass

                        except:
                            error_groups.append(group)

                if len(error_groups) == 0:

                    # add Y0
                    mol_prop += float(self.group_data.loc["Yo", prop])

                    # propagate error of summed groups: sqrt(a**2 + b**2 + ...)
                    mol_err = round(math.sqrt(sum([float(err)**2 for err in prop_errs])), n_dec)

#                     # format output as string (preserves trailing zeros)
#                     mol_prop = format(mol_prop, '.'+str(n_dec)+'f')
#                     mol_err = format(mol_err, '.'+str(n_dec)+'f')

                    self.__setattr__(prop, mol_prop)
                    self.__setattr__(err_str, mol_err)

                else:
                    msg = self.name + " encountered errors with group(s): " +\
                        str(error_groups) + ". Are these groups assigned "+\
                        "hydration properties in the data file?"
                    self.err_handler.raise_exception(msg)
        
        ig_gas_error = False
        if self.Gig != None and self.Hig != None and self.Sig != None and self.Cpig != None:
            # no ideal gas estimation needed.
            # TODO: Modify if statement to allow calculating remainder if
            # two out of three are provided for: Gig, Hig, Sig
            pass
        else:
            # Joback estimation of the Gibbs free energy of formation of the
            # ideal gas (Joule-based).
            try:
                J_estimate = Joback(self.name)
                
                if self.Gig == None:
                    self.Gig = J_estimate["Gig"]
                if self.Hig == None:
                    self.Hig = J_estimate["Hig"]
                if self.Sig == None:
                    self.Sig = ((float(self.Gig) - float(self.Hig))/-298.15)*1000 + self.Selements
                if self.Cpig == None:
                    self.Cpig = J_estimate["Cpig"]
            except:
                ig_gas_error = True

        if ig_gas_error:
            self.err_handler.raise_exception("The properties of aqueous "+self.name+" could not be "
                "estimated because its ideal gas properties could not be "
                "estimated with the Joback method.")
                
        # estimate the Gibbs free energy of formation of the aqueous molecule by summing
        # its ideal gas and hydration properties.
        # TODO: if ideal gas properties are NaN, ensure aqueous properties are too.
        # TODO: determine estimation error of ideal gas, then propagate with hydration errors.
        # TODO: propagate errors into HKF parameter estimations.
        
        try:
            if self.Gaq == None:
                self.Gaq = float(self.Gig) + float(self.Gh)
        except:
            self.Gaq = float("NaN")

        try:
            if self.Haq == None:
                self.Haq = float(self.Hig) + float(self.Hh)
        except:
            self.Haq = float("NaN")

        try:
            if self.Saq == None:
                self.Saq = ((float(self.Gaq) - float(self.Haq))/-298.15)*1000 + self.Selements
        except:
            self.Saq = float("NaN")
        try:
            if self.Cpaq == None:
                self.Cpaq = self.Cpig + float(self.Cph)
        except:
            self.Cpaq = float("NaN") 

        # calculate HKF parameters
        try:
            # find_HKF requires calories
            hkf_dict, eq = find_HKF(Gh=float(self.Gh)*1000/4.184,
                                    V=float(self.V),
                                    Cp=float(self.Cpaq)/4.184,
                                    Gf=float(self.Gaq)*1000/4.184,
                                    Hf=float(self.Haq)*1000/4.184,
                                    Saq=float(self.Saq)/4.184,
                                    Z=float(self.charge))

            properties_to_convert = ["G", "H", "S", "Cp", "a1", "a2", "a3", "a4", "c1", "c2", "omega"]
            for k,v in zip(hkf_dict.keys(), hkf_dict.values()):
                if k in properties_to_convert:
                    hkf_dict[k] = v*4.184
                else:
                    hkf_dict[k] = v

            for param in ["a1", "a2", "a3", "a4", "c1", "c2", "omega"]:
                self.__setattr__(param, hkf_dict[param])

        except:
            print("Could not calculate HKF parameters for", self.name)
            pass

    # convert dataframe into an OBIGT table with an option to write to a csv file.
    def __convert_to_OBIGT(self):


        df_prop = {'name':[self.name],
                   'abbrv':[self.formula],
                   'formula':[self.formula],
                   'state':['aq'],
                   'ref1':['AqOrg'],
                   'ref2':['GrpAdd'],
                   'date':[datetime.now().strftime("%d/%m/%Y %H:%M:%S")],
                   'E_units':['J'],
                   'G':[float(self.Gaq)*1000],
                   'H':[float(self.Haq)*1000],
                   'S':[float(self.Saq)],
                   'Cp':[float(self.Cpaq)],
                   'V':[float(self.V)]}
        try:
            # if HKF parameters could be estimated
            df_hkf = {'a1.a':[float(self.a1)],
                      'a2.b':[float(self.a2)],
                      'a3.c':[float(self.a3)],
                      'a4.d':[float(self.a4)],
                      'c1.e':[float(self.c1)],
                      'c2.f':[float(self.c2)],
                      'omega.lambda':[float(self.omega)],
                      'z.T':[self.charge]}
        except:
            df_hkf = {'a1.a':[float("NaN")],
                      'a2.b':[float("NaN")],
                      'a3.c':[float("NaN")],
                      'a4.d':[float("NaN")],
                      'c1.e':[float("NaN")],
                      'c2.f':[float("NaN")],
                      'omega.lambda':[float("NaN")],
                      'z.T':[self.charge]}

        df_prop.update(df_hkf)
        
        df = pd.DataFrame(df_prop)
        
        return df

    def __estimate(self):
        self.__set_groups()
        self.__est_calcs()
        return self.__convert_to_OBIGT()

    def __estimate_joback(self):
        self.__set_groups()
        self.__est_joback()