diff --git a/streamlit/part7/solubility-app.py b/streamlit/part7/solubility-app.py new file mode 100644 index 0000000..596c595 --- /dev/null +++ b/streamlit/part7/solubility-app.py @@ -0,0 +1,110 @@ +###################### +# Import libraries +###################### +import numpy as np +import pandas as pd +import streamlit as st +import pickle +from PIL import Image +from rdkit import Chem +from rdkit.Chem import Descriptors + +###################### +# Custom function +###################### +## Calculate molecular descriptors +def AromaticProportion(m): + aromatic_atoms = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())] + aa_count = [] + for i in aromatic_atoms: + if i==True: + aa_count.append(1) + AromaticAtom = sum(aa_count) + HeavyAtom = Descriptors.HeavyAtomCount(m) + AR = AromaticAtom/HeavyAtom + return AR + +def generate(smiles, verbose=False): + + moldata= [] + for elem in smiles: + mol=Chem.MolFromSmiles(elem) + moldata.append(mol) + + baseData= np.arange(1,1) + i=0 + for mol in moldata: + + desc_MolLogP = Descriptors.MolLogP(mol) + desc_MolWt = Descriptors.MolWt(mol) + desc_NumRotatableBonds = Descriptors.NumRotatableBonds(mol) + desc_AromaticProportion = AromaticProportion(mol) + + row = np.array([desc_MolLogP, + desc_MolWt, + desc_NumRotatableBonds, + desc_AromaticProportion]) + + if(i==0): + baseData=row + else: + baseData=np.vstack([baseData, row]) + i=i+1 + + columnNames=["MolLogP","MolWt","NumRotatableBonds","AromaticProportion"] + descriptors = pd.DataFrame(data=baseData,columns=columnNames) + + return descriptors + +###################### +# Page Title +###################### + +image = Image.open('solubility-logo.jpg') + +st.image(image, use_column_width=True) + +st.write(""" +# Molecular Solubility Prediction Web App + +This app predicts the **Solubility (LogS)** values of molecules! + +Data obtained from the John S. Delaney. [ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure](https://pubs.acs.org/doi/10.1021/ci034243x). ***J. Chem. Inf. Comput. Sci.*** 2004, 44, 3, 1000-1005. +*** +""") + + +###################### +# Input molecules (Side Panel) +###################### + +st.sidebar.header('User Input Features') + +## Read SMILES input +SMILES_input = "NCCCC\nCCC\nCN" + +SMILES = st.sidebar.text_area("SMILES input", SMILES_input) +SMILES = "C\n" + SMILES #Adds C as a dummy, first item +SMILES = SMILES.split('\n') + +st.header('Input SMILES') +SMILES[1:] # Skips the dummy first item + +## Calculate molecular descriptors +st.header('Computed molecular descriptors') +X = generate(SMILES) +X[1:] # Skips the dummy first item + +###################### +# Pre-built model +###################### + +# Reads in saved model +load_model = pickle.load(open('solubility_model.pkl', 'rb')) + +# Apply model to make predictions +prediction = load_model.predict(X) +#prediction_proba = load_model.predict_proba(X) + +st.header('Predicted LogS values') +prediction[1:] # Skips the dummy first item diff --git a/streamlit/part7/solubility-logo.jpg b/streamlit/part7/solubility-logo.jpg new file mode 100644 index 0000000..020d3d8 Binary files /dev/null and b/streamlit/part7/solubility-logo.jpg differ diff --git a/streamlit/part7/solubility_model.pkl b/streamlit/part7/solubility_model.pkl new file mode 100644 index 0000000..1a13992 Binary files /dev/null and b/streamlit/part7/solubility_model.pkl differ