From f879cc4422aeefc1e6436bfbe71e4a7a3ed2f317 Mon Sep 17 00:00:00 2001 From: Chanin Nantasenamat <51851491+dataprofessor@users.noreply.github.com> Date: Sat, 2 May 2020 00:03:21 +0700 Subject: [PATCH] Add files via upload --- python/CDD_ML_Part_1_bioactivity_data.ipynb | 1999 +++++++++++++------ 1 file changed, 1379 insertions(+), 620 deletions(-) diff --git a/python/CDD_ML_Part_1_bioactivity_data.ipynb b/python/CDD_ML_Part_1_bioactivity_data.ipynb index c16a9d6..67b418c 100644 --- a/python/CDD_ML_Part_1_bioactivity_data.ipynb +++ b/python/CDD_ML_Part_1_bioactivity_data.ipynb @@ -69,10 +69,10 @@ "metadata": { "id": "cJGExHQBfLh7", "colab_type": "code", - "outputId": "3ea7812f-d7ec-40ab-8542-e65d4d0e39f8", + "outputId": "783c9cb5-c5d4-4545-a9d3-6c2a2f2b0e53", "colab": { "base_uri": "https://localhost:8080/", - "height": 181 + "height": 349 } }, "source": [ @@ -83,14 +83,24 @@ { "output_type": "stream", "text": [ - "Requirement already satisfied: chembl_webresource_client in /usr/local/lib/python3.6/dist-packages (0.10.1)\n", - "Requirement already satisfied: easydict in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (1.9)\n", + "Collecting chembl_webresource_client\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/c4/6526156c7e2f164a0fc061aae20d383f0b6b1e79957510a64382e676e2dc/chembl-webresource-client-0.10.1.tar.gz (53kB)\n", + "\r\u001b[K |██████▏ | 10kB 18.6MB/s eta 0:00:01\r\u001b[K |████████████▎ | 20kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████▍ | 30kB 7.8MB/s eta 0:00:01\r\u001b[K |████████████████████████▌ | 40kB 8.4MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▊ | 51kB 7.2MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 61kB 4.6MB/s \n", + "\u001b[?25hRequirement already satisfied: urllib3 in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (1.24.3)\n", "Requirement already satisfied: requests>=2.18.4 in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (2.21.0)\n", - "Requirement already satisfied: requests-cache>=0.4.7 in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (0.5.2)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (1.24.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->chembl_webresource_client) (2019.11.28)\n", + "Collecting requests-cache>=0.4.7\n", + " Downloading https://files.pythonhosted.org/packages/7f/55/9b1c40eb83c16d8fc79c5f6c2ffade04208b080670fbfc35e0a5effb5a92/requests_cache-0.5.2-py2.py3-none-any.whl\n", + "Requirement already satisfied: easydict in /usr/local/lib/python3.6/dist-packages (from chembl_webresource_client) (1.9)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->chembl_webresource_client) (3.0.4)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->chembl_webresource_client) (2.8)\n" + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->chembl_webresource_client) (2020.4.5.1)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->chembl_webresource_client) (2.8)\n", + "Building wheels for collected packages: chembl-webresource-client\n", + " Building wheel for chembl-webresource-client (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for chembl-webresource-client: filename=chembl_webresource_client-0.10.1-cp36-none-any.whl size=57153 sha256=c1e2f3e1514ff0b430f04c4dfe2d1bf8fc00e424b9883d7caae9ac0f6d94c36a\n", + " Stored in directory: /root/.cache/pip/wheels/81/8e/3b/4ec9940a01673307821600bfac28b17971caf84ff2b64653cb\n", + "Successfully built chembl-webresource-client\n", + "Installing collected packages: requests-cache, chembl-webresource-client\n", + "Successfully installed chembl-webresource-client-0.10.1 requests-cache-0.5.2\n" ], "name": "stdout" } @@ -146,16 +156,16 @@ "metadata": { "id": "Vxtp79so4ZjF", "colab_type": "code", - "outputId": "b27a6a74-5a2e-45f4-da6a-2f27836a69be", + "outputId": "e90dde45-1c0d-4fd9-f693-cb6e6032e2cd", "colab": { "base_uri": "https://localhost:8080/", - "height": 507 + "height": 145 } }, "source": [ "# Target search for coronavirus\n", "target = new_client.target\n", - "target_query = target.search('coronavirus')\n", + "target_query = target.search('aromatase')\n", "targets = pd.DataFrame.from_dict(target_query)\n", "targets" ], @@ -197,109 +207,44 @@ "
\n", "133 rows × 43 columns
\n", "" ], "text/plain": [ - " activity_comment activity_id ... upper_value value\n", - "0 None 1480935 ... None 7.2\n", - "1 None 1480936 ... None 9.4\n", - "2 None 1481061 ... None 13.5\n", - "3 None 1481065 ... None 13.11\n", - "4 None 1481066 ... None 2.0\n", - ".. ... ... ... ... ...\n", - "128 None 12041507 ... None 10.6\n", - "129 None 12041508 ... None 10.1\n", - "130 None 12041509 ... None 11.5\n", - "131 None 12041510 ... None 10.7\n", - "132 None 12041511 ... None 78.9\n", + " activity_comment activity_id ... upper_value value\n", + "0 None 1480935 ... None 7.2\n", + "1 None 1480936 ... None 9.4\n", + "2 None 1481061 ... None 13.5\n", "\n", - "[133 rows x 43 columns]" + "[3 rows x 43 columns]" ] }, "metadata": { "tags": [] }, - "execution_count": 36 + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oNtBv36dYhxy", + "colab_type": "code", + "outputId": "db6a7832-55eb-484c-b56c-98cdcd5944dd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "source": [ + "df.standard_type.unique()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['IC50'], dtype=object)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 } ] }, @@ -1058,10 +656,10 @@ "metadata": { "id": "6RBX658q65A5", "colab_type": "code", - "outputId": "8211e0e0-2e3b-4538-fcff-cc584def8283", + "outputId": "04a014cd-9f34-4a8f-e45f-50b380d9d41b", "colab": { "base_uri": "https://localhost:8080/", - "height": 35 + "height": 124 } }, "source": [ @@ -1073,6 +671,10 @@ { "output_type": "stream", "text": [ + "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n", + "\n", + "Enter your authorization code:\n", + "··········\n", "Mounted at /content/gdrive/\n" ], "name": "stdout" @@ -1094,25 +696,13 @@ "metadata": { "id": "tew-UtUWIS__", "colab_type": "code", - "outputId": "7ed3c80f-c7fa-4168-dfe4-2695e60ff08f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } + "colab": {} }, "source": [ - "! mkdir \"/content/gdrive/My Drive/Colab Notebooks/data\"" + "! mkdir \"/content/gdrive/My Drive/Colab Notebooks/data2\"" ], "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "mkdir: cannot create directory ‘/content/gdrive/My Drive/Colab Notebooks/data’: File exists\n" - ], - "name": "stdout" - } - ] + "outputs": [] }, { "cell_type": "code", @@ -1132,21 +722,22 @@ "metadata": { "id": "iRIr1QiEJtuw", "colab_type": "code", - "outputId": "a919e653-e47b-40bb-b229-3fde27ebf202", + "outputId": "e400f4d9-3ce7-4822-8837-33eb2499c1c1", "colab": { "base_uri": "https://localhost:8080/", - "height": 35 + "height": 52 } }, "source": [ - "! ls \"/content/gdrive/My Drive/Colab Notebooks/data\"" + "! ls -l \"/content/gdrive/My Drive/Colab Notebooks/data\"" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ - "bioactivity_data.csv\n" + "total 69\n", + "-rw------- 1 root root 70010 Apr 29 17:10 bioactivity_data.csv\n" ], "name": "stdout" } @@ -1167,13 +758,25 @@ "metadata": { "id": "FO3cZC5vnCht", "colab_type": "code", - "colab": {} + "outputId": "f5e07f1f-7a24-4d8e-ca52-e5e36e4daea1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } }, "source": [ - "!ls" + "! ls" ], "execution_count": 0, - "outputs": [] + "outputs": [ + { + "output_type": "stream", + "text": [ + "bioactivity_data.csv gdrive sample_data\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "markdown", @@ -1190,13 +793,34 @@ "metadata": { "id": "jwEJjx5b5gAn", "colab_type": "code", - "colab": {} + "outputId": "69dce8c6-565d-4537-952e-b01da9f2fd83", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 211 + } }, "source": [ "! head bioactivity_data.csv" ], "execution_count": 0, - "outputs": [] + "outputs": [ + { + "output_type": "stream", + "text": [ + "activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,bao_endpoint,bao_format,bao_label,canonical_smiles,data_validity_comment,data_validity_description,document_chembl_id,document_journal,document_year,ligand_efficiency,molecule_chembl_id,molecule_pref_name,parent_molecule_chembl_id,pchembl_value,potential_duplicate,qudt_units,record_id,relation,src_id,standard_flag,standard_relation,standard_text_value,standard_type,standard_units,standard_upper_value,standard_value,target_chembl_id,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value\n", + ",1480935,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,Cc1noc(C)c1CN1C(=O)C(=O)c2cc(C#N)ccc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '18.28', 'le': '0.33', 'lle': '3.25', 'sei': '5.90'}\",CHEMBL187579,,CHEMBL187579,5.14,False,http://www.openphacts.org/units/Nanomolar,384103,=,1,True,=,,IC50,nM,,7200.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,7.2\n", + ",1480936,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2ccc(F)cc2Cl)c2ccc(I)cc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '12.10', 'le': '0.33', 'lle': '1.22', 'sei': '13.45'}\",CHEMBL188487,,CHEMBL188487,5.03,False,http://www.openphacts.org/units/Nanomolar,383984,=,1,True,=,,IC50,nM,,9400.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,9.4\n", + ",1481061,[],CHEMBL830868,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease) at 20 uM,B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(CC2COc3ccccc3O2)c2ccc(I)cc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '11.56', 'le': '0.29', 'lle': '2.21', 'sei': '8.72'}\",CHEMBL185698,,CHEMBL185698,4.87,False,http://www.openphacts.org/units/Nanomolar,384106,=,1,True,=,,IC50,nM,,13500.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,13.5\n", + ",1481065,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccccc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '16.64', 'le': '0.32', 'lle': '1.25', 'sei': '13.06'}\",CHEMBL426082,,CHEMBL426082,4.88,False,http://www.openphacts.org/units/Nanomolar,384075,=,1,True,=,,IC50,nM,,13110.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,13.11\n", + ",1481066,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2c1cccc2[N+](=O)[O-],,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '16.84', 'le': '0.32', 'lle': '2.16', 'sei': '7.08'}\",CHEMBL187717,,CHEMBL187717,5.70,False,http://www.openphacts.org/units/Nanomolar,384234,=,1,True,=,,IC50,nM,,2000.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,2.0\n", + ",1481068,[],CHEMBL828143,In vitro inhibitory concentration SARS coronavirus main protease (SARS CoV 3C-like protease) ,B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2c(Br)cccc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '16.14', 'le': '0.37', 'lle': '1.62', 'sei': '16.07'}\",CHEMBL365134,,CHEMBL365134,6.01,False,http://www.openphacts.org/units/Nanomolar,384081,=,1,True,=,,IC50,nM,,980.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,0.98\n", + ",1481088,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccc(F)cc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '17.08', 'le': '0.33', 'lle': '1.55', 'sei': '14.22'}\",CHEMBL187598,,CHEMBL187598,5.32,False,http://www.openphacts.org/units/Nanomolar,384303,=,1,True,=,,IC50,nM,,4820.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,4.82\n", + ",1481089,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccc(I)cc21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '14.36', 'le': '0.37', 'lle': '1.78', 'sei': '16.11'}\",CHEMBL190743,,CHEMBL190743,6.02,False,http://www.openphacts.org/units/Nanomolar,384329,=,1,True,=,,IC50,nM,,950.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,0.95\n", + ",1481093,[],CHEMBL829584,In vitro inhibitory concentration against SARS coronavirus main protease (SARS CoV 3C-like protease),B,BAO_0000190,BAO_0000357,single protein format,O=C1C(=O)N(Cc2cc3ccccc3s2)c2cccc(Cl)c21,,,CHEMBL1139624,Bioorg. Med. Chem. Lett.,2005,\"{'bei': '15.10', 'le': '0.31', 'lle': '0.67', 'sei': '13.24'}\",CHEMBL365469,,CHEMBL365469,4.95,False,http://www.openphacts.org/units/Nanomolar,384283,=,1,True,=,,IC50,nM,,11200.0,CHEMBL3927,SARS coronavirus,SARS coronavirus 3C-like proteinase,227859,,,IC50,uM,UO_0000065,,11.2\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "markdown", @@ -1214,161 +838,1307 @@ "metadata": { "id": "hkVOdk6ZR396", "colab_type": "code", - "colab": {} + "outputId": "fc08d57e-f832-4cb0-90f2-dc7394b0209d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 782 + } }, "source": [ "df2 = df[df.standard_value.notna()]\n", "df2" ], "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y-qNsUlmjS25", - "colab_type": "text" - }, - "source": [ - "Apparently, for this dataset there is no missing data. But we can use the above code cell for bioactivity data of other target protein." - ] - }, - { - "cell_type": "markdown", - "metadata": { + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + " | activity_comment | \n", + "activity_id | \n", + "activity_properties | \n", + "assay_chembl_id | \n", + "assay_description | \n", + "assay_type | \n", + "bao_endpoint | \n", + "bao_format | \n", + "bao_label | \n", + "canonical_smiles | \n", + "data_validity_comment | \n", + "data_validity_description | \n", + "document_chembl_id | \n", + "document_journal | \n", + "document_year | \n", + "ligand_efficiency | \n", + "molecule_chembl_id | \n", + "molecule_pref_name | \n", + "parent_molecule_chembl_id | \n", + "pchembl_value | \n", + "potential_duplicate | \n", + "qudt_units | \n", + "record_id | \n", + "relation | \n", + "src_id | \n", + "standard_flag | \n", + "standard_relation | \n", + "standard_text_value | \n", + "standard_type | \n", + "standard_units | \n", + "standard_upper_value | \n", + "standard_value | \n", + "target_chembl_id | \n", + "target_organism | \n", + "target_pref_name | \n", + "target_tax_id | \n", + "text_value | \n", + "toid | \n", + "type | \n", + "units | \n", + "uo_units | \n", + "upper_value | \n", + "value | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "None | \n", + "1480935 | \n", + "[] | \n", + "CHEMBL829584 | \n", + "In vitro inhibitory concentration against SARS... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000357 | \n", + "single protein format | \n", + "Cc1noc(C)c1CN1C(=O)C(=O)c2cc(C#N)ccc21 | \n", + "None | \n", + "None | \n", + "CHEMBL1139624 | \n", + "Bioorg. Med. Chem. Lett. | \n", + "2005 | \n", + "{'bei': '18.28', 'le': '0.33', 'lle': '3.25', ... | \n", + "CHEMBL187579 | \n", + "None | \n", + "CHEMBL187579 | \n", + "5.14 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "384103 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "7200.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "7.2 | \n", + "
1 | \n", + "None | \n", + "1480936 | \n", + "[] | \n", + "CHEMBL829584 | \n", + "In vitro inhibitory concentration against SARS... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000357 | \n", + "single protein format | \n", + "O=C1C(=O)N(Cc2ccc(F)cc2Cl)c2ccc(I)cc21 | \n", + "None | \n", + "None | \n", + "CHEMBL1139624 | \n", + "Bioorg. Med. Chem. Lett. | \n", + "2005 | \n", + "{'bei': '12.10', 'le': '0.33', 'lle': '1.22', ... | \n", + "CHEMBL188487 | \n", + "None | \n", + "CHEMBL188487 | \n", + "5.03 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "383984 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "9400.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "9.4 | \n", + "
2 | \n", + "None | \n", + "1481061 | \n", + "[] | \n", + "CHEMBL830868 | \n", + "In vitro inhibitory concentration against SARS... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000357 | \n", + "single protein format | \n", + "O=C1C(=O)N(CC2COc3ccccc3O2)c2ccc(I)cc21 | \n", + "None | \n", + "None | \n", + "CHEMBL1139624 | \n", + "Bioorg. Med. Chem. Lett. | \n", + "2005 | \n", + "{'bei': '11.56', 'le': '0.29', 'lle': '2.21', ... | \n", + "CHEMBL185698 | \n", + "None | \n", + "CHEMBL185698 | \n", + "4.87 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "384106 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "13500.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "13.5 | \n", + "
3 | \n", + "None | \n", + "1481065 | \n", + "[] | \n", + "CHEMBL829584 | \n", + "In vitro inhibitory concentration against SARS... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000357 | \n", + "single protein format | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccccc21 | \n", + "None | \n", + "None | \n", + "CHEMBL1139624 | \n", + "Bioorg. Med. Chem. Lett. | \n", + "2005 | \n", + "{'bei': '16.64', 'le': '0.32', 'lle': '1.25', ... | \n", + "CHEMBL426082 | \n", + "None | \n", + "CHEMBL426082 | \n", + "4.88 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "384075 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "13110.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "13.11 | \n", + "
4 | \n", + "None | \n", + "1481066 | \n", + "[] | \n", + "CHEMBL829584 | \n", + "In vitro inhibitory concentration against SARS... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000357 | \n", + "single protein format | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2c1cccc2[N+](=O)[O-] | \n", + "None | \n", + "None | \n", + "CHEMBL1139624 | \n", + "Bioorg. Med. Chem. Lett. | \n", + "2005 | \n", + "{'bei': '16.84', 'le': '0.32', 'lle': '2.16', ... | \n", + "CHEMBL187717 | \n", + "None | \n", + "CHEMBL187717 | \n", + "5.70 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "384234 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "2000.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "2.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
128 | \n", + "None | \n", + "12041507 | \n", + "[] | \n", + "CHEMBL2150313 | \n", + "Inhibition of SARS-CoV PLpro expressed in Esch... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000019 | \n", + "assay format | \n", + "COC(=O)[C@@]1(C)CCCc2c1ccc1c2C(=O)C(=O)c2c(C)c... | \n", + "None | \n", + "None | \n", + "CHEMBL2146458 | \n", + "Bioorg. Med. Chem. | \n", + "2012 | \n", + "{'bei': '14.70', 'le': '0.27', 'lle': '1.57', ... | \n", + "CHEMBL2146517 | \n", + "METHYL TANSHINONATE | \n", + "CHEMBL2146517 | \n", + "4.97 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "1727226 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "10600.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "10.6 | \n", + "
129 | \n", + "None | \n", + "12041508 | \n", + "[] | \n", + "CHEMBL2150313 | \n", + "Inhibition of SARS-CoV PLpro expressed in Esch... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000019 | \n", + "assay format | \n", + "C[C@H]1COC2=C1C(=O)C(=O)c1c2ccc2c1CCCC2(C)C | \n", + "None | \n", + "None | \n", + "CHEMBL2146458 | \n", + "Bioorg. Med. Chem. | \n", + "2012 | \n", + "{'bei': '16.86', 'le': '0.31', 'lle': '1.56', ... | \n", + "CHEMBL187460 | \n", + "CRYPTOTANSHINONE | \n", + "CHEMBL187460 | \n", + "5.00 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "1727227 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "10100.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "10.1 | \n", + "
130 | \n", + "None | \n", + "12041509 | \n", + "[] | \n", + "CHEMBL2150313 | \n", + "Inhibition of SARS-CoV PLpro expressed in Esch... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000019 | \n", + "assay format | \n", + "Cc1coc2c1C(=O)C(=O)c1c-2ccc2c(C)cccc12 | \n", + "None | \n", + "None | \n", + "CHEMBL2146458 | \n", + "Bioorg. Med. Chem. | \n", + "2012 | \n", + "{'bei': '17.88', 'le': '0.32', 'lle': '0.84', ... | \n", + "CHEMBL363535 | \n", + "TANSHINONE I | \n", + "CHEMBL363535 | \n", + "4.94 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "1727228 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "11500.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "11.5 | \n", + "
131 | \n", + "None | \n", + "12041510 | \n", + "[] | \n", + "CHEMBL2150313 | \n", + "Inhibition of SARS-CoV PLpro expressed in Esch... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000019 | \n", + "assay format | \n", + "Cc1cccc2c3c(ccc12)C1=C(C(=O)C3=O)[C@@H](C)CO1 | \n", + "None | \n", + "None | \n", + "CHEMBL2146458 | \n", + "Bioorg. Med. Chem. | \n", + "2012 | \n", + "{'bei': '17.86', 'le': '0.32', 'lle': '1.68', ... | \n", + "CHEMBL227075 | \n", + "DIHYDROTANSHINONE I | \n", + "CHEMBL227075 | \n", + "4.97 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "1727229 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "10700.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "10.7 | \n", + "
132 | \n", + "None | \n", + "12041511 | \n", + "[] | \n", + "CHEMBL2150313 | \n", + "Inhibition of SARS-CoV PLpro expressed in Esch... | \n", + "B | \n", + "BAO_0000190 | \n", + "BAO_0000019 | \n", + "assay format | \n", + "CC(C)C1=Cc2ccc3c(c2C(=O)C1=O)CCCC3(C)C | \n", + "None | \n", + "None | \n", + "CHEMBL2146458 | \n", + "Bioorg. Med. Chem. | \n", + "2012 | \n", + "{'bei': '14.53', 'le': '0.27', 'lle': '-0.01',... | \n", + "CHEMBL45830 | \n", + "MILTIRONE | \n", + "CHEMBL45830 | \n", + "4.10 | \n", + "False | \n", + "http://www.openphacts.org/units/Nanomolar | \n", + "1727230 | \n", + "= | \n", + "1 | \n", + "True | \n", + "= | \n", + "None | \n", + "IC50 | \n", + "nM | \n", + "None | \n", + "78900.0 | \n", + "CHEMBL3927 | \n", + "SARS coronavirus | \n", + "SARS coronavirus 3C-like proteinase | \n", + "227859 | \n", + "None | \n", + "None | \n", + "IC50 | \n", + "uM | \n", + "UO_0000065 | \n", + "None | \n", + "78.9 | \n", + "
133 rows × 43 columns
\n", + "\n", + " | molecule_chembl_id | \n", + "canonical_smiles | \n", + "bioactivity_class | \n", + "standard_value | \n", + "
---|---|---|---|---|
0 | \n", + "CHEMBL187579 | \n", + "Cc1noc(C)c1CN1C(=O)C(=O)c2cc(C#N)ccc21 | \n", + "intermediate | \n", + "7200.0 | \n", + "
1 | \n", + "CHEMBL188487 | \n", + "O=C1C(=O)N(Cc2ccc(F)cc2Cl)c2ccc(I)cc21 | \n", + "intermediate | \n", + "9400.0 | \n", + "
2 | \n", + "CHEMBL185698 | \n", + "O=C1C(=O)N(CC2COc3ccccc3O2)c2ccc(I)cc21 | \n", + "inactive | \n", + "13500.0 | \n", + "
3 | \n", + "CHEMBL426082 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccccc21 | \n", + "inactive | \n", + "13110.0 | \n", + "
4 | \n", + "CHEMBL187717 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2c1cccc2[N+](=O)[O-] | \n", + "intermediate | \n", + "2000.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
128 | \n", + "CHEMBL2146517 | \n", + "COC(=O)[C@@]1(C)CCCc2c1ccc1c2C(=O)C(=O)c2c(C)c... | \n", + "inactive | \n", + "10600.0 | \n", + "
129 | \n", + "CHEMBL187460 | \n", + "C[C@H]1COC2=C1C(=O)C(=O)c1c2ccc2c1CCCC2(C)C | \n", + "inactive | \n", + "10100.0 | \n", + "
130 | \n", + "CHEMBL363535 | \n", + "Cc1coc2c1C(=O)C(=O)c1c-2ccc2c(C)cccc12 | \n", + "inactive | \n", + "11500.0 | \n", + "
131 | \n", + "CHEMBL227075 | \n", + "Cc1cccc2c3c(ccc12)C1=C(C(=O)C3=O)[C@@H](C)CO1 | \n", + "inactive | \n", + "10700.0 | \n", + "
132 | \n", + "CHEMBL45830 | \n", + "CC(C)C1=Cc2ccc3c(c2C(=O)C1=O)CCCC3(C)C | \n", + "inactive | \n", + "78900.0 | \n", + "
133 rows × 4 columns
\n", + "\n", + " | molecule_chembl_id | \n", + "canonical_smiles | \n", + "standard_value | \n", + "
---|---|---|---|
0 | \n", + "CHEMBL187579 | \n", + "Cc1noc(C)c1CN1C(=O)C(=O)c2cc(C#N)ccc21 | \n", + "7200.0 | \n", + "
1 | \n", + "CHEMBL188487 | \n", + "O=C1C(=O)N(Cc2ccc(F)cc2Cl)c2ccc(I)cc21 | \n", + "9400.0 | \n", + "
2 | \n", + "CHEMBL185698 | \n", + "O=C1C(=O)N(CC2COc3ccccc3O2)c2ccc(I)cc21 | \n", + "13500.0 | \n", + "
3 | \n", + "CHEMBL426082 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccccc21 | \n", + "13110.0 | \n", + "
4 | \n", + "CHEMBL187717 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2c1cccc2[N+](=O)[O-] | \n", + "2000.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
128 | \n", + "CHEMBL2146517 | \n", + "COC(=O)[C@@]1(C)CCCc2c1ccc1c2C(=O)C(=O)c2c(C)c... | \n", + "10600.0 | \n", + "
129 | \n", + "CHEMBL187460 | \n", + "C[C@H]1COC2=C1C(=O)C(=O)c1c2ccc2c1CCCC2(C)C | \n", + "10100.0 | \n", + "
130 | \n", + "CHEMBL363535 | \n", + "Cc1coc2c1C(=O)C(=O)c1c-2ccc2c(C)cccc12 | \n", + "11500.0 | \n", + "
131 | \n", + "CHEMBL227075 | \n", + "Cc1cccc2c3c(ccc12)C1=C(C(=O)C3=O)[C@@H](C)CO1 | \n", + "10700.0 | \n", + "
132 | \n", + "CHEMBL45830 | \n", + "CC(C)C1=Cc2ccc3c(c2C(=O)C1=O)CCCC3(C)C | \n", + "78900.0 | \n", + "
133 rows × 3 columns
\n", + "\n", + " | molecule_chembl_id | \n", + "canonical_smiles | \n", + "bioactivity_class | \n", + "standard_value | \n", + "0 | \n", + "
---|---|---|---|---|---|
0 | \n", + "CHEMBL187579 | \n", + "Cc1noc(C)c1CN1C(=O)C(=O)c2cc(C#N)ccc21 | \n", + "intermediate | \n", + "7200.0 | \n", + "intermediate | \n", + "
1 | \n", + "CHEMBL188487 | \n", + "O=C1C(=O)N(Cc2ccc(F)cc2Cl)c2ccc(I)cc21 | \n", + "intermediate | \n", + "9400.0 | \n", + "intermediate | \n", + "
2 | \n", + "CHEMBL185698 | \n", + "O=C1C(=O)N(CC2COc3ccccc3O2)c2ccc(I)cc21 | \n", + "inactive | \n", + "13500.0 | \n", + "inactive | \n", + "
3 | \n", + "CHEMBL426082 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2ccccc21 | \n", + "inactive | \n", + "13110.0 | \n", + "inactive | \n", + "
4 | \n", + "CHEMBL187717 | \n", + "O=C1C(=O)N(Cc2cc3ccccc3s2)c2c1cccc2[N+](=O)[O-] | \n", + "intermediate | \n", + "2000.0 | \n", + "intermediate | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
128 | \n", + "CHEMBL2146517 | \n", + "COC(=O)[C@@]1(C)CCCc2c1ccc1c2C(=O)C(=O)c2c(C)c... | \n", + "inactive | \n", + "10600.0 | \n", + "inactive | \n", + "
129 | \n", + "CHEMBL187460 | \n", + "C[C@H]1COC2=C1C(=O)C(=O)c1c2ccc2c1CCCC2(C)C | \n", + "inactive | \n", + "10100.0 | \n", + "inactive | \n", + "
130 | \n", + "CHEMBL363535 | \n", + "Cc1coc2c1C(=O)C(=O)c1c-2ccc2c(C)cccc12 | \n", + "inactive | \n", + "11500.0 | \n", + "inactive | \n", + "
131 | \n", + "CHEMBL227075 | \n", + "Cc1cccc2c3c(ccc12)C1=C(C(=O)C3=O)[C@@H](C)CO1 | \n", + "inactive | \n", + "10700.0 | \n", + "inactive | \n", + "
132 | \n", + "CHEMBL45830 | \n", + "CC(C)C1=Cc2ccc3c(c2C(=O)C1=O)CCCC3(C)C | \n", + "inactive | \n", + "78900.0 | \n", + "inactive | \n", + "
133 rows × 5 columns
\n", + "