Add files via upload

jtaquia · Apr 22, 2020 · 7683bee · 7683bee
1 parent 31dc7ce
commit 7683bee
Showing 1 changed file with 386 additions and 0 deletions.
diff --git a/python/pandas_profiling_example.ipynb b/python/pandas_profiling_example.ipynb
@@ -0,0 +1,386 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.6"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {}
+    },
+    "colab": {
+      "name": "pandas-profiling-example.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B4iHcr1KQ1-8",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Minimal working example of pandas-profiling**\n",
+        "\n",
+        "Chanin Nantasenamat\n",
+        "\n",
+        "<i>[Data Professor YouTube channel](http://youtube.com/dataprofessor), http://youtube.com/dataprofessor </i>\n",
+        "\n",
+        "In this Jupyter notebook, a minimum working example (MWE) of pandas-profiling library is shown. The code is taken directly from the example given on the GitHub of pandas-profiling.\n",
+        "\n",
+        "Source: https://github.com/pandas-profiling/pandas-profiling\n",
+        "\n",
+        "See it in action below!\n",
+        "\n",
+        "## **If you find this useful, Please give this notebook a thumbs up on [Kaggle](https://www.kaggle.com/chaninnantasenamat/pandas-profiling-example)!** 👍👍\n",
+        "https://www.kaggle.com/chaninnantasenamat/pandas-profiling-example\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qkKPkN6ZQ1-9",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Import libraries**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pOoIHSWUQ1--",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from pandas_profiling import ProfileReport"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sz4zHJqQQ1_B",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Create synthetic data**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JaTx3DQbQ1_C",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df = pd.DataFrame(\n",
+        "    np.random.rand(100, 5),\n",
+        "    columns=['a', 'b', 'c', 'd', 'e']\n",
+        ")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wae5HPZEQ1_F",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Create the Report**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OinxFMrdQ1_G",
+        "colab_type": "code",
+        "colab": {
+          "referenced_widgets": [
+            "0db2c7f6180942d89f24aaf22c85c4a8",
+            "d90d780a5413450aa4ad99ccf9d20dfb",
+            "7133b36d1f344859b0b822ef49014cda",
+            "a0663276eb4b47dc9a83f4d2fe2931c6",
+            "0e4202cc01244fa7905c0f51028a8081",
+            "9a82a1cb1a774212890340263a860205",
+            "240d72accbe945469a70e93b85777e93",
+            "525b61bb82694273b88572a1d0ff0134"
+          ]
+        },
+        "outputId": "6afc8bfe-fae1-4a89-f0f3-a679fe7be139"
+      },
+      "source": [
+        "profile = ProfileReport(df, title='Pandas Profiling Report', html={'style':{'full_width':True}})"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "0db2c7f6180942d89f24aaf22c85c4a8",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='variables', max=5.0, style=ProgressStyle(description_widt…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "d90d780a5413450aa4ad99ccf9d20dfb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='correlations', max=6.0, style=ProgressStyle(description_w…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "7133b36d1f344859b0b822ef49014cda",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='interactions [continuous]', max=25.0, style=ProgressStyle…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "a0663276eb4b47dc9a83f4d2fe2931c6",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='table', max=1.0, style=ProgressStyle(description_width='i…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "0e4202cc01244fa7905c0f51028a8081",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='missing', max=2.0, style=ProgressStyle(description_width=…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "9a82a1cb1a774212890340263a860205",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='warnings', max=3.0, style=ProgressStyle(description_width…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "240d72accbe945469a70e93b85777e93",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='package', max=1.0, style=ProgressStyle(description_width=…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "525b61bb82694273b88572a1d0ff0134",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "HBox(children=(FloatProgress(value=0.0, description='build report structure', max=1.0, style=ProgressStyle(des…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0f3z8wW0Q1_K",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Display the Report**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oBH27KBWQ1_L",
+        "colab_type": "code",
+        "colab": {
+          "referenced_widgets": [
+            "00d082e83d854ff6aa8ce1a8f5acf037"
+          ]
+        },
+        "outputId": "5c3d3d38-06f5-4fb2-93b0-06d323a69916"
+      },
+      "source": [
+        "profile.to_widgets()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "00d082e83d854ff6aa8ce1a8f5acf037",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(value='Number of va…"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/html": [
+              "Report generated with <a href=\"https://github.com/pandas-profiling/pandas-profiling\">pandas-profiling</a>."
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    }
+  ]
+}