forked from dataprofessor/code
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
988c733
commit 2d8dfdb
Showing
1 changed file
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,360 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"name": "Hummingbird-ML.ipynb", | ||
"provenance": [], | ||
"collapsed_sections": [] | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"accelerator": "GPU" | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "IpOdlr3WAPHJ", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# **Hummingbird-ML**\n", | ||
"\n", | ||
"[How to Harness GPU to Speed Up Machine Learning with Hummingbird-ML](https://www.youtube.com/watch?v=qN8jcUmo8TI)\n", | ||
"\n", | ||
"Adapted from: https://github.com/microsoft/hummingbird" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "ir3DZd5-_jiu", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Install Hummingbird-ML" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "ra3JEgWN_bfp", | ||
"colab_type": "code", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 408 | ||
}, | ||
"outputId": "4fae39de-26f0-4939-846d-039fb876725a" | ||
}, | ||
"source": [ | ||
"! pip install hummingbird-ml[extra]" | ||
], | ||
"execution_count": 1, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"text": [ | ||
"Collecting hummingbird-ml[extra]\n", | ||
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/ed/3b/cf1b8c1e7531377adead8de29e29b00b5aed380544ad0def4c0188b50d80/hummingbird_ml-0.0.5-py2.py3-none-any.whl (60kB)\n", | ||
"\r\u001b[K |█████▌ | 10kB 16.6MB/s eta 0:00:01\r\u001b[K |███████████ | 20kB 1.8MB/s eta 0:00:01\r\u001b[K |████████████████▍ | 30kB 2.2MB/s eta 0:00:01\r\u001b[K |█████████████████████▉ | 40kB 2.5MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 51kB 2.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 61kB 1.8MB/s \n", | ||
"\u001b[?25hRequirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (1.18.5)\n", | ||
"Requirement already satisfied: torch>=1.4.* in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (1.6.0+cu101)\n", | ||
"Collecting onnxconverter-common>=1.6.0\n", | ||
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/fe/7a/7e30c643cd7d2ad87689188ef34ce93e657bd14da3605f87bcdbc19cd5b1/onnxconverter_common-1.7.0-py2.py3-none-any.whl (64kB)\n", | ||
"\u001b[K |████████████████████████████████| 71kB 3.7MB/s \n", | ||
"\u001b[?25hRequirement already satisfied: scikit-learn>=0.22.1 in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (0.22.2.post1)\n", | ||
"Requirement already satisfied: xgboost==0.90; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (0.90)\n", | ||
"Requirement already satisfied: lightgbm>=2.2; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from hummingbird-ml[extra]) (2.2.3)\n", | ||
"Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.*->hummingbird-ml[extra]) (0.16.0)\n", | ||
"Collecting onnx\n", | ||
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/36/ee/bc7bc88fc8449266add978627e90c363069211584b937fd867b0ccc59f09/onnx-1.7.0-cp36-cp36m-manylinux1_x86_64.whl (7.4MB)\n", | ||
"\u001b[K |████████████████████████████████| 7.4MB 16.0MB/s \n", | ||
"\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.6/dist-packages (from onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (3.12.4)\n", | ||
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.22.1->hummingbird-ml[extra]) (0.16.0)\n", | ||
"Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.22.1->hummingbird-ml[extra]) (1.4.1)\n", | ||
"Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (3.7.4.3)\n", | ||
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from onnx->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (1.15.0)\n", | ||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf->onnxconverter-common>=1.6.0->hummingbird-ml[extra]) (49.6.0)\n", | ||
"Installing collected packages: onnx, onnxconverter-common, hummingbird-ml\n", | ||
"Successfully installed hummingbird-ml-0.0.5 onnx-1.7.0 onnxconverter-common-1.7.0\n" | ||
], | ||
"name": "stdout" | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "YnA-PmeA_q70", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Import libraries" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "lkIThThi_puf", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"import numpy as np\n", | ||
"from sklearn.ensemble import RandomForestClassifier\n", | ||
"from hummingbird.ml import convert" | ||
], | ||
"execution_count": 2, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "rFw_4cGa_-tF", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Create some random data for binary classification" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "hGGngPPp__mx", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"num_classes = 2\n", | ||
"X = np.random.rand(100000, 28)\n", | ||
"y = np.random.randint(num_classes, size=100000)" | ||
], | ||
"execution_count": 3, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "WusxNKH4AHII", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Create and train a model (scikit-learn RandomForestClassifier)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "GMRJRuBwAGeV", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"skl_model = RandomForestClassifier(n_estimators=10, max_depth=10)" | ||
], | ||
"execution_count": 4, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "M_kGo80yAYTn", | ||
"colab_type": "code", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 34 | ||
}, | ||
"outputId": "aa863652-02f8-4578-8fb7-e3b028685cd7" | ||
}, | ||
"source": [ | ||
"%%timeit\n", | ||
"skl_model.fit(X, y)" | ||
], | ||
"execution_count": 5, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"text": [ | ||
"1 loop, best of 3: 4.78 s per loop\n" | ||
], | ||
"name": "stdout" | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "Hp4a8I0tAbBl", | ||
"colab_type": "code", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 34 | ||
}, | ||
"outputId": "4e083fd5-981f-4238-9158-3f4500585560" | ||
}, | ||
"source": [ | ||
"%%timeit\n", | ||
"skl_model.predict(X)" | ||
], | ||
"execution_count": 6, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"text": [ | ||
"10 loops, best of 3: 85.6 ms per loop\n" | ||
], | ||
"name": "stdout" | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "mNiBvy9BA7wR", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Use Hummingbird to convert the model to PyTorch" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "vcAOpuxxAzPc", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"model = convert(skl_model, 'pytorch')" | ||
], | ||
"execution_count": 7, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "dpt6_4l8BF7e", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Run predictions on CPU" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "_BiU63hNBDu-", | ||
"colab_type": "code", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 34 | ||
}, | ||
"outputId": "1bd8b158-a62b-4fe0-be09-ca382c817247" | ||
}, | ||
"source": [ | ||
"%%timeit\n", | ||
"model.predict(X)" | ||
], | ||
"execution_count": 8, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"text": [ | ||
"1 loop, best of 3: 174 ms per loop\n" | ||
], | ||
"name": "stdout" | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "F10tJEMKBPZG", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Run predictions on GPU" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "l2PUbqoHBJBX", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"model.to('cuda')" | ||
], | ||
"execution_count": 9, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "-AB23_VTBRMP", | ||
"colab_type": "code", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 51 | ||
}, | ||
"outputId": "b9efea7d-913c-4326-c14a-6b6ca0e9c063" | ||
}, | ||
"source": [ | ||
"%%timeit\n", | ||
"model.predict(X)" | ||
], | ||
"execution_count": 10, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"text": [ | ||
"The slowest run took 5.22 times longer than the fastest. This could mean that an intermediate result is being cached.\n", | ||
"100 loops, best of 3: 14.8 ms per loop\n" | ||
], | ||
"name": "stdout" | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "dbkQU69JDt7T", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"# Calculation Time" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "Hr1R_9nwDwpc", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"Methods | Timing | Performance\n", | ||
"--|--|--\n", | ||
"scikit-learn | 85.6 ms | -\n", | ||
"PyTorch (CPU) | 174 ms | 2 X slower than scikit-learn\n", | ||
"PyTorch (GPU) | 14.8 ms | Almost 6 X faster than scikit-learn; Almost 12 X faster than PyTorch (CPU)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "9lmR3LHoEzhl", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |