graph rewards v iter

clam004 · Feb 17, 2023 · ed1a6d0 · ed1a6d0
1 parent 0bc3fba
commit ed1a6d0
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 39 deletions.
diff --git a/media/reward_iters_1.png b/media/reward_iters_1.png
diff --git a/notebooks/part_3_tutorial.ipynb b/notebooks/part_3_tutorial.ipynb
@@ -2,30 +2,16 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "52adb752",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/bin/nvidia-modprobe: unrecognized option: \"-s\"\n",
-      "\n",
-      "ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for usage information.\n",
-      "\n",
-      "\n",
-      "/usr/bin/nvidia-modprobe: unrecognized option: \"-s\"\n",
-      "\n",
-      "ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for usage information.\n",
-      "\n",
-      "\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n",
       "number of GPUs: 2\n"
      ]
     }
@@ -49,23 +35,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 3,
    "id": "e9365b48",
    "metadata": {},
    "outputs": [],
    "source": [
     "# For the sake of the speed of this demonstration, the batch_size is temporarily decreased from 256 to 4\n",
-    "batch_size = 64\n",
-    "config.batch_size = batch_size\n",
-    "config.forward_batch_size = batch_size//2\n"
+    "#batch_size = 64\n",
+    "#config.batch_size = batch_size\n",
+    "#config.forward_batch_size = batch_size//2\n",
+    "config.seed = 42"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 4,
    "id": "84bda234",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset imdb (/home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)\n",
+      "Loading cached processed dataset at /home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-a680d0720b68191d.arrow\n",
+      "Loading cached processed dataset at /home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-ec21da66149d9ccd.arrow\n"
+     ]
+    }
+   ],
    "source": [
     "lab = Lab(config)\n",
     "\n",
@@ -89,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "92dddae8",
    "metadata": {},
    "outputs": [
@@ -137,28 +134,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "id": "15104fc4",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "388it [2:07:25, 19.71s/it]\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Batch size (64) does not match number of examples - but got 63 for: queries",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_16688/3444053502.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m     \u001b[0;31m#### Run PPO step\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m     \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mppo_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     28\u001b[0m     \u001b[0mppo_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog_stats\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Desktop/Projects/minichatgpt/minichatgpt/trainer/ppo_trainer.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, queries, responses, scores)\u001b[0m\n\u001b[1;32m    396\u001b[0m         \u001b[0mbs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m         \u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_step_safety_checker\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    400\u001b[0m         \u001b[0mtiming\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Desktop/Projects/minichatgpt/minichatgpt/trainer/ppo_trainer.py\u001b[0m in \u001b[0;36m_step_safety_checker\u001b[0;34m(self, batch_size, queries, responses, scores)\u001b[0m\n\u001b[1;32m    359\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_list\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    360\u001b[0m                 raise ValueError(\n\u001b[0;32m--> 361\u001b[0;31m                     \u001b[0;34mf\"Batch size ({batch_size}) does not match number of examples - but got {len(tensor_list)} for: {name}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    362\u001b[0m                 )\n\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Batch size (64) does not match number of examples - but got 63 for: queries"
+      "0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
+      "10it [13:35, 81.46s/it]/home/carson/Desktop/Projects/minichatgpt/venv/lib/python3.7/site-packages/transformers/pipelines/base.py:1048: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
+      "  UserWarning,\n",
+      "19it [25:56, 82.33s/it]"
      ]
     }
    ],