Skip to content

Commit

Permalink
graph rewards v iter
Browse files Browse the repository at this point in the history
  • Loading branch information
carson committed Feb 17, 2023
1 parent 0bc3fba commit ed1a6d0
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 39 deletions.
Binary file added media/reward_iters_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
65 changes: 26 additions & 39 deletions notebooks/part_3_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,16 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "52adb752",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/bin/nvidia-modprobe: unrecognized option: \"-s\"\n",
"\n",
"ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for usage information.\n",
"\n",
"\n",
"/usr/bin/nvidia-modprobe: unrecognized option: \"-s\"\n",
"\n",
"ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for usage information.\n",
"\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"number of GPUs: 2\n"
]
}
Expand All @@ -49,23 +35,34 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 3,
"id": "e9365b48",
"metadata": {},
"outputs": [],
"source": [
"# For the sake of the speed of this demonstration, the batch_size is temporarily decreased from 256 to 4\n",
"batch_size = 64\n",
"config.batch_size = batch_size\n",
"config.forward_batch_size = batch_size//2\n"
"#batch_size = 64\n",
"#config.batch_size = batch_size\n",
"#config.forward_batch_size = batch_size//2\n",
"config.seed = 42"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 4,
"id": "84bda234",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset imdb (/home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)\n",
"Loading cached processed dataset at /home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-a680d0720b68191d.arrow\n",
"Loading cached processed dataset at /home/carson/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-ec21da66149d9ccd.arrow\n"
]
}
],
"source": [
"lab = Lab(config)\n",
"\n",
Expand All @@ -89,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "92dddae8",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -137,28 +134,18 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": null,
"id": "15104fc4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"388it [2:07:25, 19.71s/it]\n"
]
},
{
"ename": "ValueError",
"evalue": "Batch size (64) does not match number of examples - but got 63 for: queries",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_16688/3444053502.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;31m#### Run PPO step\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mppo_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0mppo_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog_stats\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Desktop/Projects/minichatgpt/minichatgpt/trainer/ppo_trainer.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, queries, responses, scores)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0mbs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m \u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_step_safety_checker\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[0mtiming\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Desktop/Projects/minichatgpt/minichatgpt/trainer/ppo_trainer.py\u001b[0m in \u001b[0;36m_step_safety_checker\u001b[0;34m(self, batch_size, queries, responses, scores)\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_list\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m raise ValueError(\n\u001b[0;32m--> 361\u001b[0;31m \u001b[0;34mf\"Batch size ({batch_size}) does not match number of examples - but got {len(tensor_list)} for: {name}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 362\u001b[0m )\n\u001b[1;32m 363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Batch size (64) does not match number of examples - but got 63 for: queries"
"0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
"10it [13:35, 81.46s/it]/home/carson/Desktop/Projects/minichatgpt/venv/lib/python3.7/site-packages/transformers/pipelines/base.py:1048: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
" UserWarning,\n",
"19it [25:56, 82.33s/it]"
]
}
],
Expand Down

0 comments on commit ed1a6d0

Please sign in to comment.