push

allenai · Sep 14, 2024 · 6c4e3ef · 6c4e3ef
1 parent 64ca898
commit 6c4e3ef
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 1 deletion.
diff --git a/scripts/experimental/winrate/plot_winrate.py b/scripts/experimental/winrate/plot_winrate.py
@@ -15,7 +15,9 @@ class Data:
 results = {
     "SFT w/ H4/no_robots": Data({8e9: [0.31, 121.41]}, "#FFB898", "o"),
     "SFT + Online DPO w/ H4/no_robots": Data({8e9: [0.47, 153.838]}, "#eb4034", "v"),
+    "SFT + PPO w/ H4/no_robots": Data({8e9: [0.446, 146.928]}, "#eb4034", "v"),
     "SFT + Offline DPO w/ H4/no_robots": Data({8e9: [0.424, 144.382]}, "#00FFFF", "2"),
+    "SFT + Offline DPO w/ H4/no_robots two epochs": Data({8e9: [0.498, 140.248]}, "#9F2B68", "3"),
     "llama-3.1-tulu-2-dpo-8": Data({8e9: [0.504, 172.248]}, "#68D39F", "X"),
     "meta-llama/Meta-Llama-3.1-8B-Instruct": Data({8e9: [0.566, 151.506]}, "#8EC2FF", "s"),
 }

diff --git a/scripts/experimental/winrate/readme.md b/scripts/experimental/winrate/readme.md
@@ -57,7 +57,20 @@ python -i generate_and_eval.py \
 # Name: proportion, dtype: float64
 # df['model_response_len'].mean()=153.838
 
-# https://wandb.ai/ai2-llm/open_instruct_internal/runs/97i9hdk3/overview
+
+# https://wandb.ai/ai2-llm/open_instruct_internal/runs/jvjegpcq/
+python -i generate_and_eval.py \
+    --model_name_or_path vwxyzjn/ppo_vllm_thread_beta_0.03__allenai_open_instruct_dev \
+    --model_revision ppo_vllm_thread_beta_0.03__3__1726244716 \
+    --output_path test.csv \
+    --n 500
+# preferred
+# response1    0.554
+# response0    0.446
+# Name: proportion, dtype: float64
+# df['model_response_len'].mean()=146.928
+
+# https://wandb.ai/ai2-llm/open_instruct_internal/runs/97i9hdk3
 python -i generate_and_eval.py \
     --model_name_or_path allenai/open_instruct_dev \
     --model_revision costa_offline_dpo_norobot_3pair__allenai_open_instruct_dev__42__1726174531 \
@@ -70,6 +83,19 @@ python -i generate_and_eval.py \
 # Name: proportion, dtype: float64
 # df['model_response_len'].mean()=144.382
 
+# https://wandb.ai/ai2-llm/open_instruct_internal/runs/w21rugjl
+python -i generate_and_eval.py \
+    --model_name_or_path allenai/open_instruct_dev \
+    --model_revision costa_offline_dpo_norobot_3pair_2peoch__allenai_open_instruct_dev__42__1726241871 \
+    --output_path test.csv \
+    --n 500
+
+# preferred
+# response1    0.502
+# response0    0.498
+# Name: proportion, dtype: float64
+# df['model_response_len'].mean()=140.248
+
 python plot_winrate.py
 ```
 

diff --git a/scripts/experimental/winrate/winrate_plot.png b/scripts/experimental/winrate/winrate_plot.png
diff --git a/scripts/experimental/winrate/winrate_vs_length_plot.png b/scripts/experimental/winrate/winrate_vs_length_plot.png