use rp_sample

tiendung · Jun 12, 2023 · 7973c86 · 7973c86
1 parent 736ec85
commit 7973c86
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 6 deletions.
diff --git a/dist_lm_train.py b/dist_lm_train.py
@@ -99,8 +99,9 @@ def train_loop(args, pipe, device, train_data_loader, test_data_loader):
     if get_pipeline_parallel_rank() == 0 and dp_rank == 0:
 
         for i, data in enumerate(train_data_loader):
-            if i < pipe.global_step:
-                continue
+            #if i < pipe.global_step:
+                #print(i)
+                #continue
 
             if use_dp:
                 get_data_parallel_comm().broadcast(stop_flag, 0)

diff --git a/example_scripts/finetune_allreduce_llama7b_node0.sh b/example_scripts/finetune_allreduce_llama7b_node0.sh
@@ -1,5 +1,5 @@
 
-export WANDB_NAME=llama-7b-instruct-two-nodes
+export WANDB_NAME=llama-7b-instruct-rp
 
 netif=enp19s0
 master_ip=172.27.6.23
@@ -18,7 +18,7 @@ ARGS="--model-name ./llama-7b-shard \
 --optimizer adam \
 --seed 42 \
 --load-pretrained-model true \
---task-name ni_dehelm:0.2,p3_dehelm:0.2,pile:0.6 \
+--task-name ni_dehelm:0.2,p3_dehelm:0.2,rp_sample:0.6 \
 --checkpoint-path ./model_ckpts/$WANDB_NAME \
 --num-layers 8 --embedding-dim 4096 \
 --total-steps 10000 --warmup-steps 10 --train-warmup-steps 0 \

diff --git a/example_scripts/finetune_allreduce_llama7b_node1.sh b/example_scripts/finetune_allreduce_llama7b_node1.sh
@@ -1,5 +1,5 @@
 
-export WANDB_NAME=llama-7b-instruct-two-nodes
+export WANDB_NAME=llama-7b-instruct-rp
 
 netif=enp19s0
 master_ip=172.27.6.23
@@ -18,7 +18,7 @@ ARGS="--model-name ./llama-7b-shard \
 --optimizer adam \
 --seed 42 \
 --load-pretrained-model true \
---task-name ni_dehelm:0.2,p3_dehelm:0.2,pile:0.6 \
+--task-name ni_dehelm:0.2,p3_dehelm:0.2,rp_sample:0.6 \
 --checkpoint-path ./model_ckpts/$WANDB_NAME \
 --num-layers 8 --embedding-dim 4096 \
 --total-steps 10000 --warmup-steps 10 --train-warmup-steps 0 \

diff --git a/tasks/data_loaders/data_utils.py b/tasks/data_loaders/data_utils.py
@@ -95,6 +95,10 @@ def name_to_dataset(task, tokenizer, args):
             data = load_dataset('EleutherAI/pile', split="train", streaming=True).shuffle(buffer_size=10_000, seed=args.seed).with_format("torch")
             # data = load_dataset('the_pile', split="train").shuffle(seed=args.seed)
             dataset = StreamDataset(data, tokenizer, args.seq_length)
+        elif task == 'rp_sample':
+            from .pile import StreamDataset
+            data = load_dataset('togethercomputer/RedPajama-Data-1T-Sample', split="train").shuffle(seed=args.seed).with_format("torch")
+            dataset = StreamDataset(data, tokenizer, args.seq_length)
         elif task == 'cot':
             from .cot import StreamDataset
             dataset = StreamDataset('./data/mmlu-cot.json', tokenizer, args.seq_length)