Skip to content

Commit

Permalink
use rp_sample
Browse files Browse the repository at this point in the history
  • Loading branch information
LorrinWWW committed Jun 12, 2023
1 parent 736ec85 commit 7973c86
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 6 deletions.
5 changes: 3 additions & 2 deletions dist_lm_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ def train_loop(args, pipe, device, train_data_loader, test_data_loader):
if get_pipeline_parallel_rank() == 0 and dp_rank == 0:

for i, data in enumerate(train_data_loader):
if i < pipe.global_step:
continue
#if i < pipe.global_step:
#print(i)
#continue

if use_dp:
get_data_parallel_comm().broadcast(stop_flag, 0)
Expand Down
4 changes: 2 additions & 2 deletions example_scripts/finetune_allreduce_llama7b_node0.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

export WANDB_NAME=llama-7b-instruct-two-nodes
export WANDB_NAME=llama-7b-instruct-rp

netif=enp19s0
master_ip=172.27.6.23
Expand All @@ -18,7 +18,7 @@ ARGS="--model-name ./llama-7b-shard \
--optimizer adam \
--seed 42 \
--load-pretrained-model true \
--task-name ni_dehelm:0.2,p3_dehelm:0.2,pile:0.6 \
--task-name ni_dehelm:0.2,p3_dehelm:0.2,rp_sample:0.6 \
--checkpoint-path ./model_ckpts/$WANDB_NAME \
--num-layers 8 --embedding-dim 4096 \
--total-steps 10000 --warmup-steps 10 --train-warmup-steps 0 \
Expand Down
4 changes: 2 additions & 2 deletions example_scripts/finetune_allreduce_llama7b_node1.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

export WANDB_NAME=llama-7b-instruct-two-nodes
export WANDB_NAME=llama-7b-instruct-rp

netif=enp19s0
master_ip=172.27.6.23
Expand All @@ -18,7 +18,7 @@ ARGS="--model-name ./llama-7b-shard \
--optimizer adam \
--seed 42 \
--load-pretrained-model true \
--task-name ni_dehelm:0.2,p3_dehelm:0.2,pile:0.6 \
--task-name ni_dehelm:0.2,p3_dehelm:0.2,rp_sample:0.6 \
--checkpoint-path ./model_ckpts/$WANDB_NAME \
--num-layers 8 --embedding-dim 4096 \
--total-steps 10000 --warmup-steps 10 --train-warmup-steps 0 \
Expand Down
4 changes: 4 additions & 0 deletions tasks/data_loaders/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def name_to_dataset(task, tokenizer, args):
data = load_dataset('EleutherAI/pile', split="train", streaming=True).shuffle(buffer_size=10_000, seed=args.seed).with_format("torch")
# data = load_dataset('the_pile', split="train").shuffle(seed=args.seed)
dataset = StreamDataset(data, tokenizer, args.seq_length)
elif task == 'rp_sample':
from .pile import StreamDataset
data = load_dataset('togethercomputer/RedPajama-Data-1T-Sample', split="train").shuffle(seed=args.seed).with_format("torch")
dataset = StreamDataset(data, tokenizer, args.seq_length)
elif task == 'cot':
from .cot import StreamDataset
dataset = StreamDataset('./data/mmlu-cot.json', tokenizer, args.seq_length)
Expand Down

0 comments on commit 7973c86

Please sign in to comment.