Skip to content

Commit

Permalink
add docker
Browse files Browse the repository at this point in the history
  • Loading branch information
haiasd committed Mar 18, 2024
1 parent f7b8d9c commit 3e1395b
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 53 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ We advise you to use training frameworks, including [Axolotl](https://github.com
## API
Qwen1.5 models are now deployed on both [DashScope](https://dashscope.aliyun.com/) and [Together](https://api.together.ai/). Check [this](https://api.together.xyz/playground/chat/Qwen/Qwen1.5-72B-Chat) out and have fun with Qwen1.5-72B-Chat!

## 🐳 Docker

To simplify the deployment process, we provide docker images with pre-built environments: [qwenllm/qwen](https://hub.docker.com/r/qwenllm/qwen). You only need to install the driver and download model files to launch demos and finetune the model.

```bash
docker run --gpus all --ipc=host --network=host --rm --name qwen1.5 -it qwenllm/qwen:1.5-cu121 bash
```

## License Agreement
Check the license of each model inside its HF repo. It is NOT necessary for you to submit a request for commercial usage.

Expand Down
78 changes: 78 additions & 0 deletions docker/Dockerfile-cu121
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
ARG CUDA_VERSION=12.1.0
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04

FROM ${from} as base

RUN <<EOF
apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
git \
git-lfs \
python3 \
python3-pip \
python3-dev \
wget \
vim \
&& rm -rf /var/lib/apt/lists/*
EOF

RUN ln -s /usr/bin/python3 /usr/bin/python

RUN git lfs install

FROM base as dev

WORKDIR /

RUN mkdir -p /data/shared/Qwen

WORKDIR /data/shared/Qwen/

FROM dev as bundle_req
RUN pip install --no-cache-dir networkx==3.1
RUN pip3 install --no-cache-dir torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121
RUN pip3 install --no-cache-dir transformers==4.37.0 accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy

FROM bundle_req as bundle_finetune
ARG BUNDLE_FINETUNE=true

RUN <<EOF
if [ "$BUNDLE_FINETUNE" = "true" ]; then
cd /data/shared/Qwen

# Full-finetune / LoRA.
pip3 install --no-cache-dir "deepspeed==0.14.0" "peft==0.9.0"

# Q-LoRA.
apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
libopenmpi-dev openmpi-bin \
&& rm -rf /var/lib/apt/lists/*
pip3 install --no-cache-dir "optimum==1.17.1" "auto-gptq==0.7.1" mpi4py
fi
EOF

FROM bundle_finetune as bundle_vllm
ARG BUNDLE_VLLM=true

RUN <<EOF
if [ "$BUNDLE_VLLM" = "true" ]; then
cd /data/shared/Qwen

pip3 install --no-cache-dir vllm==0.3.1 "fschat[model_worker,webui]==0.2.36"
fi
EOF

FROM bundle_vllm as bundle_flash_attention
ARG BUNDLE_FLASH_ATTENTION=true

RUN <<EOF
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
pip3 install --no-cache-dir flash-attn==2.5.6 --no-build-isolation
fi
EOF

FROM bundle_flash_attention as final

COPY ../examples/sft/* ./
COPY ../examples/demo/* ./

EXPOSE 80
54 changes: 54 additions & 0 deletions docker/docker_cli_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash
#
# This script will automatically pull docker image from DockerHub, and start a container to run the Qwen-Chat cli-demo.

IMAGE_NAME=qwenllm/qwen:1.5-cu121
QWEN_CHECKPOINT_PATH=/path/to/Qwen1.5-Chat
CONTAINER_NAME=qwen1.5

function usage() {
echo '
Usage: bash docker/docker_cli_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME]
'
}

while [[ "$1" != "" ]]; do
case $1 in
-i | --image-name )
shift
IMAGE_NAME=$1
;;
-c | --checkpoint )
shift
QWEN_CHECKPOINT_PATH=$1
;;
-n | --container-name )
shift
CONTAINER_NAME=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done

if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
exit 1
fi

sudo docker pull ${IMAGE_NAME} || {
echo "Pulling image ${IMAGE_NAME} failed, exit."
exit 1
}

sudo docker run --gpus all --rm --name ${CONTAINER_NAME} \
--mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
-it ${IMAGE_NAME} \
python cli_demo.py -c /data/shared/Qwen/Qwen-Chat/
64 changes: 64 additions & 0 deletions docker/docker_web_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash
#
# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat web-demo.

IMAGE_NAME=qwenllm/qwen:1.5-cu121
QWEN_CHECKPOINT_PATH=/path/to/Qwen-7B-Chat
PORT=8901
CONTAINER_NAME=qwen1.5

function usage() {
echo '
Usage: bash docker/docker_web_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
'
}

while [[ "$1" != "" ]]; do
case $1 in
-i | --image-name )
shift
IMAGE_NAME=$1
;;
-c | --checkpoint )
shift
QWEN_CHECKPOINT_PATH=$1
;;
-n | --container-name )
shift
CONTAINER_NAME=$1
;;
--port )
shift
PORT=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done

if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
exit 1
fi

sudo docker pull ${IMAGE_NAME} || {
echo "Pulling image ${IMAGE_NAME} failed, exit."
exit 1
}

sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
-v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
--mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
-it ${IMAGE_NAME} \
python web_demo.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
echo "Successfully started web demo. Open 'http://localhost:${PORT}' to try!
Run \`docker logs ${CONTAINER_NAME}\` to check demo status.
Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the demo."
}
59 changes: 6 additions & 53 deletions examples/demo/web_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from threading import Thread

import gradio as gr
import mdtex2html
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

Expand Down Expand Up @@ -81,52 +80,6 @@ def _chat_stream(model, tokenizer, query, history):
yield new_text


def postprocess(self, y):
if y is None:
return []
for i, (message, response) in enumerate(y):
y[i] = (
None if message is None else mdtex2html.convert(message),
None if response is None else mdtex2html.convert(response),
)
return y


gr.Chatbot.postprocess = postprocess


def _parse_text(text):
lines = text.split("\n")
lines = [line for line in lines if line != ""]
count = 0
for i, line in enumerate(lines):
if "```" in line:
count += 1
items = line.split("`")
if count % 2 == 1:
lines[i] = f'<pre><code class="language-{items[-1]}">'
else:
lines[i] = f"<br></code></pre>"
else:
if i > 0:
if count % 2 == 1:
line = line.replace("`", r"\`")
line = line.replace("<", "&lt;")
line = line.replace(">", "&gt;")
line = line.replace(" ", "&nbsp;")
line = line.replace("*", "&ast;")
line = line.replace("_", "&lowbar;")
line = line.replace("-", "&#45;")
line = line.replace(".", "&#46;")
line = line.replace("!", "&#33;")
line = line.replace("(", "&#40;")
line = line.replace(")", "&#41;")
line = line.replace("$", "&#36;")
lines[i] = "<br>" + line
text = "".join(lines)
return text


def _gc():
import gc
gc.collect()
Expand All @@ -137,20 +90,20 @@ def _gc():
def _launch_demo(args, model, tokenizer):

def predict(_query, _chatbot, _task_history):
print(f"User: {_parse_text(_query)}")
_chatbot.append((_parse_text(_query), ""))
print(f"User: {_query}")
_chatbot.append((_query, ""))
full_response = ""
response = ""
for new_text in _chat_stream(model, tokenizer, _query, history=_task_history):
response += new_text
_chatbot[-1] = (_parse_text(_query), _parse_text(response))
_chatbot[-1] = (_query, response)

yield _chatbot
full_response = _parse_text(response)
full_response = response

print(f"History: {_task_history}")
_task_history.append((_query, full_response))
print(f"Qwen1.5-Chat: {_parse_text(full_response)}")
print(f"Qwen1.5-Chat: {full_response}")

def regenerate(_chatbot, _task_history):
if not _task_history:
Expand Down Expand Up @@ -227,4 +180,4 @@ def main():


if __name__ == '__main__':
main()
main()

0 comments on commit 3e1395b

Please sign in to comment.