add docker

li-xiu-qi · Mar 18, 2024 · 3e1395b · 3e1395b
1 parent f7b8d9c
commit 3e1395b
Show file tree

Hide file tree

Showing 5 changed files with 210 additions and 53 deletions.
diff --git a/README.md b/README.md
@@ -237,6 +237,14 @@ We advise you to use training frameworks, including [Axolotl](https://github.com
 ## API
 Qwen1.5 models are now deployed on both [DashScope](https://dashscope.aliyun.com/) and [Together](https://api.together.ai/). Check [this](https://api.together.xyz/playground/chat/Qwen/Qwen1.5-72B-Chat) out and have fun with Qwen1.5-72B-Chat!
 
+## 🐳 Docker
+
+To simplify the deployment process, we provide docker images with pre-built environments: [qwenllm/qwen](https://hub.docker.com/r/qwenllm/qwen). You only need to install the driver and download model files to launch demos and finetune the model.
+
+```bash
+docker run --gpus all --ipc=host --network=host --rm --name qwen1.5 -it qwenllm/qwen:1.5-cu121 bash
+```
+
 ## License Agreement
 Check the license of each model inside its HF repo. It is NOT necessary for you to submit a request for commercial usage.
 

diff --git a/docker/Dockerfile-cu121 b/docker/Dockerfile-cu121
@@ -0,0 +1,78 @@
+ARG CUDA_VERSION=12.1.0
+ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
+
+FROM ${from} as base
+
+RUN <<EOF
+apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python3-dev \
+    wget \
+    vim \
+&& rm -rf /var/lib/apt/lists/*
+EOF
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+RUN git lfs install
+
+FROM base as dev
+
+WORKDIR /
+
+RUN mkdir -p /data/shared/Qwen
+
+WORKDIR /data/shared/Qwen/
+
+FROM dev as bundle_req
+RUN pip install --no-cache-dir networkx==3.1
+RUN pip3 install --no-cache-dir torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121
+RUN pip3 install --no-cache-dir transformers==4.37.0 accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy
+
+FROM bundle_req as bundle_finetune
+ARG BUNDLE_FINETUNE=true
+
+RUN <<EOF
+if [ "$BUNDLE_FINETUNE" = "true" ]; then
+    cd /data/shared/Qwen
+
+    # Full-finetune / LoRA.
+    pip3 install --no-cache-dir "deepspeed==0.14.0" "peft==0.9.0"
+
+    # Q-LoRA.
+    apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+        libopenmpi-dev openmpi-bin \
+        && rm -rf /var/lib/apt/lists/*
+    pip3 install --no-cache-dir "optimum==1.17.1" "auto-gptq==0.7.1" mpi4py
+fi
+EOF
+
+FROM bundle_finetune as bundle_vllm
+ARG BUNDLE_VLLM=true
+
+RUN <<EOF
+if [ "$BUNDLE_VLLM" = "true" ]; then
+    cd /data/shared/Qwen
+
+    pip3 install --no-cache-dir vllm==0.3.1 "fschat[model_worker,webui]==0.2.36"
+fi
+EOF
+
+FROM bundle_vllm as bundle_flash_attention
+ARG BUNDLE_FLASH_ATTENTION=true
+
+RUN <<EOF 
+if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
+    pip3 install --no-cache-dir flash-attn==2.5.6 --no-build-isolation
+fi
+EOF
+
+FROM bundle_flash_attention as final
+
+COPY ../examples/sft/* ./
+COPY ../examples/demo/* ./
+
+EXPOSE 80
diff --git a/docker/docker_cli_demo.sh b/docker/docker_cli_demo.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a container to run the Qwen-Chat cli-demo.
+
+IMAGE_NAME=qwenllm/qwen:1.5-cu121
+QWEN_CHECKPOINT_PATH=/path/to/Qwen1.5-Chat
+CONTAINER_NAME=qwen1.5
+
+function usage() {
+    echo '
+Usage: bash docker/docker_cli_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all --rm --name ${CONTAINER_NAME} \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
+    -it ${IMAGE_NAME} \
+    python cli_demo.py -c /data/shared/Qwen/Qwen-Chat/
diff --git a/docker/docker_web_demo.sh b/docker/docker_web_demo.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat web-demo.
+
+IMAGE_NAME=qwenllm/qwen:1.5-cu121
+QWEN_CHECKPOINT_PATH=/path/to/Qwen-7B-Chat
+PORT=8901
+CONTAINER_NAME=qwen1.5
+
+function usage() {
+    echo '
+Usage: bash docker/docker_web_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        --port )
+            shift
+            PORT=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
+    -v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
+    -it ${IMAGE_NAME} \
+    python web_demo.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
+    echo "Successfully started web demo. Open 'http://localhost:${PORT}' to try!
+Run \`docker logs ${CONTAINER_NAME}\` to check demo status.
+Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the demo."
+}
diff --git a/examples/demo/web_demo.py b/examples/demo/web_demo.py
@@ -9,7 +9,6 @@
 from threading import Thread
 
 import gradio as gr
-import mdtex2html
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
@@ -81,52 +80,6 @@ def _chat_stream(model, tokenizer, query, history):
         yield new_text
 
 
-def postprocess(self, y):
-    if y is None:
-        return []
-    for i, (message, response) in enumerate(y):
-        y[i] = (
-            None if message is None else mdtex2html.convert(message),
-            None if response is None else mdtex2html.convert(response),
-        )
-    return y
-
-
-gr.Chatbot.postprocess = postprocess
-
-
-def _parse_text(text):
-    lines = text.split("\n")
-    lines = [line for line in lines if line != ""]
-    count = 0
-    for i, line in enumerate(lines):
-        if "```" in line:
-            count += 1
-            items = line.split("`")
-            if count % 2 == 1:
-                lines[i] = f'<pre><code class="language-{items[-1]}">'
-            else:
-                lines[i] = f"<br></code></pre>"
-        else:
-            if i > 0:
-                if count % 2 == 1:
-                    line = line.replace("`", r"\`")
-                    line = line.replace("<", "&lt;")
-                    line = line.replace(">", "&gt;")
-                    line = line.replace(" ", "&nbsp;")
-                    line = line.replace("*", "&ast;")
-                    line = line.replace("_", "&lowbar;")
-                    line = line.replace("-", "&#45;")
-                    line = line.replace(".", "&#46;")
-                    line = line.replace("!", "&#33;")
-                    line = line.replace("(", "&#40;")
-                    line = line.replace(")", "&#41;")
-                    line = line.replace("$", "&#36;")
-                lines[i] = "<br>" + line
-    text = "".join(lines)
-    return text
-
-
 def _gc():
     import gc
     gc.collect()
@@ -137,20 +90,20 @@ def _gc():
 def _launch_demo(args, model, tokenizer):
 
     def predict(_query, _chatbot, _task_history):
-        print(f"User: {_parse_text(_query)}")
-        _chatbot.append((_parse_text(_query), ""))
+        print(f"User: {_query}")
+        _chatbot.append((_query, ""))
         full_response = ""
         response = ""
         for new_text in _chat_stream(model, tokenizer, _query, history=_task_history):
             response += new_text
-            _chatbot[-1] = (_parse_text(_query), _parse_text(response))
+            _chatbot[-1] = (_query, response)
 
             yield _chatbot
-            full_response = _parse_text(response)
+            full_response = response
 
         print(f"History: {_task_history}")
         _task_history.append((_query, full_response))
-        print(f"Qwen1.5-Chat: {_parse_text(full_response)}")
+        print(f"Qwen1.5-Chat: {full_response}")
 
     def regenerate(_chatbot, _task_history):
         if not _task_history:
@@ -227,4 +180,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()