Skip to content

Commit

Permalink
Merge branch 'main' into text_generation_improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
yidong72 committed Dec 8, 2022
2 parents 1db6582 + f091999 commit 09d5854
Show file tree
Hide file tree
Showing 175 changed files with 10,557 additions and 5,308 deletions.
33 changes: 33 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ASR:
- nemo/collections/asr/**/*
- examples/asr/**/*
- tutorials/asr/**/*
- docs/source/asr/**/*

NLP:
- nemo/collections/nlp/**/*
- examples/nlp/**/*
- tutorials/nlp/**/*
- docs/source/nlp/**/*

Speaker Tasks:
- examples/speaker_tasks/**/*
- tutorials/speaker_tasks/**/*

TTS:
- nemo/collections/tts/**/*
- examples/tts/**/*
- tutorials/tts/**/*
- docs/source/tts/**/*

core:
- nemo/core/**/*

common:
- nemo/collections/common/**/*

CI:
- .github/**/*
- Jenkinsfile
- Dockerfile
- ci.groovy
74 changes: 74 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
push:
branches: [ "main", "[rv][0-9]*", "gh-pages-src" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ "main" ]
schedule:
- cron: '19 1 * * 4'

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support

steps:
- name: Checkout repository
uses: actions/checkout@v3

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.

# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
queries: security-and-quality # security-extended,


# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2

# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun

# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.

# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"
14 changes: 14 additions & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: "Pull Request Labeler"
on:
- pull_request_target

jobs:
triage:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v4
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
29 changes: 15 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:22.09-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:22.11-py3


# build an image that includes only the nemo dependencies, ensures that dependencies
Expand All @@ -28,18 +28,18 @@ RUN apt-get update && \
apt-get install -y \
libsndfile1 sox \
libfreetype6 \
python-setuptools swig \
python-dev ffmpeg && \
swig \
ffmpeg && \
rm -rf /var/lib/apt/lists/*

WORKDIR /tmp/
RUN git clone https://github.com/NVIDIA/apex.git && \

RUN git clone https://github.com/NVIDIA/apex.git -b 22.11-devel && \
cd apex && \
git checkout 2b0e8371113fe70758f1964c40bf7dbe304fd9e6 && \
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./
pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./

# uninstall stuff from base container
RUN pip uninstall -y sacrebleu torchtext
RUN pip3 uninstall -y sacrebleu torchtext

# build torchaudio
WORKDIR /tmp/torchaudio_build
Expand All @@ -49,7 +49,7 @@ RUN /bin/bash /tmp/torchaudio_build/scripts/installers/install_torchaudio_latest
# install nemo dependencies
WORKDIR /tmp/nemo
COPY requirements .
RUN for f in $(ls requirements*.txt); do pip install --disable-pip-version-check --no-cache-dir -r $f; done
RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-check --no-cache-dir -r $f; done

# install pynini
COPY nemo_text_processing/install_pynini.sh /tmp/nemo/
Expand All @@ -65,18 +65,19 @@ COPY . .

# start building the final container
FROM nemo-deps as nemo
ARG NEMO_VERSION=1.13.0
ARG NEMO_VERSION=1.14.0

# Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container
# version information as runtime environment variable for introspection purposes
RUN /usr/bin/test -n "$NEMO_VERSION" && \
/bin/echo "export NEMO_VERSION=${NEMO_VERSION}" >> /root/.bashrc && \
/bin/echo "export BASE_IMAGE=${BASE_IMAGE}" >> /root/.bashrc
# TODO: remove sed when PTL has updated their torchtext import check
RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]" && \
sed -i "s/_module_available(\"torchtext\")/False/g" /opt/conda/lib/python3.8/site-packages/pytorch_lightning/utilities/imports.py && \
python -c "import nemo.collections.asr as nemo_asr" && \
python -c "import nemo.collections.nlp as nemo_nlp" && \

# Install NeMo
RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"

# Check install
RUN python -c "import nemo.collections.nlp as nemo_nlp" && \
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"

Expand Down
Loading

0 comments on commit 09d5854

Please sign in to comment.