Skip to content

Commit

Permalink
Set up GPU CI testing (explosion#7293)
Browse files Browse the repository at this point in the history
* Set up CI for tests with GPU agent

* Update tests for enabled GPU

* Fix steps filename

* Add parallel build jobs as a setting

* Fix test requirements

* Fix install test requirements condition

* Fix pipeline models test

* Reset current ops in prefer/require testing

* Fix more tests

* Remove separate test_models test

* Fix regression 5551

* fix StaticVectors for GPU use

* fix vocab tests

* Fix regression test 5082

* Move azure steps to .github and reenable default pool jobs

* Consolidate/rename azure steps

Co-authored-by: svlandeg <[email protected]>
  • Loading branch information
adrianeboyd and svlandeg committed Apr 22, 2021
1 parent bdb485c commit 36ecba2
Show file tree
Hide file tree
Showing 16 changed files with 239 additions and 138 deletions.
57 changes: 57 additions & 0 deletions .github/azure-steps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
parameters:
python_version: ''
architecture: ''
prefix: ''
gpu: false
num_build_jobs: 1

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: ${{ parameters.python_version }}
architecture: ${{ parameters.architecture }}

- script: |
${{ parameters.prefix }} python -m pip install -U pip setuptools
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
displayName: "Install dependencies"
- script: |
${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
${{ parameters.prefix }} python setup.py sdist --formats=gztar
displayName: "Compile and build sdist"
- task: DeleteFiles@1
inputs:
contents: "spacy"
displayName: "Delete source directory"

- script: |
${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
displayName: "Uninstall all packages"
- bash: |
${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
${{ parameters.prefix }} python -m pip install dist/$SDIST
displayName: "Install from sdist"
- script: |
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
displayName: "Install test requirements"
- script: |
${{ parameters.prefix }} python -m pip install -U cupy-cuda110
${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
displayName: "Install GPU requirements"
condition: eq(${{ parameters.gpu }}, true)
- script: |
${{ parameters.prefix }} python -m pytest --pyargs spacy
displayName: "Run CPU tests"
condition: eq(${{ parameters.gpu }}, false)
- script: |
${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu
displayName: "Run GPU tests"
condition: eq(${{ parameters.gpu }}, true)
53 changes: 19 additions & 34 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,39 +76,24 @@ jobs:
maxParallel: 4
pool:
vmImage: $(imageName)

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: "$(python.version)"
architecture: "x64"

- script: |
python -m pip install -U setuptools
pip install -r requirements.txt
displayName: "Install dependencies"
- script: |
python setup.py build_ext --inplace
python setup.py sdist --formats=gztar
displayName: "Compile and build sdist"
- task: DeleteFiles@1
inputs:
contents: "spacy"
displayName: "Delete source directory"
- template: .github/azure-steps.yml
parameters:
python_version: '$(python.version)'
architecture: 'x64'

- script: |
pip freeze > installed.txt
pip uninstall -y -r installed.txt
displayName: "Uninstall all packages"
- bash: |
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
pip install dist/$SDIST
displayName: "Install from sdist"
- script: |
pip install -r requirements.txt
python -m pytest --pyargs spacy
displayName: "Run tests"
- job: "TestGPU"
dependsOn: "Validate"
strategy:
matrix:
Python38LinuxX64_GPU:
python.version: '3.8'
pool:
name: "LinuxX64_GPU"
steps:
- template: .github/azure-steps.yml
parameters:
python_version: '$(python.version)'
architecture: 'x64'
gpu: true
num_build_jobs: 24
2 changes: 1 addition & 1 deletion spacy/ml/staticvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def forward(
return _handle_empty(model.ops, model.get_dim("nO"))
key_attr = model.attrs["key_attr"]
W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
V = cast(Floats2d, docs[0].vocab.vectors.data)
V = cast(Floats2d, model.ops.asarray(docs[0].vocab.vectors.data))
rows = model.ops.flatten(
[doc.vocab.vectors.find(keys=doc.to_array(key_attr)) for doc in docs]
)
Expand Down
3 changes: 3 additions & 0 deletions spacy/tests/enable_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from spacy import require_gpu

require_gpu()
16 changes: 9 additions & 7 deletions spacy/tests/pipeline/test_entity_ruler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from spacy.language import Language
from spacy.pipeline import EntityRuler
from spacy.errors import MatchPatternError
from thinc.api import NumpyOps, get_current_ops


@pytest.fixture
Expand Down Expand Up @@ -201,13 +202,14 @@ def test_entity_ruler_overlapping_spans(nlp):

@pytest.mark.parametrize("n_process", [1, 2])
def test_entity_ruler_multiprocessing(nlp, n_process):
texts = ["I enjoy eating Pizza Hut pizza."]
if isinstance(get_current_ops, NumpyOps) or n_process < 2:
texts = ["I enjoy eating Pizza Hut pizza."]

patterns = [{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}]
patterns = [{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}]

ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)

for doc in nlp.pipe(texts, n_process=2):
for ent in doc.ents:
assert ent.ent_id_ == "1234"
for doc in nlp.pipe(texts, n_process=2):
for ent in doc.ents:
assert ent.ent_id_ == "1234"
12 changes: 6 additions & 6 deletions spacy/tests/pipeline/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from numpy.testing import assert_almost_equal
from spacy.vocab import Vocab
from thinc.api import NumpyOps, Model, data_validation
from thinc.api import Model, data_validation, get_current_ops
from thinc.types import Array2d, Ragged

from spacy.lang.en import English
Expand All @@ -13,7 +13,7 @@
from spacy.tokens import Doc


OPS = NumpyOps()
OPS = get_current_ops()

texts = ["These are 4 words", "Here just three"]
l0 = [[1, 2], [3, 4], [5, 6], [7, 8]]
Expand Down Expand Up @@ -82,7 +82,7 @@ def util_batch_unbatch_docs_list(
Y_batched = model.predict(in_data)
Y_not_batched = [model.predict([u])[0] for u in in_data]
for i in range(len(Y_batched)):
assert_almost_equal(Y_batched[i], Y_not_batched[i], decimal=4)
assert_almost_equal(OPS.to_numpy(Y_batched[i]), OPS.to_numpy(Y_not_batched[i]), decimal=4)


def util_batch_unbatch_docs_array(
Expand All @@ -91,7 +91,7 @@ def util_batch_unbatch_docs_array(
with data_validation(True):
model.initialize(in_data, out_data)
Y_batched = model.predict(in_data).tolist()
Y_not_batched = [model.predict([u])[0] for u in in_data]
Y_not_batched = [model.predict([u])[0].tolist() for u in in_data]
assert_almost_equal(Y_batched, Y_not_batched, decimal=4)


Expand All @@ -100,8 +100,8 @@ def util_batch_unbatch_docs_ragged(
):
with data_validation(True):
model.initialize(in_data, out_data)
Y_batched = model.predict(in_data)
Y_batched = model.predict(in_data).data.tolist()
Y_not_batched = []
for u in in_data:
Y_not_batched.extend(model.predict([u]).data.tolist())
assert_almost_equal(Y_batched.data, Y_not_batched, decimal=4)
assert_almost_equal(Y_batched, Y_not_batched, decimal=4)
18 changes: 13 additions & 5 deletions spacy/tests/pipeline/test_textcat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import random
import numpy.random
from numpy.testing import assert_equal
from numpy.testing import assert_almost_equal
from thinc.api import fix_random_seed
from spacy import util
from spacy.lang.en import English
Expand Down Expand Up @@ -222,8 +222,12 @@ def test_overfitting_IO():
batch_cats_1 = [doc.cats for doc in nlp.pipe(texts)]
batch_cats_2 = [doc.cats for doc in nlp.pipe(texts)]
no_batch_cats = [doc.cats for doc in [nlp(text) for text in texts]]
assert_equal(batch_cats_1, batch_cats_2)
assert_equal(batch_cats_1, no_batch_cats)
for cats_1, cats_2 in zip(batch_cats_1, batch_cats_2):
for cat in cats_1:
assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
for cats_1, cats_2 in zip(batch_cats_1, no_batch_cats):
for cat in cats_1:
assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)


def test_overfitting_IO_multi():
Expand Down Expand Up @@ -270,8 +274,12 @@ def test_overfitting_IO_multi():
batch_deps_1 = [doc.cats for doc in nlp.pipe(texts)]
batch_deps_2 = [doc.cats for doc in nlp.pipe(texts)]
no_batch_deps = [doc.cats for doc in [nlp(text) for text in texts]]
assert_equal(batch_deps_1, batch_deps_2)
assert_equal(batch_deps_1, no_batch_deps)
for cats_1, cats_2 in zip(batch_deps_1, batch_deps_2):
for cat in cats_1:
assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
for cats_1, cats_2 in zip(batch_deps_1, no_batch_deps):
for cat in cats_1:
assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)


# fmt: off
Expand Down
7 changes: 4 additions & 3 deletions spacy/tests/pipeline/test_tok2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from spacy.training import Example
from spacy import util
from spacy.lang.en import English
from thinc.api import Config
from numpy.testing import assert_equal
from thinc.api import Config, get_current_ops
from numpy.testing import assert_array_equal

from ..util import get_batch, make_tempdir

Expand Down Expand Up @@ -160,7 +160,8 @@ def test_tok2vec_listener():

doc = nlp("Running the pipeline as a whole.")
doc_tensor = tagger_tok2vec.predict([doc])[0]
assert_equal(doc.tensor, doc_tensor)
ops = get_current_ops()
assert_array_equal(ops.to_numpy(doc.tensor), ops.to_numpy(doc_tensor))

# TODO: should this warn or error?
nlp.select_pipes(disable="tok2vec")
Expand Down
50 changes: 27 additions & 23 deletions spacy/tests/regression/test_issue4501-5000.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from spacy.util import ensure_path, load_model_from_path
import numpy
import pickle
from thinc.api import NumpyOps, get_current_ops

from ..util import make_tempdir

Expand Down Expand Up @@ -169,21 +170,22 @@ def test_issue4725_1():


def test_issue4725_2():
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
# if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),
# or because of issues with pickling the NER (cf test_issue4725_1)
vocab = Vocab(vectors_name="test_vocab_add_vector")
data = numpy.ndarray((5, 3), dtype="f")
data[0] = 1.0
data[1] = 2.0
vocab.set_vector("cat", data[0])
vocab.set_vector("dog", data[1])
nlp = English(vocab=vocab)
nlp.add_pipe("ner")
nlp.initialize()
docs = ["Kurt is in London."] * 10
for _ in nlp.pipe(docs, batch_size=2, n_process=2):
pass
if isinstance(get_current_ops, NumpyOps):
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
# if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),
# or because of issues with pickling the NER (cf test_issue4725_1)
vocab = Vocab(vectors_name="test_vocab_add_vector")
data = numpy.ndarray((5, 3), dtype="f")
data[0] = 1.0
data[1] = 2.0
vocab.set_vector("cat", data[0])
vocab.set_vector("dog", data[1])
nlp = English(vocab=vocab)
nlp.add_pipe("ner")
nlp.initialize()
docs = ["Kurt is in London."] * 10
for _ in nlp.pipe(docs, batch_size=2, n_process=2):
pass


def test_issue4849():
Expand All @@ -204,10 +206,11 @@ def test_issue4849():
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert count_ents == 2
# USING 2 PROCESSES
count_ents = 0
for doc in nlp.pipe([text], n_process=2):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert count_ents == 2
if isinstance(get_current_ops, NumpyOps):
count_ents = 0
for doc in nlp.pipe([text], n_process=2):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert count_ents == 2


@Language.factory("my_pipe")
Expand Down Expand Up @@ -239,10 +242,11 @@ def test_issue4903():
nlp.add_pipe("sentencizer")
nlp.add_pipe("my_pipe", after="sentencizer")
text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
docs = list(nlp.pipe(text, n_process=2))
assert docs[0].text == "I like bananas."
assert docs[1].text == "Do you like them?"
assert docs[2].text == "No, I prefer wasabi."
if isinstance(get_current_ops(), NumpyOps):
docs = list(nlp.pipe(text, n_process=2))
assert docs[0].text == "I like bananas."
assert docs[1].text == "Do you like them?"
assert docs[2].text == "No, I prefer wasabi."


def test_issue4924():
Expand Down
16 changes: 9 additions & 7 deletions spacy/tests/regression/test_issue5001-5500.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from spacy.lang.en.syntax_iterators import noun_chunks
from spacy.vocab import Vocab
import spacy
from thinc.api import get_current_ops
import pytest

from ...util import make_tempdir
Expand Down Expand Up @@ -54,16 +55,17 @@ def test_issue5082():
ruler.add_patterns(patterns)
parsed_vectors_1 = [t.vector for t in nlp(text)]
assert len(parsed_vectors_1) == 4
numpy.testing.assert_array_equal(parsed_vectors_1[0], array1)
numpy.testing.assert_array_equal(parsed_vectors_1[1], array2)
numpy.testing.assert_array_equal(parsed_vectors_1[2], array3)
numpy.testing.assert_array_equal(parsed_vectors_1[3], array4)
ops = get_current_ops()
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[0]), array1)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[1]), array2)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[2]), array3)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[3]), array4)
nlp.add_pipe("merge_entities")
parsed_vectors_2 = [t.vector for t in nlp(text)]
assert len(parsed_vectors_2) == 3
numpy.testing.assert_array_equal(parsed_vectors_2[0], array1)
numpy.testing.assert_array_equal(parsed_vectors_2[1], array2)
numpy.testing.assert_array_equal(parsed_vectors_2[2], array34)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[0]), array1)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[1]), array2)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[2]), array34)


def test_issue5137():
Expand Down
Loading

0 comments on commit 36ecba2

Please sign in to comment.