Skip to content

Commit

Permalink
Test changed notebooks in branches (#380)
Browse files Browse the repository at this point in the history
* Test only subset of changed notebooks

Signed-off-by: Karen Feng <[email protected]>

* Update step names

Signed-off-by: Karen Feng <[email protected]>

* Bash fixup

Signed-off-by: Karen Feng <[email protected]>
  • Loading branch information
karenfeng committed Jun 3, 2021
1 parent 5f28878 commit 321ccea
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 13 deletions.
36 changes: 31 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ jobs:
sudo apt-get -y install rsync
sbt installHail hail/test uninstallHail exit
spark-3-tests:
<<: *setup_base
steps:
Expand Down Expand Up @@ -263,8 +262,7 @@ jobs:
- store_test_results:
path: ~/glow/core/target/scala-2.12/test-reports


notebook-tests:
all-notebook-tests:
<<: *setup_base
steps:
- checkout
Expand All @@ -282,11 +280,38 @@ jobs:
command: |
printf "[docs-ci]\nhost = https://westus2.azuredatabricks.net\ntoken = ${DATABRICKS_API_TOKEN}\n" > ~/.databrickscfg
- run:
name: Run notebook tests
name: Run all notebook tests
command: |
export PATH=$HOME/conda/envs/glow/bin:$PATH
python docs/dev/run-nb-test.py --cli-profile docs-ci
changed-notebook-tests:
<<: *setup_base
steps:
- checkout
- restore_cache:
keys:
- conda-deps-v1-{{ checksum "python/environment.yml" }}
- *install_conda
- *create_python_env
- save_cache:
paths:
- /home/circleci/conda
key: conda-deps-v1-{{ checksum "python/environment.yml" }}
- run:
name: Configure Databricks CLI
command: |
printf "[docs-ci]\nhost = https://westus2.azuredatabricks.net\ntoken = ${DATABRICKS_API_TOKEN}\n" > ~/.databrickscfg
- run:
name: Run changed notebook tests
command: |
export PATH=$HOME/conda/envs/glow/bin:$PATH
if [[ -n $(git diff --name-only master --relative docs/source/_static/) ]]; then
echo "Testing changed notebooks!"
python docs/dev/run-nb-test.py --cli-profile docs-ci $(git diff --name-only master --relative docs/source/_static/zzz_GENERATED_NOTEBOOK_SOURCE | cut -c 51- | cut -f1 -d"." | sed -e 's/^/--nbs /')
else
echo "No notebooks changed."
fi
workflows:
version: 2
Expand All @@ -296,6 +321,7 @@ workflows:
- scala-2_11-tests
- scala-2_12-tests
- spark-3-tests
- changed-notebook-tests
nightly:
triggers:
- schedule:
Expand All @@ -307,4 +333,4 @@ workflows:
jobs:
- check-docs
- spark-3-tests
- notebook-tests
- all-notebook-tests
20 changes: 12 additions & 8 deletions docs/dev/run-nb-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Before running this, configure your Databricks CLI profile.
Example usage:
python3 docs/dev/run-nb-test.py
python3 docs/dev/run-nb-test.py --cli-profile docs-ci --nbs etl/merge-vcf --nbs tertiary/pandas-lmm
'''
import click
from datetime import datetime
Expand All @@ -16,7 +16,6 @@
import time
import uuid

SOURCE_DIR = 'docs/source/_static/zzz_GENERATED_NOTEBOOK_SOURCE'
JOBS_JSON = 'docs/dev/jobs-config.json'
INIT_SCRIPT_DIR = 'docs/dev/init-scripts'

Expand All @@ -33,21 +32,26 @@ def run_cli_cmd(cli_profile, api, args):
@click.option('--cli-profile', default='DEFAULT', help='Databricks CLI profile name.')
@click.option('--workspace-tmp-dir', default='/tmp/glow-nb-test-ci', help='Base workspace dir for import and testing.')
@click.option('--dbfs-init-script-dir', default='dbfs:/glow-init-scripts', help='DBFS directory for init scripts.')
def main(cli_profile, workspace_tmp_dir, dbfs_init_script_dir):
@click.option('--source-dir', default='docs/source/_static/zzz_GENERATED_NOTEBOOK_SOURCE',
help='Source directory of notebooks to upload.')
@click.option('--nbs', multiple=True, default=[],
help='Relative name of notebooks in the source directory to run. If not provided, runs all notebooks.')
def main(cli_profile, workspace_tmp_dir, dbfs_init_script_dir, source_dir, nbs):
identifier = str(uuid.uuid4())
work_dir = os.path.join(workspace_tmp_dir, identifier)
with open(JOBS_JSON, 'r') as f:
jobs_json = json.load(f)

nbs = [os.path.relpath(path, SOURCE_DIR).split('.')[0]
for path in glob.glob(SOURCE_DIR + '/**', recursive=True)
if not os.path.isdir(path)]
if not nbs:
nbs = [os.path.relpath(path, source_dir).split('.')[0]
for path in glob.glob(source_dir + '/**', recursive=True)
if not os.path.isdir(path)]
nb_to_run_id = {}

try:
print(f"Importing source files from {SOURCE_DIR} to {work_dir}")
print(f"Importing source files from {source_dir} to {work_dir}")
run_cli_cmd(cli_profile, 'workspace', ['mkdirs', work_dir])
run_cli_cmd(cli_profile, 'workspace', ['import_dir', SOURCE_DIR, work_dir])
run_cli_cmd(cli_profile, 'workspace', ['import_dir', source_dir, work_dir])

print(f"Installing init scripts")
run_cli_cmd(cli_profile, 'fs', ['cp', INIT_SCRIPT_DIR, dbfs_init_script_dir, '--recursive', '--overwrite'])
Expand Down

0 comments on commit 321ccea

Please sign in to comment.