|
@Library('blossom-github-lib@master') |
|
import ipp.blossom.* |
|
|
|
podTemplate(cloud:'sc-ipp-blossom-prod', yaml : """ |
|
apiVersion: v1 |
|
kind: Pod |
|
metadata: |
|
labels: |
|
some-label: some-label-value |
|
spec: |
|
volumes: |
|
- name: scratch |
|
nfs: |
|
server: ipp1-cdot01-col01 |
|
path: /vol/scratch1/scratch.okuchaiev_blossom |
|
containers: |
|
- name: latestdlfw |
|
image: nvcr.io/nvidia/pytorch:23.02-py3 |
|
command: |
|
- cat |
|
volumeMounts: |
|
- name: scratch |
|
mountPath: /testdata |
|
resources: |
|
limits: |
|
nvidia.com/gpu: 2 |
|
restartPolicy: Never |
|
backoffLimit: 4 |
|
tty: true |
|
shm-size: 32g |
|
nodeSelector: |
|
kubernetes.io/os: linux |
|
nvidia.com/gpu_type: "Tesla_T4x4" |
|
nvidia.com/node_type: gpu_tester |
|
nvidia.com/driver_version: "510.20" |
|
""" |
|
) { |
|
node(POD_LABEL) { |
|
def githubHelper |
|
stage('Get Token') { |
|
withCredentials([usernamePassword(credentialsId: 'GHAtoken', passwordVariable: 'GIT_PASSWORD', usernameVariable: 'GIT_USERNAME')]) { |
|
|
|
githubHelper = GithubHelper.getInstance("${GIT_PASSWORD}", githubData) |
|
} |
|
|
|
} |
|
def stageName = '' |
|
try { |
|
currentBuild.description = githubHelper.getBuildDescription() |
|
container('latestdlfw') { |
|
stage('Code checkout') { |
|
|
|
githubHelper.updateCommitStatus("$BUILD_URL", "$stageName Running", GitHubCommitState.PENDING) |
|
checkout changelog: true, poll: true, scm: [$class: 'GitSCM', branches: [[name: "pr/"+githubHelper.getPRNumber()]], |
|
doGenerateSubmoduleConfigurations: false, |
|
submoduleCfg: [], |
|
userRemoteConfigs: [[credentialsId: 'github-token', url: githubHelper.getCloneUrl(), refspec: '+refs/pull/*/head:refs/remotes/origin/pr/*']]] |
|
} |
|
|
|
stage('Code Style') { |
|
sh "apt-get update && \ |
|
apt-get install -y bc && \ |
|
nvidia-smi && \ |
|
pip install -r requirements/requirements_test.txt && \ |
|
python setup.py style && ls -l /testdata/TestData && ln -s /testdata/TestData /home/TestData && \ |
|
ls -l /home && ls -l /home/TestData" |
|
} |
|
|
|
stage('Installation') { |
|
sh "git config --global --add safe.directory '*' && nvidia-smi && ./reinstall.sh release" |
|
} |
|
|
|
stage('L0: GPU unit tests') { |
|
sh "NEMO_NUMBA_MINVER=0.53 pytest -m 'not pleasefixme'" |
|
} |
|
|
|
parallel( |
|
[ |
|
"L1: NMT Training Pre-LN": { sh 'CUDA_VISIBLE_DEVICES=0 python examples/nlp/machine_translation/enc_dec_nmt.py \ |
|
--config-path=conf \ |
|
--config-name=aayn_base \ |
|
do_testing=true \ |
|
model.train_ds.src_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.src \ |
|
model.train_ds.tgt_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \ |
|
model.validation_ds.src_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.src \ |
|
model.validation_ds.tgt_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.src \ |
|
model.test_ds.src_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.src \ |
|
model.test_ds.tgt_file_name=/testdata/TestData/nlp/nmt/toy_data/wmt14-de-en.src \ |
|
model.encoder_tokenizer.tokenizer_model=/testdata/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \ |
|
model.decoder_tokenizer.tokenizer_model=/testdata/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \ |
|
model.encoder.pre_ln=true \ |
|
model.decoder.pre_ln=true \ |
|
trainer.devices=[0] \ |
|
trainer.accelerator="gpu" \ |
|
+trainer.fast_dev_run=true \ |
|
+trainer.limit_test_batches=2 \ |
|
exp_manager=null \ |
|
'}, |
|
"L1: Speech to text": { sh 'CUDA_VISIBLE_DEVICES=1 python examples/asr/asr_ctc/speech_to_text_ctc.py \ |
|
model.train_ds.manifest_filepath=/testdata/TestData/an4_dataset/an4_train.json \ |
|
model.validation_ds.manifest_filepath=/testdata/TestData/an4_dataset/an4_val.json \ |
|
trainer.devices=[0] \ |
|
trainer.accelerator="gpu" \ |
|
+trainer.fast_dev_run=True \ |
|
exp_manager=null \ |
|
'} |
|
] |
|
) |
|
} |
|
githubHelper.updateCommitStatus("$BUILD_URL", "Complete", GitHubCommitState.SUCCESS) |
|
} |
|
catch (Exception ex){ |
|
currentBuild.result = 'FAILURE' |
|
println ex |
|
githubHelper.updateCommitStatus("$BUILD_URL", "$stageName Failed", GitHubCommitState.FAILURE) |
|
} |
|
|
|
} |
|
} |