diff --git a/omnia.yml b/omnia.yml index c50e15df9..31289917f 100644 --- a/omnia.yml +++ b/omnia.yml @@ -57,12 +57,6 @@ - k8s_common tags: kubernetes -- name: Apply GPU node config - hosts: gpus - gather_facts: false - roles: - - compute_gpu - - name: Apply K8s manager config hosts: manager gather_facts: true diff --git a/roles/slurm_common/files/slurm.conf b/roles/slurm_common/files/slurm.conf index a38fceb61..b0bbdf3c7 100644 --- a/roles/slurm_common/files/slurm.conf +++ b/roles/slurm_common/files/slurm.conf @@ -32,6 +32,7 @@ ProctrackType=proctrack/pgid #FirstJobId= ReturnToService=2 #MaxJobCount= +MailProg=/usr/bin/mail #PlugStackConfig= #PropagatePrioProcess= #PropagateResourceLimits= @@ -87,11 +88,11 @@ AccountingStorageType=accounting_storage/slurmdbd #AccountingStorageLoc= #AccountingStoragePass= #AccountingStorageUser= -# +AccountingStoragePort= # COMPUTE NODES #NodeName=linux[1-32] Procs=1 State=UNKNOWN #NodeName=DEFAULT Sockets=2 CoresPerSocket=20 State=UNKNOWN NodeName= Sockets= CoresPerSocket= #NodeName=compute[002-005] CoresPerSocket=20 PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP -#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP +#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP \ No newline at end of file diff --git a/roles/slurm_common/tasks/main.yml b/roles/slurm_common/tasks/main.yml index 9e10b68f1..ef58bd77b 100644 --- a/roles/slurm_common/tasks/main.yml +++ b/roles/slurm_common/tasks/main.yml @@ -13,38 +13,40 @@ # limitations under the License. --- -- name: Install epel repository - package: - name: epel-release - state: present - tags: install +- name: Get hostname + command: hostname -s + register: host_name + changed_when: true -- name: Munge installation - package: - name: munge-devel +- name: Add host name in file + replace: + dest: "{{ hostname_dest }}" + regexp: localhost.localdomain + replace: "{{ host_name.stdout }}" + backup: yes + mode: "{{ common_mode }}" + +- name: Add host name in hosts file + lineinfile: + dest: "{{ hosts_dest }}" + line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ host_name.stdout }}" state: present - tags: install + create: yes + mode: "{{ common_mode }}" - name: Install packages for slurm package: - name: "{{ common_packages }}" + name: "{{ item }}" state: present + with_items: + - "{{ common_packages }}" tags: install -- name: pip upgrade pip - pip: - name: pip - executable: pip3 - extra_args: --upgrade - state: latest - tags: install - -- name: create munge key +- name: Create munge key command: "{{ munge_cmd }}" changed_when: true - tags: install -- name: copy munge key +- name: Copy munge key copy: src: munge.key dest: "{{ munge_dest }}" @@ -53,75 +55,72 @@ mode: "{{ munge_mode }}" tags: install -- name: slurm configuration - slurm.conf +- name: Slurm configuration - slurm.conf copy: src: slurm.conf dest: "{{ slurm_dest }}" mode: "{{ slurm_mode }}" tags: install -- name: add cluster name +- name: Add cluster name lineinfile: path: "{{ slurm_confpth }}" - regexp: "clustername=" - line: "clustername={{ cluster_name }}" - tags: install + regexp: "ClusterName=" + line: "ClusterName={{ cluster_name }}" -- name: add slurm user name +- name: Add slurm user name lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmUser=" line: "SlurmUser={{ slurm_user }}" - tags: install - name: Add slurmctld port no lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmctldPort=" line: "SlurmctldPort={{ slurmctld_port }}" - tags: install - name: Add slurmd port no lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmdPort=" line: "SlurmdPort={{ slurmd_port }}" - tags: install - name: Add spool path lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmdSpoolDir=" line: "SlurmdSpoolDir={{ spool_pth }}" - tags: install - name: Add slurmctld pid file path lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmctldPidFile=" line: "SlurmctldPidFile={{ slurmctld_pid }}" - tags: install - name: Add slurmd pid file path lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmdPidFile=" line: "SlurmdPidFile={{ slurmd_pid }}" - tags: install - name: Add slurmctld log file path lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmctldLogFile=" line: "SlurmctldLogFile={{ slurmctld_log }}" - tags: install - name: Add slurmd log file path lineinfile: path: "{{ slurm_confpth }}" regexp: "SlurmdLogFile=" line: "SlurmdLogFile={{ slurmd_log }}" - tags: install + +- name: Add accounting storage port no + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "AccountingStoragePort=" + line: "AccountingStoragePort={{ acct_port }}" - name: Create slurm group group: @@ -155,6 +154,15 @@ state: directory mode: "{{ gen_mode }}" recurse: yes + +- name: Create slurm pid directory + file: + path: "{{ slurm_pidpth }}" + state: directory + owner: slurm + group: slurm + mode: "{{ gen_mode }}" + recurse: yes tags: install - name: Give slurm user permission to slurmctld @@ -164,7 +172,6 @@ group: slurm mode: "{{ gen_mode }}" state: touch - tags: install - name: Give slurm user permission to slurmd file: @@ -173,11 +180,11 @@ group: slurm mode: "{{ gen_mode }}" state: touch - tags: install - name: Start munge service - service: + systemd: name: munge state: restarted enabled: yes tags: install + ignore_errors: yes \ No newline at end of file diff --git a/roles/slurm_common/vars/main.yml b/roles/slurm_common/vars/main.yml index 7b008170b..140ed81b4 100644 --- a/roles/slurm_common/vars/main.yml +++ b/roles/slurm_common/vars/main.yml @@ -13,31 +13,37 @@ # limitations under the License. --- -epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm +epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm common_packages: - munge - munge-libs + - munge-devel - mariadb-server - mariadb-devel - - python3 - - python-pip + - man2html + - MySQL-python +hostname_dest: "/etc/hostname" +hosts_dest: "/etc/hosts" munge_dest: "/etc/munge/" munge_cmd: "/usr/sbin/create-munge-key -f" munge_mode: "0400" slurm_mode: "0644" +common_mode: "0777" slurm_dest: "/etc/slurm/" slurm_confpth: "/etc/slurm/slurm.conf" slurm_user: "slurm" slurmctld_port: "6817" slurmd_port: "6818" +acct_port: "6819" slurm_uid: "6001" slurm_logpth: "/var/log/slurm/" +slurm_pidpth: "/var/run/slurm/" gen_mode: "0755" spool_pth: "/var/spool/slurm/" slurmctld_pid: "/var/run/slurmctld.pid" slurmd_pid: "/var/run/slurmd.pid" cluster_name : "manager,compute" slurmctld_log: "/var/log/slurm/slurmctld.log" -slurmd_log: "/var/log/slurm/slurmd.log" +slurmd_log: "/var/log/slurm/slurmd.log" \ No newline at end of file diff --git a/roles/slurm_manager/files/slurmdbd.conf b/roles/slurm_manager/files/slurmdbd.conf index c104d7fed..38d4bc96a 100644 --- a/roles/slurm_manager/files/slurmdbd.conf +++ b/roles/slurm_manager/files/slurmdbd.conf @@ -18,7 +18,7 @@ AuthType=auth/munge # slurmDBD info DbdAddr= DbdHost= -#DbdPort=7031 +#DbdPort=6019 SlurmUser= #MessageTimeout=300 DebugLevel=verbose @@ -33,6 +33,6 @@ PidFile= StorageType=accounting_storage/mysql #StorageHost= #StoragePort= -#StoragePass= -#StorageUser= +StoragePass= +StorageUser= #StorageLoc= \ No newline at end of file diff --git a/roles/slurm_manager/tasks/main.yml b/roles/slurm_manager/tasks/main.yml index d97952605..01655a61c 100644 --- a/roles/slurm_manager/tasks/main.yml +++ b/roles/slurm_manager/tasks/main.yml @@ -13,17 +13,55 @@ # limitations under the License. --- -#- name: Install packages for slurm -# package: -# name: "{{ slurm_packages }}" -# state: present -# tags: install - -#- name: Install development tools -# package: -# name: "{{ dev_tools }}" -# state: present -# tags: install +- name: Give slurm user permission to slurmctld spool + file: + path: "{{ spool_slurmctld_pth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Give slurm ownership to cluster state + file: + path: "{{ cluster_state_path }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Create slurmctld log file on master + file: + path: "{{ slurm_logpth }}" + owner: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - slurmctld.log + +- name: Create log files on master + file: + path: "{{ slurm_logpth }}" + owner: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - "{{ log_files_master }}" + +- name: Install packages for slurm + package: + name: "{{ item }}" + state: present + with_items: + - "{{ slurm_packages }}" + tags: install + +- name: Install development tools + package: + name: "{{ item }}" + state: present + with_items: + - "{{ dev_tools }}" + tags: install - name: Create temporary download folder for slurm file: @@ -40,38 +78,62 @@ checksum: "{{ slurm_md5 }}" validate_certs: no tags: install - + - name: Build slurm rpms - command: rpmbuild -ta "{{ rpmbuild_path }}" + command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql changed_when: false args: warn: no -#- name: Verify package md5 -#command: rpm -qa -# ignore_errors: true -# register: verify_result -# changed_when: no -# failed_when: no -# args: -# warn: no +- name: Verify package md5 + command: rpm -qa + ignore_errors: true + register: verify_result + changed_when: no + failed_when: no + args: + warn: no - name: Install rpms command: rpm -Uvh ~"{{ rpm_loop }}" args: chdir: "{{ rpm_path }}" warn: no - # when: verify_result.rc != 0 + changed_when: true + +- name: Get the hostname + command: hostname -s + register: machine_name + changed_when: true - name: Add control machine name lineinfile: path: "{{ slurm_confpth }}" regexp: "ControlMachine=" - line: "ControlMachine={{ group_names[0] }}" + line: "ControlMachine={{ machine_name.stdout }}" -- name: Firewall rule for slurm - tcp/ip,udp +- name: Add slurm user name + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmUser=" + line: "SlurmUser={{ slurm_user }}" + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: firewalld + +- name: Firewall rule for slurm - tcp/udp ports firewalld: - zone: internal + zone: public port: "{{ item }}" permanent: true state: enabled @@ -79,10 +141,11 @@ - "{{ tcp_port1 }}" - "{{ tcp_port2 }}" - "{{ tcp_port3 }}" - - "{{ tcp_port4 }}" + - "{{ udp_port3 }}" - "{{ udp_port1 }}" - "{{ udp_port2 }}" - tags: install + when: "'manager' in group_names" + tags: firewalld - name: Get network address/subnet mask through ipaddr set_fact: @@ -94,23 +157,22 @@ rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept' permanent: true state: enabled - tags: install + tags: firewalld -- name: Firewall reload - systemd: - name: firewalld - state: reloaded - tags: install +- name: Reload firewalld + command: firewall-cmd --reload + changed_when: true + tags: firewalld - name: Start mariadb - service: + systemd: name: mariadb state: restarted enabled: yes tags: install - name: Grant permissions for slurm db - command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;" + command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{ db_host }}' identified by '{{ db_password[0] }}'with grant option;" tags: install changed_when: true @@ -119,6 +181,7 @@ src: slurmdbd.conf dest: "{{ slurmdbd_path }}" mode: "{{ slurmdbd_mode }}" + owner: slurm tags: install - name: Add slurm user name @@ -139,6 +202,18 @@ regexp: "DbdHost=" line: "DbdHost={{ DbdHost }}" +- name: Add storage password + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "StoragePass=" + line: "StoragePass={{ db_password[0] }}" + +- name: Add storage user + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "StorageUser=" + line: "StorageUser={{ slurm_user }}" + - name: Add log file path lineinfile: path: "{{ slurmdbd_path }}" @@ -151,13 +226,8 @@ regexp: "PidFile=" line: "PidFile={{ pidfile }}" -- name: Populate accounting database - command: slurmdbd - tags: install - changed_when: true - - name: Save slurm conf file in buffer fetch: src: "{{ slurm_confpth }}" dest: "{{ buffer_path }}" - flat: true + flat: true \ No newline at end of file diff --git a/roles/slurm_manager/vars/main.yml b/roles/slurm_manager/vars/main.yml index b22de3f0a..c7c5dbf1b 100644 --- a/roles/slurm_manager/vars/main.yml +++ b/roles/slurm_manager/vars/main.yml @@ -14,38 +14,50 @@ --- slurm_packages: - - python3 - gcc - openssl - - openssl-devel - numactl - - numactl-devel - hwloc - lua - readline - - readline-devel - - pam-devel - perl-ExtUtils-MakeMaker - - cpanm* - rpm-build + - perl-DBI + - perl-Switch + - libibumad dev_tools: - rrdtool-devel - lua-devel - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +log_files_master: + - slurm_jobacct.log + - slurm_jobcomp.log tmp_path: "/root/slurm-tmp" tmp_mode: "0755" -slurm_url: https://download.schedmd.com/slurm/slurm-20.02.3.tar.bz2 -slurm_md5: "md5:c71a300d6c5d33ef8ca60e52a203bb1e" -rpmbuild_path: "/root/slurm-tmp/slurm-20.02.3.tar.bz2" +cluster_state_path: "/var/spool/slurm/cluster_state" +spool_slurmctld_pth: "/var/spool/slurmctld" +spool_slurmd_pth: "/var/spool/slurmd" +slurm_logpth: "/var/log/slurm/" +slurm_url: https://download.schedmd.com/slurm/slurm-20.11.2.tar.bz2 +slurm_md5: "md5:592b8b24ff0f24327033eec59cd438d7" +rpmbuild_path: "/root/slurm-tmp/slurm-20.11.2.tar.bz2" rpm_loop: "/rpmbuild/RPMS/x86_64/*.rpm" tcp_port1: "6817/tcp" tcp_port2: "6818/tcp" tcp_port3: "6819/tcp" -tcp_port4: "7321/tcp" udp_port1: "6817/udp" -udp_port2: "7321/udp" +udp_port2: "6818/udp" +udp_port3: "6819/udp" family: "ipv4" db_user: "slurm" db_host: "localhost" @@ -56,7 +68,7 @@ slurm_user: "slurm" DbdAddr: "localhost" DbdHost: "localhost" logfile: "/var/log/slurm/slurmdbd.log" -pidfile: "/var/run/slurm/slurmdbd.pid" +pidfile: "/var/run/slurmdbd.pid" buffer_path: "/tmp/slurm.conf" rpm_path: "/root/rpmbuild/RPMS/x86_64/" -slurm_mode: "0644" +slurm_mode: "0644" \ No newline at end of file diff --git a/roles/slurm_start_services/tasks/main.yml b/roles/slurm_start_services/tasks/main.yml index 7df75eb3b..cc1616ea4 100644 --- a/roles/slurm_start_services/tasks/main.yml +++ b/roles/slurm_start_services/tasks/main.yml @@ -16,32 +16,35 @@ - name: Include common variables include_vars: ../../slurm_manager/vars/main.yml +- name: Include common variables + include_vars: ../../slurm_common/vars/main.yml + - name: Copy slurm conf from buffer copy: src: "{{ buffer_path }}" dest: "{{ slurm_confpth }}" mode: "{{ slurm_mode }}" -- name: Start slurmctld on manager - service: - name: slurmctld - enabled: yes - tags: install - - name: Enable slurmdbd on manager service: name: slurmdbd - enabled: yes + state: restarted + tags: install + +- name: Start slurmctld on manager + systemd: + name: slurmctld + state: started tags: install - name: Show cluster if exists - command: sacctmgr -n show cluster {{ inventory_hostname }} + command: sacctmgr -n show cluster {{ cluster_name }} register: slurm_clusterlist changed_when: false - name: Create slurm cluster - command: sacctmgr -i add cluster {{ inventory_hostname }} - when: slurm_clusterlist.stdout.find(inventory_hostname) == 1 + command: sacctmgr -i add cluster {{ cluster_name }} + when: slurm_clusterlist.stdout.find(cluster_name) == 1 - name: Show account command: sacctmgr show account @@ -49,8 +52,8 @@ changed_when: false - name: Create default slurm group - command: sacctmgr -i add account defaultgroup Cluster={{ inventory_hostname }} Description="Default Account" Organization="Default Org" - when: account_added.stdout.find(inventory_hostname) == 1 + command: sacctmgr -i add account defaultgroup Cluster={{ cluster_name }} Description="Default Account" Organization="Default Org" + when: account_added.stdout.find(cluster_name) == 1 tags: install - name: Check if user exists @@ -60,5 +63,5 @@ - name: Add root to the default account command: sacctmgr -i add user root DefaultAccount=defaultgroup - when: account_added.stdout.find(inventory_hostname) == 1 + when: account_added.stdout.find(cluster_name) == 1 tags: install \ No newline at end of file diff --git a/roles/start_slurm_workers/tasks/main.yml b/roles/slurm_workers/tasks/main.yml similarity index 70% rename from roles/start_slurm_workers/tasks/main.yml rename to roles/slurm_workers/tasks/main.yml index e07385772..df3cb3dcd 100644 --- a/roles/start_slurm_workers/tasks/main.yml +++ b/roles/slurm_workers/tasks/main.yml @@ -16,6 +16,37 @@ - name: Include common variables include_vars: ../../slurm_manager/vars/main.yml +- name: Give slurm user permission to slurmd spool + file: + path: "{{ spool_slurmd_pth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Create log files on compute nodes + file: + path: "{{ slurm_logpth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - slurmd.log + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: firewalld + - name: Copy slurm conf from buffer copy: src: "{{ buffer_path }}" @@ -32,7 +63,7 @@ - name: Install development tools package: - name: "{{ item | join (',') }}" + name: "{{ item }}" state: present with_items: - "{{ dev_tools }}" @@ -55,7 +86,7 @@ tags: install - name: Build slurm rpms - command: rpmbuild -ta "{{ rpmbuild_path }}" + command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql changed_when: false args: warn: no @@ -74,15 +105,21 @@ args: chdir: "{{ rpm_path }}" warn: no - when: verify_result.rc != 0 + changed_when: true + +- name: Get the hostname + command: hostname -s + register: machine_name + changed_when: true - name: Add socket and core info lineinfile: path: "{{ slurm_confpth }}" regexp: "NodeName= Sockets= CoresPerSocket=" - line: "NodeName={{ group_names[0] }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }} + line: "NodeName={{ machine_name.stdout }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }} CoresPerSocket={{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}" + - name: Save slurm conf in buffer fetch: src: "{{ slurm_confpth }}" @@ -90,7 +127,7 @@ flat: true - name: Start slurmd on compute nodes - service: + systemd: name: slurmd.service - enabled: yes - tags: install + state: started + tags: install \ No newline at end of file diff --git a/test/test_common.yml b/test/test_common.yml new file mode 100644 index 000000000..a6b2db3be --- /dev/null +++ b/test/test_common.yml @@ -0,0 +1,68 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_009 +# Execute common role in manager & compute nodes with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_009 + hosts: manager, compute + vars_files: + - test_vars/test_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + tags: TC_009 + + - name: Checking ntpd service status + systemd: + name: ntpd + register: ntpd_service + tags: TC_009, VERIFY_009 + + - name: Validating ntpd service status + assert: + that: + - ntpd_service.status.ActiveState == 'active' + fail_msg: "{{ ntpd_service_fail_msg }}" + success_msg: "{{ ntpd_service_success_msg }}" + tags: TC_009, VERIFY_009 + +# Testcase OMNIA_USP_US_VFSP_TC_010 +# Execute common role in manager & compute nodes with common role already executed once +- name: OMNIA_USP_US_VFSP_TC_010 + hosts: manager, compute + vars_files: + - test_vars/test_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + tags: TC_010 + + - name: Checking ntpd service status + systemd: + name: ntpd + register: ntpd_service + tags: TC_010, VERIFY_010 + + - name: Validating ntpd service status + assert: + that: + - ntpd_service.status.ActiveState == 'active' + fail_msg: "{{ ntpd_service_fail_msg }}" + success_msg: "{{ ntpd_service_success_msg }}" + tags: TC_010, VERIFY_010 \ No newline at end of file diff --git a/test/test_slurm_common.yml b/test/test_slurm_common.yml new file mode 100644 index 000000000..7c82d2055 --- /dev/null +++ b/test/test_slurm_common.yml @@ -0,0 +1,94 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_001 +# Execute slurm_common role in manager & compute nodes with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_001 + hosts: manager, compute + vars_files: + - test_vars/test_slurm_common_vars.yml + tasks: + - block: + - name: Call slurm common role + include_role: + name: ../roles/slurm_common + tags: TC_001 + + - name: Fetch common packages installed + package_facts: + manager: auto + tags: TC_001,VERIFY_001 + + - name: Checking munge service status + systemd: + name: munge + register: munge_service + tags: TC_001, VERIFY_001 + + - name: Verify all required packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ packages_status_success_msg }}" + fail_msg: "{{ packages_status_fail_msg }}" + with_items: "{{ common_packages }}" + tags: TC_001,VERIFY_001 + + - name: Validating munge service status + assert: + that: + - munge_service.status.ActiveState == 'active' + fail_msg: "{{ munge_service_fail_msg }}" + success_msg: "{{ munge_service_success_msg }}" + tags: TC_001, VERIFY_001 + +# Testcase OMNIA_USP_US_VFSP_TC_002 +# Execute slurm_common role in manager & compute nodes with common packages already installed +- name: OMNIA_USP_US_VFSP_TC_002 + hosts: manager, compute + vars_files: + - test_vars/test_slurm_common_vars.yml + tasks: + - block: + - name: Call slurm common role + include_role: + name: ../roles/slurm_common + tags: TC_002, VERIFY_002 + + - name: Fetch common packages installed + package_facts: + manager: auto + tags: TC_002,VERIFY_002 + + - name: Checking munge service status + systemd: + name: munge + register: munge_service + tags: TC_002, VERIFY_002 + + - name: Verify all required packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ packages_status_success_msg }}" + fail_msg: "{{ packages_status_fail_msg }}" + with_items: "{{ common_packages }}" + tags: TC_002,VERIFY_002 + + - name: Validating munge service status + assert: + that: + - munge_service.status.ActiveState == 'active' + fail_msg: "{{ munge_service_fail_msg }}" + success_msg: "{{ munge_service_success_msg }}" + tags: TC_002, VERIFY_002 \ No newline at end of file diff --git a/test/test_slurm_manager.yml b/test/test_slurm_manager.yml new file mode 100644 index 000000000..d17f32f28 --- /dev/null +++ b/test/test_slurm_manager.yml @@ -0,0 +1,166 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_003 +# Execute slurm_manager role in manager node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_003 + hosts: manager + vars_files: + - test_vars/test_slurm_manager_vars.yml + - ../appliance/roles/common/vars/main.yml + tasks: + - block: + - name: Call cluster_validation role + include_role: + name: ../roles/cluster_validation + tasks_from: fetch_password + tags: TC_003 + + - name: Call slurm manager role + include_role: + name: ../roles/slurm_manager + tags: TC_003 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_003,VERIFY_003 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_003, VERIFY_003 + + - name: Checking firewalld tcp/udp ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_003, VERIFY_003 + + - name: Checking mariadb service status + systemd: + name: mariadb + register: mariadb_service + tags: TC_003, VERIFY_003 + + - name: Validating tcp/udp ports on manager node + assert: + that: + - "'6817/tcp' in manager_firewalld_ports.stdout" + - "'6817/udp' in manager_firewalld_ports.stdout" + - "'6818/tcp' in manager_firewalld_ports.stdout" + - "'6818/udp' in manager_firewalld_ports.stdout" + - "'6819/tcp' in manager_firewalld_ports.stdout" + - "'6819/udp' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_ports_status_fail_msg }}" + success_msg: "{{ manager_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_003, VERIFY_003 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_003, VERIFY_003 + + - name: Validating mariadb service status + assert: + that: + - mariadb_service.status.ActiveState == 'active' + fail_msg: "{{ mariadb_service_fail_msg }}" + success_msg: "{{ mariadb_service_success_msg }}" + tags: TC_003, VERIFY_003 + +# Testcase OMNIA_USP_US_VFSP_TC_004 +# Execute slurm_manager role in manager node with slurm packages already installed +- name: OMNIA_USP_US_VFSP_TC_004 + hosts: manager + vars_files: + - test_vars/test_slurm_manager_vars.yml + - ../appliance/roles/common/vars/main.yml + tasks: + - block: + - name: Call cluster_validation role + include_role: + name: ../roles/cluster_validation + tasks_from: fetch_password + tags: TC_004 + + - name: Call slurm manager role + include_role: + name: ../roles/slurm_manager + tags: TC_004 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_004,VERIFY_004 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_004, VERIFY_004 + + - name: Checking firewalld tcp/udp ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_004, VERIFY_004 + + - name: Checking mariadb service status + systemd: + name: mariadb + register: mariadb_service + tags: TC_004, VERIFY_004 + + - name: Validating tcp/udp ports on manager node + assert: + that: + - "'6817/tcp' in manager_firewalld_ports.stdout" + - "'6817/udp' in manager_firewalld_ports.stdout" + - "'6818/tcp' in manager_firewalld_ports.stdout" + - "'6818/udp' in manager_firewalld_ports.stdout" + - "'6819/tcp' in manager_firewalld_ports.stdout" + - "'6819/udp' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_ports_status_fail_msg }}" + success_msg: "{{ manager_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_004, VERIFY_004 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_004, VERIFY_004 + + - name: Validating mariadb service status + assert: + that: + - mariadb_service.status.ActiveState == 'active' + fail_msg: "{{ mariadb_service_fail_msg }}" + success_msg: "{{ mariadb_service_success_msg }}" + tags: TC_004, VERIFY_004 \ No newline at end of file diff --git a/test/test_slurm_start_services.yml b/test/test_slurm_start_services.yml new file mode 100644 index 000000000..c91184318 --- /dev/null +++ b/test/test_slurm_start_services.yml @@ -0,0 +1,124 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_007 +# Execute slurm_start_services role in manager node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_007 + hosts: manager + vars_files: + - test_vars/test_slurm_start_services_vars.yml + tasks: + - block: + - name: Call slurm start services role + include_role: + name: ../roles/slurm_start_services + tags: TC_007 + + - name: Checking slurmctld service status + systemd: + name: slurmctld + register: slurmctld_service + tags: TC_007, VERIFY_007 + + - name: Checking slurmdbd service status + systemd: + name: slurmdbd + register: slurmdbd_service + tags: TC_007, VERIFY_007 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_007,VERIFY_007 + + - name: Validating slurmctld service status + assert: + that: + - slurmctld_service.status.ActiveState == 'active' + fail_msg: "{{ slurmctld_service_fail_msg }}" + success_msg: "{{ slurmctld_service_success_msg }}" + tags: TC_007, VERIFY_007 + + - name: Validating slurmdbd service status + assert: + that: + - slurmdbd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmdbd_service_fail_msg }}" + success_msg: "{{ slurmdbd_service_success_msg }}" + tags: TC_007, VERIFY_007 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_007, VERIFY_007 + +# Testcase OMNIA_USP_US_VFSP_TC_008 +# Execute slurm_start_services role in manager node with services already running +- name: OMNIA_USP_US_VFSP_TC_008 + hosts: manager + vars_files: + - test_vars/test_slurm_start_services_vars.yml + tasks: + - block: + - name: Call slurm start services role + include_role: + name: ../roles/slurm_start_services + tags: TC_008 + + - name: Checking slurmctld service status + systemd: + name: slurmctld + register: slurmctld_service + tags: TC_008, VERIFY_008 + + - name: Checking slurmdbd service status + systemd: + name: slurmdbd + register: slurmdbd_service + tags: TC_008, VERIFY_008 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_008,VERIFY_008 + + - name: Validating slurmctld service status + assert: + that: + - slurmctld_service.status.ActiveState == 'active' + fail_msg: "{{ slurmctld_service_fail_msg }}" + success_msg: "{{ slurmctld_service_success_msg }}" + tags: TC_008, VERIFY_008 + + - name: Validating slurmdbd service status + assert: + that: + - slurmdbd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmdbd_service_fail_msg }}" + success_msg: "{{ slurmdbd_service_success_msg }}" + tags: TC_008, VERIFY_008 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_008, VERIFY_008 \ No newline at end of file diff --git a/test/test_slurm_workers.yml b/test/test_slurm_workers.yml new file mode 100644 index 000000000..cef86b440 --- /dev/null +++ b/test/test_slurm_workers.yml @@ -0,0 +1,126 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_005 +# Execute slurm_worker role in compute node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_005 + hosts: compute + vars_files: + - test_vars/test_slurm_workers_vars.yml + tasks: + - block: + - name: Call slurm worker role + include_role: + name: ../roles/slurm_workers + tags: TC_005 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_005,VERIFY_005 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_005,VERIFY_005 + + - name: Checking slurmd service status + service: + name: slurmd.service + register: slurmd_service + tags: TC_005, VERIFY_005 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_005, VERIFY_005 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_005, VERIFY_005 + + - name: Validating slurmd service status + assert: + that: + - slurmd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmd_service_fail_msg }}" + success_msg: "{{ slurmd_service_success_msg }}" + tags: TC_005, VERIFY_005 + +# Testcase OMNIA_USP_US_VFSP_TC_006 +# Execute slurm_workers role in compute node with slurm packages already installed +- name: OMNIA_USP_US_VFSP_TC_006 + hosts: compute + vars_files: + - test_vars/test_slurm_workers_vars.yml + tasks: + - block: + - name: Call slurm worker role + include_role: + name: ../roles/slurm_workers + tags: TC_006 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_006,VERIFY_006 + + - name: Checking slurmd service status + service: + name: slurmd.service + register: slurmd_service + tags: TC_006, VERIFY_006 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_006,VERIFY_006 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_006, VERIFY_006 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_006, VERIFY_006 + + - name: Validating slurmd service status + assert: + that: + - slurmd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmd_service_fail_msg }}" + success_msg: "{{ slurmd_service_success_msg }}" + tags: TC_006, VERIFY_006 \ No newline at end of file diff --git a/test/test_vars/test_common_vars.yml b/test/test_vars/test_common_vars.yml new file mode 100644 index 000000000..fc4848765 --- /dev/null +++ b/test/test_vars/test_common_vars.yml @@ -0,0 +1,18 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +ntpd_service_fail_msg: "Ntpd service is not running" + +ntpd_service_success_msg: "Ntpd service is running" \ No newline at end of file diff --git a/test/test_vars/test_slurm_common_vars.yml b/test/test_vars/test_slurm_common_vars.yml new file mode 100644 index 000000000..3ddebe23c --- /dev/null +++ b/test/test_vars/test_slurm_common_vars.yml @@ -0,0 +1,32 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +packages_status_success_msg: "Common packages are installed" + +packages_status_fail_msg: "Common packages are not installed" + +munge_service_fail_msg: "Munge service is not running" + +munge_service_success_msg: "Munge service is running" + +common_packages: + - munge + - munge-libs + - munge-devel + - mariadb-server + - mariadb-devel + - python3 + - man2html + - MySQL-python \ No newline at end of file diff --git a/test/test_vars/test_slurm_manager_vars.yml b/test/test_vars/test_slurm_manager_vars.yml new file mode 100644 index 000000000..790b990e8 --- /dev/null +++ b/test/test_vars/test_slurm_manager_vars.yml @@ -0,0 +1,55 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_packages: + - gcc + - openssl + - numactl + - hwloc + - lua + - readline + - perl-ExtUtils-MakeMaker + - rpm-build + - perl-DBI + - perl-Switch + - libibumad + +dev_tools: + - rrdtool-devel + - lua-devel + - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +manager_ports_status_fail_msg: "Slurm ports are not opened in manager node" + +manager_ports_status_success_msg: "Slurm Ports are opened in manager node" + +slurm_packages_status_success_msg: "Slurm and dev packages are installed" + +slurm_packages_status_fail_msg: "Slurm and dev packages are not installed" + +slurm_status_fail_msg: "Slurm is not installed" + +slurm_status_success_msg: "Slurm is installed" + +mariadb_service_fail_msg: " Mariadb server is not running" + +mariadb_service_success_msg: " Mariadb server is up running" \ No newline at end of file diff --git a/test/test_vars/test_slurm_start_services_vars.yml b/test/test_vars/test_slurm_start_services_vars.yml new file mode 100644 index 000000000..cda616464 --- /dev/null +++ b/test/test_vars/test_slurm_start_services_vars.yml @@ -0,0 +1,26 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurmctld_service_fail_msg: "Slurmctld service is not running" + +slurmctld_service_success_msg: "Slurmctld service is running" + +slurmdbd_service_fail_msg: "Slurmdbd service is not running" + +slurmdbd_service_success_msg: "Slurmdbd service is running" + +slurm_status_success_msg: "Slurm is installed" + +slurm_status_fail_msg: " Slurm is not installed" \ No newline at end of file diff --git a/test/test_vars/test_slurm_workers_vars.yml b/test/test_vars/test_slurm_workers_vars.yml new file mode 100644 index 000000000..09808128c --- /dev/null +++ b/test/test_vars/test_slurm_workers_vars.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_packages: + - python3 + - gcc + - openssl + - numactl + - hwloc + - lua + - readline + - perl-ExtUtils-MakeMaker + - rpm-build + - perl-DBI + - perl-Switch + - libibumad + +dev_tools: + - rrdtool-devel + - lua-devel + - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +manager_ports_status_fail_msg: "Slurm ports are not opened in manager node" + +manager_ports_status_success_msg: "Slurm Ports are opened in manager node" + +slurm_packages_status_success_msg: "Slurm and dev packages are installed" + +slurm_packages_status_fail_msg: "Slurm and dev packages are not installed" + +slurm_status_fail_msg: "Slurm is not installed" + +slurm_status_success_msg: "Slurm is installed" + +slurmd_service_fail_msg: "Slurmd service is not running" + +slurmd_service_success_msg: "Slurmd service is running" \ No newline at end of file