Skip to content

Commit

Permalink
Merge pull request dell#178 from VishnupriyaKrish/devel
Browse files Browse the repository at this point in the history
Issue dell#172: Slurm role changes and testing framework for Slurm
  • Loading branch information
lwilson committed Jan 7, 2021
2 parents 87fd840 + 8851e6b commit e301ba4
Show file tree
Hide file tree
Showing 19 changed files with 1,022 additions and 127 deletions.
6 changes: 0 additions & 6 deletions omnia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,6 @@
- k8s_common
tags: kubernetes

- name: Apply GPU node config
hosts: gpus
gather_facts: false
roles:
- compute_gpu

- name: Apply K8s manager config
hosts: manager
gather_facts: true
Expand Down
5 changes: 3 additions & 2 deletions roles/slurm_common/files/slurm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ ProctrackType=proctrack/pgid
#FirstJobId=
ReturnToService=2
#MaxJobCount=
MailProg=/usr/bin/mail
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
Expand Down Expand Up @@ -87,11 +88,11 @@ AccountingStorageType=accounting_storage/slurmdbd
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
#
AccountingStoragePort=
# COMPUTE NODES
#NodeName=linux[1-32] Procs=1 State=UNKNOWN
#NodeName=DEFAULT Sockets=2 CoresPerSocket=20 State=UNKNOWN
NodeName= Sockets= CoresPerSocket=
#NodeName=compute[002-005] CoresPerSocket=20
PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP
#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
83 changes: 45 additions & 38 deletions roles/slurm_common/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,40 @@
# limitations under the License.
---

- name: Install epel repository
package:
name: epel-release
state: present
tags: install
- name: Get hostname
command: hostname -s
register: host_name
changed_when: true

- name: Munge installation
package:
name: munge-devel
- name: Add host name in file
replace:
dest: "{{ hostname_dest }}"
regexp: localhost.localdomain
replace: "{{ host_name.stdout }}"
backup: yes
mode: "{{ common_mode }}"

- name: Add host name in hosts file
lineinfile:
dest: "{{ hosts_dest }}"
line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ host_name.stdout }}"
state: present
tags: install
create: yes
mode: "{{ common_mode }}"

- name: Install packages for slurm
package:
name: "{{ common_packages }}"
name: "{{ item }}"
state: present
with_items:
- "{{ common_packages }}"
tags: install

- name: pip upgrade pip
pip:
name: pip
executable: pip3
extra_args: --upgrade
state: latest
tags: install

- name: create munge key
- name: Create munge key
command: "{{ munge_cmd }}"
changed_when: true
tags: install

- name: copy munge key
- name: Copy munge key
copy:
src: munge.key
dest: "{{ munge_dest }}"
Expand All @@ -53,75 +55,72 @@
mode: "{{ munge_mode }}"
tags: install

- name: slurm configuration - slurm.conf
- name: Slurm configuration - slurm.conf
copy:
src: slurm.conf
dest: "{{ slurm_dest }}"
mode: "{{ slurm_mode }}"
tags: install

- name: add cluster name
- name: Add cluster name
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "clustername="
line: "clustername={{ cluster_name }}"
tags: install
regexp: "ClusterName="
line: "ClusterName={{ cluster_name }}"

- name: add slurm user name
- name: Add slurm user name
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmUser="
line: "SlurmUser={{ slurm_user }}"
tags: install

- name: Add slurmctld port no
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmctldPort="
line: "SlurmctldPort={{ slurmctld_port }}"
tags: install

- name: Add slurmd port no
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmdPort="
line: "SlurmdPort={{ slurmd_port }}"
tags: install

- name: Add spool path
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmdSpoolDir="
line: "SlurmdSpoolDir={{ spool_pth }}"
tags: install

- name: Add slurmctld pid file path
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmctldPidFile="
line: "SlurmctldPidFile={{ slurmctld_pid }}"
tags: install

- name: Add slurmd pid file path
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmdPidFile="
line: "SlurmdPidFile={{ slurmd_pid }}"
tags: install

- name: Add slurmctld log file path
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmctldLogFile="
line: "SlurmctldLogFile={{ slurmctld_log }}"
tags: install

- name: Add slurmd log file path
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "SlurmdLogFile="
line: "SlurmdLogFile={{ slurmd_log }}"
tags: install

- name: Add accounting storage port no
lineinfile:
path: "{{ slurm_confpth }}"
regexp: "AccountingStoragePort="
line: "AccountingStoragePort={{ acct_port }}"

- name: Create slurm group
group:
Expand Down Expand Up @@ -155,6 +154,15 @@
state: directory
mode: "{{ gen_mode }}"
recurse: yes

- name: Create slurm pid directory
file:
path: "{{ slurm_pidpth }}"
state: directory
owner: slurm
group: slurm
mode: "{{ gen_mode }}"
recurse: yes
tags: install

- name: Give slurm user permission to slurmctld
Expand All @@ -164,7 +172,6 @@
group: slurm
mode: "{{ gen_mode }}"
state: touch
tags: install

- name: Give slurm user permission to slurmd
file:
Expand All @@ -173,11 +180,11 @@
group: slurm
mode: "{{ gen_mode }}"
state: touch
tags: install

- name: Start munge service
service:
systemd:
name: munge
state: restarted
enabled: yes
tags: install
ignore_errors: yes
14 changes: 10 additions & 4 deletions roles/slurm_common/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,37 @@
# limitations under the License.
---

epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm

common_packages:
- munge
- munge-libs
- munge-devel
- mariadb-server
- mariadb-devel
- python3
- python-pip
- man2html
- MySQL-python

hostname_dest: "/etc/hostname"
hosts_dest: "/etc/hosts"
munge_dest: "/etc/munge/"
munge_cmd: "/usr/sbin/create-munge-key -f"
munge_mode: "0400"
slurm_mode: "0644"
common_mode: "0777"
slurm_dest: "/etc/slurm/"
slurm_confpth: "/etc/slurm/slurm.conf"
slurm_user: "slurm"
slurmctld_port: "6817"
slurmd_port: "6818"
acct_port: "6819"
slurm_uid: "6001"
slurm_logpth: "/var/log/slurm/"
slurm_pidpth: "/var/run/slurm/"
gen_mode: "0755"
spool_pth: "/var/spool/slurm/"
slurmctld_pid: "/var/run/slurmctld.pid"
slurmd_pid: "/var/run/slurmd.pid"
cluster_name : "manager,compute"
slurmctld_log: "/var/log/slurm/slurmctld.log"
slurmd_log: "/var/log/slurm/slurmd.log"
slurmd_log: "/var/log/slurm/slurmd.log"
6 changes: 3 additions & 3 deletions roles/slurm_manager/files/slurmdbd.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ AuthType=auth/munge
# slurmDBD info
DbdAddr=
DbdHost=
#DbdPort=7031
#DbdPort=6019
SlurmUser=
#MessageTimeout=300
DebugLevel=verbose
Expand All @@ -33,6 +33,6 @@ PidFile=
StorageType=accounting_storage/mysql
#StorageHost=
#StoragePort=
#StoragePass=
#StorageUser=
StoragePass=
StorageUser=
#StorageLoc=
Loading

0 comments on commit e301ba4

Please sign in to comment.