Slurm DBD/Slurm controller improvements and relocation to galaxy08

This commit is contained in:
Nate Coraor 2017-06-01 12:52:44 -04:00
parent bfe0907a59
commit b1f4df9304
7 changed files with 128 additions and 90 deletions

5
meta/main.yml Normal file
View File

@ -0,0 +1,5 @@
---
dependencies:
- role: mariadb
when: "'slurmdbdservers' in group_names"

View File

@ -1,14 +0,0 @@
---
- name: Install virtualenv (yum)
yum:
pkg: python-virtualenv
when: ansible_os_family == "RedHat"
- name: Create virtualenv for elastic components
pip:
name: "{{ item }}"
virtualenv: "/opt/slurm_cloud_provision"
with_items:
- shade
- ansible

View File

@ -22,29 +22,32 @@
system: "{{ slurm_user.system | default('yes') }}"
when: slurm_user is defined
- name: Install Slurm (apt)
- name: Include controller installation tasks
include: slurmctld.yml
when: "'slurmservers' in group_names"
- name: Include execution host installation tasks
include: slurmd.yml
when: "'slurmexechosts' in group_names"
- name: Include DB installation tasks
include: slurmdbd.yml
when: "'slurmdbdservers' in group_names"
- name: Install Slurm client (apt)
apt:
pkg: slurm-llnl
name: "{{ item }}"
when: ansible_os_family == "Debian"
with_items:
- slurm-client
- slurm-wlm-doc
- name: Install Slurm (yum)
yum:
name: "{{ item }}"
with_items:
- munge
- munge-libs
- slurm-plugins
- slurm
- slurm-devel
- slurm-munge
- slurm-pam_slurm
#- slurm-perlapi
- slurm-sjobexit
- slurm-sjstat
- slurm-slurmdb-direct
- slurm-slurmdbd
#- slurm-sql
#- slurm-torque
when: ansible_os_family == "RedHat"
# FIXME: this task will fail if slurmservers[0] has not already completed the slurm.conf task that follows it
@ -66,47 +69,6 @@
- restart slurmd
- restart slurmctld
- name: Install slurmdbd.conf
template:
src: slurmdbd.conf.j2
dest: "{{ slurm_conf_dir }}/slurmdbd.conf"
owner: slurm
group: root
mode: 0400
notify:
- reload slurmdbd
when: "'slurmdbdservers' in group_names"
- name: Create slurm spool directory
file:
path: "{{ slurmd_spool_dir }}"
owner: root
group: root
mode: 0755
state: directory
notify:
- reload slurmd
- name: Create slurm state directory
file:
path: "{{ slurmctld_state_dir }}"
owner: slurm
group: slurm
mode: 0700
state: directory
when: "'slurmservers' in group_names"
notify:
- reload slurmctld
- name: Create slurm log directory
file:
path: "/var/log/{{ slurm_log_dir_name }}"
owner: slurm
group: slurm
mode: 0755
state: directory
when: "'slurmservers' in group_names"
- name: Check munge dir
file:
path: /etc/munge
@ -132,13 +94,14 @@
stat:
path: /var/log
register: stat_var_log
when: ansible_distribution == "Ubuntu"
- name: Force munge to start with "insecure" /var/log permissions
lineinfile:
dest: /etc/default/munge
line: 'OPTIONS="--force"'
regexp: ^OPTIONS=
when: stat_var_log.stat.wgrp
when: ansible_distribution == "Ubuntu" and stat_var_log.stat.wgrp
- name: Ensure Munge is running
service:
@ -146,20 +109,6 @@
enabled: yes
state: started
- name: Ensure slurmd is running
service:
name: "{{ slurmd_service_name }}"
enabled: yes
state: started
when: "'slurmexechosts' in group_names"
- name: Ensure slurmctld is running
service:
name: "{{ slurmctld_service_name }}"
enabled: yes
state: started
when: "'slurmservers' in group_names"
- name: Ensure slurmdbd is running
service:
name: "{{ slurmdbd_service_name }}"
@ -167,5 +116,16 @@
state: started
when: "'slurmdbdservers' in group_names"
- include: elastic.yml
when: "'slurmelasticservers' in group_names"
- name: Ensure slurmctld is running
service:
name: "{{ slurmctld_service_name }}"
enabled: yes
state: started
when: "'slurmservers' in group_names"
- name: Ensure slurmd is running
service:
name: "{{ slurmd_service_name }}"
enabled: yes
state: started
when: "'slurmexechosts' in group_names"

35
tasks/slurmctld.yml Normal file
View File

@ -0,0 +1,35 @@
---
- name: Install Slurm controller packages (apt)
apt:
name: "{{ item }}"
when: ansible_os_family == "Debian"
with_items:
- slurm-wlm
- slurm-wlm-doc
- name: Install Slurm controller packages (yum)
yum:
name: "{{ item }}"
with_items:
- slurm
- slurm-munge
when: ansible_os_family == "RedHat"
- name: Create slurm state directory
file:
path: "{{ slurmctld_state_dir }}"
owner: slurm
group: slurm
mode: 0700
state: directory
notify:
- reload slurmctld
- name: Create slurm log directory
file:
path: "/var/log/{{ slurm_log_dir_name }}"
owner: slurm
group: slurm
mode: 0755
state: directory

26
tasks/slurmd.yml Normal file
View File

@ -0,0 +1,26 @@
---
- name: Install Slurm execution host packages (apt)
apt:
name: "{{ item }}"
when: ansible_os_family == "Debian"
with_items:
- slurm-wlm
- name: Install Slurm execution host packages (yum)
yum:
name: "{{ item }}"
with_items:
- slurm
- slurm-munge
when: ansible_os_family == "RedHat"
- name: Create slurm spool directory
file:
path: "{{ slurmd_spool_dir }}"
owner: root
group: root
mode: 0755
state: directory
notify:
- reload slurmd

26
tasks/slurmdbd.yml Normal file
View File

@ -0,0 +1,26 @@
---
- name: Install Slurm DB packages (apt)
apt:
name: "{{ item }}"
when: ansible_os_family == "Debian"
with_items:
- slurmdbd
- name: Install Slurm DB packages (yum)
yum:
name: "{{ item }}"
with_items:
- munge
- slurm-slurmdbd
when: ansible_os_family == "RedHat"
- name: Install slurmdbd.conf
template:
src: slurmdbd.conf.j2
dest: "{{ slurm_conf_dir }}/slurmdbd.conf"
owner: slurm
group: root
mode: 0400
notify:
- reload slurmdbd

View File

@ -7,10 +7,10 @@ AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
# slurmDBD info
DbdAddr=galaxy02
DbdHost=galaxy02
DbdAddr={{ slurm_dbd_server_ip }}
DbdHost={{ slurm_dbd_server_name }}
#DbdPort=7031
DbdPort=30001
DbdPort={{ slurm_dbd_server_port | default(6819) }}
SlurmUser=slurm
#MessageTimeout=300
DebugLevel=4
@ -25,6 +25,6 @@ PidFile=/var/run/slurmdbd.pid
StorageType=accounting_storage/mysql
#StorageHost=localhost
#StoragePort=1234
StoragePass={{ slurmdbd_mysql_pass }}
StoragePass={{ slurm_dbd_mysql_pass }}
StorageUser=slurm
#StorageLoc=slurm_acct_db