diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/README.md b/README.md index 5b73cc1..3debeb2 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,7 @@ Install and configure Slurm Role Variables -------------- -All variables are optional. However, if unset, the role will essentially do nothing. See the [defaults][defaults] and [example playbook](#example-playbook) for examples. - -You need to define a `slurm_user` like this, unless you want to override anything. - -```yaml -slurm_user: {} -``` +All variables are optional. If nothing is set, the role will install the Slurm client programs, munge, and create a `slurm.conf`. See the [defaults][defaults] and [example playbook](#example-playbook) for examples. For the various roles a slurm node can play, you can either set group names, or add values to a list, `slurm_roles`. @@ -46,7 +40,7 @@ MIT Author Information ------------------ -[Nate Coraor](https://github.com/natefoo) -[Helena Rasche](https://github.com/erasche) +- [Nate Coraor](https://github.com/natefoo) +- [Helena Rasche](https://github.com/erasche) -[View contributors on GitHub](https://github.com/galaxyproject/ansible-cvmfs/graphs/contributors) +[View contributors on GitHub](https://github.com/galaxyproject/ansible-slurm/graphs/contributors) diff --git a/defaults/main.yml b/defaults/main.yml index 65c0058..357e3b4 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -1,3 +1,89 @@ --- + slurm_upgrade: no slurm_roles: [] +slurm_partitions: [] +slurm_nodes: [] + +slurm_config_dir: "{{ '/etc/slurm-llnl' if __slurm_debian else '/etc/slurm' }}" + +slurm_create_user: "{{ __slurm_redhat }}" +slurm_create_dirs: "{{ __slurm_redhat }}" +slurm_rotate_logs: "{{ __slurm_redhat }}" +slurm_configure_munge: yes + +# Service names are the same on both distros since ?? +slurmd_service_name: slurmd +slurmctld_service_name: slurmctld +slurmdbd_service_name: slurmdbd + +__slurm_user_name: "{{ (slurm_user | default({})).name | default('slurm') }}" +# TODO: this could be incorrect, use the group collection from galaxyproject.galaxy +__slurm_group_name: "{{ (slurm_user | default({})).group | default((slurm_user | default({})).name) | default('slurm') }}" + +__slurm_debian: "{{ ansible_os_family == 'Debian' }}" +__slurm_redhat: "{{ ansible_os_family == 'RedHat' }}" + +__slurm_config_default: + AuthType: auth/munge + CryptoType: crypto/munge + SlurmUser: "{{ __slurm_user_name }}" + ClusterName: cluster + # slurmctld options + SlurmctldPort: 6817 + SlurmctldLogFile: "{{ '/var/log/slurm-llnl/slurmctld.log' if __slurm_debian else null }}" + SlurmctldPidFile: >- + {{ + '/var/run/slurm-llnl/slurmctld.pid' if __slurm_debian else ( + '/var/run/slurmctld.pid' if __slurm_redhat else + null) + }} + StateSaveLocation: >- + {{ + '/var/lib/slurm-llnl/slurmctld' if __slurm_debian else ( + '/var/lib/slurm/slurmctld' if __slurm_redhat else + null) + }} + # slurmd options + SlurmdPort: 6818 + SlurmdLogFile: "{{ '/var/log/slurm-llnl/slurmd.log' if __slurm_debian else null }}" + SlurmdPidFile: >- + {{ + '/var/run/slurm-llnl/slurmd.pid' if __slurm_debian else ( + '/var/run/slurm.pid' if __slurm_redhat else + null) + }} + SlurmdSpoolDir: >- + {{ + '/var/lib/slurm-llnl/slurmd' if __slurm_debian else ( + '/var/spool/slurm/slurmd' if __slurm_redhat else + null) + }} +__slurm_config_merged: "{{ __slurm_config_default | combine(slurm_config | default({})) }}" + +__slurm_debian_packages: + client: [slurm-client, slurm-wlm-doc] + slurmctld: [slurm-wlm] + slurmd: [slurm-wlm] + slurmdbd: [slurmdbd] + +__slurm_redhat_packages: + client: [slurm, munge] + slurmctld: [munge, slurm, slurm-slurmctld] + slurmd: [munge, slurm, slurm-slurmd] + slurmdbd: [munge, slurm-slurmdbd] + +__slurm_packages: "{{ __slurm_debian_packages if __slurm_debian else __slurm_redhat_packages }}" + +__slurmdbd_config_default: + AuthType: auth/munge + DbdPort: 6819 + SlurmUser: "{{ __slurm_user_name }}" + PidFile: >- + {{ + '/var/run/slurm-llnl/slurmdbd.pid' if __slurm_debian else ( + '/var/run/slurmdbd.pid' if __slurm_redhat else + null) + }} + LogFile: "{{ '/var/log/slurm-llnl/slurmdbd.log' if __slurm_debian else null }}" +__slurmdbd_config_merged: "{{ __slurmdbd_config_default | combine(slurmdbd_config | default({})) }}" diff --git a/handlers/main.yml b/handlers/main.yml index 9540871..2af4d81 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -4,28 +4,28 @@ service: name: "{{ slurmd_service_name }}" state: reloaded - when: "'slurmexechosts' in group_names" + when: "'slurmexechosts' in group_names or 'exec' in slurm_roles" - name: restart slurmd service: name: "{{ slurmd_service_name }}" state: restarted - when: "'slurmexechosts' in group_names" + when: "'slurmexechosts' in group_names or 'exec' in slurm_roles" - name: reload slurmctld service: name: "{{ slurmctld_service_name }}" state: reloaded - when: "'slurmservers' in group_names" + when: "'slurmservers' in group_names or 'controller' in slurm_roles" - name: restart slurmctld service: name: "{{ slurmctld_service_name }}" state: restarted - when: "'slurmservers' in group_names" + when: "'slurmservers' in group_names or 'controller' in slurm_roles" - name: reload slurmdbd service: name: "{{ slurmdbd_service_name }}" state: reloaded - when: "'slurmdbdservers' in group_names" + when: "'slurmdbdservers' in group_names or 'dbd' in slurm_roles" diff --git a/tasks/_inc_create_config_dir.yml b/tasks/_inc_create_config_dir.yml new file mode 100644 index 0000000..73a5523 --- /dev/null +++ b/tasks/_inc_create_config_dir.yml @@ -0,0 +1,7 @@ +--- + +# As of 17.11, this is only created if you install the example configs package (RedHat) +- name: Create slurm config dir + file: + path: "{{ slurm_config_dir }}" + state: directory diff --git a/tasks/common.yml b/tasks/common.yml new file mode 100644 index 0000000..6c50666 --- /dev/null +++ b/tasks/common.yml @@ -0,0 +1,31 @@ +--- + +- name: Install Slurm client + package: + name: "{{ __slurm_packages.client }}" + state: "{{ 'latest' if slurm_upgrade else 'present' }}" + +- name: Include config dir creation tasks + include_tasks: _inc_create_config_dir.yml + when: slurm_create_dirs + +- name: Install log rotation configuration + template: + src: logrotate.j2 + dest: /etc/logrotate.d/slurm + when: slurm_rotate_logs + +- name: Install slurm.conf + template: + src: "slurm.conf.j2" + dest: "{{ slurm_config_dir }}/slurm.conf" + owner: root + group: root + mode: 0444 + notify: + - restart slurmd + - restart slurmctld + +- name: Include munge tasks + include_tasks: munge.yml + when: slurm_configure_munge diff --git a/tasks/main.yml b/tasks/main.yml index 4edcb71..fe81947 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -1,25 +1,7 @@ --- -- name: Include OS vars - include_vars: "{{ ansible_os_family | lower }}.yml" - -- name: Create slurm group - group: - name: "{{ slurm_user.name | default('slurm') }}" - gid: "{{ slurm_user.gid | default(omit) }}" - system: "{{ slurm_user.system | default('yes') }}" - when: slurm_user is defined - -- name: Create slurm user - user: - name: "{{ slurm_user.name | default('slurm') }}" - comment: "{{ slurm_user.comment | default(omit) }}" - uid: "{{ slurm_user.uid | default(omit) }}" - group: "{{ slurm_user.group | default(slurm_user.name | default('slurm')) }}" - groups: "{{ slurm_user.groups | default(omit) }}" - home: "{{ slurm_user.home | default(omit) }}" - shell: "{{ slurm_user.shell | default(omit) }}" - system: "{{ slurm_user.system | default('yes') }}" +- name: Include user creation tasks + include_tasks: user.yml when: slurm_user is defined - name: Include controller installation tasks @@ -34,111 +16,24 @@ include_tasks: slurmdbd.yml when: "'slurmdbdservers' in group_names or 'dbd' in slurm_roles" -- name: Install Slurm client (apt) - apt: - name: "{{ item }}" - state: "{{ 'latest' if slurm_upgrade else 'present' }}" - when: ansible_os_family == "Debian" - with_items: - - slurm-client - - slurm-wlm-doc +- name: Import common tasks + import_tasks: common.yml -- name: Install Slurm (yum) - yum: - name: "{{ item }}" - state: "{{ 'latest' if slurm_upgrade else 'present' }}" - with_items: - - slurm - - munge - when: ansible_os_family == "RedHat" - -# As of 17.11, this is only created if you install the example configs package -- name: Create /etc/slurm - file: - path: /etc/slurm - state: directory - when: ansible_os_family == "RedHat" - -- name: Install log rotation configuration - copy: - src: logrotate-slurm - dest: /etc/logrotate.d/slurm - when: ansible_os_family == "RedHat" - -# FIXME: this task will fail if slurmservers[0] has not already completed the slurm.conf task that follows it -- name: Acquire hostlist - command: scontrol show hostlist {{ groups[item.inventory_group] | join(",") }} - with_items: "{{ slurm_nodes | default([]) }}" - delegate_to: "{{ groups['slurmservers'][0] }}" - run_once: true - register: slurm_hostlists - -- name: Install slurm.conf - template: - src: "{{ slurm_conf_src | default( 'templates/slurm/slurm.conf.j2' ) }}" - dest: "{{ slurm_conf_dir }}/slurm.conf" - owner: root - group: root - mode: 0444 - notify: - - restart slurmd - - restart slurmctld - -- name: Check munge dir - file: - path: /etc/munge - owner: munge - group: munge - mode: 0700 - state: directory - -- name: Install munge key - action: - module: decode - args: - content: "{{ munge_key }}" - dest: /etc/munge/munge.key - filter: base64 - owner: munge - group: munge - mode: 0400 - -# /var/log on Ubuntu 14.04+ is group writable, which causes munge to refuse to start -# NOTE: This is fixed in munge 0.5.12 -- name: Check /var/log permissions - stat: - path: /var/log - register: stat_var_log - when: ansible_distribution == "Ubuntu" - -- name: Force munge to start with "insecure" /var/log permissions - lineinfile: - dest: /etc/default/munge - line: 'OPTIONS="--force"' - regexp: ^OPTIONS= - when: ansible_distribution == "Ubuntu" and stat_var_log.stat.wgrp - -- name: Ensure Munge is running - service: - name: munge - enabled: yes - state: started - -- name: Ensure slurmdbd is running +- name: Ensure slurmdbd is enabled and running service: name: "{{ slurmdbd_service_name }}" enabled: yes state: started - when: "'slurmdbdservers' in group_names 'dbd' in slurm_roles" + when: "'slurmdbdservers' in group_names or 'dbd' in slurm_roles" -- name: Ensure slurmctld is running +- name: Ensure slurmctld is enabled and running service: name: "{{ slurmctld_service_name }}" enabled: yes state: started when: "'slurmservers' in group_names or 'controller' in slurm_roles" -- name: Ensure slurmd is running +- name: Ensure slurmd is enabled and running service: name: "{{ slurmd_service_name }}" enabled: yes diff --git a/tasks/munge.yml b/tasks/munge.yml new file mode 100644 index 0000000..15c717f --- /dev/null +++ b/tasks/munge.yml @@ -0,0 +1,41 @@ +--- + +- name: Check munge dir + file: + path: /etc/munge + owner: munge + group: munge + mode: 0700 + state: directory + +- name: Install munge key + action: + module: decode + args: + content: "{{ munge_key }}" + dest: /etc/munge/munge.key + filter: base64 + owner: munge + group: munge + mode: 0400 + +# /var/log on Ubuntu 14.04+ is group writable, which causes munge to refuse to start +# NOTE: This is fixed in munge 0.5.12 +- name: Check /var/log permissions + stat: + path: /var/log + register: stat_var_log + when: ansible_distribution == "Ubuntu" + +- name: Force munge to start with "insecure" /var/log permissions + lineinfile: + dest: /etc/default/munge + line: 'OPTIONS="--force"' + regexp: ^OPTIONS= + when: ansible_distribution == "Ubuntu" and stat_var_log.stat.wgrp + +- name: Ensure Munge is enabled and running + service: + name: munge + enabled: yes + state: started diff --git a/tasks/slurmctld.yml b/tasks/slurmctld.yml index d7234cf..eb27077 100644 --- a/tasks/slurmctld.yml +++ b/tasks/slurmctld.yml @@ -1,38 +1,26 @@ --- -- name: Install Slurm controller packages (apt) - apt: - name: "{{ item }}" +- name: Install Slurm controller packages + package: + name: "{{ __slurm_packages.slurmctld }}" state: "{{ 'latest' if slurm_upgrade else 'present' }}" - when: ansible_os_family == "Debian" - with_items: - - slurm-wlm - - slurm-wlm-doc - -- name: Install Slurm controller packages (yum) - yum: - name: "{{ item }}" - state: "{{ 'latest' if slurm_upgrade else 'present' }}" - with_items: - - munge - - slurm - - slurm-slurmctld - when: ansible_os_family == "RedHat" - name: Create slurm state directory file: - path: "{{ slurmctld_state_dir }}" - owner: slurm - group: slurm + path: "{{ __slurm_config_merged.StateSaveLocation }}" + owner: "{{ __slurm_user_name }}" + group: "{{ __slurm_group_name }}" mode: 0700 state: directory + when: slurm_create_dirs notify: - reload slurmctld - name: Create slurm log directory file: - path: "/var/log/{{ slurm_log_dir_name }}" - owner: slurm - group: slurm + path: "{{ __slurm_config_merged.SlurmctldLogFile | dirname }}" + owner: "{{ __slurm_user_name }}" + group: "{{ __slurm_group_name }}" mode: 0755 state: directory + when: slurm_create_dirs and __slurm_config_merged.SlurmctldLogFile != None diff --git a/tasks/slurmd.yml b/tasks/slurmd.yml index c43ad71..5d3eb48 100644 --- a/tasks/slurmd.yml +++ b/tasks/slurmd.yml @@ -1,47 +1,40 @@ --- -- name: Install Slurm execution host packages (apt) - apt: - name: "{{ item }}" +- name: Install Slurm execution host packages + package: + name: "{{ __slurm_packages.slurmd }}" state: "{{ 'latest' if slurm_upgrade else 'present' }}" - when: ansible_os_family == "Debian" - with_items: - - slurm-wlm - -- name: Install Slurm execution host packages (yum) - yum: - name: "{{ item }}" - state: "{{ 'latest' if slurm_upgrade else 'present' }}" - with_items: - - munge - - slurm - - slurm-slurmd - when: ansible_os_family == "RedHat" - name: Create slurm spool directory file: - path: "{{ slurmd_spool_dir }}" + path: "{{ __slurm_config_merged.SlurmdSpoolDir }}" owner: root group: root mode: 0755 state: directory + when: slurm_create_dirs notify: - reload slurmd - name: Create slurm log directory file: - path: "/var/log/{{ slurm_log_dir_name }}" - owner: slurm - group: slurm + path: "{{ __slurm_config_merged.SlurmdLogFile | dirname }}" + owner: "{{ __slurm_user_name }}" + group: "{{ __slurm_group_name }}" mode: 0755 state: directory + when: slurm_create_dirs and __slurm_config_merged.SlurmdLogFile != None -- name: Create cgroup.conf - copy: - content: | - CgroupAutomount=yes - ConstrainCores=yes - ConstrainRAMSpace=yes - ConstrainSwapSpace=yes - dest: "{{ slurm_conf_dir }}/cgroup.conf" +- name: Include config dir creation tasks + include_tasks: _inc_create_config_dir.yml + when: slurm_create_dirs + +- name: Install extra execution host configs + template: + src: generic.conf.j2 + dest: "{{ slurm_config_dir }}/{{ item.name }}" backup: yes + with_items: + - name: cgroup.conf + config: slurm_cgroup_config + when: item.config in vars diff --git a/tasks/slurmdbd.yml b/tasks/slurmdbd.yml index 9463fb1..7c84a55 100644 --- a/tasks/slurmdbd.yml +++ b/tasks/slurmdbd.yml @@ -1,28 +1,25 @@ --- -- name: Install Slurm DB packages (apt) - apt: - name: "{{ item }}" +- name: Install Slurm DB packages + package: + name: "{{ __slurm_packages.slurmdbd }}" state: "{{ 'latest' if slurm_upgrade else 'present' }}" - when: ansible_os_family == "Debian" - with_items: - - slurmdbd - -- name: Install Slurm DB packages (yum) - yum: - name: "{{ item }}" - state: "{{ 'latest' if slurm_upgrade else 'present' }}" - with_items: - - munge - - slurm-slurmdbd - when: ansible_os_family == "RedHat" - name: Install slurmdbd.conf template: - src: slurmdbd.conf.j2 - dest: "{{ slurm_conf_dir }}/slurmdbd.conf" - owner: slurm + src: generic.conf.j2 + dest: "{{ slurm_config_dir }}/slurmdbd.conf" + owner: "{{ __slurm_user_name }}" group: root mode: 0400 notify: - reload slurmdbd + +- name: Create slurm log directory + file: + path: "{{ __slurmdbd_config_merged.LogFile | dirname }}" + owner: "{{ __slurm_user_name }}" + group: "{{ __slurm_group_name }}" + mode: 0755 + state: directory + when: slurm_create_dirs and __slurmdbd_config_merged.LogFile != None diff --git a/tasks/user.yml b/tasks/user.yml new file mode 100644 index 0000000..a65aa68 --- /dev/null +++ b/tasks/user.yml @@ -0,0 +1,18 @@ +--- + +- name: Create slurm group + group: + name: "{{ slurm_user.name | default('slurm') }}" + gid: "{{ slurm_user.gid | default(omit) }}" + system: "{{ slurm_user.system | default('yes') }}" + +- name: Create slurm user + user: + name: "{{ slurm_user.name | default('slurm') }}" + comment: "{{ slurm_user.comment | default(omit) }}" + uid: "{{ slurm_user.uid | default(omit) }}" + group: "{{ slurm_user.group | default(slurm_user.name | default('slurm')) }}" + groups: "{{ slurm_user.groups | default(omit) }}" + home: "{{ slurm_user.home | default(omit) }}" + shell: "{{ slurm_user.shell | default(omit) }}" + system: "{{ slurm_user.system | default('yes') }}" diff --git a/templates/generic.conf.j2 b/templates/generic.conf.j2 new file mode 100644 index 0000000..5a465de --- /dev/null +++ b/templates/generic.conf.j2 @@ -0,0 +1,10 @@ +## +## This file is maintained by Ansible - ALL MODIFICATIONS WILL BE REVERTED +## + +{% set conf = lookup('vars', item.config) %} +{% for key in conf | sort %} +{% if conf[key] != None %} +{{ key }}={{ conf[key] }} +{% endif %} +{% endfor %} diff --git a/files/logrotate-slurm b/templates/logrotate.j2 similarity index 71% rename from files/logrotate-slurm rename to templates/logrotate.j2 index 94a239d..ceb7a4d 100644 --- a/files/logrotate-slurm +++ b/templates/logrotate.j2 @@ -1,7 +1,8 @@ ## # Slurm Logrotate Configuration ## -/var/log/slurm/*.log { +# TODO: this ignores the actual *LogFile values +{{ '/var/log/slurm-llnl' if __slurm_debian else '/var/log/slurm' }}/*.log { compress missingok nocopytruncate diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 new file mode 100644 index 0000000..001a343 --- /dev/null +++ b/templates/slurm.conf.j2 @@ -0,0 +1,26 @@ +## +## This file is maintained by Ansible - ALL MODIFICATIONS WILL BE REVERTED +## + +{% if 'ControlMachine' not in __slurm_config_merged and 'SlurmctldHost' not in __slurm_config_merged %} +# Default, define SlurmctldHost or ControlMachine to override +ControlMachine=localhost +{% endif %} + +# Configuration options +{% for key in __slurm_config_merged | sort %} +{% if __slurm_config_merged[key] != None %} +{{ key }}={{ __slurm_config_merged[key] }} +{% endif %} +{% endfor %} + +# Nodes +{% for i in slurm_nodes %} +NodeName={{ i.name }}{% for k in i %} {{ k }}={{ i[k] }}{% endfor %} +{% endfor %} + + +# Partitions +{% for i in slurm_partitions %} +PartitionName={{ i.name }}{% for k in i %} {{ k }}={{ i[k] }}{% endfor %} +{% endfor %} diff --git a/templates/slurmdbd.conf.j2 b/templates/slurmdbd.conf.j2 deleted file mode 100644 index 52b1542..0000000 --- a/templates/slurmdbd.conf.j2 +++ /dev/null @@ -1,30 +0,0 @@ -## -## This file is maintained by Ansible - CHANGES WILL BE OVERWRITTEN -## - -# Authentication info -AuthType=auth/munge -#AuthInfo=/var/run/munge/munge.socket.2 - -# slurmDBD info -DbdAddr={{ slurm_dbd_server_ip }} -DbdHost={{ slurm_dbd_server_name }} -#DbdPort=7031 -DbdPort={{ slurm_dbd_server_port | default(6819) }} -SlurmUser=slurm -#MessageTimeout=300 -DebugLevel=4 -#DefaultQOS=normal,standby -LogFile=/var/log/slurm/slurmdbd.log -PidFile=/var/run/slurmdbd.pid -#PluginDir=/usr/lib/slurm -#PrivateData=accounts,users,usage,jobs -#TrackWCKey=yes - -# Database info -StorageType=accounting_storage/mysql -#StorageHost=localhost -#StoragePort=1234 -StoragePass={{ slurm_dbd_mysql_pass }} -StorageUser=slurm -#StorageLoc=slurm_acct_db diff --git a/vars/debian.yml b/vars/debian.yml deleted file mode 100644 index 8d54bd2..0000000 --- a/vars/debian.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- - -slurm_conf_dir: /etc/slurm-llnl -# FIXME -slurm_service_name: slurm-llnl -slurm_log_dir_name: slurm-llnl -slurmd_spool_dir: /var/lib/slurm-llnl/slurmd/slurmd.spool -slurmctld_state_dir: /var/lib/slurm-llnl/slurmctld/slurm.state - diff --git a/vars/redhat.yml b/vars/redhat.yml deleted file mode 100644 index 2ef7aa1..0000000 --- a/vars/redhat.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- - -slurm_conf_dir: /etc/slurm -slurm_log_dir_name: slurm -slurmd_service_name: slurmd -slurmctld_service_name: slurmctld -slurmdbd_service_name: slurmdbd -slurmd_spool_dir: /var/lib/slurm/slurmd/slurmd.spool -slurmctld_state_dir: /var/lib/slurm/slurmctld/slurm.state