1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
--- etc/slurm.conf.example.orig 2020-03-26 21:44:05 UTC
+++ etc/slurm.conf.example
@@ -8,8 +8,9 @@
#
# See the slurm.conf man page for more information.
#
-ClusterName=linux
-ControlMachine=linux0
+ClusterName=Beastie
+# Short hostname of the head node
+ControlMachine=head
#ControlAddr=
#BackupController=
#BackupAddr=
@@ -25,8 +26,8 @@ StateSaveLocation=/var/spool/slurm/ctld
SlurmdSpoolDir=/var/spool/slurm/d
SwitchType=switch/none
MpiDefault=none
-SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmdPidFile=/var/run/slurmd.pid
+SlurmctldPidFile=/var/run/slurm/slurmctld.pid
+SlurmdPidFile=/var/run/slurm/slurmd.pid
ProctrackType=proctrack/pgid
#PluginDir=
#FirstJobId=
@@ -34,7 +35,7 @@ ReturnToService=0
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
-#PropagateResourceLimits=
+PropagateResourceLimits=NONE
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
@@ -42,7 +43,9 @@ ReturnToService=0
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
-#TaskPlugin=
+TaskPlugin=task/affinity
+TaskPluginParam=cores
+# For debugging: TaskPluginParam=cores,verbose
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
@@ -57,10 +60,11 @@ KillWait=30
Waittime=0
#
# SCHEDULING
+DefMemPerCPU=256
SchedulerType=sched/backfill
#SchedulerAuth=
SelectType=select/cons_tres
-SelectTypeParameters=CR_Core
+SelectTypeParameters=CR_Core_Memory
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
@@ -72,22 +76,58 @@ SelectTypeParameters=CR_Core
#
# LOGGING
SlurmctldDebug=info
-SlurmctldLogFile=/var/log/slurmctld.log
+SlurmctldLogFile=/var/log/slurm/slurmctld
SlurmdDebug=info
-SlurmdLogFile=/var/log/slurmd.log
+SlurmdLogFile=/var/log/slurm/slurmd
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
-#JobAcctGatherType=jobacct_gather/linux
+JobAcctGatherType=jobacct_gather/none
#JobAcctGatherFrequency=30
#
-#AccountingStorageType=accounting_storage/slurmdbd
+AccountingStorageType=accounting_storage/filetxt
+AccountingStorageLoc=/home/slurm/Accounting
+AccountingStoreJobComment=YES
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
+
+############################################################################
+# Enable power saving if remote IPMI power-on is available on compute nodes.
+# If unavailable on some nodes, list them in SuspendExcNodes.
+# SlurmUser must be a member of operator and wheel and have a valid
+# login shell in order to execute shutdown on compute nodes.
+# If you prefer to control power manually, see the following scripts
+# from the SPCM port:
#
+# auto-ipmi-remote-power
+# cluster-power-saver
+# cluster-power-waster
+# cluster-ipmi-power-on
+############################################################################
+
+# SuspendProgram=/usr/local/etc/spcm/slurm-node-suspend
+# SuspendTime should be >= SuspendTimeout + ResumeTimeout.
+# SuspendTime=600
+# SuspendTimeout=60
+#
+# ResumeProgram=/usr/local/etc/spcm/slurm-node-resume
+# ResumeTimeout=300
+# BatchStartTimeout=300
+#
+# Exempt compute nodes that double as file servers or don't have IPMI
+# remote power-on enabled.
+#
+# SuspendExcNodes=compute-001
+
+#
# COMPUTE NODES
-NodeName=linux[1-32] Procs=1 State=UNKNOWN
-PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
+# Set RealMemory < avail memory in /var/run/dmesg.boot
+# Note that it may change slightly following freebsd-update
+NodeName=compute-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=30000 State=UNKNOWN
+# NodeName=compute-256g-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=250000 State=UNKNOWN
+# PartitionName=debug Nodes=ALL Default=NO MaxTime=INFINITE State=UP
+PartitionName=batch Nodes=compute-[001-002] Default=YES MaxTime=INFINITE State=UP
+# PartitionName=256g Nodes=compute-256g-[001-002] Default=NO MaxTime=INFINITE State=UP
|