summaryrefslogtreecommitdiff
path: root/sysutils/slurm-wlm/files/patch-etc_slurm.conf.example
blob: ce9c31fd439b3ac4f193137f5ef9c1b48e52be07 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
--- etc/slurm.conf.example.orig	2023-11-21 22:33:29 UTC
+++ etc/slurm.conf.example
@@ -8,8 +8,9 @@
 # Put this file on all nodes of your cluster.
 # See the slurm.conf man page for more information.
 #
-ClusterName=cluster
-SlurmctldHost=linux0
+ClusterName=Beastie
+# Short hostname of the head node
+SlurmctldHost=head
 #SlurmctldHost=
 #
 #DisableRootJobs=NO
@@ -41,7 +42,7 @@ ProctrackType=proctrack/cgroup
 #PrologFlags=
 #PrologSlurmctld=
 #PropagatePrioProcess=0
-#PropagateResourceLimits=
+PropagateResourceLimits=NONE
 #PropagateResourceLimitsExcept=
 #RebootProgram=
 ReturnToService=1
@@ -58,6 +59,8 @@ TaskPlugin=task/affinity
 SwitchType=switch/none
 #TaskEpilog=
 TaskPlugin=task/affinity
+TaskPluginParam=cores
+# For debugging: TaskPluginParam=cores,verbose
 #TaskProlog=
 #TopologyPlugin=topology/tree
 #TmpFS=/tmp
@@ -88,11 +91,12 @@ Waittime=0
 #
 #
 # SCHEDULING
-#DefMemPerCPU=0
+DefMemPerCPU=256
 #MaxMemPerCPU=0
 #SchedulerTimeSlice=30
 SchedulerType=sched/backfill
 SelectType=select/cons_tres
+SelectTypeParameters=CR_Core_Memory
 #
 #
 # JOB PRIORITY
@@ -115,9 +119,11 @@ SelectType=select/cons_tres
 #AccountingStorageHost=
 #AccountingStoragePass=
 #AccountingStoragePort=
-AccountingStorageType=accounting_storage/none
+#AccountingStorageType=accounting_storage/slurmdb
+#AccountingStorageLoc=/home/slurm/Accounting
+#AccountingStoreJobComment=YES
 #AccountingStorageUser=
-#AccountingStoreFlags=
+AccountingStoreFlags=job_comment
 #JobCompHost=
 #JobCompLoc=
 #JobCompPass=
@@ -128,9 +134,9 @@ SlurmctldDebug=info
 JobAcctGatherFrequency=30
 JobAcctGatherType=jobacct_gather/none
 SlurmctldDebug=info
-SlurmctldLogFile=/var/log/slurmctld.log
+SlurmctldLogFile=/var/log/slurm/slurmctld
 SlurmdDebug=info
-SlurmdLogFile=/var/log/slurmd.log
+SlurmdLogFile=/var/log/slurm/slurmd
 #SlurmSchedLogFile=
 #SlurmSchedLogLevel=
 #DebugFlags=
@@ -148,6 +154,41 @@ SlurmdLogFile=/var/log/slurmd.log
 #SuspendTime=
 #
 #
+
+############################################################################
+# Enable power saving if remote IPMI power-on is available on compute nodes.
+# If unavailable on some nodes, list them in SuspendExcNodes.
+# SlurmUser must be a member of operator and wheel and have a valid
+# login shell in order to execute shutdown on compute nodes.
+# If you prefer to control power manually, see the following scripts
+# from the SPCM port:
+ #
+#       auto-ipmi-remote-power
+#       cluster-power-saver
+#       cluster-power-waster
+#       cluster-ipmi-power-on
+############################################################################
+
+# SuspendProgram=/usr/local/etc/spcm/slurm-node-suspend
+# SuspendTime should be >= SuspendTimeout + ResumeTimeout.
+# SuspendTime=600
+# SuspendTimeout=60
+#
+# ResumeProgram=/usr/local/etc/spcm/slurm-node-resume
+# ResumeTimeout=300
+# BatchStartTimeout=300
+#
+# Exempt compute nodes that double as file servers or don't have IPMI
+# remote power-on enabled.
+#
+# SuspendExcNodes=compute-001
+
+#
 # COMPUTE NODES
-NodeName=linux[1-32] CPUs=1 State=UNKNOWN
-PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
+# Set RealMemory < avail memory in /var/run/dmesg.boot
+# Note that it may change slightly following freebsd-update
+NodeName=compute-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=30000 State=UNKNOWN
+# NodeName=compute-256g-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=250000 State=UNKNOWN
+# PartitionName=debug Nodes=ALL Default=NO MaxTime=INFINITE State=UP
+PartitionName=batch Nodes=compute-[001-002] Default=YES MaxTime=INFINITE State=UP
+# PartitionName=256g Nodes=compute-256g-[001-002] Default=NO MaxTime=INFINITE State=UP