ClassAccounts: Difference between revisions
No edit summary |
No edit summary |
||
Line 54: | Line 54: | ||
'''Any questions or issues with the cluster must be first made through your TA.''' | '''Any questions or issues with the cluster must be first made through your TA.''' | ||
=== | ===Nexus=== | ||
Class accounts only have access to the following submission parameters in SLURM. You may be required to explicitly set each of these in your submission parameters. | Class accounts only have access to the following submission parameters in SLURM. You may be required to explicitly set each of these in your submission parameters. | ||
* Partition - <code>class</code> | * Partition - <code>class</code> | ||
* Account - <code>class</code> | * Account - <code>class</code> | ||
* QoS - <code>default</code> | * QoS - <code>default</code>, <code>medium</code>, and <code>high</code> | ||
====Available Nodes==== | ====Available Nodes==== | ||
Line 66: | Line 66: | ||
<pre> | <pre> | ||
$ show_nodes -p class | $ show_nodes -p class | ||
NODELIST CPUS MEMORY AVAIL_FEATURES GRES | NODELIST CPUS MEMORY AVAIL_FEATURES GRES STATE PARTITION | ||
tron00 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron01 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron02 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron03 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron04 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron05 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class | |||
tron06 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron07 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron08 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron09 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron10 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron11 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron12 16 128525 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron13 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron14 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron15 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron16 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron17 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron18 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron19 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron20 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron21 16 128525 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class | |||
tron22 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron23 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron24 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron25 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron26 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron27 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron28 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron29 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron30 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron31 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron32 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron33 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron34 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class | |||
tron35 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron36 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron37 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron38 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron39 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron40 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron41 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron42 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron43 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron44 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron45 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class | |||
tron46 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron47 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron48 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron49 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron50 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron51 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron52 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron53 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron54 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron55 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron56 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron57 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron58 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron59 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron60 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
tron61 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class | |||
</pre> | </pre> | ||
Line 80: | Line 134: | ||
<pre> | <pre> | ||
$ scontrol show node | $ scontrol show node tron00 | ||
NodeName= | NodeName=tron00 Arch=x86_64 CoresPerSocket=16 | ||
CPUAlloc= | CPUAlloc=9 CPUTot=32 CPULoad=1.26 | ||
AvailableFeatures= | AvailableFeatures=rhel8,AMD,EPYC-7302 | ||
ActiveFeatures= | ActiveFeatures=rhel8,AMD,EPYC-7302 | ||
Gres=gpu: | Gres=gpu:rtxa6000:8 | ||
NodeAddr= | NodeAddr=tron00 NodeHostName=tron00 Version=21.08.8-2 | ||
OS=Linux | OS=Linux 4.18.0-372.19.1.el8_6.x86_64 #1 SMP Mon Jul 18 11:14:02 EDT 2022 | ||
RealMemory= | RealMemory=257540 AllocMem=221184 FreeMem=187393 Sockets=2 Boards=1 | ||
State= | State=MIXED ThreadsPerCore=1 TmpDisk=0 Weight=100 Owner=N/A MCS_label=N/A | ||
Partitions=class,scavenger | Partitions=class,scavenger,tron | ||
BootTime= | BootTime=2022-08-18T17:35:10 SlurmdStartTime=2022-08-19T13:01:47 | ||
CfgTRES=cpu=32,mem= | LastBusyTime=2022-08-22T11:20:13 | ||
AllocTRES= | CfgTRES=cpu=32,mem=257540M,billing=346,gres/gpu=8,gres/gpu:rtxa6000=8 | ||
AllocTRES=cpu=9,mem=216G,gres/gpu=2,gres/gpu:rtxa6000=2 | |||
CapWatts=n/a | CapWatts=n/a | ||
CurrentWatts=0 AveWatts=0 | CurrentWatts=0 AveWatts=0 | ||
ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s | ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s | ||
</pre> | </pre> | ||
Revision as of 15:26, 22 August 2022
Overview
UMIACS Class Accounts are currently intended to support classes for all of UMIACS/CSD via the Nexus cluster. All new class accounts will be serviced solely through this cluster. Faculty may request that a class be supported by contacting staff@umiacs.umd.edu.
Getting an account
Your TA will request an account for you. Once this is done, you will be notified by email that you have an account to redeem. If you have not received an email, please contact your TA. You must redeem the account within 7 days or else the redemption token will expire. If your redemption token does expire, please contact your TA to have it renewed.
Once you do redeem your account, you will need to wait until you get a confirmation email that your account has been installed. This is typically done once a day on days that the University is open for business.
Registering for Duo
UMIACS requires that all Class accounts be registered for MFA (multi-factor authentication) under our Duo instance (note that this is different than UMD's general Duo instance). You will not be able to log onto the class submission host until you register.
In order to register, visit our directory app and log in with your Class username and password. You will then receive a prompt to enroll in Duo. For assistance in enrollment, you can visit our Duo help page.
Once notified that your account has been installed and you have registered in our Duo instance, you can access the following class submission host(s) using SSH with your assigned username and your chosen password:
nexusclass00.umiacs.umd.edu
ornexusclass01.umiacs.umd.edu
Cleaning up your account before the end of the semester
Class accounts for a given semester will be archived and deleted after that semester's completion as early as the following:
- Spring semesters: June 1st of same year
- Summer semesters: September 1st of same year
- Fall semesters: January 1st of next year
It is your responsibility to ensure you have backed up anything you want to keep from your class account's personal or group storage (below sections) prior to the relevant date.
Personal Storage
Your home directory has a quota of 20GB and is located at:
/fs/classhomes/<semester><year>/<coursecode>/<username>
where <semester>
is either "spring", "summer", "fall", or "winter", <year>
is the current year e.g., "2021", <coursecode> is the class' course code as listed in UMD's Schedule of Classes in all lowercase e.g., "cmsc999z", and <username>
is the username mentioned in the email you received to redeem the account e.g., "c999z000".
You can request up to another 100GB of personal storage if you would like by having your TA contact staff. This storage will be located at
/fs/class-projects/<semester><year>/<coursecode>/<username>
Group Storage
You can also request group storage if you would like by having your TA contact staff to specify the usernames of the accounts that should be in the group. Only other class accounts in the same class can be added to the group. The quota will be 100GB multiplied by the number of accounts in the group and will be located at
/fs/class-projects/<semester><year>/<coursecode>/<groupname>
where <groupname>
is composed of:
- the abbreviated course code as used in the username e.g., "c999z"
- the character "g"
- the number of the group (starting at 0 for the first group for the class requested to us) prepended with 0s to make the total group name 8 characters long
e.g., "c999zg00".
Cluster Usage
You may not run computational jobs on any submission host. You must schedule your jobs with the SLURM workload manager. You can also find out more with the public documentation for the SLURM Workload Manager.
Any questions or issues with the cluster must be first made through your TA.
Nexus
Class accounts only have access to the following submission parameters in SLURM. You may be required to explicitly set each of these in your submission parameters.
- Partition -
class
- Account -
class
- QoS -
default
,medium
, andhigh
Available Nodes
You can list the available nodes and their current state with the show_nodes -p class
command. This list of nodes is not completely static as nodes may be pulled out of service to repair/replace GPUs or other components.
$ show_nodes -p class NODELIST CPUS MEMORY AVAIL_FEATURES GRES STATE PARTITION tron00 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron01 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron02 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron03 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron04 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron05 32 257540 rhel8,AMD,EPYC-7302 gpu:rtxa6000:8 idle class tron06 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron07 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron08 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron09 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron10 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron11 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron12 16 128525 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron13 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron14 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron15 16 128520 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron16 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron17 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron18 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron19 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron20 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron21 16 128525 rhel8,AMD,EPYC-7302P gpu:rtxa4000:4 idle class tron22 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron23 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron24 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron25 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron26 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron27 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron28 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron29 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron30 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron31 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron32 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron33 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron34 16 128524 rhel8,Zen,EPYC-7313P gpu:rtxa4000:4 idle class tron35 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron36 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron37 16 128521 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron38 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron39 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron40 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron41 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron42 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron43 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron44 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron45 16 128525 rhel8,AMD,EPYC-7302 gpu:rtxa4000:4 idle class tron46 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron47 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron48 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron49 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron50 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron51 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron52 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron53 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron54 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron55 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron56 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron57 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron58 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron59 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron60 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class tron61 48 257539 rhel8,Zen,EPYC-7352 gpu:rtxa5000:8 idle class
You can also find more granular information about an individual node with the scontrol show node
command.
$ scontrol show node tron00 NodeName=tron00 Arch=x86_64 CoresPerSocket=16 CPUAlloc=9 CPUTot=32 CPULoad=1.26 AvailableFeatures=rhel8,AMD,EPYC-7302 ActiveFeatures=rhel8,AMD,EPYC-7302 Gres=gpu:rtxa6000:8 NodeAddr=tron00 NodeHostName=tron00 Version=21.08.8-2 OS=Linux 4.18.0-372.19.1.el8_6.x86_64 #1 SMP Mon Jul 18 11:14:02 EDT 2022 RealMemory=257540 AllocMem=221184 FreeMem=187393 Sockets=2 Boards=1 State=MIXED ThreadsPerCore=1 TmpDisk=0 Weight=100 Owner=N/A MCS_label=N/A Partitions=class,scavenger,tron BootTime=2022-08-18T17:35:10 SlurmdStartTime=2022-08-19T13:01:47 LastBusyTime=2022-08-22T11:20:13 CfgTRES=cpu=32,mem=257540M,billing=346,gres/gpu=8,gres/gpu:rtxa6000=8 AllocTRES=cpu=9,mem=216G,gres/gpu=2,gres/gpu:rtxa6000=2 CapWatts=n/a CurrentWatts=0 AveWatts=0 ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s