Skip to content

Parthenon legacy scaling instructions

Philipp Grete edited this page Feb 5, 2021 · 1 revision

Parthenon

Built using latest develop branch with Summit machine file.

Base input file

<parthenon/job>
problem_id = advection

<parthenon/mesh>
refinement = adaptive
numlevel = 2

nx1 = 256
x1min = -0.5
x1max = 0.5
ix1_bc = periodic
ox1_bc = periodic

nx2 = 256
x2min = -0.5
x2max = 0.5
ix2_bc = periodic
ox2_bc = periodic

nx3 = 256
x3min = -0.5
x3max = 0.5
ix3_bc = periodic
ox3_bc = periodic

<parthenon/meshblock>
nx1 = 32
nx2 = 32
nx3 = 32

<parthenon/time>
tlim = 1.0
integrator = rk1
nlim = 10
perf_cycle_offset = 2
ncycle_out_mesh=-10

<Advection>
cfl = 0.30
vx = 1.0
vy = 1.0
vz = 1.0
profile = smooth_gaussian
ang_2 = 0.0
ang_3 = 0.0
ang_2_vert = false
ang_3_vert = false
amp = 1.0

refine_tol = 1.01    # control the package specific refinement tagging function
derefine_tol = 1.001
compute_error = false

num_vars = 1 # number of variables in variable vector
buffer_send_pack = true  # send all buffers using packs
buffer_recv_pack = true  # receive buffers using packs
buffer_set_pack =  true  # set received buffers using packs

Scaling test

Tried to mirror Sierra here, i.e., jsrun and environment is setup to match 4 GPUs per node (rather than the 6 Summit actually has). Adapt launch command and environment as necessary.

# ensure that only two GPUs are used in each resource set [to mirror Sierra]
export KOKKOS_NUM_DEVICES=2

export MX=256
export MY=256
export MZ=256

# >1 mean we need MPS and to oversubscribe the GPU
export TASKS_PER_GPU=1

export NODES=1
# 1 node, 1 gpu
jsrun --nrs 1 --tasks_per_rs $((1*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 1 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_1-mps_${TASKS_PER_GPU}

# 1 node, 2 gpu
export MX=512
jsrun --nrs 1 --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 1 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_2-mps_${TASKS_PER_GPU}

# 1 node, 4 gpu
export MY=512
jsrun --nrs $((2*NODES)) --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 2 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_4-mps_${TASKS_PER_GPU}

# and now continue by updating NODES and MX, MY, MZ accordingly

# 2 nodes, 4 gpu per node
export NODES=2
export MZ=512
jsrun --nrs $((2*NODES)) --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 2 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_4-mps_${TASKS_PER_GPU}



##### Now repeat above but with using 4 ranks per GPU
export TASKS_PER_GPU=4
Clone this wiki locally