PBS for GPU software

  未分类

yfoOoj.gif
cat /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xf
#!/bin/bash
source /etc/profile
xfsolver –use-xstream -d=0,1 project.xsim

脚本:
cat 1.pbs
#/bin/bash
#PBS -N node01_xfdtd
#PBS -l nodes=1:ppn=1
#PBS -l walltime=99:99:99
#PBS -l cput=99:99:99
#PBS -j oe
cd PBS_O_WORKDIR
xf

cat 2.pbs
#/bin/bash
#PBS -N node02_xfdtd
#PBS -l nodes=2:ppn=1
#PBS -l walltime=99:99:99
#PBS -l cput=99:99:99
#PBS -j oe
cdPBS_O_WORKDIR
xf
若求方便,比如一键操作和命令最简化之类的,xfsolver –use-xstream -d=0,1 project.xsim 是只用GPU进行计算的命令。我将它的运行写成xf脚本,然后加入pbs脚本。以上这是我写的2个,放在其用户主目录XFDTD_PBS下,这样不管进什么例子,只需运行~/XFDTD_PBS/*.pbs ,又根据学生要求,现在只需要写1或者2就可以了!
当它运行第一个时就在节点一上,运行2时,虽然是写的nodes=2,它其实只在2上运行,不管你后面ppn写的多少!
是不是因为这个软件没有编译成关于GPU的并行版本造成的,关键编译时确实加入了mpich2,而用命令mpirun提交时会显示两遍进程,应该也是没有编成并行的反应!
但是现在学生可以通过这两个脚本来手动提交到两台节点上,现在的问题是如何只写一个脚本来实现当第一个节点满负荷时系统自动提交到第二个节点上!

#######################################################################################################1.指定使用pbsnodes里面的某一个或者多个节点进行计算:
a.使用指定的某一个节点:
#PBS -l nodes=node03:ppn=2 /////指定使用node03,2个核进行计算
b.使用指定的多个节点:
#PBS -l nodes=node01:ppn=2+node02:ppn=2 //////指定使用node01和node02,每个节点用2个核进行计算
2.在pbs队列中加入GPU参数(只针对于nvidia驱动)
a.编译
./configure –prefix=/opt/software/torque-4.2.6 –enable-shared –enable-static –enable-nvidia-gpus
make
make install
b.编译完成之后,修改/var/spool/torque中的相关参数,如server_priv/nodes:
tech05 np=4
其他参数在此不再赘述。
重启pbs所有服务之后,server_priv/nodes中会自动识别gpu,如:
tech05 np=4 gpus=1
此时,qnodes显示为:
[gentai@tech05 torque]# qnodes
tech05
state = free
np = 4
ntype = cluster
status = rectime=1408417978,varattr=,jobs=,state=free,netload=8066812,gres=,loadave=0.03,ncpus=2,physmem=3892128kb,availmem=13173620kb,totmem=14132120kb,idletime=5285,nusers=2,nsessions=2,sessions=2458 2672,uname=Linux tech05 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
gpus = 1
gpu_status = gpu[0]=gpu_id=0000:00:0D.0;gpu_product_name=GeForce 7025 / nForce 630a;gpu_display=N/A;gpu_pci_device_id=03D610DE;gpu_pci_location_id=0000:00:0D.0;gpu_fan_speed=N/A;gpu_memory_total=30 MB;gpu_memory_used=28 MB;gpu_mode=N/A;gpu_state=Unavailable;gpu_utilization=N/A;gpu_memory_utilization=N/A;gpu_ecc_mode=N/A;gpu_single_bit_ecc_errors=N/A;gpu_double_bit_ecc_errors=N/A;gpu_temperature=N/A,driver_ver=304.60,timestamp=Tue Aug 19 11:12:58 2014
c.关于pbs脚本中如何加入gpu参数,可以参考目前torque-4.2.6版本的官方使用手册:
http://www.physik.uni-leipzig.de/wiki/files/torqueAdminGuide-4.2.6.pdf
中的第9.10.28.44.182.183页的相关描述!
####################################################################################################

脚本内容改为下面时
[gentai@node01 000001]vim /home/gentai/XFDTD_PBS/1.pbs
#/bin/bash
#PBS -N node01_xfdtd
#PBS -l nodes=node02:ppn=2+node01:ppn=2
#PBS -l walltime=99:99:99
#PBS -l cput=99:99:99
#PBS -j oe
cd
PBS_O_WORKDIR
#xf
xfsolver project.xsim

[gentai@node01 000001]$ qstat -n

node01:
Req’d Req’d Elap
Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time


57.node01 gentai batch node01_xfdtd 5515 2 0 — 100:4 R 00:01
node02/0+node01/0
58.node01 gentai batch node01_xfdtd 5606 2 0 — 100:4 R 00:00
node02/2+node02/1+node01/2+node01/1

显示是正常的,但是

[root@node01 var]# ps -ef | grep xf
root 784 32226 0 01:37 pts/2 00:00:00 grep xf
root 3248 1 0 Jun09 ? 00:00:00 /usr/sbin/rpc.ypxfrd

[root@node02 ~]# ps -ef | grep xf
gentai 5582 5581 0 13:45 ? 00:00:00 /bin/bash /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver project.xsim
gentai 5596 5582 60 13:45 ? 00:02:13 /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe project.xsim
gentai 5673 5672 0 13:45 ? 00:00:00 /bin/bash /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver project.xsim
gentai 5687 5673 59 13:45 ? 00:01:47 /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe project.xsim
root 5722 5229 0 13:48 pts/0 00:00:00 grep xf

还是写在前面的节点有进程!
现在还没有重新编译安装PBS
#####################################################################################################
qnodes中自动识别了gpu,但是nodes中没有自动修改
[root@node02 init.d]# vim /var/spool/torque/server_priv/nodes
node01 np=12
node02 np=6
[root@node02 init.d]# qnodes
node01
state = free
np = 12
ntype = cluster
status = rectime=1408430725,varattr=,jobs=,state=free,netload=67577641178,gres=,loadave=0.01,ncpus=12,physmem=33046952kb,availmem=62369480kb,totmem=66606728kb,idletime=18082,nusers=3,nsessions=30,sessions=1184 1185 1186 5814 2561 5638 3575 4118 3917 5629 5705 5790 5795 5828 5914 6886 6890 6891 6899 6901 6921 10732 17412 17429 17434 19175 28855 29725 30160 30164,uname=Linux node01 2.6.32-358.el6.x86_64 #1 SMP Fri Feb 22 00:31:26 UTC 2013 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
gpus = 2
gpu_status = gpu[1]=gpu_id=0000:82:00.0;gpu_product_name=Tesla C2075;gpu_display=Enabled;gpu_pci_device_id=109610DE;gpu_pci_location_id=0000:82:00.0;gpu_fan_speed=30 %;gpu_memory_total=5375 MiB;gpu_memory_used=34 MiB;gpu_mode=Default;gpu_state=Unallocated;gpu_utilization=0 %;gpu_memory_utilization=5 %;gpu_ecc_mode=Enabled;gpu_single_bit_ecc_errors=0;gpu_double_bit_ecc_errors=0;gpu_temperature=68 C,gpu[0]=gpu_id=0000:03:00.0;gpu_product_name=Tesla K20c;gpu_display=Disabled;gpu_pci_device_id=102210DE;gpu_pci_location_id=0000:03:00.0;gpu_fan_speed=32 %;gpu_memory_total=4799 MiB;gpu_memory_used=12 MiB;gpu_mode=Default;gpu_state=Unallocated;gpu_utilization=0 %;gpu_memory_utilization=0 %;gpu_ecc_mode=Enabled;gpu_single_bit_ecc_errors=0;gpu_double_bit_ecc_errors=0;gpu_temperature=44 C,driver_ver=331.20,timestamp=Tue Aug 19 02:45:25 2014

node02
state = free
np = 6
ntype = cluster
status = rectime=1408430711,varattr=,jobs=,state=free,netload=2900524167,gres=,loadave=0.00,ncpus=6,physmem=16495372kb,availmem=32319464kb,totmem=33279208kb,idletime=6099851,nusers=1,nsessions=6,sessions=3266 3270 3271 3297 3299 3323,uname=Linux node02 2.6.32-358.el6.x86_64 #1 SMP Fri Feb 22 00:31:26 UTC 2013 x86_64,opsys=linux
mom_service_port = 15002
mom_manager_port = 15003
gpus = 3
gpu_status = gpu[2]=gpu_id=0000:06:00.0;gpu_product_name=Quadro NVS 290;gpu_display=N/A;gpu_pci_device_id=042F10DE;gpu_pci_location_id=0000:06:00.0;gpu_fan_speed=100 %;gpu_memory_total=255 MiB;gpu_memory_used=35 MiB;gpu_mode=Default;gpu_state=Unallocated;gpu_utilization=N/A;gpu_memory_utilization=N/A;gpu_ecc_mode=N/A;gpu_single_bit_ecc_errors=N/A;gpu_double_bit_ecc_errors=N/A;gpu_temperature=65 C,gpu[1]=gpu_id=0000:04:00.0;gpu_product_name=Tesla K20c;gpu_display=Disabled;gpu_pci_device_id=102210DE;gpu_pci_location_id=0000:04:00.0;gpu_fan_speed=30 %;gpu_memory_total=4799 MiB;gpu_memory_used=12 MiB;gpu_mode=Default;gpu_state=Unallocated;gpu_utilization=0 %;gpu_memory_utilization=0 %;gpu_ecc_mode=Enabled;gpu_single_bit_ecc_errors=0;gpu_double_bit_ecc_errors=0;gpu_temperature=41 C,gpu[0]=gpu_id=0000:03:00.0;gpu_product_name=Tesla K20c;gpu_display=Disabled;gpu_pci_device_id=102210DE;gpu_pci_location_id=0000:03:00.0;gpu_fan_speed=32 %;gpu_memory_total=4799 MiB;gpu_memory_used=12 MiB;gpu_mode=Default;gpu_state=Unallocated;gpu_utilization=0 %;gpu_memory_utilization=0 %;gpu_ecc_mode=Enabled;gpu_single_bit_ecc_errors=0;gpu_double_bit_ecc_errors=0;gpu_temperature=43 C,driver_ver=331.20,timestamp=Tue Aug 19 14:54:46 2014
######################################################################################################
node01软件第一次运行(指定两个GPU都运行):
xfsolver –use-xstream -d=0,1 project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream -d=0,1 project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 2 XStream (CUDA) Accelerators [ 0 1 ].

Processing finished. xfsolver exiting.

再运行一次:
xfsolver –use-xstream -d=0,1 project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream -d=0,1 project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
XFSolver was unable to acquire the 2 XStream (CUDA) Accelerator token(s) requested.
Attempting to restart with non-accelerated engine.
The Software FDTD kernel is being used to complete the calculation.

用mpirun跑的话,也是只能提交一次,默认提交到GPU1(C2075)但过程是显示两遍的,根据软件供应商的说法,老师申请的license不支持并行,所以这个用法应该是不正确的
mpirun -np 2 xfsolver –use-xstream project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream project.xsim

Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 1 XStream (CUDA) Accelerator [ 1 ].
Executing simulation with 1 XStream (CUDA) Accelerator [ 1 ].

再提交一次就会出现
Obtaining license…
Unable to check out license

指定在0上
xfsolver –use-xstream -d=0 project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream -d=0 project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000002
Working directory changed to: 000002/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 1 XStream (CUDA) Accelerator [ 0 ].

Processing finished. xfsolver exiting.

不指定,则只在1上运行
xfsolver –use-xstream project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 1 XStream (CUDA) Accelerator [ 1 ].

Thu Aug 21 21:29:16 2014
+——————————————————+
| NVIDIA-SMI 331.20 Driver Version: 331.20 |
|——————————-+———————-+———————-+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|=++|
| 0 Tesla K20c Off | 0000:03:00.0 Off | 0 |
| 35% 47C P8 16W / 225W | 13MiB / 4799MiB | 0% Default |
+——————————-+———————-+———————-+
| 1 Tesla C2075 Off | 0000:82:00.0 On | 0 |
| 54% 88C P0 88W / N/A | 182MiB / 5375MiB | 65% Default |
+——————————-+———————-+———————-+

+—————————————————————————–+
| Compute processes: GPU Memory |
| GPU PID Process name Usage |
|=|
| 1 4962 …Fdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe 140MiB

也就是说,不指明GPU的情况下,是默认只提交到 C2075 上进行计算的,而且一次性只能最多进行两次计算

切换到node02上做对比
ssh node02
Last login: Fri Aug 22 09:40:21 2014 from node01
[gentai@node02 ~]cd /home/gentai/Desktop/pbs_test/Simulations/000001
[gentai@node02 000001]
xfsolver –use-xstream project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 2 XStream (CUDA) Accelerators [ 0 1 ].

Processing finished. xfsolver exiting.

第二次运行
xfsolver –use-xstream project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000002
Working directory changed to: 000002/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
XFSolver was unable to acquire the 2 XStream (CUDA) Accelerator token(s) requested.
Attempting to restart with non-accelerated engine.
The Software FDTD kernel is being used to complete the calculation.

Fri Aug 22 09:41:18 2014
+——————————————————+
| NVIDIA-SMI 331.89 Driver Version: 331.89 |
|——————————-+———————-+———————-+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|=++|
| 0 Tesla K20c Off | 0000:03:00.0 Off | 0 |
| 38% 51C P0 57W / 225W | 127MiB / 4799MiB | 44% Default |
+——————————-+———————-+———————-+
| 1 Tesla K20c Off | 0000:04:00.0 Off | 0 |
| 38% 50C P0 57W / 225W | 127MiB / 4799MiB | 17% Default |
+——————————-+———————-+———————-+
| 2 Quadro NVS 290 Off | 0000:06:00.0 N/A | N/A |
|100% 79C N/A N/A / N/A | 3MiB / 255MiB | N/A Default |
+——————————-+———————-+———————-+
虽然都只能进行一次计算,但是很明显,node02不用指明节点,就在两个相同的GPU上进行计算,其它情况与节点一同,都是只能同时进行两次计算

在节点一上运行
mpirun -np 1 –host node02 xfsolver –use-xstream -d=0,1 project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream -d=0,1 project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license… license obtained!
Base working directory is: /home/gentai/Desktop/pbs_test/Simulations/000001
Working directory changed to: 000001/Run0001
Initializing XStream (CUDA) FDTD Accelerator for calculation…
Executing simulation with 2 XStream (CUDA) Accelerators [ 0 1 ].
^CCtrl-C caught… cleaning up processes
[gentai@node01 000001]$ mpirun -np 1 –host node02 xfsolver –use-xstream -d=0 project.xsim
Info: Running /opt/software/XFdtd.7/remcom/XFdtd_7.3.2.7/bin/Linux-x86_64RHEL5/xfsolver_exe –use-xstream -d=0 project.xsim

XFSolver Version 7.3.2.7 (64-bit) (XStream:CUDA)
Simulation written with XFdtd 7.3.2.7 (32-bit)
Obtaining license…
Unable to check out license

Processing finished. xfsolver exiting.

再运行几次
都会报Obtaining license…
Unable to check out license
这是由于运行时ctrl+c的结果,虽然取消了操作,但是进程里还在
还是验证了一次只能进行两次计算!

下面是软件厂商的原话:
我查了一下,你们租用的license是3个GUI和2个计算引擎,也就是说可以同时打开3个界面进行编

辑,并且可以同时单机运行2个仿真,每个仿真默认支持1个GPU。
当时好像没有提需要 GPU+MPI
的功能,license里面没有看到MPI的feature,所以应该不支持。

如果你们要测试MPI+GPU功能的话,需要再单独申请试用license。

可以确认你们租用的License是不支持MPI的,所以没有并行计算的功能。
2个单机计算引擎可以在不同的节点上各调用一个GPU同时完成两个工程的仿真。估计也能在1个节点上调用2个GPU进行仿真(你可以测试下)。但肯定不能跨节点调用GPU。

请跟您老师确认下你们是打算更改租用license的模块增加MPI+GPU功能,还是只是临时要一个试用License进行测试。如果需要增加MPI+GPU模块的话,可能会涉及到租用费用的变动,这块需要跟REMCOM原厂再谈。

LEAVE A COMMENT

Captcha Code