Task #8955
Add cmsstor409 and cmsstor410 in dCache Disk Production following new enhanced procedure
Description
Follow enhanced procedure for adding new pools and test it with cmsstor409/410 and add them in production (if all good).
Related issues
History
#1 Updated by Gerard Bernabeu Altayo over 5 years ago
- Follows Task #8928: Enhance procedure for adding new pools added
#2 Updated by Natalia Ratnikova over 5 years ago
- Status changed from New to Accepted
root@cmsstor152 ~]# ssh -1 -i /etc/dcache/admin/server_key -c blowfish -p 22223 cmsstor152.fnal.gov -l admin
dCache Admin (VII) (user=admin)
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor409-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > migration ls
[1] SLEEPING migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > migration cancel 1
[1] CANCELLED migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > sweeper purge
Reclaiming 2689723269120 bytes
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
00007D5A10C10F3C4CACBF838D7141A54920 <-------D---L(0)[0]> 3221225472 si={<unknown>}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > migration info 1
Command : migration copy permanent -target=pgroup - burningPools
State : CANCELLED
Queued : 0
Attempts : 102814
Targets : w-cmsstor410-disk_itb-disk1,w-cmsstor409-disk_itb-disk2,w-cmsstor410-disk_itb-disk2
Completed : 102814 files; 3661671052287 bytes
Concurrency: 1
Running tasks:
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor409-disk_itb-disk2
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > migration ls
[1] SLEEPING migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > migration cancel 1
[1] CANCELLED migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
000031618CF756C443898AA098BE7690FD3E <C----------L(0)[0]> 2147483648 si={none.none}
0000AEEB892B582F40A59B070658ABDB8DA7 <C----------L(0)[0]> 3221225472 si={none.none}
........ [skipped for brevity]
00002DA6695A31BA48D6A83C20616C793522 <C----------L(0)[0]> 3221225472 si={none.none}
000052DB30A625DA4BD7A45BF64BF0BDD574 <C----------L(0)[0]> 3221225472 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > sweeper purge
Reclaiming 2719788040192 bytes
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000092F3BD4CA354148A8922983AF3023EC <C----------L(0)[0]> 3221225472 si={none.none}
000039A5659BF583488891EC256A51456447 <C----------L(0)[0]> 3221225472 si={none.none}
0000A765B27428404DC59997FB138A362ADC <C----------L(0)[0]> 4294967296 si={none.none}
0000AFB80E79CFAF44BD9F76506F24D13F34 <-------D---L(0)[0]> 2147483648 si={<unknown>}
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin >
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor410-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > migration ls
[2] SLEEPING migration copy permanent -target=pgroup - burningPools
[1] FINISHED migration move target=pgroup - flushPools
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > migration cancel 2
[2] CANCELLED migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > migration cancel 1
java.lang.IllegalStateException: The job cannot be cancelled in its present state
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > sweeper purge
Reclaiming 2641404887040 bytes
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00000777FA412604410E9F926536A6E09CD4 <C----------L(0)[0]> 4294967296 si={none.none}
00008A92A42521644E7A9EBAF32C9158C0C2 <C----------L(0)[0]> 4294967296 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
0000A362DF6E95394A7290759C64A38831B7 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
0000A362DF6E95394A7290759C64A38831B7 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin >
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor410-disk_itb-disk2
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > migration ls
[2] SLEEPING migration copy verify -concurrency=10 -tmode=cached -pins=keep -permanent -target=pgroup - burningPools
[1] CANCELLED migration copy permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > migration cancel 2
[2] CANCELLED migration copy verify -concurrency=10 -tmode=cached -pins=keep -permanent -target=pgroup - burningPools
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > sweeper purge
Reclaiming 2707976880128 bytes
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > rep ls
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000A362DF6E95394A7290759C64A38831B7 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin >
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > rep ls
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000A362DF6E95394A7290759C64A38831B7 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > pf 00002909A9D69F8B40A9B7F789100E5E6590
/dcache/uscms_test/natalia_test2
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > pf 0000A362DF6E95394A7290759C64A38831B7
/dcache/uscms_test/natalia_test1
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor410-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
0000A362DF6E95394A7290759C64A38831B7 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > pf 00006500B72CC40345F0B012352E668D735F
/dcache/uscms_test/4M.burningtest
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > pf 0000B81991F7B2AB46F1BB343E08B8037FFF
/dcache/uscms_test/natalia_test
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > pf 0000A362DF6E95394A7290759C64A38831B7
/dcache/uscms_test/natalia_test1
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor409-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
0000B81991F7B2AB46F1BB343E08B8037FFF <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > pf 00009319728C24FB4A459CA5D40F0F92E604
/dcache/uscms_test/natalia_test3
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > pf 0000B81991F7B2AB46F1BB343E08B8037FFF
/dcache/uscms_test/natalia_test
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor409-disk_itb-disk2
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > pf 00006500B72CC40345F0B012352E668D735F
/dcache/uscms_test/4M.burningtest
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > pf 00009319728C24FB4A459CA5D40F0F92E604
/dcache/uscms_test/natalia_test3
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > pf 00002909A9D69F8B40A9B7F789100E5E6590
/dcache/uscms_test/natalia_test2
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
00006500B72CC40345F0B012352E668D735F <C-------X--L(0)[0]> 4194304 si={none.none}
00009319728C24FB4A459CA5D40F0F92E604 <C-------X--L(0)[0]> 1473849076 si={none.none}
00002909A9D69F8B40A9B7F789100E5E6590 <C-------X--L(0)[0]> 1473849076 si={none.none}
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin >
=============================
On NFS-mounted namespace remove files not related to burning tests:
[root@cmsstor155 ~]# df
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda3 222630532 18136540 193178332 9% /
tmpfs 8164516 0 8164516 0% /dev/shm
/dev/sda1 999320 94992 851900 11% /boot
/dev/sdd 12694973568 1793509596 10901463972 15% /storage/data3
/dev/sdb 12694973568 1799140088 10895833480 15% /storage/data1
/dev/sdc 12694973568 1666628124 11028345444 14% /storage/data2
cmsstor153:/dcache 1125899906842624 5259127072 1125894647715552 1% /dcache_itb
[root@cmsstor155 ~]# cd /dcache_itb/
[root@cmsstor155 dcache_itb]# ls
puppet uscms_test
[root@cmsstor155 dcache_itb]# cd uscms_test/
[root@cmsstor155 uscms_test]# ls
4M.burningtest natalia_test natalia_test2 testdir
burning natalia_test1 natalia_test3
[root@cmsstor155 uscms_test]# rm 4M.burningtest natalia_test*
rm: remove regular file `4M.burningtest'? y
rm: remove regular file `natalia_test'? y
rm: remove regular file `natalia_test1'? y
rm: remove regular file `natalia_test2'? y
rm: remove regular file `natalia_test3'? y
[root@cmsstor155 uscms_test]# ls
burning testdir
============================
In admin interface check that files are now gone:
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > rep ls
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk2) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor409-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > rep ls
[cmsstor152.fnal.gov] (w-cmsstor409-disk_itb-disk1) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor410-disk_itb-disk2
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > rep ls
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk2) admin > ..
[cmsstor152.fnal.gov] (local) admin > cd w-cmsstor410-disk_itb-disk1
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin > rep ls
[cmsstor152.fnal.gov] (w-cmsstor410-disk_itb-disk1) admin >
#3 Updated by Natalia Ratnikova over 5 years ago
[cmsdev33 17:19] cat /storage/local/data1/home/natasha/CODE/enc/hosts/cmsstor152.fnal.gov.yaml
classes:
role::storage::dcacheadmindisk_itb:
rpmrepos::epel:
itb: 'true'
rpmrepos::osg:
itb: 'true'
[cmsdev33 17:19] git checkout natalia_burning_dcache_pools
Branch natalia_burning_dcache_pools set up to track remote branch natalia_burning_dcache_pools from origin.
Switched to a new branch 'natalia_burning_dcache_pools'
[cmsdev33 17:19] git status
- On branch natalia_burning_dcache_pools
nothing to commit (working directory clean)
[cmsdev33 17:19] emacs modules/dcache/files/etc/dcache/poolmanager-
poolmanager-disk.conf poolmanager-tape.conf
poolmanager-disk_itb.conf
[cmsdev33 17:19] emacs modules/dcache/files/etc/dcache/poolmanager-disk_itb.conf &
[1] 18376
[cmsdev33 17:19] gir diffbash: gir: command not founda/modules/dcache/files/etc/dcache/poolmanager-disk_itb.conf
[cmsdev33 17:19] git diff
diff --git a/modules/dcache/files/etc/dcache/poolmanager-disk_itb.conf b/modules
index 515a91d..5a99360 100644
--
+++ b/modules/dcache/files/etc/dcache/poolmanager-disk_itb.conf@ -43,13 +43,6
@ psu create pool w-cmsstor155-disk_itb-disk1
psu create pool w-cmsstor155-disk_itb-disk2
psu create pool w-cmsstor155-disk_itb-disk3
psu create pool w-cmsstor409-disk_itb-disk1
-psu create pool w-cmsstor409-disk_itb-disk2psu create pool w-cmsstor410-disk_itb-disk1
-psu create pool w-cmsstor410-disk_itb-disk2
-
#
# The pool groups ...
#@ -66,12 +59,6
@ psu addto pgroup readonlyPools w-cmsstor151-disk_itb-disk1
psu addto pgroup readonlyPools w-cmsstor151-disk_itb-disk2
psu addto pgroup readonlyPools w-cmsstor151-disk_itb-disk3
psu addto pgroup burningPools w-cmsstor409-disk_itb-disk1
-psu addto pgroup burningPools w-cmsstor409-disk_itb-disk2
-psu addto pgroup burningPools w-cmsstor410-disk_itb-disk1
-psu addto pgroup burningPools w-cmsstor410-disk_itb-disk2
-
#
# Sections
#
[cmsdev33 17:19] git log modules/dcache/files/etc/dcache/poolmanager-disk_itb.conf
commit 558c9bfc30998fe76cd381582dc2570262787001
Author: Natalia Ratnikova <natasha@fnal.gov>
Date: Wed Jun 10 17:38:20 2015 -0500
Removed burning-link , since we do not want to write or read data to/from
commit c496aa3d4480afccff13757c18912f09017920ec
Author: Natalia Ratnikova <natasha@fnal.gov>
Date: Thu May 28 21:23:43 2015 -0500
Added cmsstor409/410 pools to burning pools group, cmsstor151 moved from fl
commit b8dc37f28039244db29d63eeb832caa645a0937b
Author: Natalia Ratnikova <natasha@fnal.gov>
Date: Thu May 28 20:36:19 2015 -0500
redmine task: 8928: Added readonly and burning pool groups, test cmsstor151
commit d742288c36c26a4333158271fa89ae4f51326ada
Author: Gerard Bernabeu <gerard1@fnal.gov>
Date: Wed Mar 4 17:45:46 2015 -0600
Changed dCache pool roles, adding support for cmsstor4XX which have only 2 p
[cmsdev33 17:19] git commit m "removed cmsstor409/410 pools from disk_itb instance, moving them to production. " -all
error: did you mean `--all` (with two dashes ?)
[cmsdev33 17:19] git commit -m "removed cmsstor409/410 pools from disk_itb instance, moving them to production. " --all
[natalia_burning_dcache_pools 3c77781] removed cmsstor409/410 pools from disk_itb instance, moving them to production.
1 files changed, 0 insertions(+), 13 deletions()
[cmsdev33 17:19]
========= Check chimera
Use ^D to (control-D) to exit postgresql
cmspnfs1.fnal.gov - chimeradisk_itb/natalia_redmine_9074 (SLF 6.6)
16-core Xeon X5550 @ 2.67GHz (PowerEdge R710); 47.13 GB RAM, 20.00 GB swap
[root@cmspnfs1 ~]# psql -U enstore chimera
psql (9.2.6)
Type "help" for help.
chimera=> select * from t_locationinfo where ilocation='w-cmsstor410-disk_itb-disk1';
ipnfsid | itype | ilocation | ipriority | ictime | iatime | istate
---------+-------+-----------+-----------+--------+--------+--------
(0 rows)
chimera=> select * from t_locationinfo where ilocation='w-cmsstor410-disk_itb-disk2';
ipnfsid | itype | ilocation | ipriority | ictime | iatime | istate
---------+-------+-----------+-----------+--------+--------+--------
(0 rows)
chimera=> select * from t_locationinfo where ilocation='w-cmsstor409-disk_itb-disk2';
ipnfsid | itype | ilocation | ipriority | ictime | iatime | istate
---------+-------+-----------+-----------+--------+--------+--------
(0 rows)
chimera=> select * from t_locationinfo where ilocation='w-cmsstor409-disk_itb-disk1';
ipnfsid | itype | ilocation | ipriority | ictime | iatime | istate
---------+-------+-----------+-----------+--------+--------+--------
(0 rows)
chimera=> .q
chimera-> quit
chimera-> exit
chimera-> \q
[root@cmspnfs1 ~]# exit
logout
Connection to cmspnfs1 closed.
====================
From the admin interface remove pools:
[root@cmsstor152 ~]# ssh -1 -i /etc/dcache/admin/server_key -c blowfish -p 22223 cmsstor152.fnal.gov -l admin
dCache Admin (VII) (user=admin)
[cmsstor152.fnal.gov] (local) admin > cd PoolManager
[cmsstor152.fnal.gov] (PoolManager) admin > psu ls pool -a
w-cmsstor410-disk_itb-disk1 (enabled=true;active=1;rdOnly=false;links=0;pgroups=1;hsm=[];mode=disabled(store,stage,p2p-client))
linkList :
pGroupList :
burningPools (links=0;pools=4)
w-cmsstor409-disk_itb-disk2 (enabled=true;active=6;rdOnly=false;links=0;pgroups=1;hsm=[];mode=disabled(store,stage,p2p-client))
linkList :
pGroupList :
burningPools (links=0;pools=4)
w-cmsstor155-disk_itb-disk1 (enabled=true;active=1;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
flushPools (links=1;pools=3)
w-cmsstor410-disk_itb-disk2 (enabled=true;active=6;rdOnly=false;links=0;pgroups=1;hsm=[];mode=disabled(store,stage,p2p-client))
linkList :
pGroupList :
burningPools (links=0;pools=4)
w-cmsstor409-disk_itb-disk1 (enabled=true;active=6;rdOnly=false;links=0;pgroups=1;hsm=[];mode=disabled(store,stage,p2p-client))
linkList :
pGroupList :
burningPools (links=0;pools=4)
w-cmsstor151-disk_itb-disk2 (enabled=true;active=28;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
readonlyPools (links=1;pools=3)
w-cmsstor151-disk_itb-disk3 (enabled=true;active=26;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
readonlyPools (links=1;pools=3)
w-cmsstor155-disk_itb-disk2 (enabled=true;active=28;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
flushPools (links=1;pools=3)
w-cmsstor151-disk_itb-disk1 (enabled=true;active=28;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
readonlyPools (links=1;pools=3)
w-cmsstor155-disk_itb-disk3 (enabled=true;active=23;rdOnly=false;links=0;pgroups=1;hsm=[];mode=enabled)
linkList :
pGroupList :
flushPools (links=1;pools=3)
[cmsstor152.fnal.gov] (PoolManager) admin >
[cmsstor152.fnal.gov] (PoolManager) admin > psu removefrom pgroup burningPools w-cmsstor410-disk_itb-disk1
[cmsstor152.fnal.gov] (PoolManager) admin > psu removefrom pgroup burningPools w-cmsstor410-disk_itb-disk2
[cmsstor152.fnal.gov] (PoolManager) admin > psu removefrom pgroup burningPools w-cmsstor409-disk_itb-disk1
[cmsstor152.fnal.gov] (PoolManager) admin > psu removefrom pgroup burningPools w-cmsstor409-disk_itb-disk2
[cmsstor152.fnal.gov] (PoolManager) admin >
#4 Updated by Natalia Ratnikova over 5 years ago
Observed a problem of removing pools from the itb instance following Chih-Hao's instructions in
https://cmsweb.fnal.gov/bin/view/Storage/PoolRetire#A_915_93_Remove_pools_from_PoolManager_configuration
Pools were removed from the groupo, but did not go away from "psu ls pool -a" output , until the pools were stopped. Then they re-appeared again.
The following order seems to succeed eventually:
stop puppet on cmsstor152
run puppet manually
from the admin interface in pool manager reload the configuration
The pools were still coming back after a while, most likely self-registering themselves.
Stop puppet on the pools, so dcache does not get started there.
Update the docs at https://cmsweb.fnal.gov/bin/view/Storage/PoolAdd#Pool_burning_tests
#5 Updated by Natalia Ratnikova over 5 years ago
We did not have re-shoot in the original procedure, but for a clean start we decided to add this step.
Also, this will prevent the pools from re-registering themselves back to the ITB instance.
Here are the actions:
[root@cmsconsole ~]# cmspower-powerit -a off -c 'Moving from testbed to production' cmsstor409
=== cmsstor409 ===
connecting to APC apccms1555-1, outlet 3
Outlet state: OFF
[root@cmsconsole ~]# cmspower-powerit -a off -c 'Moving from testbed to production' cmsstor410
=== cmsstor410 ===
connecting to APC apccms1555-1, outlet 11
Outlet state: OFF
Updated docs
in ENC:
copy standard node yaml
cp hosts/cmsstor404.fnal.gov.yaml hosts/cmsstor409.fnal.gov.yaml
cp hosts/cmsstor404.fnal.gov.yaml hosts/cmsstor410.fnal.gov.yaml
commit and push:
git commit -m "Moving cmsstor409/410 into production" hosts/cmsstor410.fnal.gov.yaml hosts/cmsstor409.fnal.gov.yaml
===================
re-shoot both nodes.
[root@cmsadmin1 ~]# cms-shoot cmsstor409
removing host from rocks on cmsrocks51, if necessary
cmsstor24.fnal.gov: no host cmsstor409 to remove
Connection to cmsrocks51 closed.
removing host from rocks on cmsrocks52, if necessary
cmssrv26.fnal.gov: no host cmsstor409 to remove
Connection to cmsrocks52 closed.
stopping puppet on cmsstor409, if applicable
ssh: connect to host cmsstor409 port 22: No route to host
telling host to netboot on next boot
cmsstor409: netboot -> True
set 1 hosts to boot
1 system(s) updated
telling cmspuppetca to remove host's cert, if present
cleaning cert for cmsstor409.fnal.gov
Notice: Revoked certificate with serial 2586
Notice: Removing file Puppet::SSL::Certificate cmsstor409.fnal.gov at '/var/lib/puppet/ssl/ca/signed/cmsstor409.fnal.gov.pem'
Notice: Removing file Puppet::SSL::Certificate cmsstor409.fnal.gov at '/var/lib/puppet/ssl/certs/cmsstor409.fnal.gov.pem'
telling cmspuppetca to update autosign information
when you're ready to start, run:
cmspower-powerit --action cycle --comment 'reinstalling' cmsstor409
don't forget to disable zabbix monitoring if applicable
[root@cmsadmin1 ~]#
[root@cmsadmin1 ~]# screen -S reshoot_409_410_NR
[root@cmsadmin1 ~]# cmspower-powerit --action cycle --comment 'reinstalling' cmsstor409
/usr/bin/ssh -l root cmsconsole cmspower-powerit --action cycle --comment \'root: reinstalling\' cmsstor409
Outlet state: OFF
Outlet state: ON
=== cmsstor409 ===
connecting to APC apccms1555-1, outlet 3
connecting to APC apccms1555-1, outlet 3
[root@cmsadmin1 ~]# ssh -l root -Y -X cmsconsole cmspower-cons cmsstor409 &
[1] 53082
[root@cmsadmin1 ~]# Connecting to node cmsstor409: ssh -x -t root:ttyS3@fcc-2-1555
[root@cmsadmin1 ~]# cms-shoot cmsstor410
removing host from rocks on cmsrocks51, if necessary
cmsstor24.fnal.gov: no host cmsstor410 to remove
Connection to cmsrocks51 closed.
removing host from rocks on cmsrocks52, if necessary
cmssrv26.fnal.gov: no host cmsstor410 to remove
Connection to cmsrocks52 closed.
stopping puppet on cmsstor410, if applicable
ssh: connect to host cmsstor410 port 22: No route to host
telling host to netboot on next boot
cmsstor410: netboot -> True
set 1 hosts to boot
1 system(s) updated
telling cmspuppetca to remove host's cert, if present
cleaning cert for cmsstor410.fnal.gov
Notice: Revoked certificate with serial 2585
Notice: Removing file Puppet::SSL::Certificate cmsstor410.fnal.gov at '/var/lib/puppet/ssl/ca/signed/cmsstor410.fnal.gov.pem'
Notice: Removing file Puppet::SSL::Certificate cmsstor410.fnal.gov at '/var/lib/puppet/ssl/certs/cmsstor410.fnal.gov.pem'
telling cmspuppetca to update autosign information
when you're ready to start, run:
cmspower-powerit --action cycle --comment 'reinstalling' cmsstor410
don't forget to disable zabbix monitoring if applicable
[root@cmsadmin1 ~]# cmspower-powerit --action cycle --comment 'reinstalling' cmsstor410
/usr/bin/ssh -l root cmsconsole cmspower-powerit --action cycle --comment \'root: reinstalling\' cmsstor410
Outlet state: OFF
Outlet state: ON
=== cmsstor410 ===
connecting to APC apccms1555-1, outlet 11
connecting to APC apccms1555-1, outlet 11
#6 Updated by Natalia Ratnikova over 5 years ago
cmsstor409.fnal.gov - dcachepooldisk/production (SLF 6.6)
16-core Opteron 6320 (H8DGU); 62.90 GB RAM, 20.00 GB swap
/usr/bin/xauth: creating new authority file /root/.Xauthority
[root@cmsstor409 ~]# service puppet stop
Stopping puppet agent: [ OK ]
[root@cmsstor409 ~]# dcache status
DOMAIN STATUS PID USER
w-cmsstor409-disk-disk1Domain running 9321 root
w-cmsstor409-disk-disk2Domain running 9381 root
gridftp-cmsstor409Domain running 9441 root
[root@cmsstor409 ~]# dcache stop
Stopping gridftp-cmsstor409Domain 0 done
Stopping w-cmsstor409-disk-disk2Domain 0 1 done
Stopping w-cmsstor409-disk-disk1Domain 0 1 done
[root@cmsstor409 ~]# find /storage/data1
/storage/data1
/storage/data1/write-pool
/storage/data1/write-pool/data
/storage/data1/write-pool/meta
/storage/data1/write-pool/meta/je.info.0
/storage/data1/write-pool/meta/je.lck
/storage/data1/write-pool/meta/00000017.jdb
/storage/data1/write-pool/meta/00000018.jdb
/storage/data1/write-pool/setup.bak
/storage/data1/write-pool/setup
[root@cmsstor409 ~]# rm -rf /storage/data1/*
[root@cmsstor409 ~]# mount
/dev/sda2 on / type ext3 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
tmpfs on /dev/shm type tmpfs (rw)
/dev/sda1 on /boot type ext3 (rw)
/dev/sda5 on /storage/local/data1 type ext4 (rw)
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
/dev/sdb on /storage/data1 type xfs (rw,nobarrier,inode64)
/dev/sdc on /storage/data2 type xfs (rw,nobarrier,inode64)
[root@cmsstor409 ~]# xfs_admin -l /dev/sdb
label = "dcache-disk1"
[root@cmsstor409 ~]# xfs_admin -l /dev/sdc
label = "dcache-disk2"
[root@cmsstor409 ~]# find /storage/data1
/storage/data1
[root@cmsstor409 ~]# find /storage/data2
/storage/data2
/storage/data2/write-pool
/storage/data2/write-pool/data
/storage/data2/write-pool/meta
/storage/data2/write-pool/meta/je.info.0
/storage/data2/write-pool/meta/je.lck
/storage/data2/write-pool/meta/00000017.jdb
/storage/data2/write-pool/meta/00000018.jdb
/storage/data2/write-pool/setup.bak
/storage/data2/write-pool/setup
[root@cmsstor409 ~]# rm -rf /storage/data2/*
[root@cmsstor409 ~]# find /storage/data2
/storage/data2
[root@cmsstor409 ~]#
=====================================
cmsstor410.fnal.gov - dcachepooldisk/production (SLF 6.6)
16-core Opteron 6320 (H8DGU); 62.90 GB RAM, 20.00 GB swap
/usr/bin/xauth: creating new authority file /root/.Xauthority
[root@cmsstor410 ~]# service puppet stop
Stopping puppet agent: [ OK ]
[root@cmsstor410 ~]# dcache status
DOMAIN STATUS PID USER
w-cmsstor410-disk-disk1Domain running 9308 root
w-cmsstor410-disk-disk2Domain running 9368 root
gridftp-cmsstor410Domain running 9428 root
[root@cmsstor410 ~]# dcache stop
Stopping gridftp-cmsstor410Domain 0 done
Stopping w-cmsstor410-disk-disk2Domain 0 1 done
Stopping w-cmsstor410-disk-disk1Domain 0 1 done
[root@cmsstor410 ~]# find /storage/data1
/storage/data1
/storage/data1/write-pool
/storage/data1/write-pool/data
/storage/data1/write-pool/meta
/storage/data1/write-pool/meta/je.info.0
/storage/data1/write-pool/meta/je.lck
/storage/data1/write-pool/meta/00000017.jdb
/storage/data1/write-pool/meta/00000018.jdb
/storage/data1/write-pool/setup.bak
/storage/data1/write-pool/setup
[root@cmsstor410 ~]# rm -rf /storage/data1/*
[root@cmsstor410 ~]# find /storage/data2
/storage/data2
/storage/data2/write-pool
/storage/data2/write-pool/data
/storage/data2/write-pool/meta
/storage/data2/write-pool/meta/je.info.0
/storage/data2/write-pool/meta/je.lck
/storage/data2/write-pool/meta/00000018.jdb
/storage/data2/write-pool/meta/00000019.jdb
/storage/data2/write-pool/setup.bak
/storage/data2/write-pool/setup
[root@cmsstor410 ~]# rm -rf /storage/data2/*
[root@cmsstor410 ~]# find /storage
/storage
/storage/data2
/storage/local
/storage/local/data1
/storage/local/data1/lost+found
/storage/data1
[root@cmsstor410 ~]#
==============================================
[root@cmsdcacheadmindisk ~]# grep 409 /etc/dcache/poolmanager.conf
[root@cmsdcacheadmindisk ~]# grep 410 /etc/dcache/poolmanager.conf
[root@cmsdcacheadmindisk ~]#
in Puppet:
update on top of Gerard's commit:
[cmsdev33 17:19] git checkout natalia_adding409410
Branch natalia_adding409410 set up to track remote branch natalia_adding409410 from origin.
Switched to a new branch 'natalia_adding409410'
[cmsdev33 17:19] git show
commit 2e4c54cdc5daee7e3e8d35fcc81433a82e6f31d9
Author: Gerard Bernabeu <gerard1@fnal.gov>
Date: Thu Jul 9 18:25:40 2015 -0500
adding logging all the time to the exec
diff --git a/modules/dcache/manifests/dcachedomain.pp b/modules/dcache/manifest
index 8acb3a4..6d6a457 100644
--- a/modules/dcache/manifests/dcachedomain.pp
++ b/modules/dcache/manifests/dcachedomain.pp@ -21,6 +21,7
@ class dcache::dcachedomain (
exec{'poolmanager reload':
command => "/bin/echo -e 'cd PoolManager\nreload -yes\n..\nlogoff\n' | ssh
refreshonly => true,
logoutput => true,
}
}
(END)
Check against the commit where these pools were originally added:
[cmsdev33 17:19] git diff -r bba968ea06e51684b3d161a6587f71e0c1ec4c41 modules/dcache/files/etc/dcache/poolmanager-disk.conf | grep cmsstor409
-psu create pool w-cmsstor409-disk-disk1
-psu create pool w-cmsstor409-disk-disk2
-psu addto pgroup flushPools w-cmsstor409-disk-disk1
-psu addto pgroup flushPools w-cmsstor409-disk-disk2
[cmsdev33 17:19] git diff -r bba968ea06e51684b3d161a6587f71e0c1ec4c41 modules/dcache/files/etc/dcache/poolmanager-disk.conf | grep cmsstor410
-psu create pool w-cmsstor410-disk-disk1
-psu create pool w-cmsstor410-disk-disk2
-psu addto pgroup flushPools w-cmsstor410-disk-disk1
-psu addto pgroup flushPools w-cmsstor410-disk-disk2
[cmsdev33 17:19] emacs modules/dcache/files/etc/dcache/poolmanager-disk.conf &
[1] 13811
[cmsdev33 17:19] git diff
diff --git a/modules/dcache/files/etc/dcache/poolmanager-disk.conf b/modules/dc
index fc8b96e..2eb2120 100644
--- a/modules/dcache/files/etc/dcache/poolmanager-disk.conf
+++ b/modules/dcache/files/etc/dcache/poolmanager-disk.conf@ -600,6 +600,10
@ psu create pool w-cmsstor407-disk-disk1
psu create pool w-cmsstor407-disk-disk2
psu create pool w-cmsstor408-disk-disk1
psu create pool w-cmsstor408-disk-disk2
+psu create pool w-cmsstor409-disk-disk1
+psu create pool w-cmsstor409-disk-disk2
+psu create pool w-cmsstor410-disk-disk1
+psu create pool w-cmsstor410-disk-disk2
#
# The pool groups ...
@ -1172,6 +1176,10
@ psu addto pgroup flushPools w-cmsstor407-disk-disk1
psu addto pgroup flushPools w-cmsstor407-disk-disk2
psu addto pgroup flushPools w-cmsstor408-disk-disk1
psu addto pgroup flushPools w-cmsstor408-disk-disk2
+psu addto pgroup flushPools w-cmsstor409-disk-disk1
+psu addto pgroup flushPools w-cmsstor409-disk-disk2
+psu addto pgroup flushPools w-cmsstor410-disk-disk1
+psu addto pgroup flushPools w-cmsstor410-disk-disk2
#
# Sections
[1]+ Done emacs modules/dcache/files/etc/dcache/poolmanager-disk.conf
commit and push
=============
Nodes are already configured in zabbix, see:
https://cmszabbix1/hosts.php?sid=8ad2c10f6cc09b02&form_refresh=1&groupid=28&page=5
=============
start puppet :
root@cmsstor409 ~]# service puppet start
Starting puppet agent: [ OK ]
[root@cmsstor409 ~]# exit
[root@cmsstor410 ~]# service puppet start
Starting puppet agent: [ OK ]
[root@cmsstor410 ~]# exit
Asked Chih-Hao/Gerard to review the natalia_adding409410 branch and merge into itb before the production push.
#7 Updated by Natalia Ratnikova over 5 years ago
- Status changed from Accepted to Resolved
- % Done changed from 0 to 100
With puppet itb2prod push, both new servers are back in operation on the production instance.
Resolving the task.