Solaris backup issue

Solaris backup issue

Post by Matt » Thu, 05 Dec 2002 05:44:47



Howdy folks,

I am trying to debug an issue with Netbackup 3.4. When I run `bpbackup -L
/safe_place/progress /`, the backup exits with "EXIT STATUS 24: socket
write failed". I wandered through the logs, but they weren't extremely
useful. Pulling out truss, I get the following [Exhibit A]. The output
leads me to believe that the pipe is torn down for no apparent reason.
Does anyone know how I can dig deeper into this? Any thoughts on the truss
output?

Thanks,
Matty

[Exhibit A]
getpid()???????????????????????????????????????
= 856 [1] write(3, " 1 5 : 0 6 : 0 0 . 3 1 6".., 60)????? = 60
lstat64("audiocontrol", 0xFFBE2280)???????????? = 0
open64("/usr/openwin/bin/audiocontrol", O_RDONLY) = 11
acl("/usr/openwin/bin/audiocontrol", GETACLCNT, 0, 0x00000000) = 4
read(11, "7F E L F010201\0\0\0\0\0".., 83968)?? = 83968 write(1, "808B?
\b "80\0\fD006 ` 4".., 524288)? = 524288
time()?????????????????????????????????????????
= 1038945960 read(11, "A004 ! !E0 #A0 h !07 B "".., 157136)? = 157136
close(11)??????????????????????????????????????
= 0 utime("/usr/openwin/bin/audiocontrol", 0xFFBDF628) = 0 write(2, " F i
l?? -?? 0?? 2 0 3 0".., 116)???? = 116
time()?????????????????????????????????????????
= 1038945960 write(4, " 1 5 : 0 6 : 0 0?? / u s".., 39)????? = 39
getpid()???????????????????????????????????????
= 856 [1] write(3, " 1 5 : 0 6 : 0 0 . 3 2 7".., 61)????? = 61
lstat64("audiotool", 0xFFBE2280)??????????????? = 0
open64("/usr/openwin/bin/audiotool", O_RDONLY)? = 11
acl("/usr/openwin/bin/audiotool", GETACLCNT, 0, 0x00000000) = 4 read(11,
"7F E L F010201\0\0\0\0\0".., 366592)? = 366592 write(1, "A004 ! !E0 #A0
h !07 B "".., 524288) (sleeping...) kaio(6, 0x00250108, 0x002503F8,
0x00000000, 0x00000000, 0x00000000, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF08DFA0, 0xFF08DFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF07BFA0, 0xFF07BFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF069FA0, 0xFF069FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF057FA0, 0xFF057FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF045FA0, 0xFF045FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF033FA0, 0xFF033FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF021FA0, 0xFF021FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF00FFA0, 0xFF00FFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFEFFDFA0, 0xFEFFDFB0, 0x00000000) (sleeping...) write(1,
"A004 ! !E0 #A0 h !07 B "".., 524288)? Err#32 EPIPE
time()?????????????????????????????????????????
= 1038946449 write(4, " 1 5 : 1 4 : 0 9?? E R R".., 63)????? = 63
getpid()???????????????????????????????????????
= 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 2 9".., 86)????? = 86
write(2, " E R R?? -? ", 6)???????????????????? = 6
write(2, " C a n n o t?? w r i t e".., 47)????? = 47 write(2, "\n",
1)?????????????????????????????? = 1
getpid()???????????????????????????????????????
= 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 89)????? = 89
sigaction(SIGPIPE, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
getpid()???????????????????????????????????????
= 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 73)????? = 73
write(2, " I N F?? -? ", 6)???????????????????? = 6
write(2, " E X I T?? S T A T U S? ".., 35)????? = 35 write(2,
"\n", 1)?????????????????????????????? = 1
close(1)???????????????????????????????????????
= 0 sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
alarm(3600)????????????????????????????????????
= 0 access("/usr/openv/netbackup/bin/bpend_notify.test.User", 0) Err#2
ENOENT access("/usr/openv/netbackup/bin/bpend_notify.test", 0) Err#2
ENOENT access("/usr/openv/netbackup/bin/bpend_notify", 0) Err#2 ENOENT
alarm(0)???????????????????????????????????????
= 3600 sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0 llseek(0,
0, SEEK_CUR)????????????????????????? Err#29
ESPIPE llseek(7, 0,
SEEK_CUR)????????????????????????? = 0 _exit(24)

 
 
 

Solaris backup issue

Post by Michael Tos » Thu, 05 Dec 2002 06:32:55


We have _occasionally_ "socket write failed" with NB 3.4.
But so far the second backup attempt was always successful, so we have
not looked into it.
The receiving process at the other end of the pipe has obviously
terminated?

Have you contacted Veritas hotline?


> Howdy folks,

> I am trying to debug an issue with Netbackup 3.4. When I run `bpbackup -L
> /safe_place/progress /`, the backup exits with "EXIT STATUS 24: socket
> write failed". I wandered through the logs, but they weren't extremely
> useful. Pulling out truss, I get the following [Exhibit A]. The output
> leads me to believe that the pipe is torn down for no apparent reason.
> Does anyone know how I can dig deeper into this? Any thoughts on the truss
> output?

> Thanks,
> Matty

> [Exhibit A]
> getpid()???????????????????????????????????????
> = 856 [1] write(3, " 1 5 : 0 6 : 0 0 . 3 1 6".., 60)????? = 60
> lstat64("audiocontrol", 0xFFBE2280)???????????? = 0
> open64("/usr/openwin/bin/audiocontrol", O_RDONLY) = 11
> acl("/usr/openwin/bin/audiocontrol", GETACLCNT, 0, 0x00000000) = 4
> read(11, "7F E L F010201\0\0\0\0\0".., 83968)?? = 83968 write(1, "808B?
> \b "80\0\fD006 ` 4".., 524288)? = 524288
> time()?????????????????????????????????????????
> = 1038945960 read(11, "A004 ! !E0 #A0 h !07 B "".., 157136)? = 157136
> close(11)??????????????????????????????????????
> = 0 utime("/usr/openwin/bin/audiocontrol", 0xFFBDF628) = 0 write(2, " F i
> l?? -?? 0?? 2 0 3 0".., 116)???? = 116
> time()?????????????????????????????????????????
> = 1038945960 write(4, " 1 5 : 0 6 : 0 0?? / u s".., 39)????? = 39
> getpid()???????????????????????????????????????
> = 856 [1] write(3, " 1 5 : 0 6 : 0 0 . 3 2 7".., 61)????? = 61
> lstat64("audiotool", 0xFFBE2280)??????????????? = 0
> open64("/usr/openwin/bin/audiotool", O_RDONLY)? = 11
> acl("/usr/openwin/bin/audiotool", GETACLCNT, 0, 0x00000000) = 4 read(11,
> "7F E L F010201\0\0\0\0\0".., 366592)? = 366592 write(1, "A004 ! !E0 #A0
> h !07 B "".., 524288) (sleeping...) kaio(6, 0x00250108, 0x002503F8,
> 0x00000000, 0x00000000, 0x00000000, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF08DFA0, 0xFF08DFB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF07BFA0, 0xFF07BFB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF069FA0, 0xFF069FB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF057FA0, 0xFF057FB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF045FA0, 0xFF045FB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF033FA0, 0xFF033FB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF021FA0, 0xFF021FB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFF00FFA0, 0xFF00FFB0, 0x00000000) (sleeping...)
> lwp_cond_wait(0xFEFFDFA0, 0xFEFFDFB0, 0x00000000) (sleeping...) write(1,
> "A004 ! !E0 #A0 h !07 B "".., 524288)? Err#32 EPIPE
> time()?????????????????????????????????????????
> = 1038946449 write(4, " 1 5 : 1 4 : 0 9?? E R R".., 63)????? = 63
> getpid()???????????????????????????????????????
> = 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 2 9".., 86)????? = 86
> write(2, " E R R?? -? ", 6)???????????????????? = 6
> write(2, " C a n n o t?? w r i t e".., 47)????? = 47 write(2, "\n",
> 1)?????????????????????????????? = 1
> getpid()???????????????????????????????????????
> = 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 89)????? = 89
> sigaction(SIGPIPE, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
> getpid()???????????????????????????????????????
> = 856 [1] write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 73)????? = 73
> write(2, " I N F?? -? ", 6)???????????????????? = 6
> write(2, " E X I T?? S T A T U S? ".., 35)????? = 35 write(2,
> "\n", 1)?????????????????????????????? = 1
> close(1)???????????????????????????????????????
> = 0 sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
> alarm(3600)????????????????????????????????????
> = 0 access("/usr/openv/netbackup/bin/bpend_notify.test.User", 0) Err#2
> ENOENT access("/usr/openv/netbackup/bin/bpend_notify.test", 0) Err#2
> ENOENT access("/usr/openv/netbackup/bin/bpend_notify", 0) Err#2 ENOENT
> alarm(0)???????????????????????????????????????
> = 3600 sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0 llseek(0,
> 0, SEEK_CUR)????????????????????????? Err#29
> ESPIPE llseek(7, 0,
> SEEK_CUR)????????????????????????? = 0 _exit(24)

--
Michael Tosch / Master IS/IT Support
Ericsson Eurolab Deutschland GmbH
Tel: +49 2407 575 313

 
 
 

Solaris backup issue

Post by Matt » Tue, 10 Dec 2002 11:40:00


Howdy folks,

I am trying to debug an issue with Netbackup 3.4. When I
run `bpbackup -L /safe_place/progress /`, the backup
exits with "EXIT STATUS 24: socket write failed". I wandered
through the logs, but they weren't extremely useful.
Pulling out truss, I get the following [Exhibit A]. The
output leads me to believe that the pipe is torn down
for no apparent reason. Does anyone know how I can
dig deeper into this? Any thoughts on the truss output?

Thanks,
Matty

[Exhibit A]
getpid()??????????????????????????????????????? = 856 [1]
write(3, " 1 5 : 0 6 : 0 0 . 3 1 6".., 60)????? = 60
lstat64("audiocontrol", 0xFFBE2280)???????????? = 0
open64("/usr/openwin/bin/audiocontrol", O_RDONLY) = 11
acl("/usr/openwin/bin/audiocontrol", GETACLCNT, 0, 0x00000000) = 4
read(11, "7F E L F010201\0\0\0\0\0".., 83968)?? = 83968
write(1, "808B? \b "80\0\fD006 ` 4".., 524288)? = 524288
time()????????????????????????????????????????? = 1038945960
read(11, "A004 ! !E0 #A0 h !07 B "".., 157136)? = 157136
close(11)?????????????????????????????????????? = 0
utime("/usr/openwin/bin/audiocontrol", 0xFFBDF628) = 0
write(2, " F i l?? -?? 0?? 2 0 3 0".., 116)???? = 116
time()????????????????????????????????????????? = 1038945960
write(4, " 1 5 : 0 6 : 0 0?? / u s".., 39)????? = 39
getpid()??????????????????????????????????????? = 856 [1]
write(3, " 1 5 : 0 6 : 0 0 . 3 2 7".., 61)????? = 61
lstat64("audiotool", 0xFFBE2280)??????????????? = 0
open64("/usr/openwin/bin/audiotool", O_RDONLY)? = 11
acl("/usr/openwin/bin/audiotool", GETACLCNT, 0, 0x00000000) = 4
read(11, "7F E L F010201\0\0\0\0\0".., 366592)? = 366592
write(1, "A004 ! !E0 #A0 h !07 B "".., 524288) (sleeping...)
kaio(6, 0x00250108, 0x002503F8, 0x00000000, 0x00000000, 0x00000000, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF08DFA0, 0xFF08DFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF07BFA0, 0xFF07BFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF069FA0, 0xFF069FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF057FA0, 0xFF057FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF045FA0, 0xFF045FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF033FA0, 0xFF033FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF021FA0, 0xFF021FB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFF00FFA0, 0xFF00FFB0, 0x00000000) (sleeping...)
lwp_cond_wait(0xFEFFDFA0, 0xFEFFDFB0, 0x00000000) (sleeping...)
write(1, "A004 ! !E0 #A0 h !07 B "".., 524288)? Err#32 EPIPE
time()????????????????????????????????????????? = 1038946449
write(4, " 1 5 : 1 4 : 0 9?? E R R".., 63)????? = 63
getpid()??????????????????????????????????????? = 856 [1]
write(3, " 1 5 : 1 4 : 0 9 . 3 2 9".., 86)????? = 86
write(2, " E R R?? -? ", 6)???????????????????? = 6
write(2, " C a n n o t?? w r i t e".., 47)????? = 47
write(2, "\n", 1)?????????????????????????????? = 1
getpid()??????????????????????????????????????? = 856 [1]
write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 89)????? = 89
sigaction(SIGPIPE, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
getpid()??????????????????????????????????????? = 856 [1]
write(3, " 1 5 : 1 4 : 0 9 . 3 3 0".., 73)????? = 73
write(2, " I N F?? -? ", 6)???????????????????? = 6
write(2, " E X I T?? S T A T U S? ".., 35)????? = 35
write(2, "\n", 1)?????????????????????????????? = 1
close(1)??????????????????????????????????????? = 0
sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
alarm(3600)???????????????????????????????????? = 0
access("/usr/openv/netbackup/bin/bpend_notify.test.User", 0) Err#2 ENOENT
access("/usr/openv/netbackup/bin/bpend_notify.test", 0) Err#2 ENOENT
access("/usr/openv/netbackup/bin/bpend_notify", 0) Err#2 ENOENT
alarm(0)??????????????????????????????????????? = 3600
sigaction(SIGALRM, 0xFFBD6DF0, 0xFFBD6E70)????? = 0
llseek(0, 0, SEEK_CUR)????????????????????????? Err#29 ESPIPE
llseek(7, 0, SEEK_CUR)????????????????????????? = 0
_exit(24)