Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ganesha crashed after running delegations cases for pynfs #153

Closed
thotz opened this issue Oct 10, 2018 · 32 comments
Closed

Ganesha crashed after running delegations cases for pynfs #153

thotz opened this issue Oct 10, 2018 · 32 comments

Comments

@thotz
Copy link

thotz commented Oct 10, 2018

Ganesha was crashed after running delegations cases in pynfs suite. The crash was seen in client clean up path

Steps to reproduce

./testserver.py -v --outfile ~/pynfs.run.1 --maketree :/ --showomit --rundeps delegations

Tried only with glusterfs volume. Tests was not actually running, all the warned out(IMO so support for delegations in the export is not needed)

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7f2c517ea700 (LWP 31067)]
0x00007f2cbaafdef5 in svc_release_it (xprt=0x7f2c64018750, flags=0, tag=0x7f2cbab25340 <func.8326> "clnt_vc_destroy", line=462) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/svc.h:433
433 ((xprt)->xp_ops->xp_destroy)(xprt, flags, tag, line);
(gdb) bt
#0 0x00007f2cbaafdef5 in svc_release_it (xprt=0x7f2c64018750, flags=0, tag=0x7f2cbab25340 <func.8326> "clnt_vc_destroy", line=462) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/svc.h:433
#1 0x00007f2cbaafec57 in clnt_vc_destroy (clnt=0x7f2c64009ff0) at /root/nfs-ganesha/src/libntirpc/src/clnt_vc.c:462
#2 0x000000000043dfdd in clnt_release_it (clnt=0x7f2c64009ff0, flags=0, tag=0x571880 <func.22917> "_nfs_rpc_destroy_chan", line=628) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/clnt.h:318
#3 0x000000000043e073 in clnt_destroy_it (clnt=0x7f2c64009ff0, tag=0x571880 <func.22917> "_nfs_rpc_destroy_chan", line=628) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/clnt.h:340
#4 0x0000000000441693 in _nfs_rpc_destroy_chan (chan=0x7f2c4801a198) at /root/nfs-ganesha/src/MainNFSD/nfs_rpc_callback.c:628
#5 0x00000000004422fc in nfs_rpc_destroy_chan (chan=0x7f2c4801a198) at /root/nfs-ganesha/src/MainNFSD/nfs_rpc_callback.c:864
#6 0x00000000004d11aa in nfs_client_id_expire (clientid=0x7f2c4801a0f0, make_stale=false) at /root/nfs-ganesha/src/SAL/nfs4_clientid.c:1099
#7 0x00000000004470e2 in reap_hash_table (ht_reap=0xa0d890) at /root/nfs-ganesha/src/MainNFSD/nfs_reaper_thread.c:109
#8 0x0000000000447885 in reaper_run (ctx=0xa43590) at /root/nfs-ganesha/src/MainNFSD/nfs_reaper_thread.c:232
#9 0x0000000000510a1a in fridgethr_start_routine (arg=0xa43590) at /root/nfs-ganesha/src/support/fridgethr.c:550
#10 0x00007f2cb94cbe25 in start_thread (arg=0x7f2c517ea700) at pthread_create.c:308
#11 0x00007f2cb8dd3bad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
(gdb) f 0
#0 0x00007f2cbaafdef5 in svc_release_it (xprt=0x7f2c64018750, flags=0, tag=0x7f2cbab25340 <func.8326> "clnt_vc_destroy", line=462) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/svc.h:433
433 (
(xprt)->xp_ops->xp_destroy)(xprt, flags, tag, line);
(gdb) p xprt
$1 = (SVCXPRT *) 0x7f2c64018750
(gdb) p *xprt
$2 = {xp_ops = 0x2a, xp_dispatch = {process_cb = 0x0, rendezvous_cb = 0x0}, xp_parent = 0x2, xp_tp = 0x0, xp_netid = 0xffffffffffffffff <Address 0xffffffffffffffff out of bounds>, xp_p1 = 0x0,
xp_p2 = 0xffffffffffffffff, xp_p3 = 0x0, xp_u1 = 0xffffffffffffffff, xp_u2 = 0x0, xp_local = {nb = {maxlen = 4294967295, len = 4294967295, buf = 0x0}, ss = {ss_family = 65535,
__ss_padding = "\377\377\377\377\377\377\000\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377\000\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377\000\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377\000\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377\000\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377", '\000' <repeats 31 times>, __ss_align = 0}},
xp_remote = {nb = {maxlen = 0, len = 0, buf = 0x0}, ss = {ss_family = 0, __ss_padding = '\000' <repeats 117 times>, __ss_align = 0}}, xp_lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0,
__kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, xp_fd = 0, xp_ifindex = 0, xp_si_type = 0, xp_type = 0, xp_refcnt = -1,
xp_flags = 64}
(gdb) f 4
#4 0x0000000000441693 in _nfs_rpc_destroy_chan (chan=0x7f2c4801a198) at /root/nfs-ganesha/src/MainNFSD/nfs_rpc_callback.c:628
628 CLNT_DESTROY(chan->clnt);
(gdb) p chan
$3 = (rpc_call_channel_t *) 0x7f2c4801a198
(gdb) p *chan
$4 = {type = RPC_CHAN_V40, mtx = {__data = {__lock = 1, __count = 0, __owner = 31067, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = "\001\000\000\000\000\000\000\000[y\000\000\001", '\000' <repeats 26 times>, __align = 1}, states = 0, source = {clientid = 0x7f2c4801a0f0, session = 0x7f2c4801a0f0}, last_called = 0,
clnt = 0x7f2c64009ff0, auth = 0x0, gss_sec = {mech = 0x0, qop = 0, svc = 0, cred = 0x0, req_flags = 0}}
(gdb) p *chan->clnt
$5 = {cl_ops = 0x7f2cbad37420 <ops.8330>, cl_netid = 0x0, cl_tp = 0x0, cl_u1 = 0x0, cl_u2 = 0x0, cl_lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 3, __spins = 0, __elision = 0,
__list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 16 times>, "\003", '\000' <repeats 22 times>, __align = 0}, cl_error = {ru = {RE_errno = 0, RE_why = AUTH_OK, RE_vers = {low = 0,
high = 0}, RE_lb = {s1 = 0, s2 = 0}}, re_status = RPC_SUCCESS}, cl_refcnt = 0, cl_flags = 96}
(gdb) p *chan->clnt->cl_ops
$6 = {cl_call = 0x7f2cbaafe74f <clnt_vc_call>, cl_abort = 0x7f2cbaafe9ae <clnt_vc_abort>, cl_freeres = 0x7f2cbaafe985 <clnt_vc_freeres>, cl_destroy = 0x7f2cbaafec15 <clnt_vc_destroy>,
cl_control = 0x7f2cbaafe9b8 <clnt_vc_control>}
(gdb)

@soumyakoduri
Copy link

From debugging via gdb, I see that there could be cases where in an xprt handle is released twice in svc_rqst_xprt_task().

752 /static/ void
753 svc_rqst_xprt_task(struct work_pool_entry *wpe)
754 {
755 struct rpc_dplx_rec rec =
756 ....
...
760 /
atomic barrier (above) should protect following values /
761 if (rec->xprt.xp_refcnt > 1
762 && !(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED)) {
763 /
(idempotent) xp_flags and xp_refcnt are set atomic.
764 * xp_refcnt need more than 1 (this task).
765 /
766 (void)clock_gettime(CLOCK_MONOTONIC_FAST, &(rec->recv.ts));
767 (void)SVC_RECV(&rec->xprt);
768 }
769
770 /
If tests fail, log non-fatal "WARNING! already destroying!" */
771 SVC_RELEASE(&rec->xprt, SVC_RELEASE_FLAG_NONE);
772 }

Here at line 771, we release xprt ref all the time without any checks for errors in SVC_RECV. But there could be cases where in we encountered errors in SVC_RECV->svc_vc_recv() which calls SVC_DESTROY->svc_destroy_it->svc_release_it(). So we end up decrement ref twice resulting in use_after_free.

svc_destroy_it sets "SVC_XPRT_FLAG_DESTROYING" in xprt->xp_flags. May be we could check the same before calling SVC_RELEASE at line:771.

@dang .. thoughts?

@dang
Copy link

dang commented Oct 17, 2018

I don't think that's it. The rpc_dplx_rec has a refcount on the svc_xprt which needs to be released when the xprt is destroyed (on failure). In addition, the event has a ref on the xprt (taken in svc_rqst_epoll_event() at line 868). This needs to be unconditionally released when the event is done, whether it succeeded or not, or else the xprt will be leaked.

@soumyakoduri
Copy link

Okay..but callback chan still has reference to that xprt. I have collected traces using gdb and copied to the below link. Please take a look.

https://paste.fedoraproject.org/paste/4YOgU70k7OPR6lmOWvwxOg

@dang
Copy link

dang commented Oct 17, 2018

Okay, I think Ganesha needs it's own ref on the client. Something like this:
https://paste.fedoraproject.org/paste/UNVXAwIR9S6A6GT~Q8f~Tw

@soumyakoduri
Copy link

Thanks Dan..will verify the fix. But wouldn't it result in leaking clnt object?
I mean with this fix, when and where shall chan->clnt gets destroyed. And when it gets destroyed, wouldn't it again try to access freed xprt?

@mattbenjamin
Copy link

mattbenjamin commented Oct 17, 2018 via email

@soumyakoduri
Copy link

also its very easy to reproduce this issue (in case if needed). Jiffin listed it in the first comment.
Run below pynfs nfs4.0 testcase against NFS-Ganesha server (delegations need not be enabled)

#./testserver.py -v --outfile ~/pynfs.run.1 --maketree localhost:/ --showomit --rundeps DELEG1

It will result in use_after_free of xprt object by reaper thread which may or may not result in crash.

@dang
Copy link

dang commented Oct 17, 2018

It's destroyed in _nfs_rpc_destroy_chan(), in the last backtrace of your set of backtraces.

@soumyakoduri
Copy link

It's destroyed in _nfs_rpc_destroy_chan(), in the last backtrace of your set of backtraces.

yes.. but clnt->cl_refcnt always stays positive and never becomes zero now. Atleast for this particular testcase , I dont see 'clnt_vc_destroy' being called ever (post this fix is applied)

@dang
Copy link

dang commented Oct 17, 2018

Hmm, okay, so it's not hitting the error case. Is it never hitting the error case, with the extra ref in place? If so, that would be very strange.
So, it seems I was wrong, and you were right. We should probably be checking the return value of SVC_RECV() in svc_rqst_xprt_task()

@soumyakoduri
Copy link

yes its not hitting error case with extra ref.

@soumyakoduri
Copy link

I sent patch to fix the same - #154

Please review the changes. Thanks!

@was4
Copy link

was4 commented Oct 19, 2018 via email

@mattbenjamin
Copy link

You've fixed problems and created them, just like everybody else. The overall refcounting design is mine, I designed it.

Matt

@dang
Copy link

dang commented Oct 19, 2018

So, there appears to be multiple issues here. In fact, the client is not getting a ref, if the xprt is newly created. svc_xprt_lookup() only returns with a single ref, the initial one which (per comment above) belongs to the svc_xprt fd tree. This is clear both by code inspection and in the backtrace linked above. Thus, when there's an error, and so an early destroy, there's a use after free when the supposed client's ref is put by the reaper. This would be fixed by this:

https://paste.fedoraproject.org/paste/AYel7QSG2xRZyYwl~KfkuQ

The second issue is that we must be missing a put somewhere in the non-error case, or we'd be leaking xprts under normal use. This is less clear, and I'm still looking at it.

@mattbenjamin
Copy link

ack

@was4
Copy link

was4 commented Oct 19, 2018 via email

@mattbenjamin
Copy link

I created the interfaces to take and release refcounts in the transport interface, which appears to still exist. No personally disparaging remark has been made.

@dang
Copy link

dang commented Oct 19, 2018

Okay, I have working code with no leaks with the following patches. For ntirpc:
https://paste.fedoraproject.org/paste/PxGuEtOdsztrAEtFBbUTjQ
For Ganesha:
https://paste.fedoraproject.org/paste/mYqhXW5Sp89vSEDEflnLTw

@soumyakoduri can you test this with delegations?

@was4
Copy link

was4 commented Oct 20, 2018 via email

@soumyakoduri
Copy link

Okay, I have working code with no leaks with the following patches. For ntirpc:
https://paste.fedoraproject.org/paste/PxGuEtOdsztrAEtFBbUTjQ
For Ganesha:
https://paste.fedoraproject.org/paste/mYqhXW5Sp89vSEDEflnLTw

@soumyakoduri can you test this with delegations?

@dang.. with the above changes I dont see any more crash. Checked through gdb as well. While destroying chan->clnt, its xprt had valid ref=1 and there isnt any use_after_free.

@was4
Copy link

was4 commented Oct 21, 2018 via email

@soumyakoduri
Copy link

Thanks Bill!

@Gaurav-Gangalwar
Copy link

I am also facing similar crash, I took this fix
#155

With extra ref, looks like we will never do svc_vc_destroy_task for xprt and socket fd (xp_fd) is never getting closed and causing fd leak even after SVC_DESTROY.
I tried with this patch on top of extra ref patch above, it seems to fix the issue.
I am not sure if its the right way to fix it.
https://paste.fedoraproject.org/paste/fLYaI0rd1kB563m0SOPT1A

@soumyakoduri
Copy link

yes.. #155 is the right and only fix needed in ntirpc codebase to fix this crash issue. You shouldn't be needing any other fixes.

@was4
Copy link

was4 commented Nov 28, 2018 via email

@Gaurav-Gangalwar
Copy link

On 11/28/18 3:21 AM, Gaurav wrote: I am also facing similar crash, I took this fix #155 <#155> With extra ref, looks like we will never do svc_vc_destroy_task for xprt and socket fd (xp_fd) is never getting closed and causing fd leak even after SVC_DESTROY. I tried with this patch on top of extra ref patch above, it seems to fix the issue. I am not sure if its the right way to fix it. https://paste.fedoraproject.org/paste/fLYaI0rd1kB563m0SOPT1A
SVC_DESTROY isn't always (or even usually) the call that actually runs the svc_vc_destroy_task. There's usually more SVC_RELEASE calls later. This latter patch is not a good idea. It breaks a lot of internal design. For one thing, close() also removes the fd from the epoll as a side effect. But then later, unregister will error on the fd not being in the epoll. You should unregister before close. We really REALLY don't want to close early (before the svc_vc_destroy_task). There's good reasons for that design. Half open (one sided close) TCP connections are not well handled. Moreover, there is no fd leak. If the fd isn't used, periodic svc_xprt_foreach calls run through the xprt list looking for old ones that haven't had any activity. It took many months to find this error. Everybody is affected. Isn't it time for an ntirpc 1.7.x release?

I checked with latest next branch code.
I can see fd leaks with this test, mount with tcp in loop.
for i in {1..1000}; do echo $i; mount -t nfs -o vers=3,tcp :/export /mnt; umount /mnt; done

28/11/2018 13:55:26 : epoch 5bfee335 : centos71 : ganesha.nfsd-46782[svc_84] rpc :TIRPC :DEBUG :svc_xprt_lookup: fd 1054 max_connections 1024 exceeded

We are exceeding max_connections.

If I revert the commit for #155, then it works fine.

I agree we do SVC_RELEASE for all the refs taken and SVC_DESTROY will release the extra ref, but now we have extra ref even after SVC_DESTROY and all SVC_RELEASE. It make sense to keep the xprt entry in hash to reuse it but we will need to close the xp_fd on SVC_DESTROY even if we are not releasing/destroying xprt.

I agree we will need to handle epoll_ctl failure with this fix, if we close xp_fd early. But I guess we will never end in xprt_unregister as we will always have extra ref.
Also svc_xprt_foreach will do SVC_DESTROY only if its not already DESTROYED, so it will noop in case SVC_DESTROY is already called and won't release extra ref.

@was4
Copy link

was4 commented Nov 29, 2018 via email

@Gaurav-Gangalwar
Copy link

Gaurav-Gangalwar commented Nov 30, 2018

No, it means we aren't doing "SVC_RELEASE for all the refs taken". The cleanup list has a ref. Maybe that's not a good design. Or maybe Ganesha umount code needs to SVC_DESTROY to avoid waiting for the cleanup task to run. Take it to the nfs-ganesha list.

How about this fix,
https://paste.fedoraproject.org/paste/lapz1NrOlBxS342S4Q79aw

Its aligned to the design, we can do SVC_RELEASE in cleanup routine if its already destroyed, that will handle extra ref after SVC_DESTROY.

@was4
Copy link

was4 commented Dec 1, 2018 via email

@was4
Copy link

was4 commented Dec 1, 2018 via email

@was4 was4 closed this as completed Dec 1, 2018
@thotz
Copy link
Author

thotz commented Mar 6, 2019

On latest master I am still seeing crash while running pynfs test suite for delegations during clean part
If needed I can bisect which version cause the issue recreated again

(gdb) bt
#0 0x0000000000000000 in ?? ()
#1 0x00007fa8b185bf3e in svc_release_it (xprt=0x7fa850000c90, flags=0, tag=0x7fa8b18837c0 <func.8331> "clnt_vc_destroy", line=462) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/svc.h:440
#2 0x00007fa8b185cd0b in clnt_vc_destroy (clnt=0x7fa850000b70) at /root/nfs-ganesha/src/libntirpc/src/clnt_vc.c:462
#3 0x000000000044107a in clnt_release_it (clnt=0x7fa850000b70, flags=0, tag=0x579b80 <func.22987> "_nfs_rpc_destroy_chan", line=628) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/clnt.h:319
#4 0x0000000000441110 in clnt_destroy_it (clnt=0x7fa850000b70, tag=0x579b80 <func.22987> "_nfs_rpc_destroy_chan", line=628) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/clnt.h:341
#5 0x0000000000444629 in _nfs_rpc_destroy_chan (chan=0x7fa860010b18) at /root/nfs-ganesha/src/MainNFSD/nfs_rpc_callback.c:628
#6 0x0000000000445297 in nfs_rpc_destroy_chan (chan=0x7fa860010b18) at /root/nfs-ganesha/src/MainNFSD/nfs_rpc_callback.c:864
#7 0x00000000004d7e4f in nfs_client_id_expire (clientid=0x7fa860010a70, make_stale=false) at /root/nfs-ganesha/src/SAL/nfs4_clientid.c:1099
#8 0x000000000044a301 in reap_hash_table (ht_reap=0x1505920) at /root/nfs-ganesha/src/MainNFSD/nfs_reaper_thread.c:109
#9 0x000000000044aaa4 in reaper_run (ctx=0x1535a80) at /root/nfs-ganesha/src/MainNFSD/nfs_reaper_thread.c:232
#10 0x000000000051762e in fridgethr_start_routine (arg=0x1535a80) at /root/nfs-ganesha/src/support/fridgethr.c:552
#11 0x00007fa8b0022dd5 in start_thread (arg=0x7fa855ffb700) at pthread_create.c:307
#12 0x00007fa8af721ead in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) f 1
#1 0x00007fa8b185bf3e in svc_release_it (xprt=0x7fa850000c90, flags=0, tag=0x7fa8b18837c0 <func.8331> "clnt_vc_destroy", line=462) at /root/nfs-ganesha/src/libntirpc/ntirpc/rpc/svc.h:440
440 (*(xprt)->xp_ops->xp_destroy)(xprt, flags, tag, line);
(gdb) p *(xprt)->xp_ops
$1 = {xp_recv = 0xbaadf00d, xp_stat = 0x861, xp_decode = 0x7fa850033190, xp_reply = 0x7fa850000c80, xp_checksum = 0x0, xp_destroy = 0x0, xp_control = 0xffffffffffffffff, xp_free_user_data = 0x0}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

6 participants