IB/rdmavt: Handle dereg of inuse MRs properly
A destroy of an MR prior to destroying the QP can cause the following diagnostic if the QP is referencing the MR being de-registered: hfi1 0000:05:00.0: hfi1_0: rvt_dereg_mr timeout mr ffff8808562108 00 pd ffff880859b20b00 The solution is to when the a non-zero refcount is encountered when the MR is destroyed the QPs needs to be iterated looking for QPs in the same PD as the MR. If rvt_qp_mr_clean() detects any such QP references the rkey/lkey, the QP needs to be put into an error state via a call to rvt_qp_error() which will trigger the clean up of any stuck references. This solution is as specified in IBTA 1.3 Volume 1 11.2.10.5. [This is reproduced with the 0.4.9 version of qperf and the rc_bw test] Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
557fafe1bf
commit
0208da90de
|
@ -440,6 +440,105 @@ bail_umem:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_dereg_clean_qp_cb - callback from iterator
|
||||
* @qp - the qp
|
||||
* @v - the mregion (as u64)
|
||||
*
|
||||
* This routine fields the callback for all QPs and
|
||||
* for QPs in the same PD as the MR will call the
|
||||
* rvt_qp_mr_clean() to potentially cleanup references.
|
||||
*/
|
||||
static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
|
||||
{
|
||||
struct rvt_mregion *mr = (struct rvt_mregion *)v;
|
||||
|
||||
/* skip PDs that are not ours */
|
||||
if (mr->pd != qp->ibqp.pd)
|
||||
return;
|
||||
rvt_qp_mr_clean(qp, mr->lkey);
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_dereg_clean_qps - find QPs for reference cleanup
|
||||
* @mr - the MR that is being deregistered
|
||||
*
|
||||
* This routine iterates RC QPs looking for references
|
||||
* to the lkey noted in mr.
|
||||
*/
|
||||
static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
|
||||
{
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
|
||||
|
||||
rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_check_refs - check references
|
||||
* @mr - the megion
|
||||
* @t - the caller identification
|
||||
*
|
||||
* This routine checks MRs holding a reference during
|
||||
* when being de-registered.
|
||||
*
|
||||
* If the count is non-zero, the code calls a clean routine then
|
||||
* waits for the timeout for the count to zero.
|
||||
*/
|
||||
static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
|
||||
{
|
||||
unsigned long timeout;
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
|
||||
|
||||
if (percpu_ref_is_zero(&mr->refcount))
|
||||
return 0;
|
||||
/* avoid dma mr */
|
||||
if (mr->lkey)
|
||||
rvt_dereg_clean_qps(mr);
|
||||
timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
|
||||
if (!timeout) {
|
||||
rvt_pr_err(rdi,
|
||||
"%s timeout mr %p pd %p lkey %x refcount %ld\n",
|
||||
t, mr, mr->pd, mr->lkey,
|
||||
atomic_long_read(&mr->refcount.count));
|
||||
rvt_get_mr(mr);
|
||||
return -EBUSY;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_mr_has_lkey - is MR
|
||||
* @mr - the mregion
|
||||
* @lkey - the lkey
|
||||
*/
|
||||
bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
|
||||
{
|
||||
return mr && lkey == mr->lkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_ss_has_lkey - is mr in sge tests
|
||||
* @ss - the sge state
|
||||
* @lkey
|
||||
*
|
||||
* This code tests for an MR in the indicated
|
||||
* sge state.
|
||||
*/
|
||||
bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
|
||||
{
|
||||
int i;
|
||||
bool rval = false;
|
||||
|
||||
if (!ss->num_sge)
|
||||
return rval;
|
||||
/* first one */
|
||||
rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
|
||||
/* any others */
|
||||
for (i = 0; !rval && i < ss->num_sge - 1; i++)
|
||||
rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_dereg_mr - unregister and free a memory region
|
||||
* @ibmr: the memory region to free
|
||||
|
@ -453,22 +552,14 @@ bail_umem:
|
|||
int rvt_dereg_mr(struct ib_mr *ibmr)
|
||||
{
|
||||
struct rvt_mr *mr = to_imr(ibmr);
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
|
||||
int ret = 0;
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
rvt_free_lkey(&mr->mr);
|
||||
|
||||
rvt_put_mr(&mr->mr); /* will set completion if last */
|
||||
timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
|
||||
if (!timeout) {
|
||||
rvt_pr_err(rdi,
|
||||
"rvt_dereg_mr timeout mr %p pd %p\n",
|
||||
mr, mr->mr.pd);
|
||||
rvt_get_mr(&mr->mr);
|
||||
ret = -EBUSY;
|
||||
ret = rvt_check_refs(&mr->mr, __func__);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
rvt_deinit_mregion(&mr->mr);
|
||||
if (mr->umem)
|
||||
ib_umem_release(mr->umem);
|
||||
|
@ -761,16 +852,12 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
|
|||
{
|
||||
struct rvt_fmr *fmr = to_ifmr(ibfmr);
|
||||
int ret = 0;
|
||||
unsigned long timeout;
|
||||
|
||||
rvt_free_lkey(&fmr->mr);
|
||||
rvt_put_mr(&fmr->mr); /* will set completion if last */
|
||||
timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
|
||||
if (!timeout) {
|
||||
rvt_get_mr(&fmr->mr);
|
||||
ret = -EBUSY;
|
||||
ret = rvt_check_refs(&fmr->mr, __func__);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
rvt_deinit_mregion(&fmr->mr);
|
||||
kfree(fmr);
|
||||
out:
|
||||
|
|
|
@ -458,10 +458,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
|
|||
}
|
||||
}
|
||||
|
||||
if (qp->ibqp.qp_type != IB_QPT_RC)
|
||||
return;
|
||||
|
||||
for (n = 0; n < rvt_max_atomic(rdi); n++) {
|
||||
for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
|
||||
struct rvt_ack_entry *e = &qp->s_ack_queue[n];
|
||||
|
||||
if (e->rdma_sge.mr) {
|
||||
|
@ -471,6 +468,113 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_swqe_has_lkey - return true if lkey is used by swqe
|
||||
* @wqe - the send wqe
|
||||
* @lkey - the lkey
|
||||
*
|
||||
* Test the swqe for using lkey
|
||||
*/
|
||||
static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < wqe->wr.num_sge; i++) {
|
||||
struct rvt_sge *sge = &wqe->sg_list[i];
|
||||
|
||||
if (rvt_mr_has_lkey(sge->mr, lkey))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_qp_sends_has_lkey - return true is qp sends use lkey
|
||||
* @qp - the rvt_qp
|
||||
* @lkey - the lkey
|
||||
*/
|
||||
static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
|
||||
{
|
||||
u32 s_last = qp->s_last;
|
||||
|
||||
while (s_last != qp->s_head) {
|
||||
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
|
||||
|
||||
if (rvt_swqe_has_lkey(wqe, lkey))
|
||||
return true;
|
||||
|
||||
if (++s_last >= qp->s_size)
|
||||
s_last = 0;
|
||||
}
|
||||
if (qp->s_rdma_mr)
|
||||
if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_qp_acks_has_lkey - return true if acks have lkey
|
||||
* @qp - the qp
|
||||
* @lkey - the lkey
|
||||
*/
|
||||
static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
|
||||
{
|
||||
int i;
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
|
||||
|
||||
for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
|
||||
struct rvt_ack_entry *e = &qp->s_ack_queue[i];
|
||||
|
||||
if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* rvt_qp_mr_clean - clean up remote ops for lkey
|
||||
* @qp - the qp
|
||||
* @lkey - the lkey that is being de-registered
|
||||
*
|
||||
* This routine checks if the lkey is being used by
|
||||
* the qp.
|
||||
*
|
||||
* If so, the qp is put into an error state to elminate
|
||||
* any references from the qp.
|
||||
*/
|
||||
void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
|
||||
{
|
||||
bool lastwqe = false;
|
||||
|
||||
if (qp->ibqp.qp_type == IB_QPT_SMI ||
|
||||
qp->ibqp.qp_type == IB_QPT_GSI)
|
||||
/* avoid special QPs */
|
||||
return;
|
||||
spin_lock_irq(&qp->r_lock);
|
||||
spin_lock(&qp->s_hlock);
|
||||
spin_lock(&qp->s_lock);
|
||||
|
||||
if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
|
||||
goto check_lwqe;
|
||||
|
||||
if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
|
||||
rvt_qp_sends_has_lkey(qp, lkey) ||
|
||||
rvt_qp_acks_has_lkey(qp, lkey))
|
||||
lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
|
||||
check_lwqe:
|
||||
spin_unlock(&qp->s_lock);
|
||||
spin_unlock(&qp->s_hlock);
|
||||
spin_unlock_irq(&qp->r_lock);
|
||||
if (lastwqe) {
|
||||
struct ib_event ev;
|
||||
|
||||
ev.device = qp->ibqp.device;
|
||||
ev.element.qp = &qp->ibqp;
|
||||
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
|
||||
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* rvt_remove_qp - remove qp form table
|
||||
* @rdi: rvt dev struct
|
||||
|
|
|
@ -191,4 +191,7 @@ static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length,
|
|||
}
|
||||
}
|
||||
|
||||
bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey);
|
||||
bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey);
|
||||
|
||||
#endif /* DEF_RDMAVT_INCMRH */
|
||||
|
|
|
@ -702,4 +702,5 @@ int rvt_qp_iter_next(struct rvt_qp_iter *iter);
|
|||
void rvt_qp_iter(struct rvt_dev_info *rdi,
|
||||
u64 v,
|
||||
void (*cb)(struct rvt_qp *qp, u64 v));
|
||||
void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey);
|
||||
#endif /* DEF_RDMAVT_INCQP_H */
|
||||
|
|
Loading…
Reference in New Issue