From 0e442afd92fcdde2cc63b6f25556b8934e42b7d2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Sep 2009 11:10:15 -0700 Subject: [PATCH 1/5] IB/mad: Fix lock-lock-timer deadlock in RMPP code Holding agent->lock across cancel_delayed_work() (which does del_timer_sync()) in ib_cancel_rmpp_recvs() leads to lockdep reports of possible lock-timer deadlocks if a consumer ever does something that connects agent->lock to a lock taken in IRQ context (cf http://marc.info/?l=linux-rdma&m=125243699026045). Fix this by changing the list items to a new state "CANCELING" while holding the lock, and then canceling the delayed work without holding the lock. If the delayed work runs after the lock is dropped, it will see the state is CANCELING and return immediately, so the list will stay stable while we traverse it with the lock not held. Reviewed-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_rmpp.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 57a3c6f947b2..4e0f2829e0e5 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -37,7 +37,8 @@ enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE + RMPP_STATE_COMPLETE, + RMPP_STATE_CANCELING }; struct mad_rmpp_recv { @@ -86,19 +87,23 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) unsigned long flags; spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + rmpp_recv->state = RMPP_STATE_CANCELING; + } + spin_unlock_irqrestore(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } - spin_unlock_irqrestore(&agent->lock, flags); flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); destroy_rmpp_recv(rmpp_recv); } } @@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state == RMPP_STATE_CANCELING) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); From bdf643816a2017eba9af280b6d29ef4213358984 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 24 Sep 2009 10:59:34 -0700 Subject: [PATCH 2/5] RDMA/nes: Remove duplicate .ndo_set_mac_address field initialization The definition of nes_netdev_ops has initializations of a local function and eth_mac_addr for its ndo_set_mac_address field. This change uses only the local function. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier I, s, fld; position p0,p; expression E; @@ struct I s =@p0 { ... .fld@p = E, ...}; @s@ identifier I, s, r.fld; position r.p0,p; expression E; @@ struct I s =@p0 { ... .fld@p = E, ...}; @script:python@ p0 << r.p0; fld << r.fld; ps << s.p; pr << r.p; @@ if int(ps[0].line)!=int(pr[0].line) or int(ps[0].column)!=int(pr[0].column): cocci.print_main(fld,p0) // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_nic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index c6e6611d3016..0bfa2546569f 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = { .ndo_set_mac_address = nes_netdev_set_mac_address, .ndo_set_multicast_list = nes_netdev_set_multicast_list, .ndo_change_mtu = nes_netdev_change_mtu, - .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_register = nes_netdev_vlan_rx_register, }; From c57e20dcff981c39e43c857f3997095bacb2223f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 24 Sep 2009 11:03:03 -0700 Subject: [PATCH 3/5] mlx4_core: Pass cache line size to device FW ConnectX can work more efficiently if the CPU cache line size is passed to it with the INIT_HCA firmware command. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/mlx4/fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index cee199ceba2f..3c16602172fc 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -33,6 +33,7 @@ */ #include +#include #include "fw.h" #include "icm.h" @@ -698,6 +699,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 +#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) @@ -735,6 +737,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; + *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = + (ilog2(cache_line_size()) - 4) << 5; + #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) From d686159e50c57788001001e9537aa8b4bbc38001 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 24 Sep 2009 11:55:41 -0700 Subject: [PATCH 4/5] IB/mthca: Fix access to freed memory in catastrophic event handling catas_reset() uses a pointer to mthca_dev, but mthca_dev is not valid after the call to __mthca_restart_one(). Based on a similar patch for mlx4 (634354d7, "mlx4: Fix access to freed memory") by Vitaliy Gusev Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 056b2a4c6970..0aa0110e4b6c 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work) spin_unlock_irq(&catas_lock); list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + struct pci_dev *pdev = dev->pdev; ret = __mthca_restart_one(dev->pdev); + /* 'dev' now is not valid */ if (ret) - mthca_err(dev, "Reset failed (%d)\n", ret); - else - mthca_dbg(dev, "Reset succeeded\n"); + printk(KERN_ERR "mthca %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else { + struct mthca_dev *d = pci_get_drvdata(pdev); + mthca_dbg(d, "Reset succeeded\n"); + } } mutex_unlock(&mthca_device_mutex); From 5ee95120841fd623c48d7d971182cf58e3b0c8de Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 24 Sep 2009 12:01:05 -0700 Subject: [PATCH 5/5] IPoIB: Don't turn on carrier for a non-active port Multicast joins can succeed even if the IB port is down. This happens when the SM runs on the same port with the requesting port. However, IPoIB calls netif_carrier_on() when the join of the broadcast group succeeds, without caring about the state of the IB port. The result is an IPoIB interface in RUNNING state but without an active IB port to support it. If a bonding interface uses this IPoIB interface as a slave it might not detect that this slave is almost useless and failover functionality will be damaged. The fix checks the state of the IB port in the carrier_task before calling netif_carrier_on(). Adresses: https://bugs.openfabrics.org/show_bug.cgi?id=1726 Signed-off-by: Moni Shoua Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 25874fc680c9..8763c1ea5eb4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -362,12 +362,19 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, carrier_on_task); + struct ib_port_attr attr; /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being * removed. */ + if (ib_query_port(priv->ca, priv->port, &attr) || + attr.state != IB_PORT_ACTIVE) { + ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); + return; + } + rtnl_lock(); netif_carrier_on(priv->dev); rtnl_unlock();