From 0e442afd92fcdde2cc63b6f25556b8934e42b7d2 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Wed, 23 Sep 2009 11:10:15 -0700
Subject: [PATCH 1/5] IB/mad: Fix lock-lock-timer deadlock in RMPP code

Holding agent->lock across cancel_delayed_work() (which does
del_timer_sync()) in ib_cancel_rmpp_recvs() leads to lockdep reports of
possible lock-timer deadlocks if a consumer ever does something that
connects agent->lock to a lock taken in IRQ context (cf
http://marc.info/?l=linux-rdma&m=125243699026045).

Fix this by changing the list items to a new state "CANCELING" while
holding the lock, and then canceling the delayed work without holding
the lock.  If the delayed work runs after the lock is dropped, it will
see the state is CANCELING and return immediately, so the list will
stay stable while we traverse it with the lock not held.

Reviewed-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad_rmpp.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 57a3c6f947b2..4e0f2829e0e5 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -37,7 +37,8 @@
 enum rmpp_state {
 	RMPP_STATE_ACTIVE,
 	RMPP_STATE_TIMEOUT,
-	RMPP_STATE_COMPLETE
+	RMPP_STATE_COMPLETE,
+	RMPP_STATE_CANCELING
 };
 
 struct mad_rmpp_recv {
@@ -86,19 +87,23 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
 	unsigned long flags;
 
 	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+			ib_free_recv_mad(rmpp_recv->rmpp_wc);
+		rmpp_recv->state = RMPP_STATE_CANCELING;
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+
 	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
 		cancel_delayed_work(&rmpp_recv->timeout_work);
 		cancel_delayed_work(&rmpp_recv->cleanup_work);
 	}
-	spin_unlock_irqrestore(&agent->lock, flags);
 
 	flush_workqueue(agent->qp_info->port_priv->wq);
 
 	list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
 				 &agent->rmpp_list, list) {
 		list_del(&rmpp_recv->list);
-		if (rmpp_recv->state != RMPP_STATE_COMPLETE)
-			ib_free_recv_mad(rmpp_recv->rmpp_wc);
 		destroy_rmpp_recv(rmpp_recv);
 	}
 }
@@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work)
 	unsigned long flags;
 
 	spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
+	if (rmpp_recv->state == RMPP_STATE_CANCELING) {
+		spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+		return;
+	}
 	list_del(&rmpp_recv->list);
 	spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
 	destroy_rmpp_recv(rmpp_recv);

From bdf643816a2017eba9af280b6d29ef4213358984 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 24 Sep 2009 10:59:34 -0700
Subject: [PATCH 2/5] RDMA/nes: Remove duplicate .ndo_set_mac_address field
 initialization

The definition of nes_netdev_ops has initializations of a local function
and eth_mac_addr for its ndo_set_mac_address field.  This change uses only
the local function.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r@
identifier I, s, fld;
position p0,p;
expression E;
@@

struct I s =@p0 { ... .fld@p = E, ...};

@s@
identifier I, s, r.fld;
position r.p0,p;
expression E;
@@

struct I s =@p0 { ... .fld@p = E, ...};

@script:python@
p0 << r.p0;
fld << r.fld;
ps << s.p;
pr << r.p;
@@

if int(ps[0].line)!=int(pr[0].line) or int(ps[0].column)!=int(pr[0].column):
  cocci.print_main(fld,p0)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/nes/nes_nic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index c6e6611d3016..0bfa2546569f 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = {
 	.ndo_set_mac_address	= nes_netdev_set_mac_address,
 	.ndo_set_multicast_list = nes_netdev_set_multicast_list,
 	.ndo_change_mtu		= nes_netdev_change_mtu,
-	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_vlan_rx_register 	= nes_netdev_vlan_rx_register,
 };

From c57e20dcff981c39e43c857f3997095bacb2223f Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Thu, 24 Sep 2009 11:03:03 -0700
Subject: [PATCH 3/5] mlx4_core: Pass cache line size to device FW

ConnectX can work more efficiently if the CPU cache line size is passed
to it with the INIT_HCA firmware command.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/fw.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index cee199ceba2f..3c16602172fc 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -33,6 +33,7 @@
  */
 
 #include <linux/mlx4/cmd.h>
+#include <linux/cache.h>
 
 #include "fw.h"
 #include "icm.h"
@@ -698,6 +699,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define INIT_HCA_IN_SIZE		 0x200
 #define INIT_HCA_VERSION_OFFSET		 0x000
 #define	 INIT_HCA_VERSION		 2
+#define INIT_HCA_CACHELINE_SZ_OFFSET	 0x0e
 #define INIT_HCA_FLAGS_OFFSET		 0x014
 #define INIT_HCA_QPC_OFFSET		 0x020
 #define	 INIT_HCA_QPC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x10)
@@ -735,6 +737,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 
 	*((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
 
+	*((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
+		(ilog2(cache_line_size()) - 4) << 5;
+
 #if defined(__LITTLE_ENDIAN)
 	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
 #elif defined(__BIG_ENDIAN)

From d686159e50c57788001001e9537aa8b4bbc38001 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 24 Sep 2009 11:55:41 -0700
Subject: [PATCH 4/5] IB/mthca: Fix access to freed memory in catastrophic
 event handling

catas_reset() uses a pointer to mthca_dev, but mthca_dev is not valid
after the call to __mthca_restart_one().

Based on a similar patch for mlx4 (634354d7, "mlx4: Fix access to
freed memory") by Vitaliy Gusev <vgusev@openvz.org>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_catas.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 056b2a4c6970..0aa0110e4b6c 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work)
 	spin_unlock_irq(&catas_lock);
 
 	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+		struct pci_dev *pdev = dev->pdev;
 		ret = __mthca_restart_one(dev->pdev);
+		/* 'dev' now is not valid */
 		if (ret)
-			mthca_err(dev, "Reset failed (%d)\n", ret);
-		else
-			mthca_dbg(dev, "Reset succeeded\n");
+			printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
+			       pci_name(pdev), ret);
+		else {
+			struct mthca_dev *d = pci_get_drvdata(pdev);
+			mthca_dbg(d, "Reset succeeded\n");
+		}
 	}
 
 	mutex_unlock(&mthca_device_mutex);

From 5ee95120841fd623c48d7d971182cf58e3b0c8de Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@Voltaire.COM>
Date: Thu, 24 Sep 2009 12:01:05 -0700
Subject: [PATCH 5/5] IPoIB: Don't turn on carrier for a non-active port

Multicast joins can succeed even if the IB port is down.  This happens
when the SM runs on the same port with the requesting port.  However,
IPoIB calls netif_carrier_on() when the join of the broadcast group
succeeds, without caring about the state of the IB port.  The result
is an IPoIB interface in RUNNING state but without an active IB port
to support it.

If a bonding interface uses this IPoIB interface as a slave it might
not detect that this slave is almost useless and failover
functionality will be damaged.  The fix checks the state of the IB
port in the carrier_task before calling netif_carrier_on().

Adresses: https://bugs.openfabrics.org/show_bug.cgi?id=1726
Signed-off-by: Moni Shoua <monis@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 25874fc680c9..8763c1ea5eb4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -362,12 +362,19 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   carrier_on_task);
+	struct ib_port_attr attr;
 
 	/*
 	 * Take rtnl_lock to avoid racing with ipoib_stop() and
 	 * turning the carrier back on while a device is being
 	 * removed.
 	 */
+	if (ib_query_port(priv->ca, priv->port, &attr) ||
+	    attr.state != IB_PORT_ACTIVE) {
+		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
+		return;
+	}
+
 	rtnl_lock();
 	netif_carrier_on(priv->dev);
 	rtnl_unlock();