IB/cm: Do not queue work to a device that's going away
Whenever ib_cm gets remove_one call, like when there is a hot-unplug event, the driver should mark itself as going_down and confirm that no new works are going to be queued for that device. so, the order of the actions are: 1. mark the going_down bit. 2. flush the wq. 3. [make sure no new works for that device.] 4. unregister mad agent. otherwise, works that are already queued can be scheduled after the mad agent was freed. Signed-off-by: Erez Shitrit <erezsh@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
3fdf70acec
commit
be4b499323
1 changed files with 55 additions and 6 deletions
|
@ -169,6 +169,7 @@ struct cm_device {
|
||||||
struct ib_device *ib_device;
|
struct ib_device *ib_device;
|
||||||
struct device *device;
|
struct device *device;
|
||||||
u8 ack_delay;
|
u8 ack_delay;
|
||||||
|
int going_down;
|
||||||
struct cm_port *port[0];
|
struct cm_port *port[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -805,6 +806,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
|
||||||
{
|
{
|
||||||
int wait_time;
|
int wait_time;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
struct cm_device *cm_dev;
|
||||||
|
|
||||||
|
cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
|
||||||
|
if (!cm_dev)
|
||||||
|
return;
|
||||||
|
|
||||||
spin_lock_irqsave(&cm.lock, flags);
|
spin_lock_irqsave(&cm.lock, flags);
|
||||||
cm_cleanup_timewait(cm_id_priv->timewait_info);
|
cm_cleanup_timewait(cm_id_priv->timewait_info);
|
||||||
|
@ -818,8 +824,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
|
||||||
*/
|
*/
|
||||||
cm_id_priv->id.state = IB_CM_TIMEWAIT;
|
cm_id_priv->id.state = IB_CM_TIMEWAIT;
|
||||||
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
|
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
|
||||||
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
|
|
||||||
msecs_to_jiffies(wait_time));
|
/* Check if the device started its remove_one */
|
||||||
|
spin_lock_irq(&cm.lock);
|
||||||
|
if (!cm_dev->going_down)
|
||||||
|
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
|
||||||
|
msecs_to_jiffies(wait_time));
|
||||||
|
spin_unlock_irq(&cm.lock);
|
||||||
|
|
||||||
cm_id_priv->timewait_info = NULL;
|
cm_id_priv->timewait_info = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3305,6 +3317,11 @@ static int cm_establish(struct ib_cm_id *cm_id)
|
||||||
struct cm_work *work;
|
struct cm_work *work;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct cm_device *cm_dev;
|
||||||
|
|
||||||
|
cm_dev = ib_get_client_data(cm_id->device, &cm_client);
|
||||||
|
if (!cm_dev)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
work = kmalloc(sizeof *work, GFP_ATOMIC);
|
work = kmalloc(sizeof *work, GFP_ATOMIC);
|
||||||
if (!work)
|
if (!work)
|
||||||
|
@ -3343,7 +3360,17 @@ static int cm_establish(struct ib_cm_id *cm_id)
|
||||||
work->remote_id = cm_id->remote_id;
|
work->remote_id = cm_id->remote_id;
|
||||||
work->mad_recv_wc = NULL;
|
work->mad_recv_wc = NULL;
|
||||||
work->cm_event.event = IB_CM_USER_ESTABLISHED;
|
work->cm_event.event = IB_CM_USER_ESTABLISHED;
|
||||||
queue_delayed_work(cm.wq, &work->work, 0);
|
|
||||||
|
/* Check if the device started its remove_one */
|
||||||
|
spin_lock_irq(&cm.lock);
|
||||||
|
if (!cm_dev->going_down) {
|
||||||
|
queue_delayed_work(cm.wq, &work->work, 0);
|
||||||
|
} else {
|
||||||
|
kfree(work);
|
||||||
|
ret = -ENODEV;
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&cm.lock);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -3394,6 +3421,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
|
||||||
enum ib_cm_event_type event;
|
enum ib_cm_event_type event;
|
||||||
u16 attr_id;
|
u16 attr_id;
|
||||||
int paths = 0;
|
int paths = 0;
|
||||||
|
int going_down = 0;
|
||||||
|
|
||||||
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
|
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
|
||||||
case CM_REQ_ATTR_ID:
|
case CM_REQ_ATTR_ID:
|
||||||
|
@ -3452,7 +3480,19 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
|
||||||
work->cm_event.event = event;
|
work->cm_event.event = event;
|
||||||
work->mad_recv_wc = mad_recv_wc;
|
work->mad_recv_wc = mad_recv_wc;
|
||||||
work->port = port;
|
work->port = port;
|
||||||
queue_delayed_work(cm.wq, &work->work, 0);
|
|
||||||
|
/* Check if the device started its remove_one */
|
||||||
|
spin_lock_irq(&cm.lock);
|
||||||
|
if (!port->cm_dev->going_down)
|
||||||
|
queue_delayed_work(cm.wq, &work->work, 0);
|
||||||
|
else
|
||||||
|
going_down = 1;
|
||||||
|
spin_unlock_irq(&cm.lock);
|
||||||
|
|
||||||
|
if (going_down) {
|
||||||
|
kfree(work);
|
||||||
|
ib_free_recv_mad(mad_recv_wc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
|
static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
|
||||||
|
@ -3771,7 +3811,7 @@ static void cm_add_one(struct ib_device *ib_device)
|
||||||
|
|
||||||
cm_dev->ib_device = ib_device;
|
cm_dev->ib_device = ib_device;
|
||||||
cm_get_ack_delay(cm_dev);
|
cm_get_ack_delay(cm_dev);
|
||||||
|
cm_dev->going_down = 0;
|
||||||
cm_dev->device = device_create(&cm_class, &ib_device->dev,
|
cm_dev->device = device_create(&cm_class, &ib_device->dev,
|
||||||
MKDEV(0, 0), NULL,
|
MKDEV(0, 0), NULL,
|
||||||
"%s", ib_device->name);
|
"%s", ib_device->name);
|
||||||
|
@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device *ib_device)
|
||||||
list_del(&cm_dev->list);
|
list_del(&cm_dev->list);
|
||||||
write_unlock_irqrestore(&cm.device_lock, flags);
|
write_unlock_irqrestore(&cm.device_lock, flags);
|
||||||
|
|
||||||
|
spin_lock_irq(&cm.lock);
|
||||||
|
cm_dev->going_down = 1;
|
||||||
|
spin_unlock_irq(&cm.lock);
|
||||||
|
|
||||||
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
|
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
|
||||||
if (!rdma_cap_ib_cm(ib_device, i))
|
if (!rdma_cap_ib_cm(ib_device, i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
port = cm_dev->port[i-1];
|
port = cm_dev->port[i-1];
|
||||||
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
|
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
|
||||||
ib_unregister_mad_agent(port->mad_agent);
|
/*
|
||||||
|
* We flush the queue here after the going_down set, this
|
||||||
|
* verify that no new works will be queued in the recv handler,
|
||||||
|
* after that we can call the unregister_mad_agent
|
||||||
|
*/
|
||||||
flush_workqueue(cm.wq);
|
flush_workqueue(cm.wq);
|
||||||
|
ib_unregister_mad_agent(port->mad_agent);
|
||||||
cm_remove_port_fs(port);
|
cm_remove_port_fs(port);
|
||||||
}
|
}
|
||||||
device_unregister(cm_dev->device);
|
device_unregister(cm_dev->device);
|
||||||
|
|
Loading…
Add table
Reference in a new issue