aboutsummaryrefslogtreecommitdiff
path: root/datapath/dp_dev.c
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2009-07-08 12:23:32 -0700
committerBen Pfaff <blp@nicira.com>2009-07-08 14:13:15 -0700
commit72ca14c15430b1e06b06fa2042dab347c1c7d7df (patch)
treec5a79176da08f9cae4061e1b2e9153e8094d6002 /datapath/dp_dev.c
parent6d867ef335eab5cee83e353a0c7e8d143a2d3419 (diff)
datapath: Fix race against workqueue in dp_dev and simplify code.
The dp_dev_destroy() function failed to cancel the xmit_queue work, which allowed it to run after the device had been destroyed, accessing freed memory. However, simply canceling the work with cancel_work_sync() would be insufficient, since other packets could get queued while the work function was running. Stopping the queue with netif_tx_disable() doesn't help, because the final action in dp_dev_do_xmit() is to re-enable the TX queue. This issue led me to re-examine why the dp_dev needs to use a work_struct at all. This was implemented in commit 71f13ed0b "Send of0 packets from workqueue, to avoid recursive locking of ofN device" due to a complaint from lockdep about recursive locking. However, there's no actual reason that we need any locking around dp_dev_xmit(). Until now, it has accepted the standard locking provided by the network stack. But looking at the other software devices (veth, loopback), those use NETIF_F_LLTX, which disables this locking, and presumably do so for this very reason. In fact, the lwn article at http://lwn.net/Articles/121566/ hints that NETIF_F_LLTX, which is otherwise discouraged in the kernel, is acceptable for "certain types of software device." So this commit switches to using NETIF_F_LLTX for dp_dev and gets rid of the work_struct. In the process, I noticed that veth and loopback also take advantage of a network device destruction "hook" using the net_device "destructor" member. Using this we can automatically get called on network device destruction at the point where rtnl_unlock() is called. This allows us to stop stringing the dp_devs that are being destroyed onto a list so that we can free them, and thus simplifies the code along all the paths that call dp_dev_destroy(). This commit gets rid of a call to synchronize_rcu() (disguised as a call to synchronize_net(), which is a macro that expands to synchronize_rcu()), so it probably speeds up deleting ports, too.
Diffstat (limited to 'datapath/dp_dev.c')
-rw-r--r--datapath/dp_dev.c49
1 files changed, 10 insertions, 39 deletions
diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c
index 34a102a3..0284f967 100644
--- a/datapath/dp_dev.c
+++ b/datapath/dp_dev.c
@@ -56,6 +56,8 @@ static int dp_dev_mac_addr(struct net_device *dev, void *p)
return 0;
}
+/* Not reentrant (because it is called with BHs disabled), but may be called
+ * simultaneously on different CPUs. */
static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
@@ -66,10 +68,7 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
* harder to predict when. */
skb_orphan(skb);
- /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue,
- * so we need to have our own clone. (At any rate, fwd_port_input()
- * will need its own clone, so there's no benefit to queuing any other
- * way.) */
+ /* dp_process_received_packet() needs its own clone. */
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return 0;
@@ -77,37 +76,14 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
dp_dev->stats.tx_packets++;
dp_dev->stats.tx_bytes += skb->len;
- if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) {
- /* Queue overflow. Stop transmitter. */
- netif_stop_queue(netdev);
-
- /* We won't see all dropped packets individually, so overrun
- * error is appropriate. */
- dp_dev->stats.tx_fifo_errors++;
- }
- skb_queue_tail(&dp_dev->xmit_queue, skb);
- netdev->trans_start = jiffies;
-
- schedule_work(&dp_dev->xmit_work);
+ skb_reset_mac_header(skb);
+ rcu_read_lock_bh();
+ dp_process_received_packet(skb, dp_dev->dp->ports[dp_dev->port_no]);
+ rcu_read_unlock_bh();
return 0;
}
-static void dp_dev_do_xmit(struct work_struct *work)
-{
- struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
- struct datapath *dp = dp_dev->dp;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
- skb_reset_mac_header(skb);
- rcu_read_lock_bh();
- dp_process_received_packet(skb, dp->ports[dp_dev->port_no]);
- rcu_read_unlock_bh();
- }
- netif_wake_queue(dp_dev->dev);
-}
-
static int dp_dev_open(struct net_device *netdev)
{
netif_start_queue(netdev);
@@ -146,10 +122,12 @@ do_setup(struct net_device *netdev)
netdev->open = dp_dev_open;
SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops);
netdev->stop = dp_dev_stop;
- netdev->tx_queue_len = 100;
+ netdev->tx_queue_len = 0;
netdev->set_mac_address = dp_dev_mac_addr;
+ netdev->destructor = free_netdev;
netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ netdev->features = NETIF_F_LLTX; /* XXX other features? */
random_ether_addr(netdev->dev_addr);
@@ -195,19 +173,12 @@ struct net_device *dp_dev_create(struct datapath *dp, const char *dp_name, int p
dp_dev->dp = dp;
dp_dev->port_no = port_no;
dp_dev->dev = netdev;
- skb_queue_head_init(&dp_dev->xmit_queue);
- INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
return netdev;
}
/* Called with RTNL lock and dp_mutex.*/
void dp_dev_destroy(struct net_device *netdev)
{
- struct dp_dev *dp_dev = dp_dev_priv(netdev);
-
- netif_tx_disable(netdev);
- synchronize_net();
- skb_queue_purge(&dp_dev->xmit_queue);
unregister_netdevice(netdev);
}