ping: convert to RCU lookups, get rid of rwlock
Using rwlock in networking code is extremely risky.
writers can starve if enough readers are constantly
grabing the rwlock.
I thought rwlock were at fault and sent this patch:
https://lkml.org/lkml/2022/6/17/272
But Peter and Linus essentially told me rwlock had to be unfair.
We need to get rid of rwlock in networking code.
Fixes: c319b4d76b
("net: ipv4: add IPPROTO_ICMP socket kind")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
f0623340fd
commit
dbca1596bb
1 changed files with 16 additions and 20 deletions
|
@ -50,7 +50,7 @@
|
||||||
|
|
||||||
struct ping_table {
|
struct ping_table {
|
||||||
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
|
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
|
||||||
rwlock_t lock;
|
spinlock_t lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct ping_table ping_table;
|
static struct ping_table ping_table;
|
||||||
|
@ -82,7 +82,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
|
||||||
struct sock *sk2 = NULL;
|
struct sock *sk2 = NULL;
|
||||||
|
|
||||||
isk = inet_sk(sk);
|
isk = inet_sk(sk);
|
||||||
write_lock_bh(&ping_table.lock);
|
spin_lock(&ping_table.lock);
|
||||||
if (ident == 0) {
|
if (ident == 0) {
|
||||||
u32 i;
|
u32 i;
|
||||||
u16 result = ping_port_rover + 1;
|
u16 result = ping_port_rover + 1;
|
||||||
|
@ -128,14 +128,15 @@ next_port:
|
||||||
if (sk_unhashed(sk)) {
|
if (sk_unhashed(sk)) {
|
||||||
pr_debug("was not hashed\n");
|
pr_debug("was not hashed\n");
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
|
sock_set_flag(sk, SOCK_RCU_FREE);
|
||||||
|
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist);
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||||
}
|
}
|
||||||
write_unlock_bh(&ping_table.lock);
|
spin_unlock(&ping_table.lock);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
write_unlock_bh(&ping_table.lock);
|
spin_unlock(&ping_table.lock);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ping_get_port);
|
EXPORT_SYMBOL_GPL(ping_get_port);
|
||||||
|
@ -153,19 +154,19 @@ void ping_unhash(struct sock *sk)
|
||||||
struct inet_sock *isk = inet_sk(sk);
|
struct inet_sock *isk = inet_sk(sk);
|
||||||
|
|
||||||
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
|
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
|
||||||
write_lock_bh(&ping_table.lock);
|
spin_lock(&ping_table.lock);
|
||||||
if (sk_hashed(sk)) {
|
if (sk_hashed(sk)) {
|
||||||
hlist_nulls_del(&sk->sk_nulls_node);
|
hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
|
||||||
sk_nulls_node_init(&sk->sk_nulls_node);
|
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
isk->inet_num = 0;
|
isk->inet_num = 0;
|
||||||
isk->inet_sport = 0;
|
isk->inet_sport = 0;
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||||
}
|
}
|
||||||
write_unlock_bh(&ping_table.lock);
|
spin_unlock(&ping_table.lock);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ping_unhash);
|
EXPORT_SYMBOL_GPL(ping_unhash);
|
||||||
|
|
||||||
|
/* Called under rcu_read_lock() */
|
||||||
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
|
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
|
||||||
{
|
{
|
||||||
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
|
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
|
||||||
|
@ -190,8 +191,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
read_lock_bh(&ping_table.lock);
|
|
||||||
|
|
||||||
ping_portaddr_for_each_entry(sk, hnode, hslot) {
|
ping_portaddr_for_each_entry(sk, hnode, hslot) {
|
||||||
isk = inet_sk(sk);
|
isk = inet_sk(sk);
|
||||||
|
|
||||||
|
@ -230,13 +229,11 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
|
||||||
sk->sk_bound_dev_if != sdif)
|
sk->sk_bound_dev_if != sdif)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
sock_hold(sk);
|
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
sk = NULL;
|
sk = NULL;
|
||||||
exit:
|
exit:
|
||||||
read_unlock_bh(&ping_table.lock);
|
|
||||||
|
|
||||||
return sk;
|
return sk;
|
||||||
}
|
}
|
||||||
|
@ -588,7 +585,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
|
||||||
sk->sk_err = err;
|
sk->sk_err = err;
|
||||||
sk_error_report(sk);
|
sk_error_report(sk);
|
||||||
out:
|
out:
|
||||||
sock_put(sk);
|
return;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ping_err);
|
EXPORT_SYMBOL_GPL(ping_err);
|
||||||
|
|
||||||
|
@ -994,7 +991,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb)
|
||||||
reason = __ping_queue_rcv_skb(sk, skb2);
|
reason = __ping_queue_rcv_skb(sk, skb2);
|
||||||
else
|
else
|
||||||
reason = SKB_DROP_REASON_NOMEM;
|
reason = SKB_DROP_REASON_NOMEM;
|
||||||
sock_put(sk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reason)
|
if (reason)
|
||||||
|
@ -1080,13 +1076,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
|
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
|
||||||
__acquires(ping_table.lock)
|
__acquires(RCU)
|
||||||
{
|
{
|
||||||
struct ping_iter_state *state = seq->private;
|
struct ping_iter_state *state = seq->private;
|
||||||
state->bucket = 0;
|
state->bucket = 0;
|
||||||
state->family = family;
|
state->family = family;
|
||||||
|
|
||||||
read_lock_bh(&ping_table.lock);
|
rcu_read_lock();
|
||||||
|
|
||||||
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
|
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
|
||||||
}
|
}
|
||||||
|
@ -1112,9 +1108,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||||
EXPORT_SYMBOL_GPL(ping_seq_next);
|
EXPORT_SYMBOL_GPL(ping_seq_next);
|
||||||
|
|
||||||
void ping_seq_stop(struct seq_file *seq, void *v)
|
void ping_seq_stop(struct seq_file *seq, void *v)
|
||||||
__releases(ping_table.lock)
|
__releases(RCU)
|
||||||
{
|
{
|
||||||
read_unlock_bh(&ping_table.lock);
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ping_seq_stop);
|
EXPORT_SYMBOL_GPL(ping_seq_stop);
|
||||||
|
|
||||||
|
@ -1198,5 +1194,5 @@ void __init ping_init(void)
|
||||||
|
|
||||||
for (i = 0; i < PING_HTABLE_SIZE; i++)
|
for (i = 0; i < PING_HTABLE_SIZE; i++)
|
||||||
INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
|
INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
|
||||||
rwlock_init(&ping_table.lock);
|
spin_lock_init(&ping_table.lock);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue