Skip to content

Commit

Permalink
netfilter: conntrack: spinlock per cpu to protect special lists.
Browse files Browse the repository at this point in the history
One spinlock per cpu to protect dying/unconfirmed/template special lists.
(These lists are now per cpu, a bit like the untracked ct)
Add a @cpu field to nf_conn, to make sure we hold the appropriate
spinlock at removal time.

Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: Jesper Dangaard Brouer <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Reviewed-by: Florian Westphal <[email protected]>
Signed-off-by: Pablo Neira Ayuso <[email protected]>
  • Loading branch information
netoptimizer authored and ummakynes committed Mar 7, 2014
1 parent b476b72 commit b7779d0
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 79 deletions.
3 changes: 2 additions & 1 deletion include/net/netfilter/nf_conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ struct nf_conn {
*/
struct nf_conntrack ct_general;

spinlock_t lock;
spinlock_t lock;
u16 cpu;

/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
Expand Down
11 changes: 8 additions & 3 deletions include/net/netns/conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ struct nf_ip_net {
#endif
};

struct ct_pcpu {
spinlock_t lock;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
};

struct netns_ct {
atomic_t count;
unsigned int expect_count;
Expand All @@ -86,9 +93,7 @@ struct netns_ct {
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
struct hlist_head *expect_hash;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
Expand Down
141 changes: 103 additions & 38 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,50 @@ clean_from_lists(struct nf_conn *ct)
nf_ct_remove_expectations(ct);
}

/* must be called with local_bh_disable */
static void nf_ct_add_to_dying_list(struct nf_conn *ct)
{
struct ct_pcpu *pcpu;

/* add this conntrack to the (per cpu) dying list */
ct->cpu = smp_processor_id();
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);

spin_lock(&pcpu->lock);
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&pcpu->dying);
spin_unlock(&pcpu->lock);
}

/* must be called with local_bh_disable */
static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
{
struct ct_pcpu *pcpu;

/* add this conntrack to the (per cpu) unconfirmed list */
ct->cpu = smp_processor_id();
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);

spin_lock(&pcpu->lock);
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&pcpu->unconfirmed);
spin_unlock(&pcpu->lock);
}

/* must be called with local_bh_disable */
static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
{
struct ct_pcpu *pcpu;

/* We overload first tuple to link into unconfirmed or dying list.*/
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);

spin_lock(&pcpu->lock);
BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
spin_unlock(&pcpu->lock);
}

static void
destroy_conntrack(struct nf_conntrack *nfct)
{
Expand Down Expand Up @@ -220,9 +264,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
* too. */
nf_ct_remove_expectations(ct);

/* We overload first tuple to link into unconfirmed or dying list.*/
BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
nf_ct_del_from_dying_or_unconfirmed_list(ct);

NF_CT_STAT_INC(net, delete);
spin_unlock_bh(&nf_conntrack_lock);
Expand All @@ -244,9 +286,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
* Otherwise we can get spurious warnings. */
NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
/* add this conntrack to the dying list */
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.dying);
nf_ct_add_to_dying_list(ct);
spin_unlock_bh(&nf_conntrack_lock);
}

Expand Down Expand Up @@ -467,15 +507,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
/* deletion from this larval template list happens via nf_ct_put() */
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
{
struct ct_pcpu *pcpu;

__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);

spin_lock_bh(&nf_conntrack_lock);
/* add this conntrack to the (per cpu) tmpl list */
local_bh_disable();
tmpl->cpu = smp_processor_id();
pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);

spin_lock(&pcpu->lock);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.tmpl);
spin_unlock_bh(&nf_conntrack_lock);
&pcpu->tmpl);
spin_unlock_bh(&pcpu->lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);

Expand Down Expand Up @@ -546,8 +593,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;

/* Remove from unconfirmed list */
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
nf_ct_del_from_dying_or_unconfirmed_list(ct);

/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
Expand Down Expand Up @@ -879,10 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,

/* Now it is inserted into the unconfirmed list, bump refcount */
nf_conntrack_get(&ct->ct_general);

/* Overload tuple linked list to put us in unconfirmed list. */
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.unconfirmed);
nf_ct_add_to_unconfirmed_list(ct);

spin_unlock_bh(&nf_conntrack_lock);

Expand Down Expand Up @@ -1254,6 +1297,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
int cpu;

spin_lock_bh(&nf_conntrack_lock);
for (; *bucket < net->ct.htable_size; (*bucket)++) {
Expand All @@ -1265,12 +1309,19 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
goto found;
}
}
hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&nf_conntrack_lock);

for_each_possible_cpu(cpu) {
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);

spin_lock_bh(&pcpu->lock);
hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&pcpu->lock);
}
return NULL;
found:
atomic_inc(&ct->ct_general.use);
Expand Down Expand Up @@ -1323,14 +1374,19 @@ static void nf_ct_release_dying_list(struct net *net)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
int cpu;

spin_lock_bh(&nf_conntrack_lock);
hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* never fails to remove them, no listeners at this point */
nf_ct_kill(ct);
for_each_possible_cpu(cpu) {
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);

spin_lock_bh(&pcpu->lock);
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* never fails to remove them, no listeners at this point */
nf_ct_kill(ct);
}
spin_unlock_bh(&pcpu->lock);
}
spin_unlock_bh(&nf_conntrack_lock);
}

static int untrack_refs(void)
Expand Down Expand Up @@ -1417,6 +1473,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
free_percpu(net->ct.pcpu_lists);
}
}

Expand Down Expand Up @@ -1629,37 +1686,43 @@ void nf_conntrack_init_end(void)

int nf_conntrack_init_net(struct net *net)
{
int ret;
int ret = -ENOMEM;
int cpu;

atomic_set(&net->ct.count, 0);
INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;

net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
if (!net->ct.pcpu_lists)
goto err_stat;

for_each_possible_cpu(cpu) {
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);

spin_lock_init(&pcpu->lock);
INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}

net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat)
goto err_pcpu_lists;

net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
if (!net->ct.slabname) {
ret = -ENOMEM;
if (!net->ct.slabname)
goto err_slabname;
}

net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
sizeof(struct nf_conn), 0,
SLAB_DESTROY_BY_RCU, NULL);
if (!net->ct.nf_conntrack_cachep) {
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
ret = -ENOMEM;
goto err_cache;
}

net->ct.htable_size = nf_conntrack_htable_size;
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
goto err_hash;
}
Expand Down Expand Up @@ -1701,6 +1764,8 @@ int nf_conntrack_init_net(struct net *net)
kfree(net->ct.slabname);
err_slabname:
free_percpu(net->ct.stat);
err_pcpu_lists:
free_percpu(net->ct.pcpu_lists);
err_stat:
return ret;
}
11 changes: 9 additions & 2 deletions net/netfilter/nf_conntrack_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
unsigned int i;
int cpu;

/* Get rid of expectations */
for (i = 0; i < nf_ct_expect_hsize; i++) {
Expand All @@ -414,8 +415,14 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}

/* Get rid of expecteds, set helpers to NULL. */
hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
unhelp(h, me);
for_each_possible_cpu(cpu) {
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);

spin_lock_bh(&pcpu->lock);
hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
unhelp(h, me);
spin_unlock_bh(&pcpu->lock);
}
for (i = 0; i < net->ct.htable_size; i++) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me);
Expand Down
Loading

0 comments on commit b7779d0

Please sign in to comment.