Skip to content

Commit

Permalink
netfilter: nf_conntrack: use per-conntrack locks for protocol data
Browse files Browse the repository at this point in the history
Introduce per-conntrack locks and use them instead of the global protocol
locks to avoid contention. Especially tcp_lock shows up very high in
profiles on larger machines.

This will also allow to simplify the upcoming reliable event delivery patches.

Signed-off-by: Patrick McHardy <[email protected]>
  • Loading branch information
kaber committed Jun 10, 2009
1 parent a31e1ff commit 440f0d5
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 55 deletions.
2 changes: 2 additions & 0 deletions include/net/netfilter/nf_conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ struct nf_conn {
plus 1 for any connection(s) we are `master' for */
struct nf_conntrack ct_general;

spinlock_t lock;

/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
Expand Down
4 changes: 2 additions & 2 deletions include/net/netfilter/nf_conntrack_l4proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ struct nf_conntrack_l4proto
const struct nf_conntrack_tuple *);

/* Print out the private part of the conntrack. */
int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
int (*print_conntrack)(struct seq_file *s, struct nf_conn *);

/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
const struct nf_conn *ct);
struct nf_conn *ct);
/* Calculate protoinfo nlattr size */
int (*nlattr_size)(void);

Expand Down
1 change: 1 addition & 0 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
return ERR_PTR(-ENOMEM);
}

spin_lock_init(&ct->lock);
atomic_set(&ct->ct_general.use, 1);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
Expand Down
4 changes: 2 additions & 2 deletions net/netfilter/nf_conntrack_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
}

static inline int
ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
Expand Down Expand Up @@ -347,7 +347,7 @@ ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)

static int
ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
int event, const struct nf_conn *ct)
int event, struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
Expand Down
24 changes: 11 additions & 13 deletions net/netfilter/nf_conntrack_proto_dccp.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_log.h>

static DEFINE_RWLOCK(dccp_lock);

/* Timeouts are based on values from RFC4340:
*
* - REQUEST:
Expand Down Expand Up @@ -491,7 +489,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}

write_lock_bh(&dccp_lock);
spin_lock_bh(&ct->lock);

role = ct->proto.dccp.role[dir];
old_state = ct->proto.dccp.state;
Expand Down Expand Up @@ -535,13 +533,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;

write_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
write_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
Expand All @@ -551,7 +549,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;
ct->proto.dccp.state = new_state;
write_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);

dn = dccp_pernet(net);
nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
Expand Down Expand Up @@ -617,18 +615,18 @@ static int dccp_print_tuple(struct seq_file *s,
ntohs(tuple->dst.u.dccp.port));
}

static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}

#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
const struct nf_conn *ct)
struct nf_conn *ct)
{
struct nlattr *nest_parms;

read_lock_bh(&dccp_lock);
spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
Expand All @@ -638,11 +636,11 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
cpu_to_be64(ct->proto.dccp.handshake_seq));
nla_nest_end(skb, nest_parms);
read_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);
return 0;

nla_put_failure:
read_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);
return -1;
}

Expand Down Expand Up @@ -673,7 +671,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
return -EINVAL;
}

write_lock_bh(&dccp_lock);
spin_lock_bh(&ct->lock);
ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
Expand All @@ -686,7 +684,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.dccp.handshake_seq =
be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
}
write_unlock_bh(&dccp_lock);
spin_unlock_bh(&ct->lock);
return 0;
}

Expand Down
3 changes: 1 addition & 2 deletions net/netfilter/nf_conntrack_proto_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
}

/* print private data for conntrack */
static int gre_print_conntrack(struct seq_file *s,
const struct nf_conn *ct)
static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "timeout=%u, stream_timeout=%u ",
(ct->proto.gre.timeout / HZ),
Expand Down
27 changes: 12 additions & 15 deletions net/netfilter/nf_conntrack_proto_sctp.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>

/* Protects ct->proto.sctp */
static DEFINE_RWLOCK(sctp_lock);

/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
Expand Down Expand Up @@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
}

/* Print out the private part of the conntrack. */
static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum sctp_conntrack state;

read_lock_bh(&sctp_lock);
spin_lock_bh(&ct->lock);
state = ct->proto.sctp.state;
read_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);

return seq_printf(s, "%s ", sctp_conntrack_names[state]);
}
Expand Down Expand Up @@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
}

old_state = new_state = SCTP_CONNTRACK_NONE;
write_lock_bh(&sctp_lock);
spin_lock_bh(&ct->lock);
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
Expand Down Expand Up @@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
if (old_state != new_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
}
write_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);

nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);

Expand All @@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
return NF_ACCEPT;

out_unlock:
write_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);
out:
return -NF_ACCEPT;
}
Expand Down Expand Up @@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
#include <linux/netfilter/nfnetlink_conntrack.h>

static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
const struct nf_conn *ct)
struct nf_conn *ct)
{
struct nlattr *nest_parms;

read_lock_bh(&sctp_lock);
spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
Expand All @@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
CTA_PROTOINFO_SCTP_VTAG_REPLY,
ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);

read_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);

nla_nest_end(skb, nest_parms);

return 0;

nla_put_failure:
read_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);
return -1;
}

Expand Down Expand Up @@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
!tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
return -EINVAL;

write_lock_bh(&sctp_lock);
spin_lock_bh(&ct->lock);
ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
write_unlock_bh(&sctp_lock);
spin_unlock_bh(&ct->lock);

return 0;
}
Expand Down
Loading

0 comments on commit 440f0d5

Please sign in to comment.