Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains a large batch with Netfilter fixes for
your net tree, they are:

1) Two patches to solve conntrack garbage collector cpu hogging, one to
   remove GC_MAX_EVICTS and another to look at the ratio (scanned entries
   vs. evicted entries) to make a decision on whether to reduce or not
   the scanning interval. From Florian Westphal.

2) Two patches to fix incorrect set element counting if NLM_F_EXCL is
   is not set. Moreover, don't decrenent set->nelems from abort patch
   if -ENFILE which leaks a spare slot in the set. This includes a
   patch to deconstify the set walk callback to update set->ndeact.

3) Two fixes for the fwmark_reflect sysctl feature: Propagate mark to
   reply packets both from nf_reject and local stack, from Pau Espin Pedrol.

4) Fix incorrect handling of loopback traffic in rpfilter and nf_tables
   fib expression, from Liping Zhang.

5) Fix oops on stateful objects netlink dump, when no filter is specified.
   Also from Liping Zhang.

6) Fix a build error if proc is not available in ipt_CLUSTERIP, related
   to fix that was applied in the previous batch for net. From Arnd Bergmann.

7) Fix lack of string validation in table, chain, set and stateful
   object names in nf_tables, from Liping Zhang. Moreover, restrict
   maximum log prefix length to 127 bytes, otherwise explicitly bail
   out.

8) Two patches to fix spelling and typos in nf_tables uapi header file
   and Kconfig, patches from Alexander Alemayhu and William Breathitt Gray.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jan 26, 2017
2 parents 214767f + b2c11e4 commit 086cb6a
Show file tree
Hide file tree
Showing 23 changed files with 116 additions and 93 deletions.
6 changes: 3 additions & 3 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ struct nft_set_iter {
unsigned int skip;
int err;
int (*fn)(const struct nft_ctx *ctx,
const struct nft_set *set,
struct nft_set *set,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem);
struct nft_set_elem *elem);
};

/**
Expand Down Expand Up @@ -301,7 +301,7 @@ struct nft_set_ops {
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
const struct nft_set *set,
struct nft_set *set,
struct nft_set_iter *iter);

unsigned int (*privsize)(const struct nlattr * const nla[]);
Expand Down
6 changes: 6 additions & 0 deletions include/net/netfilter/nft_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ struct nft_fib {

extern const struct nla_policy nft_fib_policy[];

static inline bool
nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}

int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/netfilter/nf_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@
#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */
#define NF_LOG_MASK 0x2f

#define NF_LOG_PREFIXLEN 128

#endif /* _NETFILTER_NF_LOG_H */
4 changes: 2 additions & 2 deletions include/uapi/linux/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ enum nft_rule_compat_flags {
/**
* enum nft_rule_compat_attributes - nf_tables rule compat attributes
*
* @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32)
* @NFTA_RULE_COMPAT_PROTO: numeric value of handled protocol (NLA_U32)
* @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32)
*/
enum nft_rule_compat_attributes {
Expand Down Expand Up @@ -499,7 +499,7 @@ enum nft_bitwise_attributes {
* enum nft_byteorder_ops - nf_tables byteorder operators
*
* @NFT_BYTEORDER_NTOH: network to host operator
* @NFT_BYTEORDER_HTON: host to network opertaor
* @NFT_BYTEORDER_HTON: host to network operator
*/
enum nft_byteorder_ops {
NFT_BYTEORDER_NTOH,
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
if (unlikely(err)) {
Expand Down
7 changes: 6 additions & 1 deletion net/ipv4/netfilter/ipt_CLUSTERIP.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
rcu_read_lock_bh();
c = __clusterip_config_find(net, clusterip);
if (c) {
if (!c->pde || unlikely(!atomic_inc_not_zero(&c->refcount)))
#ifdef CONFIG_PROC_FS
if (!c->pde)
c = NULL;
else
#endif
if (unlikely(!atomic_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
atomic_inc(&c->entries);
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/netfilter/ipt_rpfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
return dev_match || flags & XT_RPFILTER_LOOSE;
}

static bool rpfilter_is_local(const struct sk_buff *skb)
static bool
rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
const struct rtable *rt = skb_rtable(skb);
return rt && (rt->rt_flags & RTCF_LOCAL);
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}

static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
Expand All @@ -79,7 +79,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
info = par->matchinfo;
invert = info->flags & XT_RPFILTER_INVERT;

if (rpfilter_is_local(skb))
if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;

iph = ip_hdr(skb);
Expand Down
2 changes: 2 additions & 0 deletions net/ipv4/netfilter/nf_reject_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));

nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);

skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
Expand Down
15 changes: 5 additions & 10 deletions net/ipv4/netfilter/nft_fib_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}

static bool fib4_is_local(const struct sk_buff *skb)
{
const struct rtable *rt = skb_rtable(skb);

return rt && (rt->rt_flags & RTCF_LOCAL);
}

#define DSCP_BITS 0xfc

void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
Expand Down Expand Up @@ -95,8 +88,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;

if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv->result, pkt,
nft_in(pkt)->ifindex);
return;
}

Expand Down Expand Up @@ -131,7 +126,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
switch (res.type) {
case RTN_UNICAST:
break;
case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
case RTN_LOCAL: /* Should not see RTN_LOCAL here */
return;
default:
break;
Expand Down
8 changes: 4 additions & 4 deletions net/ipv6/netfilter/ip6t_rpfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
return ret;
}

static bool rpfilter_is_local(const struct sk_buff *skb)
static bool
rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in)
{
const struct rt6_info *rt = (const void *) skb_dst(skb);
return rt && (rt->rt6i_flags & RTF_LOCAL);
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}

static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
Expand All @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct ipv6hdr *iph;
bool invert = info->flags & XT_RPFILTER_INVERT;

if (rpfilter_is_local(skb))
if (rpfilter_is_loopback(skb, xt_in(par)))
return true ^ invert;

iph = ipv6_hdr(skb);
Expand Down
3 changes: 3 additions & 0 deletions net/ipv6/netfilter/nf_reject_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
Expand All @@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)

skb_dst_set(nskb, dst);

nskb->mark = fl6.flowi6_mark;

skb_reserve(nskb, hh_len + dst->header_len);
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
ip6_dst_hoplimit(dst));
Expand Down
13 changes: 4 additions & 9 deletions net/ipv6/netfilter/nft_fib_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>

static bool fib6_is_local(const struct sk_buff *skb)
{
const struct rt6_info *rt = (const void *)skb_dst(skb);

return rt && (rt->rt6i_flags & RTF_LOCAL);
}

static int get_ifindex(const struct net_device *dev)
{
return dev ? dev->ifindex : 0;
Expand Down Expand Up @@ -164,8 +157,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,

lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);

if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv->result, pkt,
nft_in(pkt)->ifindex);
return;
}

Expand Down
1 change: 1 addition & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ctl_sk->sk_mark = fl6.flowi6_mark;
ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
This option adds the "meta" expression that you can use to match
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.

config NFT_SET_RBTREE
Expand Down
44 changes: 21 additions & 23 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;

/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
/* upper bound of scan intervals */
#define GC_INTERVAL_MAX (2 * HZ)
/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
#define GC_MAX_BUCKETS_DIV 128u
/* upper bound of full table scan */
#define GC_MAX_SCAN_JIFFIES (16u * HZ)
/* desired ratio of entries found to be expired */
#define GC_EVICT_RATIO 50u

static struct conntrack_gc_work conntrack_gc_work;

Expand Down Expand Up @@ -938,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)

static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
Expand Down Expand Up @@ -979,8 +980,7 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched_rcu_qs();
} while (++buckets < goal &&
expired_count < GC_MAX_EVICTS);
} while (++buckets < goal);

if (gc_work->exiting)
return;
Expand All @@ -997,35 +997,33 @@ static void gc_worker(struct work_struct *work)
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
* Normally, expired_count will be 0, this increases the next_run time
* to priorize 2) above.
* Normally, expire ratio will be close to 0.
*
* As soon as a timed-out entry is found, move towards 1) and increase
* the scan frequency.
* In case we have lots of evictions next scan is done immediately.
* As soon as a sizeable fraction of the entries have expired
* increase scan frequency.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
gc_work->next_gc_run = 0;
next_run = 0;
} else if (expired_count) {
gc_work->next_gc_run /= 2U;
next_run = msecs_to_jiffies(1);
if (ratio > GC_EVICT_RATIO) {
gc_work->next_gc_run = min_interval;
} else {
if (gc_work->next_gc_run < GC_INTERVAL_MAX)
gc_work->next_gc_run += msecs_to_jiffies(1);
unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;

next_run = gc_work->next_gc_run;
BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);

gc_work->next_gc_run += min_interval;
if (gc_work->next_gc_run > max)
gc_work->next_gc_run = max;
}

next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}

static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}

Expand Down Expand Up @@ -1917,7 +1915,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);

conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);

return 0;

Expand Down
1 change: 0 additions & 1 deletion net/netfilter/nf_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
/* Internal logging interface, which relies on the real
LOG target modules */

#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64

static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
Expand Down
Loading

0 comments on commit 086cb6a

Please sign in to comment.