关于iptables的的工作原理,主要分为三个方面:用户程序对规则的处理,内核对用户命令的处理,内核中netfilter对数据包的过滤(Ref:netfilter分析3-钩子函数执行流程)。
本文大致分析iptables用户态程序如何解析规则,并将规则配置到内核中。以如下命令为例:
iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24 --dport 22 -m state --state NEW,ESTABLISHED -j ACCEPT
iptables -A OUTPUT -o eth0 -p tcp --sport 22 -m state --state ESTABLISHED -j ACCEPT
主要分析第一句:
iptables -A INPUT -i eth0 -p tcp -s 192.168.100.0/24 --dport 22 -m state --state NEW,ESTABLISHED -j ACCEPT
用户空间
代码版本:iptables-1.8.7。
iptables的客户端和内核共享一些数据结构。例如:
ipt_entry 、xt_entry_match、xt_tcp。
struct ipt_entry {struct ipt_ip ip;/* Mark with fields that we care about. */unsigned int nfcache;/* Size of ipt_entry + matches */__u16 target_offset;/* Size of ipt_entry + matches + target */__u16 next_offset;/* Back pointer */unsigned int comefrom;/* Packet and byte counters. */struct xt_counters counters;/* The matches (if any), then the target. */unsigned char elems[0];
};
struct xt_entry_match {union {struct {__u16 match_size;/* Used by userspace */char name[XT_EXTENSION_MAXNAMELEN];__u8 revision;} user;struct {__u16 match_size;/* Used inside the kernel */struct xt_match *match;} kernel;/* Total length */__u16 match_size;} u;unsigned char data[0];
};
struct xt_tcp {__u16 spts[2]; /* Source port range. */__u16 dpts[2]; /* Destination port range. */__u8 option; /* TCP Option iff non-zero*/__u8 flg_mask; /* TCP flags mask byte */__u8 flg_cmp; /* TCP flags compare byte */__u8 invflags; /* Inverse flags */
};
主函数为iptables_main(iptables-standalone.c)。
int
iptables_main(int argc, char *argv[])
{char *table = "filter";struct xtc_handle *handle = NULL;ret = do_command4(argc, argv, &table, &handle, false);if (ret) {ret = iptc_commit(handle);iptc_free(handle);}
}
-A INPUT的解析代码:
int do_command4(int argc, char *argv[], char **table,struct xtc_handle **handle, bool restore)
{case 'A':add_command(&command, CMD_APPEND, CMD_NONE,cs.invert);chain = optarg;break;
}
-i eth0的解析代码:
int do_command4(int argc, char *argv[], char **table,struct xtc_handle **handle, bool restore)
{case 'i':if (*optarg == '\0')xtables_error(PARAMETER_PROBLEM,"Empty interface is likely to be ""undesired");set_option(&cs.options, OPT_VIANAMEIN, &cs.fw.ip.invflags,cs.invert);xtables_parse_interface(optarg,cs.fw.ip.iniface,cs.fw.ip.iniface_mask);break;
}
-p tcp -s 192.168.100.0/24 --dport 22
ip段(192.168.100.0/24)的解析:
int do_command4(int argc, char *argv[], char **table,struct xtc_handle **handle, bool restore)
{if (shostnetworkmask)xtables_ipparse_multiple(shostnetworkmask, &saddrs,&smasks, &nsaddrs);if (dhostnetworkmask)xtables_ipparse_multiple(dhostnetworkmask, &daddrs,&dmasks, &ndaddrs);
}
–dport 22的参数解析,需要tcp_match模块,命令中已经指定了协议(-p tcp)。
static struct xtables_match tcp_match = {.family = NFPROTO_UNSPEC,.name = "tcp",.version = XTABLES_VERSION,.size = XT_ALIGN(sizeof(struct xt_tcp)),.userspacesize = XT_ALIGN(sizeof(struct xt_tcp)),.help = tcp_help,.init = tcp_init,.parse = tcp_parse,.print = tcp_print,.save = tcp_save,.extra_opts = tcp_opts,.xlate = tcp_xlate,
};
相应的解析函数:
int command_default(struct iptables_command_state *cs,struct xtables_globals *gl)
{if (cs->target != NULL &&(cs->target->parse != NULL || cs->target->x6_parse != NULL) &&cs->c >= cs->target->option_offset &&cs->c < cs->target->option_offset + XT_OPTION_OFFSET_SCALE) {xtables_option_tpcall(cs->c, cs->argv, cs->invert,cs->target, &cs->fw);return 0;}for (matchp = cs->matches; matchp; matchp = matchp->next) {m = matchp->match;if (matchp->completed ||(m->x6_parse == NULL && m->parse == NULL))continue;if (cs->c < matchp->match->option_offset ||cs->c >= matchp->match->option_offset + XT_OPTION_OFFSET_SCALE)continue;xtables_option_mpcall(cs->c, cs->argv, cs->invert, m, &cs->fw);return 0;}/* Try loading protocol */m = load_proto(cs);if (m != NULL) {size_t size;cs->proto_used = 1;size = XT_ALIGN(sizeof(struct xt_entry_match)) + m->size;m->m = xtables_calloc(1, size);m->m->u.match_size = size;strcpy(m->m->u.user.name, m->name);m->m->u.user.revision = m->revision;xs_init_match(m);if (m->x6_options != NULL)gl->opts = xtables_options_xfrm(gl->orig_opts,gl->opts,m->x6_options,&m->option_offset);elsegl->opts = xtables_merge_options(gl->orig_opts,gl->opts,m->extra_opts,&m->option_offset);if (gl->opts == NULL)xtables_error(OTHER_PROBLEM, "can't alloc memory!");optind--;/* Indicate to rerun getopt *immediately* */return 1;}
}
void xtables_option_mpcall(unsigned int c, char **argv, bool invert,struct xtables_match *m, void *fw)
{if (m->x6_parse == NULL) {if (m->parse != NULL)m->parse(c - m->option_offset, argv, invert,&m->mflags, fw, &m->m);return;}
}
tcp_parse会将端口数据写入struct xt_tcp中。
load_proto中会加载按照protocol寻找对应的xtables_match。
struct xtables_match *load_proto(struct iptables_command_state *cs)
{if (!should_load_proto(cs))return NULL;return find_proto(cs->protocol, XTF_TRY_LOAD,cs->options & OPT_NUMERIC, &cs->matches);
}
static struct xtables_match *
find_proto(const char *pname, enum xtables_tryload tryload,int nolookup, struct xtables_rule_match **matches)
{return xtables_find_match(pname, tryload, matches);
}
命令行中的数据会加载到struct xt_entry_match。之后被复制到struct ipt_entry中。
static struct ipt_entry *
generate_entry(const struct ipt_entry *fw,struct xtables_rule_match *matches,struct xt_entry_target *target)
{unsigned int size;struct xtables_rule_match *matchp;struct ipt_entry *e;size = sizeof(struct ipt_entry);for (matchp = matches; matchp; matchp = matchp->next)size += matchp->match->m->u.match_size;e = xtables_malloc(size + target->u.target_size);*e = *fw;e->target_offset = size;e->next_offset = size + target->u.target_size;size = 0;for (matchp = matches; matchp; matchp = matchp->next) {//复制match中的数据memcpy(e->elems + size, matchp->match->m, matchp->match->m->u.match_size);size += matchp->match->m->u.match_size;}memcpy(e->elems + size, target, target->u.target_size);return e;
}
数据复制。
static int
append_entry(const xt_chainlabel chain,struct ipt_entry *fw,unsigned int nsaddrs,const struct in_addr saddrs[],const struct in_addr smasks[],unsigned int ndaddrs,const struct in_addr daddrs[],const struct in_addr dmasks[],int verbose,struct xtc_handle *handle)
{for (i = 0; i < nsaddrs; i++) {fw->ip.src.s_addr = saddrs[i].s_addr;fw->ip.smsk.s_addr = smasks[i].s_addr;for (j = 0; j < ndaddrs; j++) {fw->ip.dst.s_addr = daddrs[j].s_addr;fw->ip.dmsk.s_addr = dmasks[j].s_addr;if (verbose)print_firewall_line(fw, handle);ret &= iptc_append_entry(chain, fw, handle);}}return ret;
}
iptc_append_entry(const IPT_CHAINLABEL chain,const STRUCT_ENTRY *e,struct xtc_handle *handle)
{if (!(r = iptcc_alloc_rule(c, e->next_offset))) {DEBUGP("unable to allocate rule for chain `%s'\n", chain);errno = ENOMEM;return 0;}memcpy(r->entry, e, e->next_offset);
}
/* allocate and initialize a new rule for the cache */
static struct rule_head *iptcc_alloc_rule(struct chain_head *c, unsigned int size)
{r->chain = c;r->size = size;return r;
}
解析action,-j ACCEPT。
int do_command4(int argc, char *argv[], char **table,struct xtc_handle **handle, bool restore)
{case 'j':set_option(&cs.options, OPT_JUMP, &cs.fw.ip.invflags,cs.invert);command_jump(&cs, optarg);break;
}
void command_jump(struct iptables_command_state *cs, const char *jumpto)
{cs->jumpto = xt_parse_target(jumpto);/* TRY_LOAD (may be chain name) */cs->target = xtables_find_target(cs->jumpto, XTF_TRY_LOAD);if (cs->target == NULL)return;size = XT_ALIGN(sizeof(struct xt_entry_target)) + cs->target->size;cs->target->t = xtables_calloc(1, size);cs->target->t->u.target_size = size;
}
ACCEPT,DROP,QUEUE,RETURN对应的是standard target。
static struct xtables_target standard_target = {.family = NFPROTO_UNSPEC,.name = "standard",.version = XTABLES_VERSION,.size = XT_ALIGN(sizeof(int)),.userspacesize = XT_ALIGN(sizeof(int)),.help = standard_help,
};
xt_entry_target分配的大小:
size = XT_ALIGN(sizeof(struct xt_entry_target)) + cs->target->size;
cs->target->t = xtables_calloc(1, size);
standard target的target->size大小为XT_ALIGN(sizeof(int))。最终分配的结构体为xt_standard_target 。
struct xt_standard_target {struct xt_entry_target target;int verdict;
};
整理成内核需要的格式,向内核提交:
int
TC_COMMIT(struct xtc_handle *handle)
{/* Replace, then map back the counters. */STRUCT_REPLACE *repl;new_number = iptcc_compile_table_prep(handle, &new_size);ret = iptcc_compile_table(handle, repl);ret = setsockopt(handle->sockfd, TC_IPPROTO, SO_SET_REPLACE, repl,sizeof(*repl) + repl->size);
}
内核空间
static int
do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{switch (cmd) {case IPT_SO_SET_REPLACE:ret = do_replace(sock_net(sk), user, len);break;default:ret = -EINVAL;}return ret;
}
static int
do_replace(struct net *net, const void __user *user, unsigned int len)
{newinfo = xt_alloc_table_info(tmp.size);if (!newinfo)return -ENOMEM;loc_cpu_entry = newinfo->entries;if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),tmp.size) != 0) {ret = -EFAULT;goto free_newinfo;}ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);if (ret != 0)goto free_newinfo;ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,tmp.num_counters, tmp.counters);
}
static int
__do_replace(struct net *net, const char *name, unsigned int valid_hooks,struct xt_table_info *newinfo, unsigned int num_counters,void __user *counters_ptr)
{struct xt_table *t;t = xt_request_find_table_lock(net, AF_INET, name);oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
}
struct xt_table_info *
xt_replace_table(struct xt_table *table,unsigned int num_counters,struct xt_table_info *newinfo,int *error)
{table->private = newinfo;
}
原有规则的处理
用户层调用setsockopt将数据配置到内核。do_replace函数会重新配置规则。但是用户可以多次配置iptable。这里就引入一个问题:之前内核中的iptables规到哪里去了呢?难道被冲掉了吗?
iptables在重新解析规则时,会调用getsockopt将内核中的规则拷贝出来,然后重新配置。
int do_command4(int argc, char *argv[], char **table,struct xtc_handle **handle, bool restore)
{/* only allocate handle if we weren't called with a handle */if (!*handle)*handle = iptc_init(*table);
}
struct xtc_handle *
iptc_init(const char *tablename)
{strcpy(info.name, tablename);
//获取entry的大小信息。if (getsockopt(sockfd, TC_IPPROTO, SO_GET_INFO, &info, &s) < 0) {close(sockfd);return NULL;}h = alloc_handle(&info);/* Initialize current state */h->sockfd = sockfd;h->info = info;h->entries->size = h->info.size;tmp = sizeof(STRUCT_GET_ENTRIES) + h->info.size;if (getsockopt(h->sockfd, TC_IPPROTO, SO_GET_ENTRIES, h->entries,&tmp) < 0)goto error;
}
读取规则信息之后,iptables重新处理数据:
/* parse an iptables blob into it's pieces */
static int parse_table(struct xtc_handle *h)
{/* First pass: over ruleset blob */ENTRY_ITERATE(h->entries->entrytable, h->entries->size,cache_add_entry, h, &prev, &num);
}
/* main parser function: add an entry from the blob to the cache */
static int cache_add_entry(STRUCT_ENTRY *e,struct xtc_handle *h,STRUCT_ENTRY **prev,unsigned int *num)
{
else if ((builtin = iptcb_ent_is_hook_entry(e, h)) != 0) {struct chain_head *c =iptcc_alloc_chain_head((char *)hooknames[builtin-1],builtin);DEBUGP_C("%u:%u new builtin chain: %p (rules=%p)\n",*num, offset, c, &c->rules);if (!c) {errno = -ENOMEM;return -1;}c->hooknum = builtin;__iptcc_p_add_chain(h, c, offset, num);/* FIXME: this is ugly. */goto new_rule;}
}
内核中在初始化table的时候,会配置chain。博客——netfilter分析2-表在内核的初始化——有更详尽的分析。
以filter表为例:
static int __net_init iptable_filter_table_init(struct net *net)
{repl = ipt_alloc_initial_table(&packet_filter);
}
void *ipt_alloc_initial_table(const struct xt_table *info)
{return xt_alloc_initial_table(ipt, IPT);
}
#define xt_alloc_initial_table(type, typ2) ({ \struct { \struct type##_replace repl; \struct type##_standard entries[]; \} *tbl; \struct type##_error *term; \size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \__alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \if (!(hook_mask & 1)) \continue; \tbl->repl.hook_entry[hooknum] = bytes; \tbl->repl.underflow[hooknum] = bytes; \tbl->entries[i++] = (struct type##_standard) \typ2##_STANDARD_INIT(NF_ACCEPT); \bytes += sizeof(struct type##_standard); \} \tbl; \
})
链接来源:
https://www.jianshu.com/p/ec04b7c73cfa#