阅读:6414回复:1
[原创]Linux2.6.16内核中netfilter分析
Linux2.6.16内核中netfilter分析
1 简单介绍 在2.6.16内核的netfilter中,netfilter一个重大修正思想就是将netfilter作为一个协议无关的框架,表现在内核结构树中单独建立net/netfilter目录,而在以前netfilter是附着在各个协议目录之下的,如在net/ipv4, net/ipv6等目录下。现在虽然各协议目录下也都有,但主要是处理和各协议相关的东西了,而一些共同的东西,就都放在net/netfilter目录下,文件名也有所改变,虽然现在还不是很独立,比如说net/netfilter/nf_conntrack_core.c和net/ipv4/netfilter/ip_conntrack_core.c就仍然很相似,让人觉得没必要那么分,但不少和协议无关的匹配和目标模块已经和协议分离,只在此目录下有,而不放在协议目录下了。 在net/netfilter下的匹配和目标模块文件名称都以“xt_”打头,如 xt_comment.c,xt_policy.c等 目标模块有: xt_CLASSIFY.c xt_NFQUEUE.c xt_NOTRACK.c 为了和iptables兼容(因为iptables找模块文件前缀是按“ipt_”或“ip6t_”找的),这些文件中增加了一个新的宏定义:MODULE_ALIAS,来表示模块的别名。 如在xt_limit.c中就如下定义: MODULE_ALIAS("ipt_limit"); MODULE_ALIAS("ip6t_limit"); 在include/linux/netfilter_ipv4/ip_tables.h中进行了以下定义: #define ipt_match xt_match #define ipt_target xt_target #define ipt_table xt_table 2 代码分析 以下是新匹配和目标模块的结构定义: struct xt_match { struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ int (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, int *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ int (*checkentry)(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo, unsigned int matchinfosize); /* Called when userspace align differs from kernel space one */ int (*compat)(void *match, void **dstptr, int *size, int convert); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int matchsize; unsigned int hooks; unsigned short proto; unsigned short family; u_int8_t revision; }; /* Registration hooks for targets. */ struct xt_target { struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo, void *userdata); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ int (*checkentry)(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo, unsigned int targinfosize); /* Called when userspace align differs from kernel space one */ int (*compat)(void *target, void **dstptr, int *size, int convert); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int targetsize; unsigned int hooks; unsigned short proto; unsigned short family; u_int8_t revision; }; /* Furniture shopping... */ struct xt_table { struct list_head list; /* A unique name... */ char name[XT_TABLE_MAXNAMELEN]; /* What hooks you will enter on */ unsigned int valid_hooks; /* Lock for the curtain */ rwlock_t lock; /* Man behind the curtain... */ //struct ip6t_table_info *private; void *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; int af; /* address/protocol family */ }; /* The table itself */ struct xt_table_info { /* Size per table */ unsigned int size; /* Number of entries: FIXME. --RR */ unsigned int number; /* Initial number of entries. Needed for module usage count */ unsigned int initial_entries; /* Entry points and underflows */ unsigned int hook_entry[NF_IP_NUMHOOKS]; unsigned int underflow[NF_IP_NUMHOOKS]; /* ipt_entry tables: one per CPU */ char *entries[NR_CPUS]; }; /* 主要结构 */ struct xt_af { struct mutex mutex; struct list_head match; struct list_head target; struct list_head tables; struct mutex compat_mutex; }; /*数据结构的管理模块 */ static struct xt_af *xt; /* netfilter模块初始化*/ static int __init xt_init(void) { int i; /* 每种协议分配一个资源 */ xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL); if (!xt) return -ENOMEM; for (i = 0; i < NPROTO; i++) { mutex_init(&xt.mutex); #ifdef CONFIG_COMPAT mutex_init(&xt.compat_mutex); #endif /* 初始化table, target, match资源 */ INIT_LIST_HEAD(&xt.target); INIT_LIST_HEAD(&xt.match); INIT_LIST_HEAD(&xt.tables); } return 0; } 目前2.6.16内核中支持了三类协议族,IPv4/IPv6/ARP,在各协议族中查找相应模块用的前缀为: static const char *xt_prefix[NPROTO] = { [AF_INET] = "ip", [AF_INET6] = "ip6", [NF_ARP] = "arp", }; 对应的具体前缀分别为“ipt”、“ip6t”、“arpt”。 而和老的2.4内核的struct ipt_match和struct ipt_target结构的主要区别是增加了compat函数,以及struct modulde *me参数后面的一系列参数,是和协议相关的,比如limit匹配,分别为ipv4和ipv6定义了匹配结构后,只有family参数不同,一个是AF_INET,另一个是AF_INET6,其他都相同,而挂接时并不会有问题,因为这些模块都分别挂接到不同协议族的链表: /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { int ret, af = target->family; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) return ret; /* 添加 target*/ list_add(&target->list, &xt[af].target); mutex_unlock(&xt[af].mutex); return ret; } int xt_register_match(struct xt_match *match) { int ret, af = match->family; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) return ret; /* 添加match */ list_add(&match->list, &xt[af].match); mutex_unlock(&xt[af].mutex); return ret; } table注册发生在各协议的netfilte中: int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; static struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; /* choose the copy on our node/cpu * but dont care of preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(table->name, table->valid_hooks, newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, repl->underflow); if (ret != 0) { xt_free_table_info(newinfo); return ret; } if (xt_register_table(table, &bootstrap, newinfo) != 0) { xt_free_table_info(newinfo); return ret; } return 0; } /* 分配table_info资源,注意这里是每个cpu会对应一个entry */ struct xt_table_info *xt_alloc_table_info(unsigned int size) { struct xt_table_info *newinfo; int cpu; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) /*超过物理内存空间*/ return NULL; newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL); if (!newinfo) return NULL; newinfo->size = size; for_each_possible_cpu(cpu) {/* 遍历每个cpu */ if (size <= PAGE_SIZE) newinfo->entries[cpu] = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); /* 直接分配物理空间 */ else newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));/* 分配虚拟空间*/ if (newinfo->entries[cpu] == NULL) { xt_free_table_info(newinfo); return NULL; } } return newinfo; } int xt_register_table(struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { int ret; struct xt_table_info *private; ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) return ret; /* Don't autoload: we'd eat our tail... */ if (list_named_find(&xt[table->af].tables, table->name)) { ret = -EEXIST; goto unlock; } /* Simplifies replace_table code. */ table->private = bootstrap; rwlock_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; private = table->private; duprintf("table->private->number = %u\n", private->number); /* save number of initial entries */ private->initial_entries = private->number; list_prepend(&xt[table->af].tables, table); ret = 0; unlock: mutex_unlock(&xt[table->af].mutex); return ret; } 但在进行实际匹配目标查找时会进行名字、协议族、表名、挂接点、协议等的比较,如匹配的检查 int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, unsigned short proto, int inv_proto) { if (XT_ALIGN(match->matchsize) != size) { printk("%s_tables: %s match: invalid size %Zu != %u\n", xt_prefix[family], match->name, XT_ALIGN(match->matchsize), size); return -EINVAL; } if (match->table && strcmp(match->table, table)) { printk("%s_tables: %s match: only valid in %s table, not %s\n", xt_prefix[family], match->name, match->table, table); return -EINVAL; } if (match->hooks && (hook_mask & ~match->hooks) != 0) { printk("%s_tables: %s match: bad hook_mask %u\n", xt_prefix[family], match->name, hook_mask); return -EINVAL; } if (match->proto && (match->proto != proto || inv_proto)) { printk("%s_tables: %s match: only valid for protocol %u\n", xt_prefix[family], match->name, match->proto); return -EINVAL; } return 0; } int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, unsigned short proto, int inv_proto) { if (XT_ALIGN(target->targetsize) != size) { printk("%s_tables: %s target: invalid size %Zu != %u\n", xt_prefix[family], target->name, XT_ALIGN(target->targetsize), size); return -EINVAL; } if (target->table && strcmp(target->table, table)) { printk("%s_tables: %s target: only valid in %s table, not %s\n", xt_prefix[family], target->name, target->table, table); return -EINVAL; } if (target->hooks && (hook_mask & ~target->hooks) != 0) { printk("%s_tables: %s target: bad hook_mask %u\n", xt_prefix[family], target->name, hook_mask); return -EINVAL; } if (target->proto && (target->proto != proto || inv_proto)) { printk("%s_tables: %s target: only valid for protocol %u\n", xt_prefix[family], target->name, target->proto); return -EINVAL; } return 0; } /* 下面是ipsec的policy检验过程处理 */ static struct xt_match policy_match = { .name = "policy", .family = AF_INET, .match = match, .matchsize = sizeof(struct xt_policy_info), .checkentry = checkentry, .family = AF_INET, .me = THIS_MODULE, }; static int __init init(void) { int ret; ret = xt_register_match(&policy_match); if (ret) return ret; ret = xt_register_match(&policy6_match); if (ret) xt_unregister_match(&policy_match); return ret; } static int match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; struct sec_path *sp = skb->sp; int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; if (sp == NULL) return -1; if (strict && info->len != sp->len) return 0; for (i = sp->len - 1; i >= 0; i--) { pos = strict ? i - sp->len + 1 : 0; if (pos >= info->len) return 0; e = &info->pol[pos]; /* 检查策略 */ if (match_xfrm_state(sp->xvec, e, family)) { if (!strict) return 1; } else if (strict) return 0; } return strict ? 1 : 0; } |
|
|
沙发#
发布于:2007-11-29 12:03
楼主分析的不错,可以把现在的2.6.23.*再分析一下,
“虽然现在还不是很独立,比如说net/netfilter/nf_conntrack_core.c和net/ipv4/netfilter/ip_conntrack_core.c就仍然很相似,让人觉得没必要那么分,但不少和协议无关的匹配和目标模块已经和协议分离,只在此目录下有,而不放在协议目录下了。” 这个问题就不存在了。 还是向楼主表示一下敬意。 |
|