阅读:3286回复:5
Linux 2.6.17.9内核文件系统调用详解
Linux 2.6.17.9内核文件系统调用详解
本部分主要讲述的是文件I/O操作的2.6.17.9内核版本实现,包括了主要的数据结构、宏定义和函数流程。以下分别讲述open,create,close,read,write,lseek系统调用。 1 重要数据结构 1.1 struct file struct file { /* * fu_list becomes invalid after file_free is called and queued via * fu_rcuhead for RCU freeing */ union { struct list_head fu_list; //文件链表指针 struct rcu_head fu_rcuhead; //rcu链表 } f_u; struct dentry *f_dentry; // 文件对应的目录结构 struct vfsmount *f_vfsmnt; // 虚拟文件系统挂载点 const struct file_operations *f_op; // 文件操作函数指针 atomic_t f_count; // 引用计数 unsigned int f_flags; mode_t f_mode; // 文件模式 loff_t f_pos; // 文件offset struct fown_struct f_owner; //文件owner 结构 unsigned int f_uid, f_gid;//文件用户id,组id struct file_ra_state f_ra; // 跟踪上次文件操作状态的结构指针 unsigned long f_version; void *f_security; // hook 文件操作的security结构指针 /* needed for tty driver, and maybe others */ void *private_data; // tty 驱动器所需数据 #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; // EPOLL 机制检测所需链表结构 spinlock_t f_ep_lock; // 兼容早期gcc bug 的标志 #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; // 地址映射表 }; 1.2 struct fown_struct struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ int pid; /* pid or -pgrp where SIGIO should be sent */ uid_t uid, euid; /* uid/euid of process setting the owner */ void *security; /*hook 文件操作的security结构指针*/ int signum; /* posix.1b rt signal to be delivered on IO */ }; 1.3 struct file_ra_state /* * Track a single file's readahead state */ struct file_ra_state { unsigned long start; /* Current window */ unsigned long size; unsigned long flags; /* ra flags RA_FLAG_xxx*/ unsigned long cache_hit; /* cache hit count*/ unsigned long prev_page; /* Cache last read() position */ unsigned long ahead_start; /* Ahead window */ unsigned long ahead_size; unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ }; 1.4 struct address_space struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t tree_lock; /* and rwlock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ spinlock_t i_mmap_lock; /* protect tree, count, list */ unsigned int truncate_count; /* Cover race condition with truncate */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ struct address_space_operations *a_ops; /* methods */ unsigned long flags; /* error bits/gfp mask */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ } __attribute__((aligned(sizeof(long)))); struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); void (*sync_page)(struct page *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); /* Set a page dirty. Return true if this dirtied it */ int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); /* * ext3 requires that a successful prepare_write() call be followed * by a commit_write() call - they must be balanced */ int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); int (*commit_write)(struct file *, struct page *, unsigned, unsigned); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, int); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); }; 1.5 struct block_device struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ int bd_openers; struct mutex bd_mutex; /* open/close mutex */ struct mutex bd_mount_mutex; /* mount mutex */ struct list_head bd_inodes; void * bd_holder; int bd_holders; #ifdef CONFIG_SYSFS struct list_head bd_holder_list; #endif struct block_device * bd_contains; unsigned bd_block_size; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; struct gendisk * bd_disk; struct list_head bd_list; struct backing_dev_info *bd_inode_backing_dev_info; /* * Private data. You must have bd_claim'ed the block_device * to use this. NOTE: bd_claim allows an owner to claim * the same device multiple times, the owner must take special * care to not mess up bd_private for that case. */ unsigned long bd_private; }; 1.6 struct backing_dev_info struct backing_dev_info { unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ void (*unplug_io_fn)(struct backing_dev_info *, struct page *); void *unplug_io_data; }; 1.7 struct files_struct 对于内核而言,所有打开文件都由文件描述符引用。文件描述符是一个非负整数。当打开一个现存文件或创建一个新文件时,内核向进程返回一个文件描述符。 当读、写一个文件时,用open或creat返回的文件描述符标识该文件,将其作为参数传送给read或write。在POSIX.1应用程序中,文件描述符为常数0、1和2分别代表STDIN_FILENO、STDOUT_FILENO和STDERR_FILENO,意即标准输入,标准输出和标准出错输出,这些常数都定义在头文件;中。 文件描述符的范围是0~OPEN_MAX,在目前常用的linux系统中,是32位整形所能表示的整数,即65535,64位机上则更多。 /* * Open file table structure */ struct files_struct { /* * read mostly part */ atomic_t count; /* 引用计数 */ struct fdtable *fdt; /* 文件表指针,指向fdtab */ struct fdtable fdtab;/* 文件表 */ /* * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; int next_fd; // 下一个空闲fd struct embedded_fd_set close_on_exec_init; /* 可执行close的fd集合 */ struct embedded_fd_set open_fds_init;/* 打开的fd集合 */ struct file * fd_array[NR_OPEN_DEFAULT]; /*打开的文件列表*/ }; struct fdtable { unsigned int max_fds; // 最大文件句柄数目 int max_fdset; // 最大的fd集合容量 struct file ** fd; /* current fd array */ fd_set *close_on_exec; // 可执行close的fd集合 fd_set *open_fds; // 打开的fd集合 struct rcu_head rcu; struct files_struct *free_files; /*反向指针 */ struct fdtable *next; /*链表*/ }; 2 文件操作 2.1 open 操作 2.1.1 调用关系 sys_open | ----------- getname | ----------- filp_open | | ------------ open_namei | | | ----------- may_open | | ------------ dentry_open |
|
|
沙发#
发布于:2007-05-15 12:03
感觉英文注视已经很好了,所以就偷懒了,呵呵
|
|
|