LDD3 read notes Chapter 4-5
时间:2009-07-09 来源:hylpro
2009.6.x chapter 4 Debugging Techniques *If both klogd and syslogd are running on the system, kernel messages are appended to /var/log/messages */proc/kmsg *# echo 8 > /proc/sys/kernel/printk *dmesg -n 8 *drivers/char/tty_io.c, TIOCLINUX , console 重定向 *kernel console 重定向 int main(int argc, char **argv) { char bytes[2] = {11,0}; /* 11 is the TIOCLINUX cmd number */ if (argc= =2) bytes[1] = atoi(argv[1]); /* the chosen console */ else { fprintf(stderr, "%s: need a single arg\n",argv[0]); exit(1); } if (ioctl(STDIN_FILENO, TIOCLINUX, bytes)<0) { /* use stdin */ fprintf(stderr,"%s: ioctl(stdin, TIOCLINUX): %s\n", argv[0], strerror(errno)); exit(1); } exit(0); } * kernel 消息的处理 printk -> log_buf -> (/prov/kmsg reading will drain the kernel buff) or (syslog call, drain or not) klogd defualt read from kmsg. dmesg dump the log_buf wheather or not it has been read printk anywhere even in interrupt handler * klogd (read the log_buf then send it to ) ----> syslogd (check /etc/syslog.conf how to deal with) * klogd -f (dump to specific file) *if (printk_ratelimit( )) printk(KERN_NOTICE "The printer is still on fire\n"); /proc/sys/kernel/printk_ratelimit (秒) /proc/sys/kernel/printk_ratelimit_burst *following fuction used to fomat message then deliver to printk int print_dev_t(char *buffer, dev_t dev); (retun number of characer to be printed) char *format_dev_t(char *buffer, dev_t dev); (retrun buffer) */proc 1)<linux/proc_fs.h> 2) int (*read_proc)(char *page, char **start, off_t offset, int count, int *eof, void *data); * return data you put in the buffer * update start(a point to page indicte where data start ) and eof (point to integer ) * data, your driver use it as bookkeeping * *start is NULL, if leave it , kernel assume data star at offset 0 * *start 可以被用做索引, 必须用小数值, kernel 会把这个*start 用做file->f_pos, 下一次调用将传递给read_proc的参数 offset (比如用做数组索引啥的) * read_proc的老接口 *get_info 过时了 3)struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void *data); create_proc_read_entry("scullmem", 0 /* default mode */, NULL /* parent dir */, scull_read_procmem, NULL /* client data */); remove_proc_entry("scullmem", NULL /* parent dir */); * 如果父目录存在, 可以给出路径, 就会出现在指定目录了. 4) proc 已经 out了, *使用中的file 不增加引用计数, * 可以创建相同的名字,并且无法区分 ... seq_file interface 1) <linux/seq_file.h>. 2) void *start(struct seq_file *sfile, loff_t *pos); /*sfile可以忽略, 根据pos 返回一个数据指针,或者其他什么都成, pos 不一定是byte offset 随你*/ void *next(struct seq_file *sfile, void *v, loff_t *pos); /*v 是上次你返回的值*/ void stop(struct seq_file *sfile, void *v); /*没事做可以为空*/ 这些函数的调用是原子的, 并且start河stop 绝对配对调用, start 之后 短时间就会有stop, 可以看成一个函数,在start 加锁, stop 释放. int show(struct seq_file *sfile, void *v); 输出数据得用下面的函数: int seq_printf(struct seq_file *sfile, const char *fmt, ...); /*返回非空代表数据丢失, 很多都不处理*/ int seq_putc(struct seq_file *sfile, char c); int seq_puts(struct seq_file *sfile, const char *s); int seq_escape(struct seq_file *m, const char *s, const char *esc); int seq_path(struct seq_file *sfile, struct vfsmount *m, struct dentry *dentry, char *esc); 3) static struct seq_operations scull_seq_ops = { .start = scull_seq_start, .next = scull_seq_next, .stop = scull_seq_stop, .show = scull_seq_show }; static int scull_proc_open(struct inode *inode, struct file *file) { return seq_open(file, &scull_seq_ops); } static struct file_operations scull_proc_ops = { .owner = THIS_MODULE, .open = scull_proc_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release }; entry = create_proc_entry("scullseq", 0, NULL); if (entry) entry->proc_fops = &scull_proc_ops; struct proc_dir_entry *create_proc_entry(const char *name,mode_t mode, struct proc_dir_entry *parent); 好好利用ioctl strace Oops Messages *bad EIP value , 0xfffff 有可能是把堆栈充掉了.. *0xa5a5a5a5 未初始化的slab *用户传递的参数,一般在stack dump 中出现多次(一层一层的函数调用阿) * system hang *加打印, 加schedule 调用 * 用sysrq, echo 1/0 > /proc/sys/kernel/sysrq echo x > /proc/sysrq-trigger * live hang : debug with profile 1) build with profile 2) command line: profile=2 3) readprofile, reset counter, run your driver, read the result *Documentation/basic_profiling.txt and oprofile * use NFS or unmount via sysRQ to avoid corrupt file system 使用gdb 1) gdb /usr/src/linux/vmlinux /proc/kcore 2) 在gdb 更新 core file: core-file /proc/kcore 3) 加入模块符号 *get address from /sys/module/scull/sections * (gdb) add-symbol-file .../scull.ko 0xd0832000 \ -s .bss 0xd0837100 \ -s .data 0xd0836be0 kdb Kernel Debugger 1) Pause / break key for enter on console 2) bp scull_read go 3) mds scrll_devices 1 4) mm cf26ac0c 0x50 5) Documentation/kdb * kgdboe target: 1.1.1.1 host: 1.1.1.123 1.) 在target加载kgdboe rmmod kgdboe modprobe kgdboe [email protected]/eth0,@1.1.1.123/ dmesg | tail 3). 启动gdb,用target remote 连接目标 gdb: gdb vmlinux >target remote udp:1.1.1.1:6443 LTT Dprobe chapter 5 并发与竞争 *并发情况 1) 多个process 2) 多个CPU 3) 中断/软中断/timer/tasklet/workqueue 4) 内核抢占 5) hotplug * avoid global resouce : per thread / per cpu *share is our life: locking * caful go to sleep : don't hold resouces not related to current situation * 持有sem可以sleep, 如上所示, 不要再持有其他无关资源 *semaphore. if init to 1, called mutex <asm/semaphore.h> DECLARE_MUTEX(name); DECLARE_MUTEX_LOCKED(name); or: void init_MUTEX(struct semaphore *sem); void init_MUTEX_LOCKED(struct semaphore *sem); void down(struct semaphore *sem); //D sate in ps int down_interruptible(struct semaphore *sem); //可被信号中断, 中断后返回非0值, 不再持有信号量.返回return -ERESTARTSYS; 如果不能undo everthing(user can see), 则应该返回-EINTR int down_trylock(struct semaphore *sem) //返回0则已经加锁, 如果返回1, 代表不能获得, (Returns 0 if the mutex has been acquired successfully) void up(struct semaphore *sem); Reader/Writer Semaphores <linux/rwsem.h>. void init_rwsem(struct rw_semaphore *sem); /*只能动态初始化*/ reader: void down_read(struct rw_semaphore *sem);/*可能进入uninterrupt 类型睡眠*/ int down_read_trylock(struct rw_semaphore *sem); /*返回零代表加锁失败, 和down_trylock相反 GOD! returns 1 if successful, 0 if contention*/ void up_read(struct rw_semaphore *sem); void down_write(struct rw_semaphore *sem); int down_write_trylock(struct rw_semaphore *sem); /*returns 1 if successful, 0 if contention*/ void up_write(struct rw_semaphore *sem); void downgrade_write(struct rw_semaphore *sem); /*变身为reader*/ 写进程优先级高, 写进程多不适用 Completions 用信号量来通知另外一个进程事件已经完成不太好,信号量一般用的情况是down的时候大部分时候是可以获得锁的, down也为此做了最多的优化. 而等待事件的完成, 在down的时候总是不可用的, 这样就会降低性能. <linux/completion.h>. 1) init DECLARE_COMPLETION(my_completion); struct completion my_completion; init_completion(&my_completion); void wait_for_completion(struct completion *c); /*uninterruptible*/ void complete(struct completion *c); /*只唤醒一个等待的进程*/ void complete_all(struct completion *c); 如果使用了complte_all, 再次使用一个completiono就要重新初始化 INIT_COMPLETION(struct completion c); void complete_and_exit(struct completion *c, long retval); /*通知(清理)已经完成, 然后结束进程*/ spin lock 非睡眠环境, 如中断. 由于抢占的存在 需要在单cpu也要有相应的lock. <linux/spinlock.h> spinlock_t my_lock = SPIN_LOCK_UNLOCKED; 或者void spin_lock_init(spinlock_t *lock); void spin_lock(spinlock_t *lock); void spin_unlock(spinlock_t *lock); *持有spin lock 不得睡眠, 除非被中断打断(有时候甚至中断也不成) *持有spin lcok 则自动禁止抢占 (所以单cpu lock 也有用) * after lock, pay attention every function you call *lock 并禁止中断:避免中断处理来竞争 * 加锁时间必须很短 void spin_lock(spinlock_t *lock); void spin_lock_irqsave(spinlock_t *lock, unsigned long flags); /*避免irq已经禁止,释放时打开了irq*/ void spin_lock_irq(spinlock_t *lock); /*释放的时候肯定打开irq*/ void spin_lock_bh(spinlock_t *lock) /*禁止softirq 抢占*/ void spin_unlock(spinlock_t *lock); void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags); void spin_unlock_irq(spinlock_t *lock); void spin_unlock_bh(spinlock_t *lock); int spin_trylock(spinlock_t *lock); int spin_trylock_bh(spinlock_t *lock); /*返回1 代表已经获得锁, 0 意味着没有获得锁*/ Reader/Writer Spinlocks <linux/spinlock.h>. rwlock_t my_rwlock = RW_LOCK_UNLOCKED; /* Static way */ rwlock_t my_rwlock; rwlock_init(&my_rwlock); /* Dynamic way */ void read_lock(rwlock_t *lock); void read_lock_irqsave(rwlock_t *lock, unsigned long flags); void read_lock_irq(rwlock_t *lock); void read_lock_bh(rwlock_t *lock); void read_unlock(rwlock_t *lock); void read_unlock_irqrestore(rwlock_t *lock, unsigned long flags); void read_unlock_irq(rwlock_t *lock); void read_unlock_bh(rwlock_t *lock); void write_lock(rwlock_t *lock); void write_lock_irqsave(rwlock_t *lock, unsigned long flags); void write_lock_irq(rwlock_t *lock); void write_lock_bh(rwlock_t *lock); int write_trylock(rwlock_t *lock); void write_unlock(rwlock_t *lock); void write_unlock_irqrestore(rwlock_t *lock, unsigned long flags); void write_unlock_irq(rwlock_t *lock); void write_unlock_bh(rwlock_t *lock); 写优先, 故而可能出现reader饿死, 需要注意不要应用于写非常多的场合. lock 陷阱 Ambiguous Rules *开始的时候设计好, 并做好文档.: 哪些函数是不用加锁的, 哪些应该加 *一般内部函数可以默认接口函数已经获得相应的锁 Lock Ordering Rules *同时获取多个锁必须依照顺序来, *多看看别的地方的顺序 * 先获得local 范围的锁(接近你的driver), 然后获取kernel 其他部分的锁 * 先获取sem, 后lock * 尽力避免hold 多个lock Fine- Versus Coarse-Grained Locking * BKL, 只容许整个kernel 在一个cpu上跑, 最大粒度, 2.0引入smpd的时候引入 *http://oss.sgi.com/projects/lockmeter/ 锁性能测量 * 精细粒度的锁带来许多复杂性 * 从粗粒度的锁开始做你的driver Lock-Free Algorithms *<linux/kfifo.h>, 环行缓冲区, 只有一个写线程一般可以实现 * 原子变量 *atomic_t, defined in <asm/atomic.h>. * < 24bit 某些体系限制 void atomic_set(atomic_t *v, int i); atomic_t v = ATOMIC_INIT(0); int atomic_read(atomic_t *v); void atomic_add(int i, atomic_t *v); void atomic_sub(int i, atomic_t *v); void atomic_inc(atomic_t *v); void atomic_dec(atomic_t *v); int atomic_inc_and_test(atomic_t *v); int atomic_dec_and_test(atomic_t *v); int atomic_sub_and_test(int i, atomic_t *v); /*test的含义: 如果变成0, 返回true*/ int atomic_add_negative(int i, atomic_t *v); /*如果加完成为负数, 返回真*/ int atomic_add_return(int i, atomic_t *v); int atomic_sub_return(int i, atomic_t *v); int atomic_inc_return(atomic_t *v); int atomic_dec_return(atomic_t *v); /*返回操作后的最新值*/ * 对原子变量的操作如果有关联, 还是需要锁的 Bit Operations *<asm/bitops.h> *不同体系用的数据类型不同, void*, int ,long void set_bit(nr, void *addr); void clear_bit(nr, void *addr); void change_bit(nr, void *addr); test_bit(nr, void *addr); 唯一一个非atomic int test_and_set_bit(nr, void *addr); int test_and_clear_bit(nr, void *addr); int test_and_change_bit(nr, void *addr); /*返回操作前的值*/ 用位操作来实现锁并不被鼓励, 但有简单例子: /* try to set lock */ while (test_and_set_bit(nr, addr) != 0) wait_for_a_while( ); /* do your work */ /* release lock, and check... */ if (test_and_clear_bit(nr, addr) = = 0) something_went_wrong( ); /* already released: error */ seqlocks (lock less) * reader 可以并发, 但是如果和writer冲突需要重试 *write 很少, 但要求很快 * 不能保护有指针的数据, 因为reader 可能顺杆爬走了 * <linux/seqlock.h> seqlock_t lock1 = SEQLOCK_UNLOCKED; seqlock_t lock2; seqlock_init(&lock2); reader: unsigned int seq; do { seq = read_seqbegin(&the_lock); /* Do what you need to do */ } while read_seqretry(&the_lock, seq); * 一般用于保护一小组计算 * irq safe version (irq也访问整个数据) unsigned int read_seqbegin_irqsave(seqlock_t *lock,unsigned long flags); int read_seqretry_irqrestore(seqlock_t *lock, unsigned int seq, unsigned long flags); *writer (被一个spin lock 保护起来) void write_seqlock(seqlock_t *lock); void write_sequnlock(seqlock_t *lock); void write_seqlock_irqsave(seqlock_t *lock, unsigned long flags); void write_seqlock_irq(seqlock_t *lock); void write_seqlock_bh(seqlock_t *lock); void write_sequnlock_irqrestore(seqlock_t *lock, unsigned long flags); void write_sequnlock_irq(seqlock_t *lock); void write_sequnlock_bh(seqlock_t *lock); a write_tryseqlock (如果加锁成功返回1) Read-Copy-Update * http://www.rdrop.com/users/paulmck/rclock/intro/rclock_intro.html * 使用于reader 多而wrirter 少 * 必须用指针访问数据 *所有访问code 必须是原子的 * writer 方修改一个数据copy, 当没有引用时, older数据被释放 * reader 方是lock free的, 比如网络路由表 Code using RCU should include <linux/rcupdate.h>. reader: struct my_stuff *stuff; rcu_read_lock( ); /*禁止抢占*/ stuff = find_the_stuff(args...); do_something_with(stuff); rcu_read_unlock( ); writer: *copy modify update *释放older versin: 需要等待其他cpu也释放, 基于所有code都是atomic的, 提供一个call back,用于在所有cpu都调度过一次后释放older 数据,就安全了 *需要分配 rcu_head, 不用初始化, 一般内嵌到大的数据结构中 *调用void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg);, 挂接一个clean up函数( 一般就是kfree) * 保护链表比较复杂, 暂时略过 |
相关阅读 更多 +