2006-8-10 mm/vmalloc.c
还是分析highmem的那张图,现在关注vmalloc使用的虚拟地址空间: +------------------------------------------------------------------
| 8K空洞
+------------------------------------------------------------------
| FIXADDR_TOP(0xffffe000UL) (include/asm-i386/fixmap.h)
| fixed map(每项4k虚存,见FIXADDR_SIZE)
| { //fix map 内容 (enum fixed_addresses)
| FIX_APIC_BASE,
| FIX_IO_APIC_BASE_0,
| FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1
|
| FIX_CO_CPU, /* Cobalt timer */
| FIX_CO_APIC, /* Cobalt APIC Redirection Table */
| FIX_LI_PCIA, /* Lithium PCI Bridge A */
| FIX_LI_PCIB, /* Lithium PCI Bridge B */
+--------------
#ifdef CONFIG_HIGHMEM /*为fix KMAP预留每cpu 8k的虚存,读写各4k*/
| FIX_KMAP_BEGIN, /* 主要用于kmap_atomic*/
| FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+--------------
| __end_of_fixed_addresses
| }
| FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+--------------------------------------------------------------------
| VMALLOC_END (FIXADDR_START) (include/asm-i386/pgtable.h)
| +------------------
| | xxxxx: kmap 和 vmalloc 相互重叠,2.6已经修正
| | kmap 使用的4M虚存 (asm/highmem.h,LAST_PKMAP)
| | PKMAP_BASE (0xfe000000UL) (距离4G 32M)
| +------------------
| vmalloc 映射区
| VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1)
| & \~(VMALLOC_OFFSET-1)) /*down align 8M */
+--------------------------------------------------------------------
| 约 8M 空洞
+--------------------------------------------------------------------
| high_memory (见003___arch_i386_mm_ioremap.c 对此的分析)
| 内核已经映射了的物理页面 MAX 896M
| 3G
+--------------------------------------------------------------------
| resoved for app 0-3G
+--------------------------------------------------------------------
vmalloc使用的虚拟内存空间大概数值是百十来M.(详细计算略).分配给内核自己使用的大 的虚拟地址, ioremap和vmalloc使用相同的虚拟地址. 见arch/i386/ioremap.c的分析. 管理vmalloc的虚拟内存的结构是vm_struct,而管理进程的虚拟空间使用的是vma,不一样的. 所有的vm_struct按顺序挂入 struct vm_struct * vmlist;
至于分配释放算法,实在没有什么跟多东西可讲.不要说我偷懒啊.ioremap.c中已经讲过释放 时候对vmalloc分配和ioremap映射的区别处理了.去看看吧. 就是函数: void free_area_pte() { pte_t * pte; unsigned long end;
if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); return; } pte = pte_offset(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t page; page = ptep_get_and_clear(pte); address += PAGE_SIZE; pte++; if (pte_none(page)) continue; if (pte_present(page)) { struct page *ptpage = pte_page(page); if (VALID_PAGE(ptpage) && (!PageReserved(ptpage))) //VALID_PAGE 检查此区域是否分配了ram页面,ioremap可以影射vm_area为io内存 //如果是VALID_PAGE(pagenr<max_mapnr) __free_page(ptpage); continue; } printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); } while (address < end); }
另外vread,提供了内核内存读区功能,仅读取vmalloc部分的数据.get_vm_area中有一个 小小的bug?,见注释: struct vm_struct * get_vm_area(unsigned long size, unsigned long flags) { unsigned long addr; struct vm_struct **p, *tmp, *area;
area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL); if (!area) return NULL; size += PAGE_SIZE; //空洞, 用于扑捉可能的越界 addr = VMALLOC_START; write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) ; p = &tmp->next) { if ((size + addr) < addr) { write_unlock(&vmlist_lock); kfree(area); return NULL; } if (size + addr < (unsigned long) tmp->addr) //2 2.5 已经是<=了 break; addr = tmp->size + (unsigned long) tmp->addr; if (addr > VMALLOC_END-size) { write_unlock(&vmlist_lock); kfree(area); return NULL; } } area->flags = flags; area->addr = (void *)addr; area->size = size; area->next = *p; *p = area; write_unlock(&vmlist_lock); return area; }
vm_struct管理的虚拟地址所映射的页面,或者是内核使用的page,或者是reserve的page 或者干脆就不是内核的mem_map所能管理的了的(见ioremap).当然不会被swap. page的引用 计数应该是1.(fix me).
|