在pte_handle_fault()中,如果触发异常的页存在于主存中,那么该异常往往是由写了一个只读页触发的,此时需要进行COW(写时复制操作)。如当一个父进程通过fork()创建了一个子进程时,子进程将会共享父进程的页框。之后,无论是父进程还是子进程要对相应的内存进行写操作,都要进行COW,也就是为自己重新分配一个页框,并把之前的数据复制到页框中去,再写。
- static inline int handle_pte_fault(struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, pmd_t *pmd, unsigned int flags)
- {
- pte_t entry;
- spinlock_t *ptl;
-
- entry = *pte;
-
- ...
- ...
- ...
-
-
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
- if (unlikely(!pte_same(*pte, entry)))
- goto unlock;
- if (flags & FAULT_FLAG_WRITE) {
- if (!pte_write(entry))
- return do_wp_page(mm, vma, address,
- pte, pmd, ptl, entry);
- entry = pte_mkdirty(entry);
- }
- entry = pte_mkyoung(entry);
- if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
- update_mmu_cache(vma, address, entry);
- } else {
-
- if (flags & FAULT_FLAG_WRITE)
- flush_tlb_page(vma, address);
- }
- unlock:
- pte_unmap_unlock(pte, ptl);
- return 0;
- }
可以看到,hand_pte_fault()函数处理页存在于主存中的情况的关键操作都集中在do_wp_page()函数上。该函数是用来处理COW的,不过在COW之前先要做一些检查,比如说,如果对应的页只有一个进程使用,那么便可以直接修改页的权限为可读可写,而不进行COW。总之,不到不得以的情况下是不会进行COW的。
- static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
- spinlock_t *ptl, pte_t orig_pte)
- {
- struct page *old_page, *new_page;
- pte_t entry;
- int reuse = 0, ret = 0;
- int page_mkwrite = 0;
- struct page *dirty_page = NULL;
-
- old_page = vm_normal_page(vma, address, orig_pte);
- if (!old_page) {
-
-
- if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
- (VM_WRITE|VM_SHARED))
- goto reuse;
-
- goto gotten;
- }
-
-
-
- if (PageAnon(old_page) && !PageKsm(old_page)) {
-
- if (!trylock_page(old_page)) {
- page_cache_get(old_page);
- pte_unmap_unlock(page_table, ptl);
- lock_page(old_page);
- page_table = pte_offset_map_lock(mm, pmd, address,
- &ptl);
- if (!pte_same(*page_table, orig_pte)) {
- unlock_page(old_page);
- page_cache_release(old_page);
- goto unlock;
- }
- page_cache_release(old_page);
- }
-
- reuse = reuse_swap_page(old_page);
- unlock_page(old_page);
- } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
- (VM_WRITE|VM_SHARED))) {
-
- if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
- struct vm_fault vmf;
- int tmp;
-
- vmf.virtual_address = (void __user *)(address &
- PAGE_MASK);
- vmf.pgoff = old_page->index;
- vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
- vmf.page = old_page;
-
-
- page_cache_get(old_page);
- pte_unmap_unlock(page_table, ptl);
-
-
- tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
-
-
- if (unlikely(tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
- ret = tmp;
- goto unwritable_page;
- }
- if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
- lock_page(old_page);
- if (!old_page->mapping) {
- ret = 0;
- unlock_page(old_page);
- goto unwritable_page;
- }
- } else
- VM_BUG_ON(!PageLocked(old_page));
-
-
-
- page_table = pte_offset_map_lock(mm, pmd, address,
- &ptl);
- if (!pte_same(*page_table, orig_pte)) {
- unlock_page(old_page);
- page_cache_release(old_page);
- goto unlock;
- }
-
- page_mkwrite = 1;
- }
- dirty_page = old_page;
- get_page(dirty_page);
- reuse = 1;
- }
-
- if (reuse) {
- reuse:
- flush_cache_page(vma, address, pte_pfn(orig_pte));
- entry = pte_mkyoung(orig_pte);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- if (ptep_set_access_flags(vma, address, page_table, entry,1))
- update_mmu_cache(vma, address, entry);
- ret |= VM_FAULT_WRITE;
- goto unlock;
- }
-
-
-
- page_cache_get(old_page);
- gotten:
- pte_unmap_unlock(page_table, ptl);
-
- if (unlikely(anon_vma_prepare(vma)))
- goto oom;
-
- if (is_zero_pfn(pte_pfn(orig_pte))) {
- new_page = alloc_zeroed_user_highpage_movable(vma, address);
- if (!new_page)
- goto oom;
- } else {
- new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
- if (!new_page)
- goto oom;
- cow_user_page(new_page, old_page, address, vma);
- }
- __SetPageUptodate(new_page);
-
-
- if ((vma->vm_flags & VM_LOCKED) && old_page) {
- lock_page(old_page);
- clear_page_mlock(old_page);
- unlock_page(old_page);
- }
-
- if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
- goto oom_free_new;
-
-
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
- if (likely(pte_same(*page_table, orig_pte))) {
- if (old_page) {
- if (!PageAnon(old_page)) {
- dec_mm_counter(mm, file_rss);
- inc_mm_counter(mm, anon_rss);
- }
- } else
- inc_mm_counter(mm, anon_rss);
- flush_cache_page(vma, address, pte_pfn(orig_pte));
- entry = mk_pte(new_page, vma->vm_page_prot);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-
- ptep_clear_flush(vma, address, page_table);
- page_add_new_anon_rmap(new_page, vma, address);
-
- set_pte_at_notify(mm, address, page_table, entry);
- update_mmu_cache(vma, address, entry);
- if (old_page) {
-
- page_remove_rmap(old_page);
- }
-
-
- new_page = old_page;
- ret |= VM_FAULT_WRITE;
- } else
- mem_cgroup_uncharge_page(new_page);
-
- if (new_page)
- page_cache_release(new_page);
- if (old_page)
- page_cache_release(old_page);
- unlock:
- pte_unmap_unlock(page_table, ptl);
- if (dirty_page) {
-
- if (!page_mkwrite) {
- wait_on_page_locked(dirty_page);
- set_page_dirty_balance(dirty_page, page_mkwrite);
- }
- put_page(dirty_page);
- if (page_mkwrite) {
- struct address_space *mapping = dirty_page->mapping;
-
- set_page_dirty(dirty_page);
- unlock_page(dirty_page);
- page_cache_release(dirty_page);
- if (mapping) {
-
- balance_dirty_pages_ratelimited(mapping);
- }
- }
-
-
- if (vma->vm_file)
- file_update_time(vma->vm_file);
- }
- return ret;
- oom_free_new:
- page_cache_release(new_page);
- oom:
- if (old_page) {
- if (page_mkwrite) {
- unlock_page(old_page);
- page_cache_release(old_page);
- }
- page_cache_release(old_page);
- }
- return VM_FAULT_OOM;
-
- unwritable_page:
- page_cache_release(old_page);
- return ret;
- }
-