Analysis of physical address re ing functions in vmalloc
Figure 1: vmalloc Areas
Figure 2: map three consecutive physical pages to a continuous virtual address
Note: The image goes to professional Linux kernel architecture
Function call relationship:
1) vmap_page_range (unsigned long start, unsigned long end,
Pgprot_t Prot, struct page ** pages)
2) Static int vmap_page_range_noflush (unsigned long start, unsigned long end,
Pgprot_t Prot, struct page ** pages)
3) Static int vmap_pud_range (pgd_t * PGD, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
4) Static int vmap_pmd_range (pud_t * pud, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
// The two functions do not actually do anything, because ARM920T only supports Level 2 ing.
5) Static int vmap_pte_range (pmd_t * PMD, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
// The function that actually fills in the first-level second-level page table
6) flush_cache_vmap (START, end );
Fixme: Taking ARM920T as an example, small page (4kib) ing, MMU only supports 2-level page table ing, PGD & Pte.
Kernel: 2.6.35
# Ifndef vmalloc_start
# Define vmalloc_offset (8*1024*1024)
# Define vmalloc_start (unsigned long) high_memory + vmalloc_offset )&~ (VMALLOC_OFFSET-1 ))
# Endif
Static int vmap_page_range (unsigned long start, unsigned long end,
Pgprot_t Prot, struct page ** pages)
{
Int ret;
Ret = vmap_page_range_noflush (START, end, Prot, pages );
// Start and end are the starting virtual addresses in the range of vmalloc_start ~ Between vmalloc_end
// Vmalloc_start = high_mem + 8mib
// Prot indicates the MMU page table flag
// Pages pointer to the physical page box table (non-continuous physical page box)
Flush_cache_vmap (START, end );
Return ret;
}
Static int vmap_page_range_noflush (unsigned long start, unsigned long end,
Pgprot_t Prot, struct page ** pages)
{
Pgd_t * PGD;
Unsigned long next;
Unsigned long ADDR = start;
Int err = 0;
Int Nr = 0;
Bug_on (ADDR> = END );
PGD = pgd_offset_k (ADDR );
// Obtain the PGD address, PGD base address + (ADDR> 21), two consecutive first-level query tables (2 × 1mib)
Do {
Next = pgd_addr_end (ADDR, end );
Err = vmap_pud_range (PGD, ADDR, next, Prot, pages, & nr );
If (ERR)
Return err;
} While (PGD ++, ADDR = Next, ADDR! = END );
Return NR;
}
Static int vmap_pud_range (pgd_t * PGD, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
{
Pud_t * pud;
Unsigned long next;
Pud = pud_alloc (& init_mm, PGD, ADDR );
// After defining 4level-fixup.h, pud still obtains the PGD address.
// # Define pud_t pgd_t
If (! PUD)
Return-enomem;
Do {
Next = pud_addr_end (ADDR, end );
If (vmap_pmd_range (PUD, ADDR, next, Prot, pages, NR ))
Return-enomem;
} While (PUD ++, ADDR = Next, ADDR! = END );
Return 0;
}
Static int vmap_pmd_range (pud_t * pud, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
{
Pmd_t * PMD;
Unsigned long next;
PMD = pmd_alloc (& init_mm, pud, ADDR );
// Same as pud_alloc, because only two-level page table ing is supported, this function also returns the pud address
If (! PMD)
Return-enomem;
Do {
Next = pmd_addr_end (ADDR, end );
If (vmap_pte_range (PMD, ADDR, next, Prot, pages, NR ))
Return-enomem;
} While (PMD ++, ADDR = Next, ADDR! = END );
Return 0;
}
// The most critical function of vmap ing
Static int vmap_pte_range (pmd_t * PMD, unsigned long ADDR,
Unsigned long end, pgprot_t Prot, struct page ** pages, int * nr)
{
Pte_t * PTE;
/*
* Nr is a running index into the array which helps higher level
* Callers keep track of where we're up.
*/
PTE = pte_alloc_kernel (PMD, ADDR );
// Assign the second-level table address and return the address, and complete the assignment of the first-level table. * fill in the base address of the second-level page table in PMD,
// Refer to create_mapping
If (! PTE)
Return-enomem;
Do {
Struct page * page = pages [* Nr];
If (warn_on (! Pte_none (* PTE )))
Return-ebusy;
If (warn_on (! Page ))
Return-enomem;
Set_pte_at (& init_mm, ADDR, PTE, mk_pte (page, Prot ));
// Set the PTE content of the two-level page table to complete a vmap
(* Nr) ++;
} While (PTE ++, ADDR + = page_size, ADDR! = END );
Return 0;
}
# Define pte_alloc_kernel (PMD, address )/
(Unlikely (! Pmd_present (* (PMD) & _ pte_alloc_kernel (PMD, address ))? /
Null: pte_offset_kernel (PMD, address ))
// Pmd_present (* (PMD) is empty when it is not filled ,! Pmd_present (* (PMD) returns true
Int _ pte_alloc_kernel (pmd_t * PMD, unsigned long address)
{
Pte_t * New = pte_alloc_one_kernel (& init_mm, address );
// Use buddy allocator to allocate 1 page Space
If (! New)
Return-enomem;
Smp_wmb ();/* See comment in _ pte_alloc */
Spin_lock (& init_mm.page_table_lock );
If (! Pmd_present (* PMD) {/* has another populated it? */
Pmd_populate_kernel (& init_mm, PMD, new );
// Assign a value to the PGD address space, which is the base address of the Pte.
New = NULL;
}
Spin_unlock (& init_mm.page_table_lock );
If (new)
Pte_free_kernel (& init_mm, new );
Return 0;
// Return false
}
Author: woodpecker <Pecker.hu@gmail.com>