Whether the allocation is fast or slow, the actual allocation of memory is the Buffered_rmqueue () function, the other is to choose from where to allocate more appropriate;
Let's talk about each parameter first:
The struct zone *preferred_zone represents the maximum zone type that can be accepted by the allocation
The struct zone *zone indicates that memory is allocated on the zone;
int order indicates the order of the assigned page
gfp_t Gfp_flags assigned Identity
page = Buffered_rmqueue (Preferred_zone, zone, order, Gfp_mask, migratetype); */* Really, Prep_compo Und_page () should is called from __rmqueue_bulk (). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or. */static inlinestruct page *buffered_rmqueue (struct zone *preferred_zone, struct zone *zone, int order, gfp_t g fp_flags, int migratetype) {unsigned long flags; struct page *page; int cold =!! (Gfp_flags & __gfp_cold);//whether to specify hot and cold pages again:if (likely (order = = 0)) {//Assign single page struct per_cpu_pages *PCP; struct List_head *list; Local_irq_save (flags);//prohibit local CPU interrupt, disable first save interrupt state PCP = &this_cpu_ptr (zone->pageset)->pcp;//get to CPU cache page List = &pcp->lists[migratetype];//Depending on the type of migration, freelist if (List_empty (list)) {//null for cache area is obtained, cache has no data; this could be the last CPU that was fetched The cache migration type is not the same as this time pcp->count + = Rmqueue_bulk (zone, 0, Pcp->batch, list, Migratetype, cold);//The function adds a memory page to the cache, as detailed in the following article if (Unlikely (List_empty (list)) Goto fail Ed } if (cold) page = List_entry (list->prev, struct page, LRU); else page = list_entry (list->next, struct page, LRU); List_del (&PAGE->LRU); pcp->count--; } else {if (Unlikely (Gfp_flags & __gfp_nofail)) {/* * __gfp_nofail is ' not ' to ' be used I N New code. * * All __gfp_nofail callers should is fixed so, they * properly detect and handle allocation Failures. * * We most definitely don ' t want callers attempting to * allocate greater than order-1 page units with * __gfp_nofail. */warn_on_once (Order > 1); } spin_lock_irqsave (&zone->lock, flags); page = __rmqueue (zone, Order, migratetype); Spin_unlock (&zone->lOck); if (!page) goto failed; __mod_zone_freepage_state (Zone,-(1 << order), Get_pageblock_migratetype (page)); } __count_zone_vm_events (Pgalloc, zone, 1 << order); Zone_statistics (Preferred_zone, Zone, gfp_flags); Local_irq_restore (flags); vm_bug_on (Bad_range (Zone, page)); if (Prep_new_page (page, order, gfp_flags)) Goto again; return Page;failed:local_irq_restore (Flags); return NULL;}
There is a struct per_cpu_pageset __percpu *pageset in the struct zone structure; Member, which is used for hot and cold dispenser, the hot page indicates that it is already in the CPU cache;
struct Per_cpu_pageset { struct per_cpu_pages PCP; #ifdef Config_numa s8 expire; #endif #ifdef CONFIG_SMP S8 Stat_threshold; S8 Vm_stat_diff[nr_vm_zone_stat_items]; #endif};
CPU Cache Page Groups
struct Per_cpu_pages { int count; /* Number of pages in the list/* Number of page int high; /* high watermark, emptying needed */List page upper limit int batch; /* Chunk size for Buddy Add/remove * * When adding and removing pages, how many pages are one operation at a time. Instead of single-page deletions and fills, it operates on the unit page/ * Lists of pages, one per migrate type stored on the pcp-lists */ struct List_head lists[m igrate_pcptypes];//list of migration types};
Get the page from the partner system and populate it in the CPU cache
/* * Obtain a specified number of elements from the Buddy allocator, all under * a single hold of the lock, for efficiency . Add them to the supplied list. * Returns the number of new pages which were placed at *list. */static int rmqueue_bulk (struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) {int MT = Migratetype, I; Spin_lock (&zone->lock); for (i = 0; i < count; ++i) {//a page one page processing, struct page *page = __rmqueue (zone, Order, migratetype);//assigned to the specified migration type The save page if (unlikely (page = = NULL)) break; /* * Split buddy pages returned by expand () is received here * in physical page order. The page is added to the callers and * list and the list head then moves forward. From the callers * perspective, the linked list was ordered by page number in * some conditions. This is useful-IO devices that can * merge IO RequeSTS if the physical pages are ordered * properly. */if (likely (cold = = 0)) list_add (&PAGE->LRU, list);//If it is a cold page, add to the linked header else list _add_tail (&PAGE->LRU, list);//Otherwise add the list tail if (is_enabled (CONFIG_CMA)) {//Condition compiled CONFIG_CMA option MT = get_ Pageblock_migratetype (page);//Gets the migration type of the page if (!IS_MIGRATE_CMA (MT) &&!is_migrate_isolate (MT))//If not Migrat E_CMA and MIGRATE_CMA mt = Migratetype; } set_freepage_migratetype (page, MT); Set the Migration type for page list = &page->lru;//Loop link next page if (IS_MIGRATE_CMA (MT))//If it is MIGRATE_CMA migration type __m Od_zone_page_state (Zone, Nr_free_cma_pages,-(1 << order));//Modify the page count of the CMA migration type} __mod_z One_page_state (Zone, Nr_free_pages,-(I << order));//Modify the Count of idle pages spin_unlock (&zone->lock); Return i;//returns the number of pages added to the CPU cache list}
Modify the page count for the corresponding type
static inline void __mod_zone_page_state (struct zone *zone, enum zone_stat_item item, int delta) { Zone_page_ State_add (delta, zone, item);} static inline void Zone_page_state_add (long x, struct zone *zone, enum Zone_stat_item item) { Atomic_long_add (x, &zone->vm_stat[item]); Atomic_long_add (x, &vm_stat[item]);}
/* * The hard work of removing a element from the buddy allocator. * Call me with the Zone->lock already held. */static struct page *__rmqueue (struct zone *zone, unsigned int order, int migratetype) {struct Page *page;retry_reserve:page = __rmqueue_smallest (zone, Order, migratetype);//Normally, the memory page of the specified migration type is assigned from the zone if (unlike Ly (!page) && migratetype! = Migrate_reserve) {//above is not assigned to a memory page, and is not an urgent migration Type page = __rmqueue_fallback (Zone, Orde R, Migratetype);//Modify the page to relocate other migration types,/* * Use migrate_reserve rather than fail an allocation. Goto * is used because __rmqueue_smallest is a inline function * and we want just one call site * /if (!page) {//Is not successful, the migration type is adjusted to Migrate_reserve representation is emergency assignment Migratetype = Migrate_reserve; Goto retry_reserve;//Retry}} trace_mm_page_alloc_zone_locked (page, order, migratetype); return page;}
/* * Go through the free lists for the given Migratetype and remove * the smallest available page from the Freelists */sta Tic inlinestruct page *__rmqueue_smallest (struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; struct Free_area * area; struct page *page; /* Find A page of the appropriate size in the preferred list */for (Current_order = order; Current_order < Max_orde R ++current_order) {//scan all levels of memory Area = & (Zone->free_area[current_order]); if (List_empty (&area->free_list[migratetype]))//See if the list under Migration type is empty continue; Gets the page in the list of pages = List_entry (area->free_list[migratetype].next, struct page, LRU); List_del (&PAGE->LRU); Rmv_page_order (page);//Set properties, clear Buddy identity, that is, set page->_mapcount =-1 area->nr_free--;//From here, Nr_free is the page block that represents the order. Number, not number of pages expand (Zone, page, order, Current_order, area, migratetype);//This is the page that is assigned from the higher order, gradually half to the next order, until you need to return page; } return NULL;
This is an important function of buddy: When the high-order allocation of memory blocks, such as the 8-order allocation of memory blocks. And what we need is a low price, such as 6; then call the following function, the 8-order allocated memory block, hanging to the 7-order, and then intercept from the memory block half, and then to the 6-order, this time again to find that we need to allocate the memory level, directly returned;
Next parameter:
struct Zone *zone: All operations are done on the zone
struct page *page: a page block allocated on higher order
int Low: The order of memory we need
int High: The memory allocated on this order
struct Free_area *area: This is the high-order Free page Group item on zone
int Migratetype: Migration type
/* * The Order of subdivision here are critical for the IO subsystem. * Please don't alter this order without good reasons and regression * testing. Specifically, as large blocks of memory is subdivided, * the order in which smaller blocks is delivered depends on the O Rder * they ' re subdivided in this function. The primary factor * influencing the order in which pages be delivered to the IO * subsystem according to Empiric Al testing, and this was also justified * by considering the behavior of a buddy system containing a single * large block O F memory acted on by a series of small allocations. * This behavior are a critical factor in sglist merging ' s success. * *--NYC */static inline void expand (struct zone *zone, struct page *page, int low, int. high, struct Free_area *area, int migratetype) {unsigned long size = 1 << high; while (Higher > Low) {//If the memory page is allocated on the same order, you do not need to execute the function area--;//from the higher-order idle array element, descending to the next order of the free array element high--;//next order Siz E >>= 1;///Memory size of Half vm_bug_on (Bad_range (Zone, &page[size)); #ifdef Config_debug_pagealloc if (High < Debug_gua Rdpage_minorder ()) {/* * Mark as Guard pages (or page), that would allow to * merge BAC K to allocator when Buddy would be freed. * Corresponding page table entries won't be touched, * pages would stay not present in virtual address space */Init_list_head (&PAGE[SIZE].LRU); Set_page_guard_flag (&page[size]); Set_page_private (&page[size], high); /* Guard pages is not available for any usage */__mod_zone_freepage_state (Zone,-(1 << high), Migratetype); Continue } #endif List_add (&PAGE[SIZE].LRU, &area->free_list[migratetype]);//The list of corresponding migration types that are attached to the order Area->nr_f ree++;//the memory block on the order increases Set_page_order (&page[size], high);//set Private to high order, clear the Buddy identity, because the page is already not a page of the partner systemThe}}
Running to this function indicates that the above specified migration type failed to allocate memory from the partner system, so use the alternate migration list;
/* * This array describes the order lists is fallen back to when * the free lists for the desirable migrate type is DEPL eted */static int Fallbacks[migrate_types][4] = { [migrate_unmovable] = {migrate_reclaimable, migrate_movable , Migrate_reserve}, [migrate_reclaimable] = {migrate_unmovable, migrate_movable, migrate_reserve }, #ifdef CONFIG_CMA [migrate_movable] = {MIGRATE_CMA, migrate_reclaimable, migrate_unmovable, Migrate_ Reserve}, [MIGRATE_CMA] = {Migrate_reserve},/* never used */#else [migrate_movable] = {Migrate_ Reclaimable, migrate_unmovable, migrate_reserve}, #endif [Migrate_reserve] = {Migrate_reserve},/* Never used */#ifdef config_memory_isolation [migrate_isolate] = {Migrate_reserve},/* never used */#endif};
Depending on the alternate migration type above, traverse
/* Remove an element from the buddy allocator from the fallback list */static inline struct page *__rmqueue_fallback (struc T zone *zone, int order, int start_migratetype) {struct Free_area * area; int current_order; struct page *page; int Migratetype, I; /* Find the largest possible block of pages in the and the other list */for (Current_order = max_order-1; Current_order >= Order --current_order) {//This is not the same as the traversal of the specified migration type, here is the traversal from the maximum order, which is to prevent memory fragmentation for (i = 0;; i++) {Migratetype = Fallbacks[s Tart_migratetype][i]; /* Migrate_reserve handled later if necessary */if (Migratetype = = Migrate_reserve)//This is the last choice, now is not the time Break Area = & (Zone->free_area[current_order]);//Get Higher order free array element if (List_empty (&area->free_list[migratetype ])//If the free page list for the corresponding migration type on the corresponding order is empty, the idle list continue for the alternate migration type is recycled; page = List_entry (Area->free_list[migratetype].next, STruct page, LRU);//If a free page block is found, the free page block on the current order is decremented area->nr_free--; /* * If breaking a large block of pages, move all free * pages to the preferred allocation list. If falling * Back for a reclaimable kernel allocation, being more * aggressive about taking ownership of free pages * In the other hand, never change migration * type of MIGRATE_CMA pag Eblocks nor move CMA * pages on different free lists. We don ' t * want unmovable pages to is allocated from * MIGRATE_CMA areas. *///The following is the resolution of the remaining free pages, the above comment is very clear//explain the next few about the migration type of global variables, pageblock_order that the kernel is considered to be a large allocation order (see its own configuration, usually configured max_order-1); pageblock_nr _pages Number of pages if (!IS_MIGRATE_CMA (Migratetype) &&//is not the CMA area (unlikely Current_order t;= PAGEBLOCK_ORDER/2) | | Large memory block, then all go to start_migratetype type under Start_migratetype = = Migrate_reclaimable | | Recoverable memory pages, on migration typeConversion, will be more active page_group_by_mobility_disabled)) {int pages; pages = Move_freepages_block (zone, page, start_migratetype);//convert these pages to start_migratetype migration Type under/* Claim the whole block if over half of it are free */if (pages >= (1 << (PA geblock_order-1)) | | page_group_by_mobility_disabled) Set_pageblock_migratetype (page, start_m Igratetype);//This is the type of migration that sets the entire page block, above Move_freepage_block () function is to set the migration type of each page Migratetype = Start_migratetype; }/* Remove the page from the freelists */List_del (&PAGE->LRU); Rmv_page_order (page);//Clear the identity of the buddy, identify that the page will not be the buddy system's/* Take ownership for orders >= Pageblock_order */ if (current_order >= pageblock_order &&!IS_MIGRATE_CMA (migratetype)) Cha Nge_pageblock_range (page, Current_order, start_migratetype);//This function is to set the remaining Pageblock blocks to Start_migratetype type Expand (Zone, page, order, Current_order, area, IS_MIGRATE_CMA (migratetype)? m Igratetype:start_migratetype);//Partition Large Partners page block, divided into small partners page block Trace_mm_page_alloc_extfrag (page, order, Current_order, Start_migratetype, Migratetype); return page; }} return NULL;}
int Move_freepages_block (struct zone *zone, struct page *page, int migratetype) { unsigned long start_pfn, end_ PFN; struct page *start_page, *end_page; START_PFN = PAGE_TO_PFN (page);//page frame number START_PFN = START_PFN & ~ (pageblock_nr_pages-1);//pageblock_nr_ Pages is the number of pages that the migration type considers to be large start_page = Pfn_to_page (START_PFN); End_page = Start_page + pageblock_nr_pages-1;//ready to migrate pgeblock_nr_pages pages, generally to convert the migration type, then convert pageblock_nr_pages consecutive pages, This reduces memory fragmentation END_PFN = Start_pfn + pageblock_nr_pages-1; /* Don't cross zone boundaries * /if (!ZONE_SPANS_PFN (zone, START_PFN)) start_page = page; if (zone, END_PFN)//Determine if the area of memory to be migrated is on a zone,!ZONE_SPANS_PFN zone return 0; return move_freepages (Zone, Start_page, End_page, migratetype);//Convert the memory page address range to convert the migration type to Move_freepages ()
/* Move the free pages in a range to the free lists of the requested type. * Note that Start_page and end_pages is not aligned on a pageblock * boundary. If alignment is required, use Move_freepages_block () *///a little bit of a misunderstanding?? The previous call function obviously did pageblock_nr_pages, and here is said not to align??????????? int move_freepages (struct zone *zone, struct page *start_page, struct page *end_page, int migrat etype) {struct page *page; unsigned long order; int pages_moved = 0; #ifndef config_holes_in_zone/* * Page_zone is the not safe-to-call-in-context when * CONF Ig_holes_in_zone is set. This bug check was probably redundant * anyway as we check zone boundaries in Move_freepages_block (). * Remove at a later date, no bug reports exist related to * grouping pages by mobility */bug_on (Page_zone ( start_page)! = Page_zone (end_page)); #endif for (page = start_page; page <= end_page;) {/* Make sure we is not inadvertently changing nodes */vm_bug_on (Page_to_nid (page)! = Zone_to_nid (zone)); if (!pfn_valid_within (PAGE_TO_PFN (page))) {page++; Continue } if (! Pagebuddy (page)) {//Now page is also the page++ of the partner system; Continue } order = Page_order (page);//Get Order List_move (&PAGE->LRU, &zone->free_area[order].fr Ee_list[migratetype]);//move these pages to the list of the specified migration type Set_freepage_migratetype (page, migratetype);//Set the migration type of these pages,page-> index = Migratetype Page + = 1 << order;//all at once convert 2^order page pages_moved + = 1 << order; Return pages_moved;//the pages in the range are migrated, returning how many pages were actually migrated}
static void Change_pageblock_range (struct page *pageblock_page, int start_order, int migratetype) { int nr_ Pageblocks = 1 << (start_order-pageblock_order);//Get how many pageblock_order page blocks while (nr_pageblocks--) {// Loop set each pageblock_order page block set_pageblock_migratetype (pageblock_page, migratetype);//Set the migration type of the page block pageblock_ Page + = pageblock_nr_pages;//adjust to the address of the next page block }}
Linux memory Management--Actual allocation function Buffered_rmqueue