Linux memory Management--Actual allocation function Buffered_rmqueue

Source: Internet
Author: User
Tags goto



Whether the allocation is fast or slow, the actual allocation of memory is the Buffered_rmqueue () function, the other is to choose from where to allocate more appropriate;

Let's talk about each parameter first:

The struct zone *preferred_zone represents the maximum zone type that can be accepted by the allocation

The struct zone *zone indicates that memory is allocated on the zone;

int order indicates the order of the assigned page

gfp_t Gfp_flags assigned Identity


page = Buffered_rmqueue (Preferred_zone, zone, order, Gfp_mask, migratetype); */* Really, Prep_compo  Und_page () should is called from __rmqueue_bulk ().  But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or. */static inlinestruct page *buffered_rmqueue (struct zone *preferred_zone, struct zone *zone, int order, gfp_t g    fp_flags, int migratetype) {unsigned long flags;    struct page *page; int cold =!!        (Gfp_flags & __gfp_cold);//whether to specify hot and cold pages again:if (likely (order = = 0)) {//Assign single page struct per_cpu_pages *PCP;        struct List_head *list;        Local_irq_save (flags);//prohibit local CPU interrupt, disable first save interrupt state PCP = &this_cpu_ptr (zone->pageset)->pcp;//get to CPU cache page List = &pcp->lists[migratetype];//Depending on the type of migration, freelist if (List_empty (list)) {//null for cache area is obtained, cache has no data; this could be the last CPU that was fetched           The cache migration type is not the same as this time pcp->count + = Rmqueue_bulk (zone, 0, Pcp->batch, list,         Migratetype, cold);//The function adds a memory page to the cache, as detailed in the following article if (Unlikely (List_empty (list)) Goto fail        Ed        } if (cold) page = List_entry (list->prev, struct page, LRU);        else page = list_entry (list->next, struct page, LRU);        List_del (&AMP;PAGE-&GT;LRU);    pcp->count--; } else {if (Unlikely (Gfp_flags & __gfp_nofail)) {/* * __gfp_nofail is ' not ' to ' be used I             N New code. * * All __gfp_nofail callers should is fixed so, they * properly detect and handle allocation             Failures.  * * We most definitely don ' t want callers attempting to * allocate greater than order-1 page units             with * __gfp_nofail.        */warn_on_once (Order > 1);        } spin_lock_irqsave (&zone->lock, flags);        page = __rmqueue (zone, Order, migratetype); Spin_unlock (&zone->lOck);        if (!page) goto failed;    __mod_zone_freepage_state (Zone,-(1 << order), Get_pageblock_migratetype (page));    } __count_zone_vm_events (Pgalloc, zone, 1 << order);    Zone_statistics (Preferred_zone, Zone, gfp_flags);    Local_irq_restore (flags);    vm_bug_on (Bad_range (Zone, page));    if (Prep_new_page (page, order, gfp_flags)) Goto again;    return Page;failed:local_irq_restore (Flags); return NULL;}


There is a struct per_cpu_pageset __percpu *pageset in the struct zone structure; Member, which is used for hot and cold dispenser, the hot page indicates that it is already in the CPU cache;

struct Per_cpu_pageset {    struct per_cpu_pages PCP; #ifdef Config_numa    s8 expire; #endif #ifdef CONFIG_SMP    S8 Stat_threshold;    S8 Vm_stat_diff[nr_vm_zone_stat_items]; #endif};

CPU Cache Page Groups

struct Per_cpu_pages {    int count;      /* Number of pages in the list/* Number of page    int high;       /* high watermark, emptying needed */List page upper limit    int batch;      /* Chunk size for Buddy Add/remove * * When adding and removing pages, how many pages are one operation at a time. Instead of single-page deletions and fills, it operates on the unit page/    * Lists of pages, one per migrate type stored on the pcp-lists */    struct List_head lists[m igrate_pcptypes];//list of migration types};


Get the page from the partner system and populate it in the CPU cache

/* * Obtain a specified number of elements from the Buddy allocator, all under * a single hold of the lock, for efficiency  . Add them to the supplied list. * Returns the number of new pages which were placed at *list.            */static int rmqueue_bulk (struct zone *zone, unsigned int order, unsigned long count, struct list_head *list,    int migratetype, int cold) {int MT = Migratetype, I;    Spin_lock (&zone->lock); for (i = 0; i < count; ++i) {//a page one page processing, struct page *page = __rmqueue (zone, Order, migratetype);//assigned to the specified migration type        The save page if (unlikely (page = = NULL)) break; /* * Split buddy pages returned by expand () is received here * in physical page order. The page is added to the callers and * list and the list head then moves forward. From the callers * perspective, the linked list was ordered by page number in * some conditions. This is useful-IO devices that can * merge IO RequeSTS if the physical pages are ordered * properly. */if (likely (cold = = 0)) list_add (&AMP;PAGE-&GT;LRU, list);//If it is a cold page, add to the linked header else list _add_tail (&AMP;PAGE-&GT;LRU, list);//Otherwise add the list tail if (is_enabled (CONFIG_CMA)) {//Condition compiled CONFIG_CMA option MT = get_ Pageblock_migratetype (page);//Gets the migration type of the page if (!IS_MIGRATE_CMA (MT) &&!is_migrate_isolate (MT))//If not Migrat        E_CMA and MIGRATE_CMA mt = Migratetype; } set_freepage_migratetype (page, MT); Set the Migration type for page list = &page->lru;//Loop link next page if (IS_MIGRATE_CMA (MT))//If it is MIGRATE_CMA migration type __m Od_zone_page_state (Zone, Nr_free_cma_pages,-(1 << order));//Modify the page count of the CMA migration type} __mod_z    One_page_state (Zone, Nr_free_pages,-(I << order));//Modify the Count of idle pages spin_unlock (&zone->lock); Return i;//returns the number of pages added to the CPU cache list}


Modify the page count for the corresponding type

static inline void __mod_zone_page_state (struct zone *zone,            enum zone_stat_item item, int delta) {           Zone_page_ State_add (delta, zone, item);}  static inline void Zone_page_state_add (long x, struct zone *zone,                 enum Zone_stat_item item) {    Atomic_long_add (x, &zone->vm_stat[item]);    Atomic_long_add (x, &vm_stat[item]);}


/* * The hard work of removing a element from the buddy allocator. * Call me with the Zone->lock already held. */static struct page *__rmqueue (struct zone *zone, unsigned int order, int migratetype) {struct Page *page;retry_reserve:page = __rmqueue_smallest (zone, Order, migratetype);//Normally, the memory page of the specified migration type is assigned from the zone if (unlike Ly (!page) && migratetype! = Migrate_reserve) {//above is not assigned to a memory page, and is not an urgent migration Type page = __rmqueue_fallback (Zone, Orde R, Migratetype);//Modify the page to relocate other migration types,/* * Use migrate_reserve rather than fail an allocation. Goto * is used because __rmqueue_smallest is a inline function * and we want just one call site *            /if (!page) {//Is not successful, the migration type is adjusted to Migrate_reserve representation is emergency assignment Migratetype = Migrate_reserve;    Goto retry_reserve;//Retry}} trace_mm_page_alloc_zone_locked (page, order, migratetype); return page;}


/* * Go through the free lists for the given Migratetype and remove * the smallest available page from the Freelists */sta    Tic inlinestruct page *__rmqueue_smallest (struct zone *zone, unsigned int order, int migratetype) {    unsigned int current_order;    struct Free_area * area;    struct page *page; /* Find A page of the appropriate size in the preferred list */for (Current_order = order; Current_order < Max_orde R        ++current_order) {//scan all levels of memory Area = & (Zone->free_area[current_order]);        if (List_empty (&area->free_list[migratetype]))//See if the list under Migration type is empty continue;        Gets the page in the list of pages = List_entry (area->free_list[migratetype].next, struct page, LRU);        List_del (&AMP;PAGE-&GT;LRU); Rmv_page_order (page);//Set properties, clear Buddy identity, that is, set page->_mapcount =-1 area->nr_free--;//From here, Nr_free is the page block that represents the order. Number, not number of pages expand (Zone, page, order, Current_order, area, migratetype);//This is the page that is assigned from the higher order, gradually half to the next order, until you need to return page; } return NULL;


This is an important function of buddy: When the high-order allocation of memory blocks, such as the 8-order allocation of memory blocks. And what we need is a low price, such as 6; then call the following function, the 8-order allocated memory block, hanging to the 7-order, and then intercept from the memory block half, and then to the 6-order, this time again to find that we need to allocate the memory level, directly returned;

Next parameter:

struct Zone *zone: All operations are done on the zone

struct page *page: a page block allocated on higher order

int Low: The order of memory we need

int High: The memory allocated on this order

struct Free_area *area: This is the high-order Free page Group item on zone

int Migratetype: Migration type

/* * The Order of subdivision here are critical for the IO subsystem. * Please don't alter this order without good reasons and regression * testing. Specifically, as large blocks of memory is subdivided, * the order in which smaller blocks is delivered depends on the O Rder * they ' re subdivided in this function. The primary factor * influencing the order in which pages be delivered to the IO * subsystem according to Empiric Al testing, and this was also justified * by considering the behavior of a buddy system containing a single * large block O F memory acted on by a series of small allocations. * This behavior are a critical factor in sglist merging ' s success.     * *--NYC */static inline void expand (struct zone *zone, struct page *page, int low, int. high, struct Free_area *area,    int migratetype) {unsigned long size = 1 << high; while (Higher > Low) {//If the memory page is allocated on the same order, you do not need to execute the function area--;//from the higher-order idle array element, descending to the next order of the free array element high--;//next order Siz E >>= 1;///Memory size of Half vm_bug_on (Bad_range (Zone, &page[size)); #ifdef Config_debug_pagealloc if (High < Debug_gua Rdpage_minorder ()) {/* * Mark as Guard pages (or page), that would allow to * merge BAC             K to allocator when Buddy would be freed.              * Corresponding page table entries won't be touched, * pages would stay not present in virtual address space            */Init_list_head (&AMP;PAGE[SIZE].LRU);            Set_page_guard_flag (&page[size]);            Set_page_private (&page[size], high);                          /* Guard pages is not available for any usage */__mod_zone_freepage_state (Zone,-(1 << high),            Migratetype);        Continue } #endif List_add (&AMP;PAGE[SIZE].LRU, &area->free_list[migratetype]);//The list of corresponding migration types that are attached to the order Area->nr_f ree++;//the memory block on the order increases Set_page_order (&page[size], high);//set Private to high order, clear the Buddy identity, because the page is already not a page of the partner systemThe}} 


Running to this function indicates that the above specified migration type failed to allocate memory from the partner system, so use the alternate migration list;

/* * This array describes the order lists is fallen back to when * the free lists for the desirable migrate type is DEPL eted */static int Fallbacks[migrate_types][4] = {    [migrate_unmovable]   = {migrate_reclaimable, migrate_movable ,     Migrate_reserve},    [migrate_reclaimable] = {migrate_unmovable,   migrate_movable,     migrate_reserve }, #ifdef CONFIG_CMA    [migrate_movable]     = {MIGRATE_CMA,         migrate_reclaimable, migrate_unmovable, Migrate_ Reserve},    [MIGRATE_CMA]         = {Migrate_reserve},/* never used */#else    [migrate_movable]     = {Migrate_ Reclaimable, migrate_unmovable,   migrate_reserve}, #endif    [Migrate_reserve]     = {Migrate_reserve},/* Never used */#ifdef config_memory_isolation    [migrate_isolate]     = {Migrate_reserve},/* never used */#endif};

Depending on the alternate migration type above, traverse

/* Remove an element from the buddy allocator from the fallback list */static inline struct page *__rmqueue_fallback (struc    T zone *zone, int order, int start_migratetype) {struct Free_area * area;    int current_order;    struct page *page;    int Migratetype, I; /* Find the largest possible block of pages in the and the other list */for (Current_order = max_order-1; Current_order >=                        Order --current_order) {//This is not the same as the traversal of the specified migration type, here is the traversal from the maximum order, which is to prevent memory fragmentation for (i = 0;; i++) {Migratetype = Fallbacks[s            Tart_migratetype][i];                /* Migrate_reserve handled later if necessary */if (Migratetype = = Migrate_reserve)//This is the last choice, now is not the time            Break Area = & (Zone->free_area[current_order]);//Get Higher order free array element if (List_empty (&area->free_list[migratetype            ])//If the free page list for the corresponding migration type on the corresponding order is empty, the idle list continue for the alternate migration type is recycled; page = List_entry (Area->free_list[migratetype].next, STruct page, LRU);//If a free page block is found, the free page block on the current order is decremented area->nr_free--; /* * If breaking a large block of pages, move all free * pages to the preferred allocation list.  If falling * Back for a reclaimable kernel allocation, being more * aggressive about taking ownership of free pages * In the other hand, never change migration * type of MIGRATE_CMA pag Eblocks nor move CMA * pages on different free lists.             We don ' t * want unmovable pages to is allocated from * MIGRATE_CMA areas. *///The following is the resolution of the remaining free pages, the above comment is very clear//explain the next few about the migration type of global variables, pageblock_order that the kernel is considered to be a large allocation order (see its own configuration, usually configured max_order-1); pageblock_nr _pages Number of pages if (!IS_MIGRATE_CMA (Migratetype) &&//is not the CMA area (unlikely Current_order t;= PAGEBLOCK_ORDER/2) | | Large memory block, then all go to start_migratetype type under Start_migratetype = = Migrate_reclaimable | | Recoverable memory pages, on migration typeConversion, will be more active page_group_by_mobility_disabled)) {int pages; pages = Move_freepages_block (zone, page, start_migratetype);//convert these pages to start_migratetype migration Type under/* Claim the whole block if over half of it are free */if (pages >= (1 << (PA                        geblock_order-1)) | | page_group_by_mobility_disabled) Set_pageblock_migratetype (page, start_m            Igratetype);//This is the type of migration that sets the entire page block, above Move_freepage_block () function is to set the migration type of each page Migratetype = Start_migratetype;            }/* Remove the page from the freelists */List_del (&AMP;PAGE-&GT;LRU);            Rmv_page_order (page);//Clear the identity of the buddy, identify that the page will not be the buddy system's/* Take ownership for orders >= Pageblock_order */ if (current_order >= pageblock_order &&!IS_MIGRATE_CMA (migratetype)) Cha Nge_pageblock_range (page, Current_order, start_migratetype);//This function is to set the remaining Pageblock blocks to Start_migratetype type Expand (Zone, page, order, Current_order, area, IS_MIGRATE_CMA (migratetype)? m                Igratetype:start_migratetype);//Partition Large Partners page block, divided into small partners page block Trace_mm_page_alloc_extfrag (page, order, Current_order,            Start_migratetype, Migratetype);        return page; }} return NULL;}



int Move_freepages_block (struct zone *zone, struct page *page,                int migratetype) {    unsigned long start_pfn, end_ PFN;    struct page *start_page, *end_page;    START_PFN = PAGE_TO_PFN (page);//page frame number    START_PFN = START_PFN & ~ (pageblock_nr_pages-1);//pageblock_nr_ Pages is the number of pages that the migration type considers to be large    start_page = Pfn_to_page (START_PFN);    End_page = Start_page + pageblock_nr_pages-1;//ready to migrate pgeblock_nr_pages pages, generally to convert the migration type, then convert pageblock_nr_pages consecutive pages, This reduces memory fragmentation    END_PFN = Start_pfn + pageblock_nr_pages-1;    /* Don't cross zone boundaries *    /if (!ZONE_SPANS_PFN (zone, START_PFN))        start_page = page;    if (zone, END_PFN)//Determine if the area of memory to be migrated is on a zone,!ZONE_SPANS_PFN zone        return 0;    return move_freepages (Zone, Start_page, End_page, migratetype);//Convert the memory page address range to convert the migration type to Move_freepages ()


/* Move the free pages in a range to the free lists of the requested type. * Note that Start_page and end_pages is not aligned on a pageblock * boundary. If alignment is required, use Move_freepages_block () *///a little bit of a misunderstanding?? The previous call function obviously did pageblock_nr_pages, and here is said not to align??????????? int move_freepages (struct zone *zone, struct page *start_page, struct page *end_page, int migrat    etype) {struct page *page;    unsigned long order; int pages_moved = 0; #ifndef config_holes_in_zone/* * Page_zone is the not safe-to-call-in-context when * CONF Ig_holes_in_zone is set.     This bug check was probably redundant * anyway as we check zone boundaries in Move_freepages_block (). * Remove at a later date, no bug reports exist related to * grouping pages by mobility */bug_on (Page_zone ( start_page)! = Page_zone (end_page)); #endif for (page = start_page; page <= end_page;)        {/* Make sure we is not inadvertently changing nodes */vm_bug_on (Page_to_nid (page)! = Zone_to_nid (zone));            if (!pfn_valid_within (PAGE_TO_PFN (page))) {page++;        Continue } if (!            Pagebuddy (page)) {//Now page is also the page++ of the partner system;        Continue } order = Page_order (page);//Get Order List_move (&AMP;PAGE-&GT;LRU, &zone->free_area[order].fr Ee_list[migratetype]);//move these pages to the list of the specified migration type Set_freepage_migratetype (page, migratetype);//Set the migration type of these pages,page->    index = Migratetype Page + = 1 << order;//all at once convert 2^order page pages_moved + = 1 << order; Return pages_moved;//the pages in the range are migrated, returning how many pages were actually migrated}


static void Change_pageblock_range (struct page *pageblock_page,                    int start_order, int migratetype) {    int nr_ Pageblocks = 1 << (start_order-pageblock_order);//Get how many pageblock_order page blocks while    (nr_pageblocks--) {// Loop set each pageblock_order page block        set_pageblock_migratetype (pageblock_page, migratetype);//Set the migration type of the page block        pageblock_ Page + = pageblock_nr_pages;//adjust to the address of the next page block    }}



Linux memory Management--Actual allocation function Buffered_rmqueue

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.