記憶體管理--檢測記憶體

來源:互聯網
上載者:User

標籤:

linux kernel被bootloader載入到記憶體後,cpu首先執行head.s中的start_of_setup函數等函數,然後跳轉到main.c,main中首先執行detect_memory函數探測記憶體;

int detect_memory(void){int err = -1;if (detect_memory_e820() > 0)err = 0;if (!detect_memory_e801())err = 0;if (!detect_memory_88())err = 0;return err;}

linux核心通過detect_memory_xxx來擷取記憶體相關資訊;這幾個函數都是通過觸發int 0x15 中斷擷取;,同時調用前分別把AX寄存器設定為0xe820h、0xe801h、0x88h


對於e820();

struct e820entry {__u64 addr;/* start of memory segment */該記憶體段的起始地址__u64 size;/* size of memory segment */該記憶體段段的大小__u32 type;/* type of memory segment */該記憶體段的類型} __attribute__((packed));struct e820map {<span style="white-space:pre"></span>__u32 nr_map;<span style="white-space:pre"></span>struct e820entry map[E820_X_MAX];};
type:該記憶體段的類型,可分為Usable (normal) RAM,Reserved - unusable,ACPI reclaimable memory,ACPI NVS memory,Area containing bad memory,要擷取所有的記憶體段的資訊,detect_memory_e820()通過一個do_while迴圈來不斷觸發int 0x15中斷來擷取每個記憶體段的資訊,並且將這些資訊儲存在一個struct e820entry類型的數組中。

static int detect_memory_e820(void){int count = 0;struct biosregs ireg, oreg;struct e820entry *desc = boot_params.e820_map;static struct e820entry buf; /* static so it is zeroed */initregs(&ireg);ireg.ax  = 0xe820;ireg.cx  = sizeof buf;ireg.edx = SMAP;ireg.di  = (size_t)&buf;/* * Note: at least one BIOS is known which assumes that the * buffer pointed to by one e820 call is the same one as * the previous call, and only changes modified fields.  Therefore, * we use a temporary buffer and copy the results entry by entry. * * This routine deliberately does not try to account for * ACPI 3+ extended attributes.  This is because there are * BIOSes in the field which report zero for the valid bit for * all ranges, and we don't currently make any use of the * other attribute bits.  Revisit this if we see the extended * attribute bits deployed in a meaningful way in the future. */do {<span style="white-space:pre"></span> /*在執行這條內聯彙編語句時輸入的參數有:         eax寄存器=0xe820         dx寄存器=’SMAP’         edi寄存器=desc         ebx寄存器=next         ecx寄存器=size                  返回給c語言代碼的參數有:         id=eax寄存器         rr=edx寄存器         ext=ebx寄存器         size=ecx寄存器         desc指向的記憶體位址在執行0x15中斷調用時被設定         */  <span style="white-space:pre"></span>intcall(0x15, &ireg, &oreg);/*觸發中斷0x15*/ireg.ebx = oreg.ebx; /* for next iteration... *//* BIOSes which terminate the chain with CF = 1 as opposed   to %ebx = 0 don't always report the SMAP signature on   the final, failing, probe. */if (oreg.eflags & X86_EFLAGS_CF)break;/* Some BIOSes stop returning SMAP in the middle of   the search loop.  We don't know exactly how the BIOS   screwed up the map at that point, we might have a   partial map, the full map, or complete garbage, so   just return failure. */if (oreg.eax != SMAP) {count = 0;break;}*desc++ = buf;/*儲存擷取的記憶體段資訊*/  count++;  /*擷取的記憶體段數目加1*/  } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));<span style="white-space:pre"></span>/*將記憶體塊數保持到變數中*/  return boot_params.e820_entries = count;}

  

static int detect_memory_e801(void){struct biosregs ireg, oreg;initregs(&ireg);ireg.ax = 0xe801;intcall(0x15, &ireg, &oreg);if (oreg.eflags & X86_EFLAGS_CF)return -1;/* Do we really need to do this? */if (oreg.cx || oreg.dx) {oreg.ax = oreg.cx;oreg.bx = oreg.dx;}if (oreg.ax > 15*1024) {return -1;/* Bogus! */} else if (oreg.ax == 15*1024) {boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;} else {/* * This ignores memory above 16MB if we have a memory * hole there.  If someone actually finds a machine * with a memory hole at 16MB and no support for * 0E820h they should probably generate a fake e820 * map. */boot_params.alt_mem_k = oreg.ax;}return 0;}static int detect_memory_88(void){struct biosregs ireg, oreg;initregs(&ireg);ireg.ah = 0x88;intcall(0x15, &ireg, &oreg);boot_params.screen_info.ext_mem_k = oreg.ax;return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */}

對於32位的系統,通過調用鏈arch/x86/boot/main.c:main()--->arch/x86/boot/pm.c:go_to_protected_mode()--->arch/x86/boot/pmjump.S:protected_mode_jump()--->arch/i386/boot/compressed/head_32.S:startup_32()--->arch/x86/kernel/head_32.S:startup_32()--->arch/x86/kernel/head32.c:i386_start_kernel()--->init/main.c:start_kernel(),到達眾所周知的Linux核心啟動函數start_kernel(),這裡會調用setup_arch()完成與體繫結構相關的一系列初始化工作,其中就包括各種記憶體的初始化工作,如記憶體配置圖的建立、管理區的初始化等等。對x86體繫結構,setup_arch()函數在arch/x86/kernel/setup.c中,如下:

void __init setup_arch(char **cmdline_p){/* ...... */x86_init.oem.arch_setup();setup_memory_map(); /* 建立記憶體配置圖 */e820_reserve_setup_data();/* ...... *//* * partially used pages are not usable - thus * we are rounding upwards: */max_pfn = e820_end_of_ram_pfn(); /* 找出最大可用記憶體頁面幀號 */<span style="white-space:pre"></span><pre name="code" class="cpp" style="font-size: 24px;">       /* ...... */
#ifdef CONFIG_X86_32/* max_low_pfn在這裡更新 */find_low_pfn_range(); /* 找出低端記憶體的最大頁幀號 */#elsenum_physpages = max_pfn;/* ...... *//* max_pfn_mapped在這更新 *//* 初始化記憶體映射機制 */max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);max_pfn_mapped = max_low_pfn_mapped;/* ...... */initmem_init(0, max_pfn); /* 啟動記憶體 Clerk *//* ...... */x86_init.paging.pagetable_setup_start(swapper_pg_dir);paging_init(); /* 建立完整的頁表 */x86_init.paging.pagetable_setup_done(swapper_pg_dir);/* ...... */}



在 start_kernel---->setup_arch()--------------->setup_memory_map;

void __init setup_memory_map(void){char *who;who = x86_init.resources.memory_setup();memcpy(&e820_saved, &e820, sizeof(struct e820map));printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");e820_print_map(who);}
在x86_init.c中定義了x86下的memory_setup函數:

/* * The platform setup functions are preset with the default functions * for standard PC hardware. */struct x86_init_ops x86_init __initdata = {.resources = {.probe_roms= probe_roms,.reserve_resources= reserve_standard_io_resources,.memory_setup= default_machine_specific_memory_setup,},.mpparse = {.mpc_record= x86_init_uint_noop,.setup_ioapic_ids= x86_init_noop,.mpc_apic_id= default_mpc_apic_id,.smp_read_mpc_oem= default_smp_read_mpc_oem,.mpc_oem_bus_info= default_mpc_oem_bus_info,.find_smp_config= default_find_smp_config,.get_smp_config= default_get_smp_config,},.irqs = {.pre_vector_init= init_ISA_irqs,.intr_init= native_init_IRQ,.trap_init= x86_init_noop,},.oem = {.arch_setup= x86_init_noop,.banner= default_banner,},.mapping = {.pagetable_reserve= native_pagetable_reserve,},.paging = {.pagetable_setup_start= native_pagetable_setup_start,.pagetable_setup_done= native_pagetable_setup_done,},.timers = {.setup_percpu_clockev= setup_boot_APIC_clock,.tsc_pre_init= x86_init_noop,.timer_init= hpet_time_init,.wallclock_init= x86_init_noop,},.iommu = {.iommu_init= iommu_init_noop,},.pci = {.init= x86_default_pci_init,.init_irq= x86_default_pci_init_irq,.fixup_irqs= x86_default_pci_fixup_irqs,},};

可知會回調:default_machine_specific_memory_setup();

char *__init default_machine_specific_memory_setup(void){char *who = "BIOS-e820";u32 new_nr;/* * Try to copy the BIOS-supplied E820-map. * * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */new_nr = boot_params.e820_entries;sanitize_e820_map(boot_params.e820_map, /*消除重疊的記憶體段*/  ARRAY_SIZE(boot_params.e820_map),&new_nr);boot_params.e820_entries = new_nr;if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)  < 0) { /*將記憶體布局的資訊從boot_params.e820_map拷貝到struct e820map e820*/  u64 mem_size;/* compare results from other methods and take the greater */if (boot_params.alt_mem_k    < boot_params.screen_info.ext_mem_k) {mem_size = boot_params.screen_info.ext_mem_k;who = "BIOS-88";} else {mem_size = boot_params.alt_mem_k;who = "BIOS-e801";}e820.nr_map = 0;e820_add_region(0, LOWMEMSIZE(), E820_RAM);e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);}/* In case someone cares... */return who;}

1.消除記憶體段的重疊部分

2.將記憶體布局資訊從boot_params.e820_map拷貝到e820中

append_e820_map(boot_params.e820_map, boot_params.e820_entries)將會調用一下函數:

static int __init __append_e820_map(struct e820entry *biosmap, int nr_map){while (nr_map) {  u64 start = biosmap->addr;u64 size = biosmap->size;u64 end = start + size;u32 type = biosmap->type;/* Overflow in 64 bits? Ignore the memory map. */if (start > end)return -1;e820_add_region(start, size, type);  迴圈nr_map次添加記憶體塊到e820中去; biosmap++;nr_map--;}return 0;}
  
void __init e820_add_region(u64 start, u64 size, int type){__e820_add_region(&e820, start, size, type);}
struct e820map e820;

實體記憶體就已經從BIOS中讀出來存放到全域變數e820中,

建立記憶體後

setup_arch------------->e820_end_of_ram_pfn;

/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
max_pfn = e820_end_of_ram_pfn();

static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type){int i;unsigned long last_pfn = 0;unsigned long max_arch_pfn = MAX_ARCH_PFN;/*4G地址空間對應的頁面數*/  for (i = 0; i < e820.nr_map; i++) {  /*迴圈遍曆記憶體布局數組*/struct e820entry *ei = &e820.map[i];unsigned long start_pfn;unsigned long end_pfn;if (ei->type != type)continue;start_pfn = ei->addr >> PAGE_SHIFT;end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;if (start_pfn >= limit_pfn)/*起始地址大於MAX_ARCH_PFN,無視之*/continue;if (end_pfn > limit_pfn) { /*結束位址大於MAX_ARCH_PFN則直接最大頁框編號設為MAX_ARCH_PFN*/last_pfn = limit_pfn;break;}if (end_pfn > last_pfn)    /*該記憶體段的末地址大於之前找到的最大頁框編號,則重設最大頁框編號*/last_pfn = end_pfn;}if (last_pfn > max_arch_pfn)/*大於4G空間時*/  last_pfn = max_arch_pfn;printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", last_pfn, max_arch_pfn);return last_pfn; /*返回最後一個頁面幀號*/  }
unsigned long __init e820_end_of_ram_pfn(void){<span style="white-space:pre"></span>return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);}


#define MAXMEM    (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)  

其中__VANALLOC_RESERVE為128M,說明了第4GB的記憶體劃分

可知:MAXMEM為一個略小於896M的值(896M-8K-4M-4M)即略小於低端記憶體的上限,高端記憶體的起始地址


setup_arch()-->find_low_pfn_range().該函數用來劃分低端記憶體和高端記憶體的界限,確定高端記憶體的起始地址

/* max_low_pfn get updated here */
find_low_pfn_range();

/* * Determine low and high memory ranges: */void __init find_low_pfn_range(void){/* it could update max_pfn */if (max_pfn <= MAXMEM_PFN)/*實際實體記憶體小於等於低端記憶體896M*/  lowmem_pfn_init();elsehighmem_pfn_init();}
/* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: */ /*高端地址空間的頁面數可以在啟動中進行配置; 如果不配置,在這裡進行設定大小*/void __init highmem_pfn_init(void){/*MAXMEM_PFN為最大物理地址-(4M+4M+8K+128M);所以低端記憶體的大小其實比我們說的896M低一些*/max_low_pfn = MAXMEM_PFN;/*設定高端記憶體和低端記憶體的分界線*/  if (highmem_pages == -1)/*高端記憶體頁面數如果在開機沒有設定*/highmem_pages = max_pfn - MAXMEM_PFN;/*總頁面數減去低端頁面數*//*如果highmem_pages變數在啟動項設定了,那麼在這裡就要進行這樣的判斷,因為可能出現不一致的情況*/if (highmem_pages + MAXMEM_PFN < max_pfn)max_pfn = MAXMEM_PFN + highmem_pages;if (highmem_pages + MAXMEM_PFN > max_pfn) {printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,pages_to_mb(max_pfn - MAXMEM_PFN),pages_to_mb(highmem_pages));highmem_pages = 0;}#ifndef CONFIG_HIGHMEM/* Maximum memory usable is what is directly addressable */printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);if (max_pfn > MAX_NONPAE_PFN)printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");elseprintk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");max_pfn = MAXMEM_PFN;#else /* !CONFIG_HIGHMEM *//*存在高端地址情況*/#ifndef CONFIG_HIGHMEM64G/*在沒有配置64G的情況下,記憶體的大小不能超過4G*/if (max_pfn > MAX_NONPAE_PFN) {max_pfn = MAX_NONPAE_PFN;printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);}#endif /* !CONFIG_HIGHMEM64G */#endif /* !CONFIG_HIGHMEM */}
當實際記憶體小於896M時
void __init lowmem_pfn_init(void){/* max_low_pfn is 0, we already have early_res support *//*將分界線初始化為實際實體記憶體的最大頁框號,由於系統的記憶體小於896M,所以全部記憶體為低端記憶體,如需要高端記憶體,則從中分一部分出來進行分配*/max_low_pfn = max_pfn;if (highmem_pages == -1)highmem_pages = 0;#ifdef CONFIG_HIGHMEM  /*如果使用者定義了HIGHMEM,即需要分配高端記憶體*/if (highmem_pages >= max_pfn) {       /*如果高端記憶體的頁起始地址>=最大頁框號,則無法分配*/printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,pages_to_mb(highmem_pages), pages_to_mb(max_pfn));highmem_pages = 0;}if (highmem_pages) {/*這個條件保證低端記憶體不能小於64M*/if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,pages_to_mb(highmem_pages));highmem_pages = 0;}max_low_pfn -= highmem_pages; /*設定好低、高端記憶體的分界線*/}#elseif (highmem_pages)printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");#endif}
當實際的實體記憶體大於896M,由highmem_pfn_init()進行分配void __init highmem_pfn_init(void){max_low_pfn = MAXMEM_PFN; /*設定高端記憶體和低端記憶體的分界線*/if (highmem_pages == -1)  /*未設定高端記憶體的頁框數*/highmem_pages = max_pfn - MAXMEM_PFN;  /*預設為最大頁框數減去MAXMEM_PFN*/if (highmem_pages + MAXMEM_PFN < max_pfn)      /*高端記憶體頁框數加上MAXMEM_PFN小於最大頁框數*/max_pfn = MAXMEM_PFN + highmem_pages;  /*將最大頁框數下調到前兩者的和*/if (highmem_pages + MAXMEM_PFN > max_pfn){     /*申請的高端記憶體超過範圍則不分配*/printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,pages_to_mb(max_pfn - MAXMEM_PFN),pages_to_mb(highmem_pages));highmem_pages = 0;}#ifndef CONFIG_HIGHMEM/* Maximum memory usable is what is directly addressable */printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);if (max_pfn > MAX_NONPAE_PFN)printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");elseprintk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");max_pfn = MAXMEM_PFN;#else /* !CONFIG_HIGHMEM */#ifndef CONFIG_HIGHMEM64Gif (max_pfn > MAX_NONPAE_PFN) {max_pfn = MAX_NONPAE_PFN;printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);}#endif /* !CONFIG_HIGHMEM64G */#endif /* !CONFIG_HIGHMEM */}




記憶體管理--檢測記憶體

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.