[Bug]Linux核心啟動過程中,ramdisk載入失敗,系統崩潰

來源:互聯網
上載者:User

[Bug]Linux核心啟動過程中,ramdisk載入失敗,系統崩潰
Bug描述:Linux核心啟動過程中,ramdisk載入失敗,系統崩潰
日誌資訊:

 
  1. RAMDISK: Couldn't find valid RAM disk image starting at 0.
  2. UDF-fs: No partition found (1)
  3. NILFS: Can't find nilfs on dev ram0.
  4. (1,15):ocfs2_fill_super:1001 ERROR: superblock probe
  5. VFS: Cannot open root device "ram0" or unknown-block(1,0)
  6. Please append a correct "root=" boot option; here are the available partitions:
  7. 0800 8003520 sda driver: sd
  8. 0801 14048 sda1
  9. 0804 1 sda4
  10. 0805 393057 sda5
  11. 0806 102400 sda6
  12. 0807 7488690 sda7
  13. Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(1,0)
  14. Unable to load '/system/dump '.
  15. Pid: 1, comm: swapper Not tainted 2.6.32.15-hermes-1 #23
  16. Call Trace:

  17. UTC time : 2005-1-1 0:14:52
  18. [] panic+0x7a/0x12d
  19. [] mount_block_root+0x257/0x275
  20. [] mount_root+0x56/0x5a
  21. [] prepare_namespace+0x16b/0x198
  22. [] kernel_init+0x178/0x188
  23. [] child_rip+0xa/0x20
  24. [] ? kernel_init+0x0/0x188
  25. [] ? child_rip+0x0/0x20
首先看到Couldn't find valid RAM disk image starting at 0.
順藤摸瓜,找到列印此資訊的代碼:
 
  1. static int __init
  2. identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
  3. {
  4. const int size = 512;
  5. struct minix_super_block *minixsb;
  6. struct ext2_super_block *ext2sb;
  7. struct romfs_super_block *romfsb;
  8. struct cramfs_super *cramfsb;
  9. struct squashfs_super_block *squashfsb;
  10. int nblocks = -1;
  11. unsigned char *buf;
  12. const char *compress_name;
  13. int i = 0;

  14. buf = kmalloc(size, GFP_KERNEL);
  15. if (!buf)
  16. return -1;

  17. minixsb = (struct minix_super_block *) buf;
  18. ext2sb = (struct ext2_super_block *) buf;
  19. romfsb = (struct romfs_super_block *) buf;
  20. cramfsb = (struct cramfs_super *) buf;
  21. squashfsb = (struct squashfs_super_block *) buf;
  22. memset(buf, 0xe5, size);

  23. /*
  24. * Read block 0 to test for compressed kernel
  25. */
  26. sys_lseek(fd, start_block * BLOCK_SIZE, 0);
  27. sys_read(fd, buf, size);
  28. // Eric Ju Jul 27th 2016
  29. printk("start_block:%d\n",start_block);
  30. for(i=0;i
  31. printk("0x%x ",*(buf+i));
  32. printk("\n");

  33. *decompressor = decompress_method(buf, size, &compress_name);
  34. if (compress_name) {
  35. printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
  36. compress_name, start_block);
  37. if (!*decompressor)
  38. printk(KERN_EMERG
  39. "RAMDISK: %s decompressor not configured!\n",
  40. compress_name);
  41. nblocks = 0;
  42. goto done;
  43. }

  44. /* romfs is at block zero too */
  45. if (romfsb->word0 == ROMSB_WORD0 &&
  46. romfsb->word1 == ROMSB_WORD1) {
  47. printk(KERN_NOTICE
  48. "RAMDISK: romfs filesystem found at block %d\n",
  49. start_block);
  50. nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
  51. goto done;
  52. }

  53. if (cramfsb->magic == CRAMFS_MAGIC) {
  54. printk(KERN_NOTICE
  55. "RAMDISK: cramfs filesystem found at block %d\n",
  56. start_block);
  57. nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
  58. goto done;
  59. }

  60. /* squashfs is at block zero too */
  61. if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
  62. printk(KERN_NOTICE
  63. "RAMDISK: squashfs filesystem found at block %d\n",
  64. start_block);
  65. nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
  66. >> BLOCK_SIZE_BITS;
  67. goto done;
  68. }

  69. /*
  70. * Read block 1 to test for minix and ext2 superblock
  71. */
  72. sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
  73. sys_read(fd, buf, size);

  74. /* Try minix */
  75. if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
  76. minixsb->s_magic == MINIX_SUPER_MAGIC2) {
  77. printk(KERN_NOTICE
  78. "RAMDISK: Minix filesystem found at block %d\n",
  79. start_block);
  80. nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
  81. goto done;
  82. }

  83. /* Try ext2 */
  84. if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) {
  85. printk(KERN_NOTICE
  86. "RAMDISK: ext2 filesystem found at block %d\n",
  87. start_block);
  88. nblocks = le32_to_cpu(ext2sb->s_blocks_count) <<
  89. le32_to_cpu(ext2sb->s_log_block_size);
  90. goto done;
  91. }

  92. printk(KERN_NOTICE
  93. "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
  94. start_block);

  95. done:
  96. sys_lseek(fd, start_block * BLOCK_SIZE, 0);
  97. kfree(buf);
  98. return nblocks;
  99. }
可以看到,列印此日誌,是由於本函數中所有的分支都沒有成功匹配。正常情況下,該函數應該走入第一個分支,並跳轉至done處。
為什麼沒有走入第一個分支呢?猜測fd應該是指向initrd的檔案描述符,第一個分支之前的read應該為讀取initrd的第一個扇區內容,並進行magic比較,當匹配成功,說明initrd為正確的鏡像檔案,並調用相應解壓函數進行解壓縮。通過列印讀取出的buf內容來確認,initrd檔案是否正確。經過實驗,列印內容全部為0xFF,證明該initrd檔案錯誤。
為什麼initrd檔案會錯誤呢?磁碟上initrd.img檔案都是正確的。繼續跟蹤identify_ramdisk_image的調用處,看看fd到底是什嗎?經過跟蹤,發現以下函數,位於核心源碼/init/do_mounts_initrd.c中。
 
  1. int __init initrd_load(void)
  2. {
  3. if (mount_initrd) {
  4. create_dev("/dev/ram", Root_RAM0);
  5. /*
  6. * Load the initrd data into /dev/ram0. Execute it as initrd
  7. * unless /dev/ram0 is supposed to be our actual root device,
  8. * in that case the ram disk is just set up here, and gets
  9. * mounted in the normal path.
  10. */
  11. if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
  12. sys_unlink("/initrd.image");
  13. handle_initrd();
  14. return 1;
  15. }
  16. }
  17. sys_unlink("/initrd.image");
  18. return 0;
  19. }
initrd.image檔案?不對啊,我們磁碟上的initrd檔案名稱為initrd.img怎麼會變為initrd.image呢?而且路徑也不對。猜測,initrd.image檔案應該是有啟動部分代碼建立了符號連結到了initrd.img上。繼續尋找initrd.image的建立是在哪裡?找到如下代碼,位於/init/initramfs.c
 
  1. static int __init populate_rootfs(void)
  2. {
  3. int i=0;
  4. char *err = unpack_to_rootfs(__initramfs_start,
  5. __initramfs_end - __initramfs_start);
  6. if (err)
  7. panic(err);/* Failed to decompress INTERNAL initramfs */
  8. if (initrd_start) {
  9. #ifdef CONFIG_BLK_DEV_RAM
  10. int fd;
  11. printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
  12. err = unpack_to_rootfs((char *)initrd_start,
  13. initrd_end - initrd_start);
  14. if (!err) {
  15. free_initrd();
  16. return 0;
  17. } else {
  18. clean_rootfs();
  19. unpack_to_rootfs(__initramfs_start,
  20. __initramfs_end - __initramfs_start);
  21. }
  22. printk(KERN_INFO "rootfs image is not initramfs (%s)"
  23. "; looks like an initrd\n", err);
  24. fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);

  25. if (fd >= 0) {
  26. sys_write(fd, (char *)initrd_start,
  27. initrd_end - initrd_start);
  28. sys_close(fd);
  29. free_initrd();
  30. }
  31. #else
  32. printk(KERN_INFO "Unpacking initramfs...\n");
  33. err = unpack_to_rootfs((char *)initrd_start,
  34. initrd_end - initrd_start);
  35. if (err)
  36. printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
  37. free_initrd();
  38. #endif
  39. }
  40. return 0;
  41. }
在這裡建立了initrd.image檔案。另外還有一句:sys_write(fd, (char *)initrd_start,initrd_end - initrd_start); 看來是核心從記憶體中將相關資料寫到/init.image中的。並非軟連結。那initrd_start又是哪裡呢?哪裡來的資料呢?將initrd_start嘗試列印後,發現initrd_start為0xffff880000100000,估計是已經轉換完的虛擬位址。既然知道initrd.image是從記憶體寫入根檔案系統的,那麼一定有其他程式將我們的initrd.img讀入記憶體。initrd.img是在哪裡被讀入記憶體的呢?這個檔案的路徑是在哪裡被提供的呢?想起來,lilo.multi.conf檔案中有指定initrd.img檔案的路徑。那一定是lilo在啟動時,將initrd.img讀入記憶體,並將地址傳遞給核心。那就繼續查看串口日誌。在剛剛啟動的日誌中有如下資訊:
 
  1. RAMDISK: 7fa36000 - 7ffff40e
  2. Allocated new RAMDISK: 00100000 - 006c940e
  3. Move RAMDISK from 000000007fa36000 - 000000007ffff40d to 00100000 - 006c940d
可以看到,RAMDISK的起始地址為0x7fa36000,那是不是剛剛的虛擬位址就是從這個物理地址轉換過去的呢?仔細看第三行,貌似核心將RAMDISK的內容移動到了0x00100000地址處。在對比剛的虛擬位址0xffff880000100000,可以確定該虛擬位址一定是從0x00100000處映射的。因為核心在物理低地址處地址映射的習慣是,設定虛擬高端地址後,位移實際的物理地址。那麼列印以下RAMDISK被移動之前、移動之後的內容,看看是不是移動時除了錯誤。結果發現RAMDISK被移動之前就是0xFF。可以斷定,LILO將initrd.img時就已經錯了。從0xFF上看,實體記憶體應該是沒有被寫過,是上電後的初始狀態。突然想到還有另外一個資訊,在替換核心後,執行lilo64 -C lilo.multi.conf -s `pwd`時,lilo報了一個警告資訊:
 
  1. Normally any initial ramdisk (initrd) loaded with a kernel is loaded as
  2. high in memory as possible, but never above 15Mb. This is due to a BIOS
  3. limitation on older systems. On newer systems, this option enables using
  4. memory above 15Mb (up to a kernel imposed limit, around 768Mb) for
  5. passing the initrd to the kernel. The presence of this option merely
  6. indicates that your system does not have the old BIOS limitation.
再看RAMDISK的初始起始地址:0x7fa36000,很顯然該地址高於15MB處地址,說明LILO認為核心和initrd.img的大小超過了某個固定限制,將initrd放在了高端記憶體中。那為什麼LILO寫入記憶體失敗呢?從上面的提示資訊來看,應該是在剛上電,啟動時BIOS不支援訪問高端記憶體,所以LILO在調用BIOS的寫入程式時發生了錯誤,但LILO並沒有關心這個錯誤。
在LILO的HomePage上找到一篇技術文檔,其中很明確的表述了LILO會將initrd.img載入在記憶體的低端地址的尾部處(16MB以下)。16MB的限制是因為BIOS只使用24位的地址空間來傳輸資料。後經過閱讀LILO的代碼,LILO會按照核心鏡像大小的3倍與initrd.img的大小計算總和,當總和大於14MB,LILO認為14MB以下的低地址空間無法放入核心和initrd鏡像檔案,便認為BIOS是支援16MB以上地址空間的,於是LILO在載入initrd鏡像時,將initrd放在高地址空間中。
LILO的技術文檔:http://lilo.alioth.debian.org/olddoc/html/tech_21-5.html,感興趣的同學可以拿來看看哦。

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.