Linux2.6核心進程調度核心程式碼分析

來源:互聯網
上載者:User

一.schedule()函數的嘗試分析

asmlinkage void __sched schedule(void)

{

struct task_struct *prev, *next;

       unsigned long *switch_count;

       struct rq *rq;

       int cpu;

need_resched:

       preempt_disable();   //暫時關閉核心搶佔

       cpu = smp_processor_id();    //擷取cpu

       rq = cpu_rq(cpu);        //取得cpu上的運行隊列

       rcu_qsctr_inc(cpu);

       prev = rq->curr;     //取得運行隊列上當前正在執行進程

       switch_count = &prev->nivcsw;

       release_kernel_lock(prev);

need_resched_nonpreemptible:

       schedule_debug(prev);

       hrtick_clear(rq);

       /*

        * Do the rq-clock update outside the rq lock:

        */

       local_irq_disable();

       update_rq_clock(rq);

       spin_lock(&rq->lock);

       clear_tsk_need_resched(prev);

檢查prev(將被替換掉的進程)是否存有還未被處理的訊號,若有,則保持prev的TASK_RUNNING狀態,否則就使其無效(deactivate)

       if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {

              if (unlikely(signal_pending_state(prev->state, prev)))

                     prev->state = TASK_RUNNING;

              else

                     deactivate_task(rq, prev, 1);

              switch_count = &prev->nvcsw;

       }

#ifdef CONFIG_SMP

       if (prev->sched_class->pre_schedule)

              prev->sched_class->pre_schedule(rq, prev);

#endif

       if (unlikely(!rq->nr_running))

              idle_balance(cpu, rq);

//將當前進程插入運行隊列中(rq)

       prev->sched_class->put_prev_task(rq, prev);

//從運行隊列中擷取將要被調度的進程

       next = pick_next_task(rq, prev);

       if (likely(prev != next)) {
//開始切換

              sched_info_switch(prev, next);

//切換計數+1,當前正在執行進程curr指向next

              rq->nr_switches++;

              rq->curr = next;

              ++*switch_count;

//環境切換,重點關注!

              context_switch(rq, prev, next); /* unlocks the rq */

              /*

* the context switch might have flipped the stack from under

               * us, hence refresh the local variables.

               */

              cpu = smp_processor_id();

              rq = cpu_rq(cpu);

       } else

              spin_unlock_irq(&rq->lock);

       hrtick_set(rq);

       if (unlikely(reacquire_kernel_lock(current) < 0))

              goto need_resched_nonpreemptible;

//重新開啟核心搶佔式機制

       preempt_enable_no_resched();

       if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))

              goto need_resched;

}

二.context_switch()函數分析

context_switch()函數實際上是個預先處理宏

#define switch_to(prev, next, last)                             \

do {                                                         \

       /*                                                     \

        * Context-switching clobbers all registers, so we clobber  \

        * them explicitly, via unused output variables.          \

        * (EAX and EBP is not listed because EBP is saved/restored    \

        * explicitly for wchan access and EAX is the return value of    \

        * __switch_to())                                      \

        */                                                    \

       unsigned long ebx, ecx, edx, esi, edi;                         \

                                                               \

以下為C語言的內聯彙編代碼,也是關鍵環境切換的核心所在

       asm volatile("pushfl\n\t"              /* save    flags */ \

                   "pushl %%ebp\n\t"             /* save    EBP   */    \

                   "movl %%esp,%[prev_sp]\n\t"   /* save    ESP   */ \

                   "movl %[next_sp],%%esp\n\t"    /* restore ESP   */ \

                   "movl $1f,%[prev_ip]\n\t"  /* save    EIP   */     \

                   "pushl %[next_ip]\n\t" /* restore EIP   */ \

                   "jmp __switch_to\n"    /* regparm call  */       \

                   "1:\t"                                        \

"popl %%ebp\n\t" /* restore EBP */ \

                   "popfl\n"                    /* restore flags */   \

                                                               \

                   /* output parameters */                      \

                   : [prev_sp] "=m" (prev->thread.sp),          \

                     [prev_ip] "=m" (prev->thread.ip),         \

                     "=a" (last),                                   \

                                                               \

                     /* clobbered output registers: */             \

                     "=b" (ebx), "=c" (ecx), "=d" (edx),        \

                     "=S" (esi), "=D" (edi)                          \

                                                                 \

                     /* input parameters: */                          \

                   : [next_sp]  "m" (next->thread.sp),          \

                     [next_ip]  "m" (next->thread.ip),         \

                                                                 \

                     /* regparm parameters for __switch_to(): */   \

                     [prev]     "a" (prev),                         \

                     [next]     "d" (next));                        \

} while (0)

1.內聯彙編參數分析

這裡的C語言內聯彙編代碼比較晦澀,先簡要分析一下參數,稍候我們將其還原為彙編代碼來分析。

/* output parameters */                       \

: [prev_sp] "=m" (prev->thread.sp),            \

[prev_ip] "=m" (prev->thread.ip),        \

"=a" (last),                                 \

這裡有三個輸出參數:prev_sp、prev_ip、和載入代碼“a”(代表EAX寄存器),分別用%0,%1和%2來表示,代表這段彙編代碼執行完畢後被改動的變數,分別對應於prev->thread.sp,prev->thread.ip和last,後面還有其他被改動的寄存器,不過不是重點。

/* input parameters: */                        \

: [next_sp]  "m" (next->thread.sp),            \

[next_ip]  "m" (next->thread.ip),        \

/* regparm parameters for __switch_to(): */ \

[prev]     "a" (prev),                       \

[next]     "d" (next));                      \

和上面的分析類似,前兩個參數代表將被調度啟動並執行進程的堆棧和指令指標寄存器,然後是將被調度出去的進程和調度啟動並執行進程prev,next,分別讀入eax和edx寄存器中

2.彙編程式碼分析

       我們將上面的彙編代碼改寫一下:

1)        pushfl                                         /* save    flags */    

2)        pushl      %ebp                          /* save    EBP   */ 

3)        movl      %esp, %prev->thread.sp           /* save    ESP   */

4)        movl      %next->thread.sp, %esp          
/* restore ESP   */

5)        movl      $1f,  %prev->thread.ip            /* save    EIP   */

6)        pushl      %next->thread.ip                 /* restore EIP   */   

7)        jmp __switch_to                                 /* regparm call  */     

8)        1:                                     

9)        popl       %ebp                            /* restore EBP   */ 

10)     popfl

這段代碼雖然小,但是暗藏玄機,進程切換的精髓就在其中,第1,2行將標誌寄存器和ebp寄存器壓入堆棧,第3行將當前進程(prev)的系統空間堆棧指標存入prev->thread.sp。第四行就是進程切換的關鍵,在這一行中next進程的系統空間堆棧指標被拷貝給了esp寄存器,那麼,從這一行開始就是在使用新調度進來的next進程的堆棧了,也就是說此時“當前進程”這個概念就是指next,而不是prev。隨後,第5行將標號為1(f代表forward),也就是第9行代碼的地址存入prev->thread.ip,作為prev進程下一次被調度而切入啟動並執行“返回地址”;第6行將next->thread.ip的地址壓入堆棧中,它指向的就是上一次next進程切出時在第5行存入的返回地址。第7行跳轉到__switch_to函數(它主要處理TSS,繼續切換其餘儲存在TSS中的上下文資訊,如fs和gs等寄存器其中關鍵是將核心空間堆棧指標恢複成next->esp0),__switch_to函數ret指令返回後,剛才第6行壓入堆棧的“返回地址”資訊就被彈到eip寄存器中,於是next進程執行第9和10行,恢複ebp和標誌寄存器,next進程也就正式開始執行了。

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.