一.schedule()函數的嘗試分析
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable(); //暫時關閉核心搶佔
cpu = smp_processor_id(); //擷取cpu
rq = cpu_rq(cpu); //取得cpu上的運行隊列
rcu_qsctr_inc(cpu);
prev = rq->curr; //取得運行隊列上當前正在執行進程
switch_count = &prev->nivcsw;
release_kernel_lock(prev);
need_resched_nonpreemptible:
schedule_debug(prev);
hrtick_clear(rq);
/*
* Do the rq-clock update outside the rq lock:
*/
local_irq_disable();
update_rq_clock(rq);
spin_lock(&rq->lock);
clear_tsk_need_resched(prev);
檢查prev(將被替換掉的進程)是否存有還未被處理的訊號,若有,則保持prev的TASK_RUNNING狀態,否則就使其無效(deactivate)
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev)))
prev->state = TASK_RUNNING;
else
deactivate_task(rq, prev, 1);
switch_count = &prev->nvcsw;
}
#ifdef CONFIG_SMP
if (prev->sched_class->pre_schedule)
prev->sched_class->pre_schedule(rq, prev);
#endif
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
//將當前進程插入運行隊列中(rq)
prev->sched_class->put_prev_task(rq, prev);
//從運行隊列中擷取將要被調度的進程
next = pick_next_task(rq, prev);
if (likely(prev != next)) {
//開始切換
sched_info_switch(prev, next);
//切換計數+1,當前正在執行進程curr指向next
rq->nr_switches++;
rq->curr = next;
++*switch_count;
//環境切換,重點關注!
context_switch(rq, prev, next); /* unlocks the rq */
/*
* the context switch might have flipped the stack from under
* us, hence refresh the local variables.
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
} else
spin_unlock_irq(&rq->lock);
hrtick_set(rq);
if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
//重新開啟核心搶佔式機制
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
二.context_switch()函數分析
context_switch()函數實際上是個預先處理宏
#define switch_to(prev, next, last) \
do { \
/* \
* Context-switching clobbers all registers, so we clobber \
* them explicitly, via unused output variables. \
* (EAX and EBP is not listed because EBP is saved/restored \
* explicitly for wchan access and EAX is the return value of \
* __switch_to()) \
*/ \
unsigned long ebx, ecx, edx, esi, edi; \
\
以下為C語言的內聯彙編代碼,也是關鍵環境切換的核心所在
asm volatile("pushfl\n\t" /* save flags */ \
"pushl %%ebp\n\t" /* save EBP */ \
"movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
"popfl\n" /* restore flags */ \
\
/* output parameters */ \
: [prev_sp] "=m" (prev->thread.sp), \
[prev_ip] "=m" (prev->thread.ip), \
"=a" (last), \
\
/* clobbered output registers: */ \
"=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \
\
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
\
/* regparm parameters for __switch_to(): */ \
[prev] "a" (prev), \
[next] "d" (next)); \
} while (0)
1.內聯彙編參數分析
這裡的C語言內聯彙編代碼比較晦澀,先簡要分析一下參數,稍候我們將其還原為彙編代碼來分析。
/* output parameters */ \
: [prev_sp] "=m" (prev->thread.sp), \
[prev_ip] "=m" (prev->thread.ip), \
"=a" (last), \
這裡有三個輸出參數:prev_sp、prev_ip、和載入代碼“a”(代表EAX寄存器),分別用%0,%1和%2來表示,代表這段彙編代碼執行完畢後被改動的變數,分別對應於prev->thread.sp,prev->thread.ip和last,後面還有其他被改動的寄存器,不過不是重點。
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
/* regparm parameters for __switch_to(): */ \
[prev] "a" (prev), \
[next] "d" (next)); \
和上面的分析類似,前兩個參數代表將被調度啟動並執行進程的堆棧和指令指標寄存器,然後是將被調度出去的進程和調度啟動並執行進程prev,next,分別讀入eax和edx寄存器中
2.彙編程式碼分析
我們將上面的彙編代碼改寫一下:
1) pushfl /* save flags */
2) pushl %ebp /* save EBP */
3) movl %esp, %prev->thread.sp /* save ESP */
4) movl %next->thread.sp, %esp
/* restore ESP */
5) movl $1f, %prev->thread.ip /* save EIP */
6) pushl %next->thread.ip /* restore EIP */
7) jmp __switch_to /* regparm call */
8) 1:
9) popl %ebp /* restore EBP */
10) popfl
這段代碼雖然小,但是暗藏玄機,進程切換的精髓就在其中,第1,2行將標誌寄存器和ebp寄存器壓入堆棧,第3行將當前進程(prev)的系統空間堆棧指標存入prev->thread.sp。第四行就是進程切換的關鍵,在這一行中next進程的系統空間堆棧指標被拷貝給了esp寄存器,那麼,從這一行開始就是在使用新調度進來的next進程的堆棧了,也就是說此時“當前進程”這個概念就是指next,而不是prev。隨後,第5行將標號為1(f代表forward),也就是第9行代碼的地址存入prev->thread.ip,作為prev進程下一次被調度而切入啟動並執行“返回地址”;第6行將next->thread.ip的地址壓入堆棧中,它指向的就是上一次next進程切出時在第5行存入的返回地址。第7行跳轉到__switch_to函數(它主要處理TSS,繼續切換其餘儲存在TSS中的上下文資訊,如fs和gs等寄存器其中關鍵是將核心空間堆棧指標恢複成next->esp0),__switch_to函數ret指令返回後,剛才第6行壓入堆棧的“返回地址”資訊就被彈到eip寄存器中,於是next進程執行第9和10行,恢複ebp和標誌寄存器,next進程也就正式開始執行了。