我们在分析linux内核中断剖析时,简单的聊了一下SOFTIRQ, 而没有进行深入分析. Linux内核讲对一个外部设备中断的处理分成两大部分HARDIRQ以及SOFTIRQ, HARDIRQ部分在执行时处理器的中断是关闭的,所以驱动程序的中断处理例程只应该完成一些关键的中断操作,而将耗时的操作放到SOFTIRQ部分执行, 本篇文章我们将对这部分进行深入讨论.
SoftIrq的应用非常广泛, 例如我们常见的网卡在做网络包的收发, 封装好用来做延迟操作的tasklet的实现等.
SoftIrq源代码分析
首先看一下linux内核当中的irq类型, 而在softirq中维护着struct softirq_action softirq_vec[NR_SOFTIRQS]这样一个类型的数组.
//include/linux/interrupt.h
enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,//网络端口TX
NET_RX_SOFTIRQ,//网络端口RX
BLOCK_SOFTIRQ,
IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,//tasklet实现时使用的irq
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the numbering. Sigh! */
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
在kernel_start 的时候,在做完中断时间等初始化后,会进行softirq的初始化动作:
//init/main.c
asmlinkage __visible void __init start_kernel(void){
...
init_IRQ();
...
init_timers();
hrtimers_init();
softirq_init(); //softirq初始化动作
...
}
softirq_init的动作很简单,做了两件事情, 第一件事情:为每个核创建分别创建了tasklet_vec(对应TASKLET_SOFTIRQ类型软中断)和tasklet_hi_vec(对应HI_SOFTIRQ类型软中断)链表. 第二件事情就是给数组对应的softirq_vec[NR_SOFTIRQS]中TASKLET_SOFTIRQ和HI_SOFTIRQ两种类型的softirq_action初始化自己的softirq_action的回调处理函数
//kernel/softirq.c
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
}
void __init softirq_init(void)
{
int cpu;
//第一件事情,给每个核创建一个对应链表.
for_each_possible_cpu(cpu) {
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
}
//第二件事情,注册两个不同类型softirq的回调处理函数
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
接下来我们重点看一下softirq的处理核心函数__do_softirq, 它被调用的时间点是处理完中断函数后会调用irq_exit(关于这点不清楚的可以回顾一下之前的文章: Linux内核中断剖析--外部中断(上)时进行处理,这里就不再做过多介绍.我们直接分析核心函数__do_softirq
//kernel/softirq.c
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
#else
lockdep_assert_irqs_disabled();
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);//表示HARDIRQ
if (!in_interrupt() && local_softirq_pending())//表示当前没有在软硬和不可中断中才可以进入,防止中断嵌套.
invoke_softirq();//这里面会call到关键函数
tick_irq_exit();
rcu_irq_exit();
trace_hardirq_exit(); /* must be last! */
}
static inline void invoke_softirq(void)
{
if (ksoftirqd_running(local_softirq_pending()))
return;
if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
* We can safely execute softirq on the current stack if
* it is the irq stack, because it should be near empty
* at this stage.
*/
__do_softirq();
#else
/*
* Otherwise, irq_exit() is called on the task stack that can
* be potentially deep already. So call softirq in its own stack
* to prevent from any overrun.
*/
do_softirq_own_stack();//内部实现也是__do_softirq.
#endif
} else {
wakeup_softirqd();
}
}
//重点中的重点!!!
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current- >flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
bool in_hardirq;
__u32 pending;
int softirq_bit;
/*
* Mask out PF_MEMALLOC as the current task context is borrowed for the
* softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
* again if the socket is related to swapping.
*/
current- >flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();//获取被置起来的中断类型,以类型为对应的bit位
account_irq_enter_time(current);
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);//表示进入softirq上下文
in_hardirq = lockdep_softirq_start();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);//清空被置起需要处理类型的软中断类型.
local_irq_enable();
h = softirq_vec;
//ffs函数为找到对应pending的第一个bit不为0 的bit位.
//实际就是遍历一遍softirq_vec中被置起来的soft.
while ((softirq_bit = ffs(pending))) {
unsigned int vec_nr;
int prev_count;
h += softirq_bit - 1;
vec_nr = h - softirq_vec;
prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h- >action(h);//调用到我们的注册的action
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\\n",
vec_nr, softirq_to_name[vec_nr], h- >action,
prev_count, preempt_count());
preempt_count_set(prev_count);
}
h++;
pending > >= softirq_bit;
}
if (__this_cpu_read(ksoftirqd) == current)
rcu_softirq_qs();
local_irq_disable();
pending = local_softirq_pending();
if (pending) {
if (time_before(jiffies, end) && !need_resched() &&
--max_restart)
goto restart;
wakeup_softirqd();
}
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
}