hung_task_timeout_secs 简单学习-阿里云开发者社区

hung_task_timeout_secs 简单学习

2016-03-28 8081

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介：

[原文链接]

http://hi.baidu.com/stealth_space/item/2007d93fe93ca28fb711dbac

接上篇简单学习了解下 hung_task_timeout_secs 相关知识.

OS: 2.6.18-194.32.1.el5 x86_64

# 先从系统中看下 hung_task 相关的参数及其参数值

[sina@localhost ~]$ sudo sysctl -a | grep hung

kernel.hung_task_warnings = 0

kernel.hung_task_timeout_secs = 120

kernel.hung_task_check_count = 4194304

kernel.hung_task_panic = 0

[sina@localhost ~]$

# 源码在此.... (v2.6.37.1) & 结合看下kernel/sysctl.c更好

# kernel/hung_task.c

* Detect Hung Task

* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state

#include <linux/mm.h>

#include <linux/cpu.h>

#include <linux/nmi.h>

#include <linux/init.h>

#include <linux/delay.h>

#include <linux/freezer.h>

#include <linux/kthread.h>

#include <linux/lockdep.h>

#include <linux/module.h>

#include <linux/sysctl.h>

* The number of tasks checked:

unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;

* Limit number of tasks checked in a batch.

* This value controls the preemptibility of khungtaskd since preemption

* is disabled during the critical section. It also controls the size of

* the RCU grace period. So it needs to be upper-bound.

#define HUNG_TASK_BATCHING 1024

* Zero means infinite timeout - no checking done:

unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;

unsigned long __read_mostly sysctl_hung_task_warnings = 10;

static int __read_mostly did_panic;

static struct task_struct *watchdog_task;

* Should we panic (and reboot, if panic_timeout= is set) when a

* hung task is detected:

unsigned int __read_mostly sysctl_hung_task_panic =

CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;

static int __init hung_task_panic_setup(char *str)

{

sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);

return 1;

}

__setup("hung_task_panic=", hung_task_panic_setup);

static int

hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)

{

did_panic = 1;

return NOTIFY_DONE;

}

static struct notifier_block panic_block = {

.notifier_call = hung_task_panic,

};

static void check_hung_task(struct task_struct *t, unsigned long timeout)

{

unsigned long switch_count = t->nvcsw + t->nivcsw;

* Ensure the task is not frozen.

* Also, when a freshly created task is scheduled once, changes

* its state to TASK_UNINTERRUPTIBLE without having ever been

* switched out once, it musn't be checked.

if (unlikely(t->flags & PF_FROZEN || !switch_count))

return;

if (switch_count != t->last_switch_count) {

t->last_switch_count = switch_count;

return;

}

if (!sysctl_hung_task_warnings)

return;

sysctl_hung_task_warnings--;

* Ok, the task did not get scheduled for more than 2 minutes,

* complain:

printk(KERN_ERR "INFO: task %s:%d blocked for more than "

"%ld seconds.\n", t->comm, t->pid, timeout);

printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""

" disables this message.\n");

sched_show_task(t);

debug_show_held_locks(t);

touch_nmi_watchdog();

if (sysctl_hung_task_panic)

panic("hung_task: blocked tasks");

}

* To avoid extending the RCU grace period for an unbounded amount of time,

* periodically exit the critical section and enter a new one.

* For preemptible RCU it is sufficient to call rcu_read_unlock in order

* to exit the grace period. For classic RCU, a reschedule is required.

static void rcu_lock_break(struct task_struct *g, struct task_struct *t)

{

get_task_struct(g);

get_task_struct(t);

rcu_read_unlock();

cond_resched();

rcu_read_lock();

put_task_struct(t);

put_task_struct(g);

}

* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for

* a really long time (120 seconds). If that happens, print out

* a warning.

static void check_hung_uninterruptible_tasks(unsigned long timeout)

{

int max_count = sysctl_hung_task_check_count;

int batch_count = HUNG_TASK_BATCHING;

struct task_struct *g, *t;

* If the system crashed already then all bets are off,

* do not report extra hung tasks:

if (test_taint(TAINT_DIE) || did_panic)

return;

rcu_read_lock();

do_each_thread(g, t) {

if (!max_count--)

goto unlock;

if (!--batch_count) {

batch_count = HUNG_TASK_BATCHING;

rcu_lock_break(g, t);

/* Exit if t or g was unhashed during refresh. */

if (t->state == TASK_DEAD || g->state == TASK_DEAD)

goto unlock;

}

/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */

if (t->state == TASK_UNINTERRUPTIBLE)

check_hung_task(t, timeout);

} while_each_thread(g, t);

unlock:

rcu_read_unlock();

}

static unsigned long timeout_jiffies(unsigned long timeout)

{

/* timeout of 0 will disable the watchdog */

return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;

}

* Process updating of timeout sysctl

int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,

void __user *buffer,

size_t *lenp, loff_t *ppos)

{

int ret;

ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);

if (ret || !write)

goto out;

wake_up_process(watchdog_task);

out:

return ret;

}

* kthread which checks for tasks stuck in D state

static int watchdog(void *dummy)

{

set_user_nice(current, 0);

for ( ; ; ) {

unsigned long timeout = sysctl_hung_task_timeout_secs;

while (schedule_timeout_interruptible(timeout_jiffies(timeout)))

timeout = sysctl_hung_task_timeout_secs;

check_hung_uninterruptible_tasks(timeout);

}

return 0;

}

static int __init hung_task_init(void)

{

atomic_notifier_chain_register(&panic_notifier_list, &panic_block);

watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");

return 0;

}

module_init(hung_task_init);

[ -EOF- ]

文章标签：

Linux

hung_task_timeout_secs 简单学习

热门文章

最新文章

相关电子书