1 From 4cfbf1a112e17b071fcdae207e4f785c096a198c Mon Sep 17 00:00:00 2001
2 From: Saul Wold <sgw@linux.intel.com>
3 Date: Thu, 2 Jul 2020 09:46:12 -0700
4 Subject: [PATCH] Notification of Death of arbitrary processes
6 Note: this commit was copied from Titanium Cloud Rel2
8 This exposes a new feature which may be called to request
9 notification when an arbitrary process changes state. The
10 caller specifies a pid, signal number, and event mask, and
11 when that pid dies, or is stopped, or anything else that
12 would normally cause a SIGCHLD, the kernel will send the
13 specified signal to the caller if the event is in the event
14 mask originally passed down. The siginfo_t struct will
15 contain the same information as would be included with SIGCHLD.
17 This is exposed to userspace via the prctl() call with the
18 PR_DO_NOTIFY_TASK_STATE option.
20 Signed-off-by: Jim Somerville <Jim.Somerville@windriver.com>
21 Signed-off-by: Zhang Zhiguo <zhangzhg@neusoft.com>
22 Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
23 Signed-off-by: Saul Wold <sgw@linux.intel.com>
25 include/linux/init_task.h | 9 ++
26 include/linux/sched.h | 6 +
27 include/uapi/linux/prctl.h | 18 +++
29 init/init_task.c | 1 +
31 kernel/death_notify.c | 228 +++++++++++++++++++++++++++++++++++++
32 kernel/death_notify.h | 46 ++++++++
35 kernel/signal.c | 11 ++
37 12 files changed, 353 insertions(+)
38 create mode 100644 kernel/death_notify.c
39 create mode 100644 kernel/death_notify.h
41 diff --git a/include/linux/init_task.h b/include/linux/init_task.h
42 index a7083a45a26c..1ad2341b3036 100644
43 --- a/include/linux/init_task.h
44 +++ b/include/linux/init_task.h
46 extern struct files_struct init_files;
47 extern struct fs_struct init_fs;
48 extern struct nsproxy init_nsproxy;
50 +#ifdef CONFIG_SIGEXIT
51 +#define INIT_SIGEXIT(tsk) \
52 + .notify = LIST_HEAD_INIT(tsk.notify), \
53 + .monitor = LIST_HEAD_INIT(tsk.monitor),
55 +#define INIT_SIGEXIT(tsk)
58 extern struct group_info init_groups;
59 extern struct cred init_cred;
61 diff --git a/include/linux/sched.h b/include/linux/sched.h
62 index 9b35aff09f70..d6f5a2711b7d 100644
63 --- a/include/linux/sched.h
64 +++ b/include/linux/sched.h
65 @@ -1015,6 +1015,12 @@ struct task_struct {
69 +#ifdef CONFIG_SIGEXIT
70 + /* list of processes to notify on death */
71 + struct list_head notify;
72 + /* list of outstanding monitor requests */
73 + struct list_head monitor;
75 #ifdef CONFIG_NUMA_BALANCING
77 unsigned int numa_scan_period;
78 diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
79 index b4875a93363a..d50c435a04ea 100644
80 --- a/include/uapi/linux/prctl.h
81 +++ b/include/uapi/linux/prctl.h
83 # define PR_ENDIAN_LITTLE 1 /* True little endian mode */
84 # define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */
86 +#ifdef CONFIG_SIGEXIT
87 +#define PR_DO_NOTIFY_TASK_STATE 17 /* Set/get notification for task
90 +/* This is the data structure for requestion process death
91 + * (and other state change) information. Sig of -1 means
92 + * query, sig of 0 means deregistration, positive sig means
93 + * that you want to set it. sig and events are value-result
94 + * and will be updated with the previous values on every
97 +struct task_state_notify_info {
100 + unsigned int events;
104 /* Get/set process seccomp mode */
105 #define PR_GET_SECCOMP 21
106 #define PR_SET_SECCOMP 22
107 diff --git a/init/Kconfig b/init/Kconfig
108 index bdf4f284509b..8102ef3d2adb 100644
111 @@ -1648,6 +1648,21 @@ config VM_EVENT_COUNTERS
112 on EXPERT systems. /proc/vmstat will only show page counts
113 if VM event counters are disabled.
116 + bool "Notification of death of arbitrary processes"
119 + When enabled this exposes a new feature which may be called to request
120 + notification when an arbitrary process changes state. The caller specifies
121 + a pid, signal number, and event mask, and when that pid dies, or is
122 + stopped, or anything else that would normally cause a SIGCHLD, the
123 + kernel will send the specified signal to the caller if the event is in
124 + the event mask originally passed down. The siginfo_t struct will
125 + contain the same information as would be included with SIGCHLD.
127 + This is exposed to userspace via the prctl()
128 + call with the PR_DO_NOTIFY_TASK_STATE option
132 bool "Enable SLUB debugging support" if EXPERT
133 diff --git a/init/init_task.c b/init/init_task.c
134 index 5aebe3be4d7c..d0891101ac7d 100644
135 --- a/init/init_task.c
136 +++ b/init/init_task.c
137 @@ -116,6 +116,7 @@ struct task_struct init_task
138 .alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
139 .journal_info = NULL,
140 INIT_CPU_TIMERS(init_task)
141 + INIT_SIGEXIT(init_task)
142 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
143 .timer_slack_ns = 50000, /* 50 usec default slack */
144 .thread_pid = &init_struct_pid,
145 diff --git a/kernel/Makefile b/kernel/Makefile
146 index 1ea0ba13a445..f839f425ca09 100644
147 --- a/kernel/Makefile
148 +++ b/kernel/Makefile
149 @@ -99,6 +99,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
150 obj-$(CONFIG_IRQ_WORK) += irq_work.o
151 obj-$(CONFIG_CPU_PM) += cpu_pm.o
152 obj-$(CONFIG_BPF) += bpf/
153 +obj-$(CONFIG_SIGEXIT) += death_notify.o
155 obj-$(CONFIG_PERF_EVENTS) += events/
157 diff --git a/kernel/death_notify.c b/kernel/death_notify.c
159 index 000000000000..5819d35a2564
161 +++ b/kernel/death_notify.c
164 + * kernel/death_notify.c, Process death notification support
166 + * Copyright (c) 2006-2014 Wind River Systems, Inc.
168 + * This program is free software; you can redistribute it and/or modify
169 + * it under the terms of the GNU General Public License version 2 as
170 + * published by the Free Software Foundation.
172 + * This program is distributed in the hope that it will be useful,
173 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
174 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
175 + * See the GNU General Public License for more details.
177 + * You should have received a copy of the GNU General Public License
178 + * along with this program; if not, write to the Free Software
179 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
183 +#include <linux/errno.h>
184 +#include <linux/signal.h>
185 +#include <linux/sched.h>
186 +#include <linux/sched/task.h>
187 +#include <linux/slab.h>
188 +#include <linux/prctl.h>
189 +#include <linux/uaccess.h>
190 +#include "death_notify.h"
192 +static void unlink_status_notifier(struct signotifier *n)
194 + list_del(&n->monitor_list);
195 + list_del(&n->notify_list);
199 +static void handle_already_monitoring(struct signotifier *node,
200 + struct task_state_notify_info *args,
201 + struct task_state_notify_info *oldargs)
203 + /* Store the old values */
204 + oldargs->sig = node->sig;
205 + oldargs->events = node->events;
207 + /* We know that args->sig is 0 or a valid signal. */
208 + if (args->sig > 0) {
209 + /* Update the new values */
210 + node->sig = args->sig;
211 + node->events = args->events;
212 + } else if (!args->sig) {
213 + /* args->sig of 0 means to deregister */
214 + unlink_status_notifier(node);
218 +static void setup_new_node(struct task_struct *p,
219 + struct signotifier *node,
220 + struct task_state_notify_info *args)
222 + node->notify_tsk = current;
223 + node->sig = args->sig;
224 + node->events = args->events;
226 + /* Add this node to the list of notification requests
227 + * for the specified process.
229 + list_add_tail(&node->notify_list, &p->notify);
231 + /* Also add this node to the list of monitor requests
232 + * for the current process.
234 + list_add_tail(&node->monitor_list, ¤t->monitor);
237 +/* Returns 0 if arguments are valid, 1 if they are not. */
238 +static int invalid_args(struct task_state_notify_info *args)
242 + if (args->pid <= 0)
245 + /* Sig of -1 implies query, sig of 0 implies deregistration.
246 + * Otherwise sig must be positive and within range.
248 + if ((args->sig < -1) || (args->sig > _NSIG))
251 + /* If positive sig, must have valid events. */
252 + if (args->sig > 0) {
253 + if (!args->events || (args->events >= (1 << (NSIGCHLD+1))))
262 +/* Notify those registered for process state updates via do_notify_task_state().
263 + * If "del" is nonzero, the process is dying and we want to free
264 + * the nodes in the list as we go.
266 + * Note: we only notify processes for events in which they have registered
269 + * Must be called holding a lock on tasklist_lock.
271 +void do_notify_others(struct task_struct *tsk, struct kernel_siginfo *info)
273 + struct signotifier *node;
274 + unsigned int events;
276 + /* This method of generating the event bit must be
277 + * matched in the userspace library.
279 + events = 1 << (info->si_code & 0xFF);
281 + list_for_each_entry(node, &tsk->notify, notify_list) {
282 + if (events & node->events) {
283 + info->si_signo = node->sig;
284 + group_send_sig_info(node->sig, info, node->notify_tsk, PIDTYPE_TGID);
289 +void release_notify_others(struct task_struct *p)
291 + struct signotifier *n, *t;
293 + /* Need to clean up any outstanding requests where we
294 + * wanted to be notified when others died.
296 + list_for_each_entry_safe(n, t, &p->monitor, monitor_list) {
297 + unlink_status_notifier(n);
300 + /* Also need to clean up any outstanding requests where others
301 + * wanted to be notified when we died.
303 + list_for_each_entry_safe(n, t, &p->notify, notify_list) {
304 + unlink_status_notifier(n);
308 +/* If the config is defined, then processes can call this routine
309 + * to request notification when the specified task's state changes.
310 + * On the death (or other state change) of the specified process,
311 + * we will send them the specified signal if the event is listed
312 + * in their event bitfield.
314 + * A sig of 0 means that we want to deregister.
316 + * The sig/events fields are value/result. On success we update them
317 + * to reflect what they were before the call.
319 + * Returns error code on error, on success we return 0.
321 +int do_notify_task_state(unsigned long arg)
324 + struct task_struct *p;
325 + struct signotifier *node, *tmp;
326 + struct task_state_notify_info args, oldargs;
328 + if (copy_from_user(&args, (struct task_state_notify_info __user *)arg,
331 + oldargs.pid = args.pid;
333 + /* Validate the arguments passed in. */
335 + if (invalid_args(&args))
338 + /* We must hold a write lock on tasklist_lock to add the notification
339 + * later on, and we need some lock on tasklist_lock for
340 + * find_task_by_pid(), so may as well take the write lock now.
341 + * Must use write_lock_irq().
343 + write_lock_irq(&tasklist_lock);
346 + p = find_task_by_vpid(args.pid);
350 + /* Now we know pid exists, unlikely to fail. */
353 + /* Check if we're already monitoring the specified pid. If so, update
354 + * the monitoring parameters and return the old ones.
356 + list_for_each_entry(tmp, &p->notify, notify_list) {
357 + if (tmp->notify_tsk == current) {
358 + handle_already_monitoring(tmp, &args, &oldargs);
363 + /* If we get here, we're not currently monitoring the process. */
365 + oldargs.events = 0;
367 + /* If we wanted to set up a new monitor, do it now. If we didn't
368 + * manage to allocate memory for the new node, then we return
369 + * an appropriate error.
371 + if (args.sig > 0) {
372 + node = kmalloc(sizeof(*node), GFP_ATOMIC);
374 + setup_new_node(p, node, &args);
380 + write_unlock_irq(&tasklist_lock);
382 + /* Copy the old values back to caller. */
383 + if (copy_to_user((struct task_state_notify_info __user *)arg,
384 + &oldargs, sizeof(oldargs)))
391 diff --git a/kernel/death_notify.h b/kernel/death_notify.h
393 index 000000000000..14a0995b79af
395 +++ b/kernel/death_notify.h
398 + * kernel/death_notify.h, Process death notification support
400 + * Copyright (c) 2006-2014 Wind River Systems, Inc.
402 + * This program is free software; you can redistribute it and/or modify
403 + * it under the terms of the GNU General Public License version 2 as
404 + * published by the Free Software Foundation.
406 + * This program is distributed in the hope that it will be useful,
407 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
408 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
409 + * See the GNU General Public License for more details.
411 + * You should have received a copy of the GNU General Public License
412 + * along with this program; if not, write to the Free Software
413 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
416 +#ifndef _KERNEL_DEATH_NOTIFY_H
417 +#define _KERNEL_DEATH_NOTIFY_H
419 +#ifdef CONFIG_SIGEXIT
421 +struct signotifier {
422 + struct task_struct *notify_tsk;
423 + struct list_head notify_list;
424 + struct list_head monitor_list;
426 + unsigned int events;
429 +extern int do_notify_task_state(unsigned long arg);
430 +extern void do_notify_others(struct task_struct *tsk,
431 + struct kernel_siginfo *info);
432 +extern void release_notify_others(struct task_struct *p);
434 +#else /* !CONFIG_SIGEXIT */
436 +static inline void do_notify_others(struct task_struct *tsk,
437 + struct kernel_siginfo *info) {}
438 +static inline void release_notify_others(struct task_struct *p) {}
440 +#endif /* CONFIG_SIGEXIT */
443 diff --git a/kernel/exit.c b/kernel/exit.c
444 index 2166c2d92ddc..f35f3a5870a8 100644
448 #include <asm/unistd.h>
449 #include <asm/pgtable.h>
450 #include <asm/mmu_context.h>
451 +#ifdef CONFIG_SIGEXIT
452 +#include "death_notify.h"
455 static void __unhash_process(struct task_struct *p, bool group_dead)
457 @@ -196,6 +199,9 @@ void release_task(struct task_struct *p)
460 write_lock_irq(&tasklist_lock);
461 +#ifdef CONFIG_SIGEXIT
462 + release_notify_others(p);
464 ptrace_release_task(p);
467 diff --git a/kernel/fork.c b/kernel/fork.c
468 index 17389e9935c4..4d79f261c365 100644
471 @@ -1907,6 +1907,10 @@ static __latent_entropy struct task_struct *copy_process(
472 p->sequential_io = 0;
473 p->sequential_io_avg = 0;
475 +#ifdef CONFIG_SIGEXIT
476 + INIT_LIST_HEAD(&p->notify);
477 + INIT_LIST_HEAD(&p->monitor);
480 /* Perform scheduler related setup. Assign this task to a CPU. */
481 retval = sched_fork(clone_flags, p);
482 diff --git a/kernel/signal.c b/kernel/signal.c
483 index 57b7771e20d7..41126fa53102 100644
484 --- a/kernel/signal.c
485 +++ b/kernel/signal.c
487 #include <asm/siginfo.h>
488 #include <asm/cacheflush.h>
489 #include "audit.h" /* audit_signal_info() */
490 +#ifdef CONFIG_SIGEXIT
491 +#include "death_notify.h"
495 * SLAB caches for signal bits.
496 @@ -1893,6 +1896,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
497 __wake_up_parent(tsk, tsk->parent);
498 spin_unlock_irqrestore(&psig->siglock, flags);
500 +#ifdef CONFIG_SIGEXIT
501 + do_notify_others(tsk, &info);
507 @@ -1965,6 +1972,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
509 __wake_up_parent(tsk, parent);
510 spin_unlock_irqrestore(&sighand->siglock, flags);
512 +#ifdef CONFIG_SIGEXIT
513 + do_notify_others(tsk, &info);
517 static inline bool may_ptrace_stop(void)
518 diff --git a/kernel/sys.c b/kernel/sys.c
519 index f7eb62eceb24..9fadbbe9565f 100644
523 #include <asm/unistd.h>
526 +#ifdef CONFIG_SIGEXIT
527 +#include "death_notify.h"
530 #ifndef SET_UNALIGN_CTL
531 # define SET_UNALIGN_CTL(a, b) (-EINVAL)
532 @@ -2405,6 +2408,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
534 error = PR_MCE_KILL_DEFAULT;
536 +#ifdef CONFIG_SIGEXIT
537 + case PR_DO_NOTIFY_TASK_STATE:
538 + error = do_notify_task_state(arg2);
542 error = prctl_set_mm(arg2, arg3, arg4, arg5);