13 files changed, 381 insertions, 124 deletions
diff --git a/kernel/audit.h b/kernel/audit.h
index a3370232a39..815d6f5c04e 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -83,6 +83,7 @@ struct audit_krule {
 	u32			field_count;
 	char			*filterkey; /* ties events to rules */
 	struct audit_field	*fields;
+	struct audit_field	*arch_f; /* quick access to arch field */
 	struct audit_field	*inode_f; /* quick access to an inode field */
 	struct audit_watch	*watch;	/* associated watch */
 	struct list_head	rlist;	/* entry in audit_watch.rules list */
@@ -131,17 +132,19 @@ extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
 extern int selinux_audit_rule_update(void);
 
 #ifdef CONFIG_AUDITSYSCALL
-extern void __audit_signal_info(int sig, struct task_struct *t);
-static inline void audit_signal_info(int sig, struct task_struct *t)
+extern int __audit_signal_info(int sig, struct task_struct *t);
+static inline int audit_signal_info(int sig, struct task_struct *t)
 {
-	if (unlikely(audit_pid && t->tgid == audit_pid))
-		__audit_signal_info(sig, t);
+	if (unlikely((audit_pid && t->tgid == audit_pid) ||
+		     (audit_signals && !audit_dummy_context())))
+		return __audit_signal_info(sig, t);
+	return 0;
 }
 extern enum audit_state audit_filter_inodes(struct task_struct *,
 					    struct audit_context *);
 extern void audit_set_auditable(struct audit_context *);
 #else
-#define audit_signal_info(s,t)
+#define audit_signal_info(s,t) AUDIT_DISABLED
 #define audit_filter_inodes(t,c) AUDIT_DISABLED
 #define audit_set_auditable(c)
 #endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 3749193aed8..6c61263ff96 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -311,6 +311,43 @@ int audit_match_class(int class, unsigned syscall)
 	return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall);
 }
 
+static inline int audit_match_class_bits(int class, u32 *mask)
+{
+	int i;
+
+	if (classes[class]) {
+		for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
+			if (mask[i] & classes[class][i])
+				return 0;
+	}
+	return 1;
+}
+
+static int audit_match_signal(struct audit_entry *entry)
+{
+	struct audit_field *arch = entry->rule.arch_f;
+
+	if (!arch) {
+		/* When arch is unspecified, we must check both masks on biarch
+		 * as syscall number alone is ambiguous. */
+		return (audit_match_class_bits(AUDIT_CLASS_SIGNAL,
+					       entry->rule.mask) &&
+			audit_match_class_bits(AUDIT_CLASS_SIGNAL_32,
+					       entry->rule.mask));
+	}
+
+	switch(audit_classify_arch(arch->val)) {
+	case 0: /* native */
+		return (audit_match_class_bits(AUDIT_CLASS_SIGNAL,
+					       entry->rule.mask));
+	case 1: /* 32bit on biarch */
+		return (audit_match_class_bits(AUDIT_CLASS_SIGNAL_32,
+					       entry->rule.mask));
+	default:
+		return 1;
+	}
+}
+
 /* Common user-space to kernel rule translation. */
 static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 {
@@ -429,6 +466,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 				err = -EINVAL;
 				goto exit_free;
 			}
+			entry->rule.arch_f = f;
 			break;
 		case AUDIT_PERM:
 			if (f->val & ~15)
@@ -519,7 +557,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_FSGID:
 		case AUDIT_LOGINUID:
 		case AUDIT_PERS:
-		case AUDIT_ARCH:
 		case AUDIT_MSGTYPE:
 		case AUDIT_PPID:
 		case AUDIT_DEVMAJOR:
@@ -531,6 +568,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
 			break;
+		case AUDIT_ARCH:
+			entry->rule.arch_f = f;
+			break;
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
 		case AUDIT_SUBJ_TYPE:
@@ -1221,6 +1261,9 @@ static inline int audit_add_rule(struct audit_entry *entry,
 #ifdef CONFIG_AUDITSYSCALL
 	if (!dont_count)
 		audit_n_rules++;
+
+	if (!audit_match_signal(entry))
+		audit_signals++;
 #endif
 	mutex_unlock(&audit_filter_mutex);
 
@@ -1294,6 +1337,9 @@ static inline int audit_del_rule(struct audit_entry *entry,
 #ifdef CONFIG_AUDITSYSCALL
 	if (!dont_count)
 		audit_n_rules--;
+
+	if (!audit_match_signal(entry))
+		audit_signals--;
 #endif
 	mutex_unlock(&audit_filter_mutex);
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 628c7ac590a..e36481ed61b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -78,17 +78,15 @@ extern int audit_enabled;
  * for saving names from getname(). */
 #define AUDIT_NAMES    20
 
-/* AUDIT_NAMES_RESERVED is the number of slots we reserve in the
- * audit_context from being used for nameless inodes from
- * path_lookup. */
-#define AUDIT_NAMES_RESERVED 7
-
 /* Indicates that audit should log the full pathname. */
 #define AUDIT_NAME_FULL -1
 
 /* number of audit rules */
 int audit_n_rules;
 
+/* determines whether we collect data for signals sent */
+int audit_signals;
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
@@ -114,6 +112,9 @@ struct audit_aux_data {
 
 #define AUDIT_AUX_IPCPERM	0
 
+/* Number of target pids per aux struct. */
+#define AUDIT_AUX_PIDS	16
+
 struct audit_aux_data_mq_open {
 	struct audit_aux_data	d;
 	int			oflag;
@@ -181,6 +182,13 @@ struct audit_aux_data_path {
 	struct vfsmount		*mnt;
 };
 
+struct audit_aux_data_pids {
+	struct audit_aux_data	d;
+	pid_t			target_pid[AUDIT_AUX_PIDS];
+	u32			target_sid[AUDIT_AUX_PIDS];
+	int			pid_count;
+};
+
 /* The per-task audit context. */
 struct audit_context {
 	int		    dummy;	/* must be the first element */
@@ -201,6 +209,7 @@ struct audit_context {
 	struct vfsmount *   pwdmnt;
 	struct audit_context *previous; /* For nested syscalls */
 	struct audit_aux_data *aux;
+	struct audit_aux_data *aux_pids;
 
 				/* Save things to print about task_struct */
 	pid_t		    pid, ppid;
@@ -209,6 +218,9 @@ struct audit_context {
 	unsigned long	    personality;
 	int		    arch;
 
+	pid_t		    target_pid;
+	u32		    target_sid;
+
 #if AUDIT_DEBUG
 	int		    put_count;
 	int		    ino_count;
@@ -654,6 +666,10 @@ static inline void audit_free_aux(struct audit_context *context)
 		context->aux = aux->next;
 		kfree(aux);
 	}
+	while ((aux = context->aux_pids)) {
+		context->aux_pids = aux->next;
+		kfree(aux);
+	}
 }
 
 static inline void audit_zero_context(struct audit_context *context,
@@ -795,6 +811,29 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
 	audit_log_task_context(ab);
 }
 
+static int audit_log_pid_context(struct audit_context *context, pid_t pid,
+				 u32 sid)
+{
+	struct audit_buffer *ab;
+	char *s = NULL;
+	u32 len;
+	int rc = 0;
+
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID);
+	if (!ab)
+		return 1;
+
+	if (selinux_sid_to_string(sid, &s, &len)) {
+		audit_log_format(ab, "opid=%d obj=(none)", pid);
+		rc = 1;
+	} else
+		audit_log_format(ab, "opid=%d  obj=%s", pid, s);
+	audit_log_end(ab);
+	kfree(s);
+
+	return rc;
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -973,6 +1012,21 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		audit_log_end(ab);
 	}
 
+	for (aux = context->aux_pids; aux; aux = aux->next) {
+		struct audit_aux_data_pids *axs = (void *)aux;
+		int i;
+
+		for (i = 0; i < axs->pid_count; i++)
+			if (audit_log_pid_context(context, axs->target_pid[i],
+						  axs->target_sid[i]))
+				call_panic = 1;
+	}
+
+	if (context->target_pid &&
+	    audit_log_pid_context(context, context->target_pid,
+				  context->target_sid))
+			call_panic = 1;
+
 	if (context->pwd && context->pwdmnt) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
 		if (ab) {
@@ -1193,6 +1247,10 @@ void audit_syscall_exit(int valid, long return_code)
 	} else {
 		audit_free_names(context);
 		audit_free_aux(context);
+		context->aux = NULL;
+		context->aux_pids = NULL;
+		context->target_pid = 0;
+		context->target_sid = 0;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -1226,6 +1284,7 @@ void __audit_getname(const char *name)
 	context->names[context->name_count].name_len = AUDIT_NAME_FULL;
 	context->names[context->name_count].name_put = 1;
 	context->names[context->name_count].ino  = (unsigned long)-1;
+	context->names[context->name_count].osid = 0;
 	++context->name_count;
 	if (!context->pwd) {
 		read_lock(&current->fs->lock);
@@ -1279,6 +1338,28 @@ void audit_putname(const char *name)
 #endif
 }
 
+static int audit_inc_name_count(struct audit_context *context,
+				const struct inode *inode)
+{
+	if (context->name_count >= AUDIT_NAMES) {
+		if (inode)
+			printk(KERN_DEBUG "name_count maxed, losing inode data: "
+			       "dev=%02x:%02x, inode=%lu",
+			       MAJOR(inode->i_sb->s_dev),
+			       MINOR(inode->i_sb->s_dev),
+			       inode->i_ino);
+
+		else
+			printk(KERN_DEBUG "name_count maxed, losing inode data");
+		return 1;
+	}
+	context->name_count++;
+#if AUDIT_DEBUG
+	context->ino_count++;
+#endif
+	return 0;
+}
+
 /* Copy inode data into an audit_names. */
 static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
 {
@@ -1316,13 +1397,10 @@ void __audit_inode(const char *name, const struct inode *inode)
 	else {
 		/* FIXME: how much do we care about inodes that have no
 		 * associated name? */
-		if (context->name_count >= AUDIT_NAMES - AUDIT_NAMES_RESERVED)
+		if (audit_inc_name_count(context, inode))
 			return;
-		idx = context->name_count++;
+		idx = context->name_count - 1;
 		context->names[idx].name = NULL;
-#if AUDIT_DEBUG
-		++context->ino_count;
-#endif
 	}
 	audit_copy_inode(&context->names[idx], inode);
 }
@@ -1346,7 +1424,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
-	const char *found_name = NULL;
+	const char *found_parent = NULL, *found_child = NULL;
 	int dirlen = 0;
 
 	if (!context->in_syscall)
@@ -1354,88 +1432,73 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 
 	/* determine matching parent */
 	if (!dname)
-		goto update_context;
-	for (idx = 0; idx < context->name_count; idx++)
-		if (context->names[idx].ino == parent->i_ino) {
-			const char *name = context->names[idx].name;
+		goto add_names;
 
-			if (!name)
-				continue;
+	/* parent is more likely, look for it first */
+	for (idx = 0; idx < context->name_count; idx++) {
+		struct audit_names *n = &context->names[idx];
 
-			if (audit_compare_dname_path(dname, name, &dirlen) == 0) {
-				context->names[idx].name_len = dirlen;
-				found_name = name;
-				break;
-			}
+		if (!n->name)
+			continue;
+
+		if (n->ino == parent->i_ino &&
+		    !audit_compare_dname_path(dname, n->name, &dirlen)) {
+			n->name_len = dirlen; /* update parent data in place */
+			found_parent = n->name;
+			goto add_names;
 		}
+	}
 
-update_context:
-	idx = context->name_count;
-	if (context->name_count == AUDIT_NAMES) {
-		printk(KERN_DEBUG "name_count maxed and losing %s\n",
-			found_name ?: "(null)");
-		return;
+	/* no matching parent, look for matching child */
+	for (idx = 0; idx < context->name_count; idx++) {
+		struct audit_names *n = &context->names[idx];
+
+		if (!n->name)
+			continue;
+
+		/* strcmp() is the more likely scenario */
+		if (!strcmp(dname, n->name) ||
+		     !audit_compare_dname_path(dname, n->name, &dirlen)) {
+			if (inode)
+				audit_copy_inode(n, inode);
+			else
+				n->ino = (unsigned long)-1;
+			found_child = n->name;
+			goto add_names;
+		}
 	}
-	context->name_count++;
-#if AUDIT_DEBUG
-	context->ino_count++;
-#endif
-	/* Re-use the name belonging to the slot for a matching parent directory.
-	 * All names for this context are relinquished in audit_free_names() */
-	context->names[idx].name = found_name;
-	context->names[idx].name_len = AUDIT_NAME_FULL;
-	context->names[idx].name_put = 0;	/* don't call __putname() */
-
-	if (!inode)
-		context->names[idx].ino = (unsigned long)-1;
-	else
-		audit_copy_inode(&context->names[idx], inode);
-
-	/* A parent was not found in audit_names, so copy the inode data for the
-	 * provided parent. */
-	if (!found_name) {
-		idx = context->name_count;
-		if (context->name_count == AUDIT_NAMES) {
-			printk(KERN_DEBUG
-				"name_count maxed and losing parent inode data: dev=%02x:%02x, inode=%lu",
-				MAJOR(parent->i_sb->s_dev),
-				MINOR(parent->i_sb->s_dev),
-				parent->i_ino);
+
+add_names:
+	if (!found_parent) {
+		if (audit_inc_name_count(context, parent))
 			return;
-		}
-		context->name_count++;
-#if AUDIT_DEBUG
-		context->ino_count++;
-#endif
+		idx = context->name_count - 1;
+		context->names[idx].name = NULL;
 		audit_copy_inode(&context->names[idx], parent);
 	}
-}
 
-/**
- * audit_inode_update - update inode info for last collected name
- * @inode: inode being audited
- *
- * When open() is called on an existing object with the O_CREAT flag, the inode
- * data audit initially collects is incorrect.  This additional hook ensures
- * audit has the inode data for the actual object to be opened.
- */
-void __audit_inode_update(const struct inode *inode)
-{
-	struct audit_context *context = current->audit_context;
-	int idx;
+	if (!found_child) {
+		if (audit_inc_name_count(context, inode))
+			return;
+		idx = context->name_count - 1;
 
-	if (!context->in_syscall || !inode)
-		return;
+		/* Re-use the name belonging to the slot for a matching parent
+		 * directory. All names for this context are relinquished in
+		 * audit_free_names() */
+		if (found_parent) {
+			context->names[idx].name = found_parent;
+			context->names[idx].name_len = AUDIT_NAME_FULL;
+			/* don't call __putname() */
+			context->names[idx].name_put = 0;
+		} else {
+			context->names[idx].name = NULL;
+		}
 
-	if (context->name_count == 0) {
-		context->name_count++;
-#if AUDIT_DEBUG
-		context->ino_count++;
-#endif
+		if (inode)
+			audit_copy_inode(&context->names[idx], inode);
+		else
+			context->names[idx].ino = (unsigned long)-1;
 	}
-	idx = context->name_count - 1;
-
-	audit_copy_inode(&context->names[idx], inode);
 }
 
 /**
@@ -1880,6 +1943,14 @@ int audit_sockaddr(int len, void *a)
 	return 0;
 }
 
+void __audit_ptrace(struct task_struct *t)
+{
+	struct audit_context *context = current->audit_context;
+
+	context->target_pid = t->pid;
+	selinux_get_task_sid(t, &context->target_sid);
+}
+
 /**
  * audit_avc_path - record the granting or denial of permissions
  * @dentry: dentry to record
@@ -1918,15 +1989,17 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt)
  * If the audit subsystem is being terminated, record the task (pid)
  * and uid that is doing that.
  */
-void __audit_signal_info(int sig, struct task_struct *t)
+int __audit_signal_info(int sig, struct task_struct *t)
 {
+	struct audit_aux_data_pids *axp;
+	struct task_struct *tsk = current;
+	struct audit_context *ctx = tsk->audit_context;
 	extern pid_t audit_sig_pid;
 	extern uid_t audit_sig_uid;
 	extern u32 audit_sig_sid;
 
-	if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
-		struct task_struct *tsk = current;
-		struct audit_context *ctx = tsk->audit_context;
+	if (audit_pid && t->tgid == audit_pid &&
+	    (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) {
 		audit_sig_pid = tsk->pid;
 		if (ctx)
 			audit_sig_uid = ctx->loginuid;
@@ -1934,4 +2007,72 @@ void __audit_signal_info(int sig, struct task_struct *t)
 			audit_sig_uid = tsk->uid;
 		selinux_get_task_sid(tsk, &audit_sig_sid);
 	}
+
+	if (!audit_signals) /* audit_context checked in wrapper */
+		return 0;
+
+	/* optimize the common case by putting first signal recipient directly
+	 * in audit_context */
+	if (!ctx->target_pid) {
+		ctx->target_pid = t->tgid;
+		selinux_get_task_sid(t, &ctx->target_sid);
+		return 0;
+	}
+
+	axp = (void *)ctx->aux_pids;
+	if (!axp || axp->pid_count == AUDIT_AUX_PIDS) {
+		axp = kzalloc(sizeof(*axp), GFP_ATOMIC);
+		if (!axp)
+			return -ENOMEM;
+
+		axp->d.type = AUDIT_OBJ_PID;
+		axp->d.next = ctx->aux_pids;
+		ctx->aux_pids = (void *)axp;
+	}
+	BUG_ON(axp->pid_count > AUDIT_AUX_PIDS);
+
+	axp->target_pid[axp->pid_count] = t->tgid;
+	selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]);
+	axp->pid_count++;
+
+	return 0;
+}
+
+/**
+ * audit_core_dumps - record information about processes that end abnormally
+ * @sig: signal value
+ *
+ * If a process ends with a core dump, something fishy is going on and we
+ * should record the event for investigation.
+ */
+void audit_core_dumps(long signr)
+{
+	struct audit_buffer *ab;
+	u32 sid;
+
+	if (!audit_enabled)
+		return;
+
+	if (signr == SIGQUIT)	/* don't care for those */
+		return;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+	audit_log_format(ab, "auid=%u uid=%u gid=%u",
+			audit_get_loginuid(current->audit_context),
+			current->uid, current->gid);
+	selinux_get_task_sid(current, &sid);
+	if (sid) {
+		char *ctx = NULL;
+		u32 len;
+
+		if (selinux_sid_to_string(sid, &ctx, &len))
+			audit_log_format(ab, " ssid=%u", sid);
+		else
+			audit_log_format(ab, " subj=%s", ctx);
+		kfree(ctx);
+	}
+	audit_log_format(ab, " pid=%d comm=", current->pid);
+	audit_log_untrustedstring(ab, current->comm);
+	audit_log_format(ab, " sig=%ld", signr);
+	audit_log_end(ab);
 }
diff --git a/kernel/compat.c b/kernel/compat.c
index cebb4c28c03..3bae3742c2a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -475,8 +475,8 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
 	return min_length;
 }
 
-static int get_compat_itimerspec(struct itimerspec *dst, 
-				 struct compat_itimerspec __user *src)
+int get_compat_itimerspec(struct itimerspec *dst,
+			  const struct compat_itimerspec __user *src)
 { 
 	if (get_compat_timespec(&dst->it_interval, &src->it_interval) ||
 	    get_compat_timespec(&dst->it_value, &src->it_value))
@@ -484,8 +484,8 @@ static int get_compat_itimerspec(struct itimerspec *dst,
 	return 0;
 } 
 
-static int put_compat_itimerspec(struct compat_itimerspec __user *dst, 
-				 struct itimerspec *src)
+int put_compat_itimerspec(struct compat_itimerspec __user *dst,
+			  const struct itimerspec *src)
 { 
 	if (put_compat_timespec(&src->it_interval, &dst->it_interval) ||
 	    put_compat_timespec(&src->it_value, &dst->it_value))
diff --git a/kernel/exit.c b/kernel/exit.c
index b0c6f0c3a2d..c6d14b8008d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -24,6 +24,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
+#include <linux/signalfd.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/kthread.h>
@@ -42,6 +43,7 @@
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
 #include <linux/blkdev.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -82,6 +84,14 @@ static void __exit_signal(struct task_struct *tsk)
 	sighand = rcu_dereference(tsk->sighand);
 	spin_lock(&sighand->siglock);
 
+	/*
+	 * Notify that this sighand has been detached. This must
+	 * be called with the tsk->sighand lock held. Also, this
+	 * access tsk->sighand internally, so it must be called
+	 * before tsk->sighand is reset.
+	 */
+	signalfd_detach_locked(tsk);
+
 	posix_cpu_timers_exit(tsk);
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
@@ -113,6 +123,8 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->nvcsw += tsk->nvcsw;
 		sig->nivcsw += tsk->nivcsw;
 		sig->sched_time += tsk->sched_time;
+		sig->inblock += task_io_get_inblock(tsk);
+		sig->oublock += task_io_get_oublock(tsk);
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -299,12 +311,12 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 	if (process_session(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		set_signal_session(curr->signal, session);
-		attach_pid(curr, PIDTYPE_SID, session);
+		attach_pid(curr, PIDTYPE_SID, find_pid(session));
 	}
 	if (process_group(curr) != pgrp) {
 		detach_pid(curr, PIDTYPE_PGID);
 		curr->signal->pgrp = pgrp;
-		attach_pid(curr, PIDTYPE_PGID, pgrp);
+		attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
 	}
 }
 
@@ -1193,6 +1205,12 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
 			p->nvcsw + sig->nvcsw + sig->cnvcsw;
 		psig->cnivcsw +=
 			p->nivcsw + sig->nivcsw + sig->cnivcsw;
+		psig->cinblock +=
+			task_io_get_inblock(p) +
+			sig->inblock + sig->cinblock;
+		psig->coublock +=
+			task_io_get_oublock(p) +
+			sig->oublock + sig->coublock;
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5dd3979747f..49530e40ea8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -875,6 +875,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	sig->sched_time = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -955,7 +956,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_size,
 					int __user *parent_tidptr,
 					int __user *child_tidptr,
-					int pid)
+					struct pid *pid)
 {
 	int retval;
 	struct task_struct *p = NULL;
@@ -1022,7 +1023,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
-	p->pid = pid;
+	p->pid = pid_nr(pid);
 	retval = -EFAULT;
 	if (clone_flags & CLONE_PARENT_SETTID)
 		if (put_user(p->pid, parent_tidptr))
@@ -1251,13 +1252,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			p->signal->tty = current->signal->tty;
 			p->signal->pgrp = process_group(current);
 			set_signal_session(p->signal, process_session(current));
-			attach_pid(p, PIDTYPE_PGID, process_group(p));
-			attach_pid(p, PIDTYPE_SID, process_session(p));
+			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
+			attach_pid(p, PIDTYPE_SID, task_session(current));
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
 		}
-		attach_pid(p, PIDTYPE_PID, p->pid);
+		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;
 	}
 
@@ -1321,7 +1322,8 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	struct task_struct *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL,
+				&init_struct_pid);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
 
@@ -1371,7 +1373,7 @@ long do_fork(unsigned long clone_flags,
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
+	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1420,12 +1422,15 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
-static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags)
+static void sighand_ctor(void *data, struct kmem_cache *cachep,
+			unsigned long flags)
 {
 	struct sighand_struct *sighand = data;
 
-	if (flags & SLAB_CTOR_CONSTRUCTOR)
+	if (flags & SLAB_CTOR_CONSTRUCTOR) {
 		spin_lock_init(&sighand->siglock);
+		INIT_LIST_HEAD(&sighand->signalfd_list);
+	}
 }
 
 void __init proc_caches_init(void)
@@ -1451,7 +1456,6 @@ void __init proc_caches_init(void)
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 }
 
-
 /*
  * Check constraints on flags passed to the unshare system call and
  * force unsharing of additional process context as appropriate.
diff --git a/kernel/pid.c b/kernel/pid.c
index d3ad724afa8..eb66bd2953a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -27,11 +27,13 @@
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
+#include <linux/init_task.h>
 
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
 static struct kmem_cache *pid_cachep;
+struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -247,13 +249,16 @@ struct pid * fastcall find_pid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_pid);
 
-int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
+/*
+ * attach_pid() must be called with the tasklist_lock write-held.
+ */
+int fastcall attach_pid(struct task_struct *task, enum pid_type type,
+		struct pid *pid)
 {
 	struct pid_link *link;
-	struct pid *pid;
 
 	link = &task->pids[type];
-	link->pid = pid = find_pid(nr);
+	link->pid = pid;
 	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
 
 	return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4d50e06fd74..ad7949a589d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/audit.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -161,6 +162,8 @@ int ptrace_attach(struct task_struct *task)
 {
 	int retval;
 
+	audit_ptrace(task);
+
 	retval = -EPERM;
 	if (task->pid <= 1)
 		goto out;
diff --git a/kernel/signal.c b/kernel/signal.c
index 2ac3a668d9d..364fc95bf97 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
+#include <linux/signalfd.h>
 #include <linux/capability.h>
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
@@ -113,8 +114,7 @@ void recalc_sigpending(void)
 
 /* Given the mask, find the first available signal that should be serviced. */
 
-static int
-next_signal(struct sigpending *pending, sigset_t *mask)
+int next_signal(struct sigpending *pending, sigset_t *mask)
 {
 	unsigned long i, *s, *m, x;
 	int sig = 0;
@@ -497,6 +497,11 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	int error = -EINVAL;
 	if (!valid_signal(sig))
 		return error;
+
+	error = audit_signal_info(sig, t); /* Let audit system see the signal */
+	if (error)
+		return error;
+
 	error = -EPERM;
 	if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
 	    && ((sig != SIGCONT) ||
@@ -506,10 +511,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	    && !capable(CAP_KILL))
 		return error;
 
-	error = security_task_kill(t, info, sig, 0);
-	if (!error)
-		audit_signal_info(sig, t); /* Let audit system see the signal */
-	return error;
+	return security_task_kill(t, info, sig, 0);
 }
 
 /* forward decl */
@@ -630,6 +632,12 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	int ret = 0;
 
 	/*
+	 * Deliver the signal to listening signalfds. This must be called
+	 * with the sighand lock held.
+	 */
+	signalfd_notify(t, sig);
+
+	/*
 	 * fast-pathed signals for kernel-internal things like SIGSTOP
 	 * or SIGKILL.
 	 */
@@ -1280,6 +1288,11 @@ int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 		ret = 1;
 		goto out;
 	}
+	/*
+	 * Deliver the signal to listening signalfds. This must be called
+	 * with the sighand lock held.
+	 */
+	signalfd_notify(p, sig);
 
 	list_add_tail(&q->list, &p->pending.list);
 	sigaddset(&p->pending.signal, sig);
@@ -1323,6 +1336,11 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 		q->info.si_overrun++;
 		goto out;
 	} 
+	/*
+	 * Deliver the signal to listening signalfds. This must be called
+	 * with the sighand lock held.
+	 */
+	signalfd_notify(p, sig);
 
 	/*
 	 * Put this signal on the shared-pending queue.
@@ -1983,6 +2001,8 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 	/*
 	 * If you change siginfo_t structure, please be sure
 	 * this code is fixed accordingly.
+	 * Please remember to update the signalfd_copyinfo() function
+	 * inside fs/signalfd.c too, in case siginfo_t changes.
 	 * It should never copy any pad contained in the structure
 	 * to avoid security leaks, but must copy the generic
 	 * 3 ints plus the relevant union member.
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index daabb74ee0b..fcee2a8e6da 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -8,6 +8,8 @@
 #include <linux/sched.h>
 #include <linux/stop_machine.h>
 #include <linux/syscalls.h>
+#include <linux/interrupt.h>
+
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -45,6 +47,7 @@ static int stopmachine(void *cpu)
 		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
 		    && !irqs_disabled) {
 			local_irq_disable();
+			hard_irq_disable();
 			irqs_disabled = 1;
 			/* Ack: irqs disabled. */
 			smp_mb(); /* Must read state first. */
@@ -124,6 +127,7 @@ static int stop_machine(void)
 
 	/* Make them disable irqs. */
 	local_irq_disable();
+	hard_irq_disable();
 	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
 
 	return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index cdb7e9457ba..872271ccc38 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,6 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
+#include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
 #include <linux/workqueue.h>
@@ -29,6 +30,7 @@
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
 #include <linux/getcpu.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -658,7 +660,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 	int error = -EINVAL;
 	struct pid *pgrp;
 
-	if (which > 2 || which < 0)
+	if (which > PRIO_USER || which < PRIO_PROCESS)
 		goto out;
 
 	/* normalize: avoid signed division (rounding problems) */
@@ -722,7 +724,7 @@ asmlinkage long sys_getpriority(int which, int who)
 	long niceval, retval = -ESRCH;
 	struct pid *pgrp;
 
-	if (which > 2 || which < 0)
+	if (which > PRIO_USER || which < PRIO_PROCESS)
 		return -EINVAL;
 
 	read_lock(&tasklist_lock);
@@ -1486,7 +1488,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 	if (process_group(p) != pgid) {
 		detach_pid(p, PIDTYPE_PGID);
 		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, pgid);
+		attach_pid(p, PIDTYPE_PGID, find_pid(pgid));
 	}
 
 	err = 0;
@@ -2082,6 +2084,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_nivcsw = p->signal->cnivcsw;
 			r->ru_minflt = p->signal->cmin_flt;
 			r->ru_majflt = p->signal->cmaj_flt;
+			r->ru_inblock = p->signal->cinblock;
+			r->ru_oublock = p->signal->coublock;
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -2093,6 +2097,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
 			r->ru_majflt += p->signal->maj_flt;
+			r->ru_inblock += p->signal->inblock;
+			r->ru_oublock += p->signal->oublock;
 			t = p;
 			do {
 				utime = cputime_add(utime, t->utime);
@@ -2101,6 +2107,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				r->ru_nivcsw += t->nivcsw;
 				r->ru_minflt += t->min_flt;
 				r->ru_majflt += t->maj_flt;
+				r->ru_inblock += task_io_get_inblock(t);
+				r->ru_oublock += task_io_get_oublock(t);
 				t = next_thread(t);
 			} while (t != p);
 			break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d7306d0f3df..b6d77a8a1ca 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -141,3 +141,8 @@ cond_syscall(compat_sys_migrate_pages);
 cond_syscall(sys_bdflush);
 cond_syscall(sys_ioprio_set);
 cond_syscall(sys_ioprio_get);
+
+/* New file descriptors */
+cond_syscall(sys_signalfd);
+cond_syscall(sys_timerfd);
+cond_syscall(sys_eventfd);
diff --git a/kernel/timer.c b/kernel/timer.c
index 59a28b1752f..a6c580ac084 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -92,24 +92,24 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
 {
-	return (unsigned int)((unsigned long)base & TBASE_DEFERRABLE_FLAG);
+	return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
 }
 
 static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
 {
-	return (tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG);
+	return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = (tvec_base_t *)((unsigned long)timer->base |
-	                               TBASE_DEFERRABLE_FLAG);
+	timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+	                               TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void
 timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
 {
-	timer->base = (tvec_base_t *)((unsigned long)new_base |
+	timer->base = (tvec_base_t *)((unsigned long)(new_base) |
 	                              tbase_get_deferrable(timer->base));
 }