kernel/seccomp.c

   1 /*
   2  * linux/kernel/seccomp.c
   3  *
   4  * Copyright 2004-2005  Andrea Arcangeli <andrea@cpushare.com>
   5  *
   6  * Copyright (C) 2012 Google, Inc.
   7  * Will Drewry <wad@chromium.org>
   8  *
   9  * This defines a simple but solid secure-computing facility.
  10  *
  11  * Mode 1 uses a fixed list of allowed system calls.
  12  * Mode 2 allows user-defined system call filters in the form
  13  *        of Berkeley Packet Filters/Linux Socket Filters.
  14  */
  15
  16 #include <linux/atomic.h>
  17 #include <linux/audit.h>
  18 #include <linux/compat.h>
  19 #include <linux/sched.h>
  20 #include <linux/seccomp.h>
  21 #include <linux/slab.h>
  22 #include <linux/syscalls.h>
  23
  24 /* #define SECCOMP_DEBUG 1 */
  25
  26 #ifdef CONFIG_SECCOMP_FILTER
  27 #include <asm/syscall.h>
  28 #include <linux/filter.h>
  29 #include <linux/pid.h>
  30 #include <linux/ptrace.h>
  31 #include <linux/security.h>
  32 #include <linux/tracehook.h>
  33 #include <linux/uaccess.h>
  34
  35 /**
  36  * struct seccomp_filter - container for seccomp BPF programs
  37  *
  38  * @usage: reference count to manage the object lifetime.
  39  *         get/put helpers should be used when accessing an instance
  40  *         outside of a lifetime-guarded section.  In general, this
  41  *         is only needed for handling filters shared across tasks.
  42  * @prev: points to a previously installed, or inherited, filter
  43  * @len: the number of instructions in the program
  44  * @insnsi: the BPF program instructions to evaluate
  45  *
  46  * seccomp_filter objects are organized in a tree linked via the @prev
  47  * pointer.  For any task, it appears to be a singly-linked list starting
  48  * with current->seccomp.filter, the most recently attached or inherited filter.
  49  * However, multiple filters may share a @prev node, by way of fork(), which
  50  * results in a unidirectional tree existing in memory.  This is similar to
  51  * how namespaces work.
  52  *
  53  * seccomp_filter objects should never be modified after being attached
  54  * to a task_struct (other than @usage).
  55  */
  56 struct seccomp_filter {
  57         atomic_t usage;
  58         struct seccomp_filter *prev;
  59         struct bpf_prog *prog;
  60 };
  61
  62 /* Limit any path through the tree to 256KB worth of instructions. */
  63 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
  64
  65 /*
  66  * Endianness is explicitly ignored and left for BPF program authors to manage
  67  * as per the specific architecture.
  68  */
  69 static void populate_seccomp_data(struct seccomp_data *sd)
  70 {
  71         struct task_struct *task = current;
  72         struct pt_regs *regs = task_pt_regs(task);
  73         unsigned long args[6];
  74
  75         sd->nr = syscall_get_nr(task, regs);
  76         sd->arch = syscall_get_arch();
  77         syscall_get_arguments(task, regs, 0, 6, args);
  78         sd->args[0] = args[0];
  79         sd->args[1] = args[1];
  80         sd->args[2] = args[2];
  81         sd->args[3] = args[3];
  82         sd->args[4] = args[4];
  83         sd->args[5] = args[5];
  84         sd->instruction_pointer = KSTK_EIP(task);
  85 }
  86
  87 /**
  88  *      seccomp_check_filter - verify seccomp filter code
  89  *      @filter: filter to verify
  90  *      @flen: length of filter
  91  *
  92  * Takes a previously checked filter (by bpf_check_classic) and
  93  * redirects all filter code that loads struct sk_buff data
  94  * and related data through seccomp_bpf_load.  It also
  95  * enforces length and alignment checking of those loads.
  96  *
  97  * Returns 0 if the rule set is legal or -EINVAL if not.
  98  */
  99 static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 100 {
 101         int pc;
 102         for (pc = 0; pc < flen; pc++) {
 103                 struct sock_filter *ftest = &filter[pc];
 104                 u16 code = ftest->code;
 105                 u32 k = ftest->k;
 106
 107                 switch (code) {
 108                 case BPF_LD | BPF_W | BPF_ABS:
 109                         ftest->code = BPF_LDX | BPF_W | BPF_ABS;
 110                         /* 32-bit aligned and not out of bounds. */
 111                         if (k >= sizeof(struct seccomp_data) || k & 3)
 112                                 return -EINVAL;
 113                         continue;
 114                 case BPF_LD | BPF_W | BPF_LEN:
 115                         ftest->code = BPF_LD | BPF_IMM;
 116                         ftest->k = sizeof(struct seccomp_data);
 117                         continue;
 118                 case BPF_LDX | BPF_W | BPF_LEN:
 119                         ftest->code = BPF_LDX | BPF_IMM;
 120                         ftest->k = sizeof(struct seccomp_data);
 121                         continue;
 122                 /* Explicitly include allowed calls. */
 123                 case BPF_RET | BPF_K:
 124                 case BPF_RET | BPF_A:
 125                 case BPF_ALU | BPF_ADD | BPF_K:
 126                 case BPF_ALU | BPF_ADD | BPF_X:
 127                 case BPF_ALU | BPF_SUB | BPF_K:
 128                 case BPF_ALU | BPF_SUB | BPF_X:
 129                 case BPF_ALU | BPF_MUL | BPF_K:
 130                 case BPF_ALU | BPF_MUL | BPF_X:
 131                 case BPF_ALU | BPF_DIV | BPF_K:
 132                 case BPF_ALU | BPF_DIV | BPF_X:
 133                 case BPF_ALU | BPF_AND | BPF_K:
 134                 case BPF_ALU | BPF_AND | BPF_X:
 135                 case BPF_ALU | BPF_OR | BPF_K:
 136                 case BPF_ALU | BPF_OR | BPF_X:
 137                 case BPF_ALU | BPF_XOR | BPF_K:
 138                 case BPF_ALU | BPF_XOR | BPF_X:
 139                 case BPF_ALU | BPF_LSH | BPF_K:
 140                 case BPF_ALU | BPF_LSH | BPF_X:
 141                 case BPF_ALU | BPF_RSH | BPF_K:
 142                 case BPF_ALU | BPF_RSH | BPF_X:
 143                 case BPF_ALU | BPF_NEG:
 144                 case BPF_LD | BPF_IMM:
 145                 case BPF_LDX | BPF_IMM:
 146                 case BPF_MISC | BPF_TAX:
 147                 case BPF_MISC | BPF_TXA:
 148                 case BPF_LD | BPF_MEM:
 149                 case BPF_LDX | BPF_MEM:
 150                 case BPF_ST:
 151                 case BPF_STX:
 152                 case BPF_JMP | BPF_JA:
 153                 case BPF_JMP | BPF_JEQ | BPF_K:
 154                 case BPF_JMP | BPF_JEQ | BPF_X:
 155                 case BPF_JMP | BPF_JGE | BPF_K:
 156                 case BPF_JMP | BPF_JGE | BPF_X:
 157                 case BPF_JMP | BPF_JGT | BPF_K:
 158                 case BPF_JMP | BPF_JGT | BPF_X:
 159                 case BPF_JMP | BPF_JSET | BPF_K:
 160                 case BPF_JMP | BPF_JSET | BPF_X:
 161                         continue;
 162                 default:
 163                         return -EINVAL;
 164                 }
 165         }
 166         return 0;
 167 }
 168
 169 /**
 170  * seccomp_run_filters - evaluates all seccomp filters against @syscall
 171  * @syscall: number of the current system call
 172  *
 173  * Returns valid seccomp BPF response codes.
 174  */
 175 static u32 seccomp_run_filters(int syscall)
 176 {
 177         struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 178         struct seccomp_data sd;
 179         u32 ret = SECCOMP_RET_ALLOW;
 180
 181         /* Ensure unexpected behavior doesn't result in failing open. */
 182         if (unlikely(WARN_ON(f == NULL)))
 183                 return SECCOMP_RET_KILL;
 184
 185         /* Make sure cross-thread synced filter points somewhere sane. */
 186         smp_read_barrier_depends();
 187
 188         populate_seccomp_data(&sd);
 189
 190         /*
 191          * All filters in the list are evaluated and the lowest BPF return
 192          * value always takes priority (ignoring the DATA).
 193          */
 194         for (; f; f = f->prev) {
 195                 u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
 196
 197                 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 198                         ret = cur_ret;
 199         }
 200         return ret;
 201 }
 202 #endif /* CONFIG_SECCOMP_FILTER */
 203
 204 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 205 {
 206         assert_spin_locked(&current->sighand->siglock);
 207
 208         if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 209                 return false;
 210
 211         return true;
 212 }
 213
 214 static inline void seccomp_assign_mode(struct task_struct *task,
 215                                        unsigned long seccomp_mode)
 216 {
 217         assert_spin_locked(&task->sighand->siglock);
 218
 219         task->seccomp.mode = seccomp_mode;
 220         /*
 221          * Make sure TIF_SECCOMP cannot be set before the mode (and
 222          * filter) is set.
 223          */
 224         smp_mb__before_atomic();
 225         set_tsk_thread_flag(task, TIF_SECCOMP);
 226 }
 227
 228 #ifdef CONFIG_SECCOMP_FILTER
 229 /* Returns 1 if the parent is an ancestor of the child. */
 230 static int is_ancestor(struct seccomp_filter *parent,
 231                        struct seccomp_filter *child)
 232 {
 233         /* NULL is the root ancestor. */
 234         if (parent == NULL)
 235                 return 1;
 236         for (; child; child = child->prev)
 237                 if (child == parent)
 238                         return 1;
 239         return 0;
 240 }
 241
 242 /**
 243  * seccomp_can_sync_threads: checks if all threads can be synchronized
 244  *
 245  * Expects sighand and cred_guard_mutex locks to be held.
 246  *
 247  * Returns 0 on success, -ve on error, or the pid of a thread which was
 248  * either not in the correct seccomp mode or it did not have an ancestral
 249  * seccomp filter.
 250  */
 251 static inline pid_t seccomp_can_sync_threads(void)
 252 {
 253         struct task_struct *thread, *caller;
 254
 255         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
 256         assert_spin_locked(&current->sighand->siglock);
 257
 258         /* Validate all threads being eligible for synchronization. */
 259         caller = current;
 260         for_each_thread(caller, thread) {
 261                 pid_t failed;
 262
 263                 /* Skip current, since it is initiating the sync. */
 264                 if (thread == caller)
 265                         continue;
 266
 267                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
 268                     (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
 269                      is_ancestor(thread->seccomp.filter,
 270                                  caller->seccomp.filter)))
 271                         continue;
 272
 273                 /* Return the first thread that cannot be synchronized. */
 274                 failed = task_pid_vnr(thread);
 275                 /* If the pid cannot be resolved, then return -ESRCH */
 276                 if (unlikely(WARN_ON(failed == 0)))
 277                         failed = -ESRCH;
 278                 return failed;
 279         }
 280
 281         return 0;
 282 }
 283
 284 /**
 285  * seccomp_sync_threads: sets all threads to use current's filter
 286  *
 287  * Expects sighand and cred_guard_mutex locks to be held, and for
 288  * seccomp_can_sync_threads() to have returned success already
 289  * without dropping the locks.
 290  *
 291  */
 292 static inline void seccomp_sync_threads(void)
 293 {
 294         struct task_struct *thread, *caller;
 295
 296         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
 297         assert_spin_locked(&current->sighand->siglock);
 298
 299         /* Synchronize all threads. */
 300         caller = current;
 301         for_each_thread(caller, thread) {
 302                 /* Skip current, since it needs no changes. */
 303                 if (thread == caller)
 304                         continue;
 305
 306                 /* Get a task reference for the new leaf node. */
 307                 get_seccomp_filter(caller);
 308                 /*
 309                  * Drop the task reference to the shared ancestor since
 310                  * current's path will hold a reference.  (This also
 311                  * allows a put before the assignment.)
 312                  */
 313                 put_seccomp_filter(thread);
 314                 smp_store_release(&thread->seccomp.filter,
 315                                   caller->seccomp.filter);
 316                 /*
 317                  * Opt the other thread into seccomp if needed.
 318                  * As threads are considered to be trust-realm
 319                  * equivalent (see ptrace_may_access), it is safe to
 320                  * allow one thread to transition the other.
 321                  */
 322                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
 323                         /*
 324                          * Don't let an unprivileged task work around
 325                          * the no_new_privs restriction by creating
 326                          * a thread that sets it up, enters seccomp,
 327                          * then dies.
 328                          */
 329                         if (task_no_new_privs(caller))
 330                                 task_set_no_new_privs(thread);
 331
 332                         seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
 333                 }
 334         }
 335 }
 336
 337 /**
 338  * seccomp_prepare_filter: Prepares a seccomp filter for use.
 339  * @fprog: BPF program to install
 340  *
 341  * Returns filter on success or an ERR_PTR on failure.
 342  */
 343 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 344 {
 345         struct seccomp_filter *filter;
 346         unsigned long fp_size;
 347         struct sock_filter *fp;
 348         int new_len;
 349         long ret;
 350
 351         if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
 352                 return ERR_PTR(-EINVAL);
 353         BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
 354         fp_size = fprog->len * sizeof(struct sock_filter);
 355
 356         /*
 357          * Installing a seccomp filter requires that the task has
 358          * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
 359          * This avoids scenarios where unprivileged tasks can affect the
 360          * behavior of privileged children.
 361          */
 362         if (!task_no_new_privs(current) &&
 363             security_capable_noaudit(current_cred(), current_user_ns(),
 364                                      CAP_SYS_ADMIN) != 0)
 365                 return ERR_PTR(-EACCES);
 366
 367         fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
 368         if (!fp)
 369                 return ERR_PTR(-ENOMEM);
 370
 371         /* Copy the instructions from fprog. */
 372         ret = -EFAULT;
 373         if (copy_from_user(fp, fprog->filter, fp_size))
 374                 goto free_prog;
 375
 376         /* Check and rewrite the fprog via the skb checker */
 377         ret = bpf_check_classic(fp, fprog->len);
 378         if (ret)
 379                 goto free_prog;
 380
 381         /* Check and rewrite the fprog for seccomp use */
 382         ret = seccomp_check_filter(fp, fprog->len);
 383         if (ret)
 384                 goto free_prog;
 385
 386         /* Convert 'sock_filter' insns to 'bpf_insn' insns */
 387         ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
 388         if (ret)
 389                 goto free_prog;
 390
 391         /* Allocate a new seccomp_filter */
 392         ret = -ENOMEM;
 393         filter = kzalloc(sizeof(struct seccomp_filter),
 394                          GFP_KERNEL|__GFP_NOWARN);
 395         if (!filter)
 396                 goto free_prog;
 397
 398         filter->prog = kzalloc(bpf_prog_size(new_len),
 399                                GFP_KERNEL|__GFP_NOWARN);
 400         if (!filter->prog)
 401                 goto free_filter;
 402
 403         ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
 404         if (ret)
 405                 goto free_filter_prog;
 406         kfree(fp);
 407
 408         atomic_set(&filter->usage, 1);
 409         filter->prog->len = new_len;
 410
 411         bpf_prog_select_runtime(filter->prog);
 412
 413         return filter;
 414
 415 free_filter_prog:
 416         kfree(filter->prog);
 417 free_filter:
 418         kfree(filter);
 419 free_prog:
 420         kfree(fp);
 421         return ERR_PTR(ret);
 422 }
 423
 424 /**
 425  * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
 426  * @user_filter: pointer to the user data containing a sock_fprog.
 427  *
 428  * Returns 0 on success and non-zero otherwise.
 429  */
 430 static struct seccomp_filter *
 431 seccomp_prepare_user_filter(const char __user *user_filter)
 432 {
 433         struct sock_fprog fprog;
 434         struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 435
 436 #ifdef CONFIG_COMPAT
 437         if (is_compat_task()) {
 438                 struct compat_sock_fprog fprog32;
 439                 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
 440                         goto out;
 441                 fprog.len = fprog32.len;
 442                 fprog.filter = compat_ptr(fprog32.filter);
 443         } else /* falls through to the if below. */
 444 #endif
 445         if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 446                 goto out;
 447         filter = seccomp_prepare_filter(&fprog);
 448 out:
 449         return filter;
 450 }
 451
 452 /**
 453  * seccomp_attach_filter: validate and attach filter
 454  * @flags:  flags to change filter behavior
 455  * @filter: seccomp filter to add to the current process
 456  *
 457  * Caller must be holding current->sighand->siglock lock.
 458  *
 459  * Returns 0 on success, -ve on error.
 460  */
 461 static long seccomp_attach_filter(unsigned int flags,
 462                                   struct seccomp_filter *filter)
 463 {
 464         unsigned long total_insns;
 465         struct seccomp_filter *walker;
 466
 467         assert_spin_locked(&current->sighand->siglock);
 468
 469         /* Validate resulting filter length. */
 470         total_insns = filter->prog->len;
 471         for (walker = current->seccomp.filter; walker; walker = walker->prev)
 472                 total_insns += walker->prog->len + 4;  /* 4 instr penalty */
 473         if (total_insns > MAX_INSNS_PER_PATH)
 474                 return -ENOMEM;
 475
 476         /* If thread sync has been requested, check that it is possible. */
 477         if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
 478                 int ret;
 479
 480                 ret = seccomp_can_sync_threads();
 481                 if (ret)
 482                         return ret;
 483         }
 484
 485         /*
 486          * If there is an existing filter, make it the prev and don't drop its
 487          * task reference.
 488          */
 489         filter->prev = current->seccomp.filter;
 490         current->seccomp.filter = filter;
 491
 492         /* Now that the new filter is in place, synchronize to all threads. */
 493         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
 494                 seccomp_sync_threads();
 495
 496         return 0;
 497 }
 498
 499 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
 500 void get_seccomp_filter(struct task_struct *tsk)
 501 {
 502         struct seccomp_filter *orig = tsk->seccomp.filter;
 503         if (!orig)
 504                 return;
 505         /* Reference count is bounded by the number of total processes. */
 506         atomic_inc(&orig->usage);
 507 }
 508
 509 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 510 {
 511         if (filter) {
 512                 bpf_prog_free(filter->prog);
 513                 kfree(filter);
 514         }
 515 }
 516
 517 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 518 void put_seccomp_filter(struct task_struct *tsk)
 519 {
 520         struct seccomp_filter *orig = tsk->seccomp.filter;
 521         /* Clean up single-reference branches iteratively. */
 522         while (orig && atomic_dec_and_test(&orig->usage)) {
 523                 struct seccomp_filter *freeme = orig;
 524                 orig = orig->prev;
 525                 seccomp_filter_free(freeme);
 526         }
 527 }
 528
 529 /**
 530  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
 531  * @syscall: syscall number to send to userland
 532  * @reason: filter-supplied reason code to send to userland (via si_errno)
 533  *
 534  * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
 535  */
 536 static void seccomp_send_sigsys(int syscall, int reason)
 537 {
 538         struct siginfo info;
 539         memset(&info, 0, sizeof(info));
 540         info.si_signo = SIGSYS;
 541         info.si_code = SYS_SECCOMP;
 542         info.si_call_addr = (void __user *)KSTK_EIP(current);
 543         info.si_errno = reason;
 544         info.si_arch = syscall_get_arch();
 545         info.si_syscall = syscall;
 546         force_sig_info(SIGSYS, &info, current);
 547 }
 548 #endif  /* CONFIG_SECCOMP_FILTER */
 549
 550 /*
 551  * Secure computing mode 1 allows only read/write/exit/sigreturn.
 552  * To be fully secure this must be combined with rlimit
 553  * to limit the stack allocations too.
 554  */
 555 static int mode1_syscalls[] = {
 556         __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
 557         0, /* null terminated */
 558 };
 559
 560 #ifdef CONFIG_COMPAT
 561 static int mode1_syscalls_32[] = {
 562         __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
 563         0, /* null terminated */
 564 };
 565 #endif
 566
 567 int __secure_computing(int this_syscall)
 568 {
 569         int exit_sig = 0;
 570         int *syscall;
 571         u32 ret;
 572
 573         /*
 574          * Make sure that any changes to mode from another thread have
 575          * been seen after TIF_SECCOMP was seen.
 576          */
 577         rmb();
 578
 579         switch (current->seccomp.mode) {
 580         case SECCOMP_MODE_STRICT:
 581                 syscall = mode1_syscalls;
 582 #ifdef CONFIG_COMPAT
 583                 if (is_compat_task())
 584                         syscall = mode1_syscalls_32;
 585 #endif
 586                 do {
 587                         if (*syscall == this_syscall)
 588                                 return 0;
 589                 } while (*++syscall);
 590                 exit_sig = SIGKILL;
 591                 ret = SECCOMP_RET_KILL;
 592                 break;
 593 #ifdef CONFIG_SECCOMP_FILTER
 594         case SECCOMP_MODE_FILTER: {
 595                 int data;
 596                 struct pt_regs *regs = task_pt_regs(current);
 597                 ret = seccomp_run_filters(this_syscall);
 598                 data = ret & SECCOMP_RET_DATA;
 599                 ret &= SECCOMP_RET_ACTION;
 600                 switch (ret) {
 601                 case SECCOMP_RET_ERRNO:
 602                         /* Set the low-order 16-bits as a errno. */
 603                         syscall_set_return_value(current, regs,
 604                                                  -data, 0);
 605                         goto skip;
 606                 case SECCOMP_RET_TRAP:
 607                         /* Show the handler the original registers. */
 608                         syscall_rollback(current, regs);
 609                         /* Let the filter pass back 16 bits of data. */
 610                         seccomp_send_sigsys(this_syscall, data);
 611                         goto skip;
 612                 case SECCOMP_RET_TRACE:
 613                         /* Skip these calls if there is no tracer. */
 614                         if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
 615                                 syscall_set_return_value(current, regs,
 616                                                          -ENOSYS, 0);
 617                                 goto skip;
 618                         }
 619                         /* Allow the BPF to provide the event message */
 620                         ptrace_event(PTRACE_EVENT_SECCOMP, data);
 621                         /*
 622                          * The delivery of a fatal signal during event
 623                          * notification may silently skip tracer notification.
 624                          * Terminating the task now avoids executing a system
 625                          * call that may not be intended.
 626                          */
 627                         if (fatal_signal_pending(current))
 628                                 break;
 629                         if (syscall_get_nr(current, regs) < 0)
 630                                 goto skip;  /* Explicit request to skip. */
 631
 632                         return 0;
 633                 case SECCOMP_RET_ALLOW:
 634                         return 0;
 635                 case SECCOMP_RET_KILL:
 636                 default:
 637                         break;
 638                 }
 639                 exit_sig = SIGSYS;
 640                 break;
 641         }
 642 #endif
 643         default:
 644                 BUG();
 645         }
 646
 647 #ifdef SECCOMP_DEBUG
 648         dump_stack();
 649 #endif
 650         audit_seccomp(this_syscall, exit_sig, ret);
 651         do_exit(exit_sig);
 652 #ifdef CONFIG_SECCOMP_FILTER
 653 skip:
 654         audit_seccomp(this_syscall, exit_sig, ret);
 655 #endif
 656         return -1;
 657 }
 658
 659 long prctl_get_seccomp(void)
 660 {
 661         return current->seccomp.mode;
 662 }
 663
 664 /**
 665  * seccomp_set_mode_strict: internal function for setting strict seccomp
 666  *
 667  * Once current->seccomp.mode is non-zero, it may not be changed.
 668  *
 669  * Returns 0 on success or -EINVAL on failure.
 670  */
 671 static long seccomp_set_mode_strict(void)
 672 {
 673         const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 674         long ret = -EINVAL;
 675
 676         spin_lock_irq(&current->sighand->siglock);
 677
 678         if (!seccomp_may_assign_mode(seccomp_mode))
 679                 goto out;
 680
 681 #ifdef TIF_NOTSC
 682         disable_TSC();
 683 #endif
 684         seccomp_assign_mode(current, seccomp_mode);
 685         ret = 0;
 686
 687 out:
 688         spin_unlock_irq(&current->sighand->siglock);
 689
 690         return ret;
 691 }
 692
 693 #ifdef CONFIG_SECCOMP_FILTER
 694 /**
 695  * seccomp_set_mode_filter: internal function for setting seccomp filter
 696  * @flags:  flags to change filter behavior
 697  * @filter: struct sock_fprog containing filter
 698  *
 699  * This function may be called repeatedly to install additional filters.
 700  * Every filter successfully installed will be evaluated (in reverse order)
 701  * for each system call the task makes.
 702  *
 703  * Once current->seccomp.mode is non-zero, it may not be changed.
 704  *
 705  * Returns 0 on success or -EINVAL on failure.
 706  */
 707 static long seccomp_set_mode_filter(unsigned int flags,
 708                                     const char __user *filter)
 709 {
 710         const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 711         struct seccomp_filter *prepared = NULL;
 712         long ret = -EINVAL;
 713
 714         /* Validate flags. */
 715         if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 716                 return -EINVAL;
 717
 718         /* Prepare the new filter before holding any locks. */
 719         prepared = seccomp_prepare_user_filter(filter);
 720         if (IS_ERR(prepared))
 721                 return PTR_ERR(prepared);
 722
 723         /*
 724          * Make sure we cannot change seccomp or nnp state via TSYNC
 725          * while another thread is in the middle of calling exec.
 726          */
 727         if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
 728             mutex_lock_killable(&current->signal->cred_guard_mutex))
 729                 goto out_free;
 730
 731         spin_lock_irq(&current->sighand->siglock);
 732
 733         if (!seccomp_may_assign_mode(seccomp_mode))
 734                 goto out;
 735
 736         ret = seccomp_attach_filter(flags, prepared);
 737         if (ret)
 738                 goto out;
 739         /* Do not free the successfully attached filter. */
 740         prepared = NULL;
 741
 742         seccomp_assign_mode(current, seccomp_mode);
 743 out:
 744         spin_unlock_irq(&current->sighand->siglock);
 745         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
 746                 mutex_unlock(&current->signal->cred_guard_mutex);
 747 out_free:
 748         seccomp_filter_free(prepared);
 749         return ret;
 750 }
 751 #else
 752 static inline long seccomp_set_mode_filter(unsigned int flags,
 753                                            const char __user *filter)
 754 {
 755         return -EINVAL;
 756 }
 757 #endif
 758
 759 /* Common entry point for both prctl and syscall. */
 760 static long do_seccomp(unsigned int op, unsigned int flags,
 761                        const char __user *uargs)
 762 {
 763         switch (op) {
 764         case SECCOMP_SET_MODE_STRICT:
 765                 if (flags != 0 || uargs != NULL)
 766                         return -EINVAL;
 767                 return seccomp_set_mode_strict();
 768         case SECCOMP_SET_MODE_FILTER:
 769                 return seccomp_set_mode_filter(flags, uargs);
 770         default:
 771                 return -EINVAL;
 772         }
 773 }
 774
 775 SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
 776                          const char __user *, uargs)
 777 {
 778         return do_seccomp(op, flags, uargs);
 779 }
 780
 781 /**
 782  * prctl_set_seccomp: configures current->seccomp.mode
 783  * @seccomp_mode: requested mode to use
 784  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
 785  *
 786  * Returns 0 on success or -EINVAL on failure.
 787  */
 788 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 789 {
 790         unsigned int op;
 791         char __user *uargs;
 792
 793         switch (seccomp_mode) {
 794         case SECCOMP_MODE_STRICT:
 795                 op = SECCOMP_SET_MODE_STRICT;
 796                 /*
 797                  * Setting strict mode through prctl always ignored filter,
 798                  * so make sure it is always NULL here to pass the internal
 799                  * check in do_seccomp().
 800                  */
 801                 uargs = NULL;
 802                 break;
 803         case SECCOMP_MODE_FILTER:
 804                 op = SECCOMP_SET_MODE_FILTER;
 805                 uargs = filter;
 806                 break;
 807         default:
 808                 return -EINVAL;
 809         }
 810
 811         /* prctl interface doesn't have flags, so they are always zero. */
 812         return do_seccomp(op, 0, uargs);
 813 }