summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2013-10-06 18:57:27 -0700
committerJohn Denker <jsd@av8n.com>2013-10-18 05:19:01 -0700
commitc121dfb5fe8e49c2f4d867730c001a337f6d2813 (patch)
treece1a5e26cef9e2bcc63cc53f88ada29cca0e125d
parent634d365a03cb0581a062cd3cf4db9ae69f1cde26 (diff)
original, as distributed
-rw-r--r--drivers/char/random.c1515
-rw-r--r--include/linux/sysctl.h217
-rw-r--r--include/trace/events/random.h134
-rw-r--r--kernel/sysctl.c2671
4 files changed, 4537 insertions, 0 deletions
diff --git a/drivers/char/random.c b/drivers/char/random.c
new file mode 100644
index 0000000..0d91fe5
--- /dev/null
+++ b/drivers/char/random.c
@@ -0,0 +1,1515 @@
+/*
+ * random.c -- A strong random number generator
+ *
+ * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
+ *
+ * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All
+ * rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * (now, with legal B.S. out of the way.....)
+ *
+ * This routine gathers environmental noise from device drivers, etc.,
+ * and returns good random numbers, suitable for cryptographic use.
+ * Besides the obvious cryptographic uses, these numbers are also good
+ * for seeding TCP sequence numbers, and other places where it is
+ * desirable to have numbers which are not only random, but hard to
+ * predict by an attacker.
+ *
+ * Theory of operation
+ * ===================
+ *
+ * Computers are very predictable devices. Hence it is extremely hard
+ * to produce truly random numbers on a computer --- as opposed to
+ * pseudo-random numbers, which can easily generated by using a
+ * algorithm. Unfortunately, it is very easy for attackers to guess
+ * the sequence of pseudo-random number generators, and for some
+ * applications this is not acceptable. So instead, we must try to
+ * gather "environmental noise" from the computer's environment, which
+ * must be hard for outside attackers to observe, and use that to
+ * generate random numbers. In a Unix environment, this is best done
+ * from inside the kernel.
+ *
+ * Sources of randomness from the environment include inter-keyboard
+ * timings, inter-interrupt timings from some interrupts, and other
+ * events which are both (a) non-deterministic and (b) hard for an
+ * outside observer to measure. Randomness from these sources are
+ * added to an "entropy pool", which is mixed using a CRC-like function.
+ * This is not cryptographically strong, but it is adequate assuming
+ * the randomness is not chosen maliciously, and it is fast enough that
+ * the overhead of doing it on every interrupt is very reasonable.
+ * As random bytes are mixed into the entropy pool, the routines keep
+ * an *estimate* of how many bits of randomness have been stored into
+ * the random number generator's internal state.
+ *
+ * When random bytes are desired, they are obtained by taking the SHA
+ * hash of the contents of the "entropy pool". The SHA hash avoids
+ * exposing the internal state of the entropy pool. It is believed to
+ * be computationally infeasible to derive any useful information
+ * about the input of SHA from its output. Even if it is possible to
+ * analyze SHA in some clever way, as long as the amount of data
+ * returned from the generator is less than the inherent entropy in
+ * the pool, the output data is totally unpredictable. For this
+ * reason, the routine decreases its internal estimate of how many
+ * bits of "true randomness" are contained in the entropy pool as it
+ * outputs random numbers.
+ *
+ * If this estimate goes to zero, the routine can still generate
+ * random numbers; however, an attacker may (at least in theory) be
+ * able to infer the future output of the generator from prior
+ * outputs. This requires successful cryptanalysis of SHA, which is
+ * not believed to be feasible, but there is a remote possibility.
+ * Nonetheless, these numbers should be useful for the vast majority
+ * of purposes.
+ *
+ * Exported interfaces ---- output
+ * ===============================
+ *
+ * There are three exported interfaces; the first is one designed to
+ * be used from within the kernel:
+ *
+ * void get_random_bytes(void *buf, int nbytes);
+ *
+ * This interface will return the requested number of random bytes,
+ * and place it in the requested buffer.
+ *
+ * The two other interfaces are two character devices /dev/random and
+ * /dev/urandom. /dev/random is suitable for use when very high
+ * quality randomness is desired (for example, for key generation or
+ * one-time pads), as it will only return a maximum of the number of
+ * bits of randomness (as estimated by the random number generator)
+ * contained in the entropy pool.
+ *
+ * The /dev/urandom device does not have this limit, and will return
+ * as many bytes as are requested. As more and more random bytes are
+ * requested without giving time for the entropy pool to recharge,
+ * this will result in random numbers that are merely cryptographically
+ * strong. For many applications, however, this is acceptable.
+ *
+ * Exported interfaces ---- input
+ * ==============================
+ *
+ * The current exported interfaces for gathering environmental noise
+ * from the devices are:
+ *
+ * void add_device_randomness(const void *buf, unsigned int size);
+ * void add_input_randomness(unsigned int type, unsigned int code,
+ * unsigned int value);
+ * void add_interrupt_randomness(int irq, int irq_flags);
+ * void add_disk_randomness(struct gendisk *disk);
+ *
+ * add_device_randomness() is for adding data to the random pool that
+ * is likely to differ between two devices (or possibly even per boot).
+ * This would be things like MAC addresses or serial numbers, or the
+ * read-out of the RTC. This does *not* add any actual entropy to the
+ * pool, but it initializes the pool to different values for devices
+ * that might otherwise be identical and have very little entropy
+ * available to them (particularly common in the embedded world).
+ *
+ * add_input_randomness() uses the input layer interrupt timing, as well as
+ * the event type information from the hardware.
+ *
+ * add_interrupt_randomness() uses the interrupt timing as random
+ * inputs to the entropy pool. Using the cycle counters and the irq source
+ * as inputs, it feeds the randomness roughly once a second.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
+ *
+ * All of these routines try to estimate how many bits of randomness a
+ * particular randomness source. They do this by keeping track of the
+ * first and second order deltas of the event timings.
+ *
+ * Ensuring unpredictability at system startup
+ * ============================================
+ *
+ * When any operating system starts up, it will go through a sequence
+ * of actions that are fairly predictable by an adversary, especially
+ * if the start-up does not involve interaction with a human operator.
+ * This reduces the actual number of bits of unpredictability in the
+ * entropy pool below the value in entropy_count. In order to
+ * counteract this effect, it helps to carry information in the
+ * entropy pool across shut-downs and start-ups. To do this, put the
+ * following lines an appropriate script which is run during the boot
+ * sequence:
+ *
+ * echo "Initializing random number generator..."
+ * random_seed=/var/run/random-seed
+ * # Carry a random seed from start-up to start-up
+ * # Load and then save the whole entropy pool
+ * if [ -f $random_seed ]; then
+ * cat $random_seed >/dev/urandom
+ * else
+ * touch $random_seed
+ * fi
+ * chmod 600 $random_seed
+ * dd if=/dev/urandom of=$random_seed count=1 bs=512
+ *
+ * and the following lines in an appropriate script which is run as
+ * the system is shutdown:
+ *
+ * # Carry a random seed from shut-down to start-up
+ * # Save the whole entropy pool
+ * echo "Saving random seed..."
+ * random_seed=/var/run/random-seed
+ * touch $random_seed
+ * chmod 600 $random_seed
+ * dd if=/dev/urandom of=$random_seed count=1 bs=512
+ *
+ * For example, on most modern systems using the System V init
+ * scripts, such code fragments would be found in
+ * /etc/rc.d/init.d/random. On older Linux systems, the correct script
+ * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
+ *
+ * Effectively, these commands cause the contents of the entropy pool
+ * to be saved at shut-down time and reloaded into the entropy pool at
+ * start-up. (The 'dd' in the addition to the bootup script is to
+ * make sure that /etc/random-seed is different for every start-up,
+ * even if the system crashes without executing rc.0.) Even with
+ * complete knowledge of the start-up activities, predicting the state
+ * of the entropy pool requires knowledge of the previous history of
+ * the system.
+ *
+ * Configuring the /dev/random driver under Linux
+ * ==============================================
+ *
+ * The /dev/random driver under Linux uses minor numbers 8 and 9 of
+ * the /dev/mem major number (#1). So if your system does not have
+ * /dev/random and /dev/urandom created already, they can be created
+ * by using the commands:
+ *
+ * mknod /dev/random c 1 8
+ * mknod /dev/urandom c 1 9
+ *
+ * Acknowledgements:
+ * =================
+ *
+ * Ideas for constructing this random number generator were derived
+ * from Pretty Good Privacy's random number generator, and from private
+ * discussions with Phil Karn. Colin Plumb provided a faster random
+ * number generator, which speed up the mixing function of the entropy
+ * pool, taken from PGPfone. Dale Worley has also contributed many
+ * useful ideas and suggestions to improve this driver.
+ *
+ * Any flaws in the design are solely my responsibility, and should
+ * not be attributed to the Phil, Colin, or any of authors of PGP.
+ *
+ * Further background information on this topic may be obtained from
+ * RFC 1750, "Randomness Recommendations for Security", by Donald
+ * Eastlake, Steve Crocker, and Jeff Schiller.
+ */
+
+#include <linux/utsname.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/cryptohash.h>
+#include <linux/fips.h>
+#include <linux/ptrace.h>
+#include <linux/kmemcheck.h>
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+# include <linux/irq.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/io.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/random.h>
+
+/*
+ * Configuration information
+ */
+#define INPUT_POOL_WORDS 128
+#define OUTPUT_POOL_WORDS 32
+#define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10
+
+#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
+
+/*
+ * The minimum number of bits of entropy before we wake up a read on
+ * /dev/random. Should be enough to do a significant reseed.
+ */
+static int random_read_wakeup_thresh = 64;
+
+/*
+ * If the entropy count falls under this number of bits, then we
+ * should wake up processes which are selecting or polling on write
+ * access to /dev/random.
+ */
+static int random_write_wakeup_thresh = 128;
+
+/*
+ * When the input pool goes over trickle_thresh, start dropping most
+ * samples to avoid wasting CPU time and reduce lock contention.
+ */
+
+static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28;
+
+static DEFINE_PER_CPU(int, trickle_count);
+
+/*
+ * A pool of size .poolwords is stirred with a primitive polynomial
+ * of degree .poolwords over GF(2). The taps for various sizes are
+ * defined below. They are chosen to be evenly spaced (minimum RMS
+ * distance from evenly spaced; the numbers in the comments are a
+ * scaled squared error sum) except for the last tap, which is 1 to
+ * get the twisting happening as fast as possible.
+ */
+static struct poolinfo {
+ int poolwords;
+ int tap1, tap2, tap3, tap4, tap5;
+} poolinfo_table[] = {
+ /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
+ { 128, 103, 76, 51, 25, 1 },
+ /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
+ { 32, 26, 20, 14, 7, 1 },
+#if 0
+ /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */
+ { 2048, 1638, 1231, 819, 411, 1 },
+
+ /* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */
+ { 1024, 817, 615, 412, 204, 1 },
+
+ /* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */
+ { 1024, 819, 616, 410, 207, 2 },
+
+ /* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */
+ { 512, 411, 308, 208, 104, 1 },
+
+ /* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */
+ { 512, 409, 307, 206, 102, 2 },
+ /* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */
+ { 512, 409, 309, 205, 103, 2 },
+
+ /* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */
+ { 256, 205, 155, 101, 52, 1 },
+
+ /* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */
+ { 128, 103, 78, 51, 27, 2 },
+
+ /* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */
+ { 64, 52, 39, 26, 14, 1 },
+#endif
+};
+
+#define POOLBITS poolwords*32
+#define POOLBYTES poolwords*4
+
+/*
+ * For the purposes of better mixing, we use the CRC-32 polynomial as
+ * well to make a twisted Generalized Feedback Shift Reigster
+ *
+ * (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR generators. ACM
+ * Transactions on Modeling and Computer Simulation 2(3):179-194.
+ * Also see M. Matsumoto & Y. Kurita, 1994. Twisted GFSR generators
+ * II. ACM Transactions on Mdeling and Computer Simulation 4:254-266)
+ *
+ * Thanks to Colin Plumb for suggesting this.
+ *
+ * We have not analyzed the resultant polynomial to prove it primitive;
+ * in fact it almost certainly isn't. Nonetheless, the irreducible factors
+ * of a random large-degree polynomial over GF(2) are more than large enough
+ * that periodicity is not a concern.
+ *
+ * The input hash is much less sensitive than the output hash. All
+ * that we want of it is that it be a good non-cryptographic hash;
+ * i.e. it not produce collisions when fed "random" data of the sort
+ * we expect to see. As long as the pool state differs for different
+ * inputs, we have preserved the input entropy and done a good job.
+ * The fact that an intelligent attacker can construct inputs that
+ * will produce controlled alterations to the pool's state is not
+ * important because we don't consider such inputs to contribute any
+ * randomness. The only property we need with respect to them is that
+ * the attacker can't increase his/her knowledge of the pool's state.
+ * Since all additions are reversible (knowing the final state and the
+ * input, you can reconstruct the initial state), if an attacker has
+ * any uncertainty about the initial state, he/she can only shuffle
+ * that uncertainty about, but never cause any collisions (which would
+ * decrease the uncertainty).
+ *
+ * The chosen system lets the state of the pool be (essentially) the input
+ * modulo the generator polymnomial. Now, for random primitive polynomials,
+ * this is a universal class of hash functions, meaning that the chance
+ * of a collision is limited by the attacker's knowledge of the generator
+ * polynomail, so if it is chosen at random, an attacker can never force
+ * a collision. Here, we use a fixed polynomial, but we *can* assume that
+ * ###--> it is unknown to the processes generating the input entropy. <-###
+ * Because of this important property, this is a good, collision-resistant
+ * hash; hash collisions will occur no more often than chance.
+ */
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static struct fasync_struct *fasync;
+
+static bool debug;
+module_param(debug, bool, 0644);
+#define DEBUG_ENT(fmt, arg...) do { \
+ if (debug) \
+ printk(KERN_DEBUG "random %04d %04d %04d: " \
+ fmt,\
+ input_pool.entropy_count,\
+ blocking_pool.entropy_count,\
+ nonblocking_pool.entropy_count,\
+ ## arg); } while (0)
+
+/**********************************************************************
+ *
+ * OS independent entropy store. Here are the functions which handle
+ * storing entropy in an entropy pool.
+ *
+ **********************************************************************/
+
+struct entropy_store;
+struct entropy_store {
+ /* read-only data: */
+ struct poolinfo *poolinfo;
+ __u32 *pool;
+ const char *name;
+ struct entropy_store *pull;
+ int limit;
+
+ /* read-write data: */
+ spinlock_t lock;
+ unsigned add_ptr;
+ unsigned input_rotate;
+ int entropy_count;
+ int entropy_total;
+ unsigned int initialized:1;
+ bool last_data_init;
+ __u8 last_data[EXTRACT_SIZE];
+};
+
+static __u32 input_pool_data[INPUT_POOL_WORDS];
+static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
+static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
+
+static struct entropy_store input_pool = {
+ .poolinfo = &poolinfo_table[0],
+ .name = "input",
+ .limit = 1,
+ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+ .pool = input_pool_data
+};
+
+static struct entropy_store blocking_pool = {
+ .poolinfo = &poolinfo_table[1],
+ .name = "blocking",
+ .limit = 1,
+ .pull = &input_pool,
+ .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
+ .pool = blocking_pool_data
+};
+
+static struct entropy_store nonblocking_pool = {
+ .poolinfo = &poolinfo_table[1],
+ .name = "nonblocking",
+ .pull = &input_pool,
+ .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
+ .pool = nonblocking_pool_data
+};
+
+static __u32 const twist_table[8] = {
+ 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
+ 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+
+/*
+ * This function adds bytes into the entropy "pool". It does not
+ * update the entropy estimate. The caller should call
+ * credit_entropy_bits if this is appropriate.
+ *
+ * The pool is stirred with a primitive polynomial of the appropriate
+ * degree, and then twisted. We twist by three bits at a time because
+ * it's cheap to do so and helps slightly in the expected case where
+ * the entropy is concentrated in the low-order bits.
+ */
+static void _mix_pool_bytes(struct entropy_store *r, const void *in,
+ int nbytes, __u8 out[64])
+{
+ unsigned long i, j, tap1, tap2, tap3, tap4, tap5;
+ int input_rotate;
+ int wordmask = r->poolinfo->poolwords - 1;
+ const char *bytes = in;
+ __u32 w;
+
+ tap1 = r->poolinfo->tap1;
+ tap2 = r->poolinfo->tap2;
+ tap3 = r->poolinfo->tap3;
+ tap4 = r->poolinfo->tap4;
+ tap5 = r->poolinfo->tap5;
+
+ smp_rmb();
+ input_rotate = ACCESS_ONCE(r->input_rotate);
+ i = ACCESS_ONCE(r->add_ptr);
+
+ /* mix one byte at a time to simplify size handling and churn faster */
+ while (nbytes--) {
+ w = rol32(*bytes++, input_rotate & 31);
+ i = (i - 1) & wordmask;
+
+ /* XOR in the various taps */
+ w ^= r->pool[i];
+ w ^= r->pool[(i + tap1) & wordmask];
+ w ^= r->pool[(i + tap2) & wordmask];
+ w ^= r->pool[(i + tap3) & wordmask];
+ w ^= r->pool[(i + tap4) & wordmask];
+ w ^= r->pool[(i + tap5) & wordmask];
+
+ /* Mix the result back in with a twist */
+ r->pool[i] = (w >> 3) ^ twist_table[w & 7];
+
+ /*
+ * Normally, we add 7 bits of rotation to the pool.
+ * At the beginning of the pool, add an extra 7 bits
+ * rotation, so that successive passes spread the
+ * input bits across the pool evenly.
+ */
+ input_rotate += i ? 7 : 14;
+ }
+
+ ACCESS_ONCE(r->input_rotate) = input_rotate;
+ ACCESS_ONCE(r->add_ptr) = i;
+ smp_wmb();
+
+ if (out)
+ for (j = 0; j < 16; j++)
+ ((__u32 *)out)[j] = r->pool[(i - j) & wordmask];
+}
+
+static void __mix_pool_bytes(struct entropy_store *r, const void *in,
+ int nbytes, __u8 out[64])
+{
+ trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_);
+ _mix_pool_bytes(r, in, nbytes, out);
+}
+
+static void mix_pool_bytes(struct entropy_store *r, const void *in,
+ int nbytes, __u8 out[64])
+{
+ unsigned long flags;
+
+ trace_mix_pool_bytes(r->name, nbytes, _RET_IP_);
+ spin_lock_irqsave(&r->lock, flags);
+ _mix_pool_bytes(r, in, nbytes, out);
+ spin_unlock_irqrestore(&r->lock, flags);
+}
+
+struct fast_pool {
+ __u32 pool[4];
+ unsigned long last;
+ unsigned short count;
+ unsigned char rotate;
+ unsigned char last_timer_intr;
+};
+
+/*
+ * This is a fast mixing routine used by the interrupt randomness
+ * collector. It's hardcoded for an 128 bit pool and assumes that any
+ * locks that might be needed are taken by the caller.
+ */
+static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
+{
+ const char *bytes = in;
+ __u32 w;
+ unsigned i = f->count;
+ unsigned input_rotate = f->rotate;
+
+ while (nbytes--) {
+ w = rol32(*bytes++, input_rotate & 31) ^ f->pool[i & 3] ^
+ f->pool[(i + 1) & 3];
+ f->pool[i & 3] = (w >> 3) ^ twist_table[w & 7];
+ input_rotate += (i++ & 3) ? 7 : 14;
+ }
+ f->count = i;
+ f->rotate = input_rotate;
+}
+
+/*
+ * Credit (or debit) the entropy store with n bits of entropy
+ */
+static void credit_entropy_bits(struct entropy_store *r, int nbits)
+{
+ int entropy_count, orig;
+
+ if (!nbits)
+ return;
+
+ DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
+retry:
+ entropy_count = orig = ACCESS_ONCE(r->entropy_count);
+ entropy_count += nbits;
+
+ if (entropy_count < 0) {
+ DEBUG_ENT("negative entropy/overflow\n");
+ entropy_count = 0;
+ } else if (entropy_count > r->poolinfo->POOLBITS)
+ entropy_count = r->poolinfo->POOLBITS;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+
+ if (!r->initialized && nbits > 0) {
+ r->entropy_total += nbits;
+ if (r->entropy_total > 128)
+ r->initialized = 1;
+ }
+
+ trace_credit_entropy_bits(r->name, nbits, entropy_count,
+ r->entropy_total, _RET_IP_);
+
+ /* should we wake readers? */
+ if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) {
+ wake_up_interruptible(&random_read_wait);
+ kill_fasync(&fasync, SIGIO, POLL_IN);
+ }
+}
+
+/*********************************************************************
+ *
+ * Entropy input management
+ *
+ *********************************************************************/
+
+/* There is one of these per entropy source */
+struct timer_rand_state {
+ cycles_t last_time;
+ long last_delta, last_delta2;
+ unsigned dont_count_entropy:1;
+};
+
+/*
+ * Add device- or boot-specific data to the input and nonblocking
+ * pools to help initialize them to unique values.
+ *
+ * None of this adds any entropy, it is meant to avoid the
+ * problem of the nonblocking pool having similar initial state
+ * across largely identical devices.
+ */
+void add_device_randomness(const void *buf, unsigned int size)
+{
+ unsigned long time = get_cycles() ^ jiffies;
+
+ mix_pool_bytes(&input_pool, buf, size, NULL);
+ mix_pool_bytes(&input_pool, &time, sizeof(time), NULL);
+ mix_pool_bytes(&nonblocking_pool, buf, size, NULL);
+ mix_pool_bytes(&nonblocking_pool, &time, sizeof(time), NULL);
+}
+EXPORT_SYMBOL(add_device_randomness);
+
+static struct timer_rand_state input_timer_state;
+
+/*
+ * This function adds entropy to the entropy "pool" by using timing
+ * delays. It uses the timer_rand_state structure to make an estimate
+ * of how many bits of entropy this call has added to the pool.
+ *
+ * The number "num" is also added to the pool - it should somehow describe
+ * the type of event which just happened. This is currently 0-255 for
+ * keyboard scan codes, and 256 upwards for interrupts.
+ *
+ */
+static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+{
+ struct {
+ long jiffies;
+ unsigned cycles;
+ unsigned num;
+ } sample;
+ long delta, delta2, delta3;
+
+ preempt_disable();
+ /* if over the trickle threshold, use only 1 in 4096 samples */
+ if (input_pool.entropy_count > trickle_thresh &&
+ ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff))
+ goto out;
+
+ sample.jiffies = jiffies;
+ sample.cycles = get_cycles();
+ sample.num = num;
+ mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL);
+
+ /*
+ * Calculate number of bits of randomness we probably added.
+ * We take into account the first, second and third-order deltas
+ * in order to make our estimate.
+ */
+
+ if (!state->dont_count_entropy) {
+ delta = sample.jiffies - state->last_time;
+ state->last_time = sample.jiffies;
+
+ delta2 = delta - state->last_delta;
+ state->last_delta = delta;
+
+ delta3 = delta2 - state->last_delta2;
+ state->last_delta2 = delta2;
+
+ if (delta < 0)
+ delta = -delta;
+ if (delta2 < 0)
+ delta2 = -delta2;
+ if (delta3 < 0)
+ delta3 = -delta3;
+ if (delta > delta2)
+ delta = delta2;
+ if (delta > delta3)
+ delta = delta3;
+
+ /*
+ * delta is now minimum absolute delta.
+ * Round down by 1 bit on general principles,
+ * and limit entropy entimate to 12 bits.
+ */
+ credit_entropy_bits(&input_pool,
+ min_t(int, fls(delta>>1), 11));
+ }
+out:
+ preempt_enable();
+}
+
+void add_input_randomness(unsigned int type, unsigned int code,
+ unsigned int value)
+{
+ static unsigned char last_value;
+
+ /* ignore autorepeat and the like */
+ if (value == last_value)
+ return;
+
+ DEBUG_ENT("input event\n");
+ last_value = value;
+ add_timer_randomness(&input_timer_state,
+ (type << 4) ^ code ^ (code >> 4) ^ value);
+}
+EXPORT_SYMBOL_GPL(add_input_randomness);
+
+static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
+
+void add_interrupt_randomness(int irq, int irq_flags)
+{
+ struct entropy_store *r;
+ struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness);
+ struct pt_regs *regs = get_irq_regs();
+ unsigned long now = jiffies;
+ __u32 input[4], cycles = get_cycles();
+
+ input[0] = cycles ^ jiffies;
+ input[1] = irq;
+ if (regs) {
+ __u64 ip = instruction_pointer(regs);
+ input[2] = ip;
+ input[3] = ip >> 32;
+ }
+
+ fast_mix(fast_pool, input, sizeof(input));
+
+ if ((fast_pool->count & 1023) &&
+ !time_after(now, fast_pool->last + HZ))
+ return;
+
+ fast_pool->last = now;
+
+ r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
+ __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
+ /*
+ * If we don't have a valid cycle counter, and we see
+ * back-to-back timer interrupts, then skip giving credit for
+ * any entropy.
+ */
+ if (cycles == 0) {
+ if (irq_flags & __IRQF_TIMER) {
+ if (fast_pool->last_timer_intr)
+ return;
+ fast_pool->last_timer_intr = 1;
+ } else
+ fast_pool->last_timer_intr = 0;
+ }
+ credit_entropy_bits(r, 1);
+}
+
+#ifdef CONFIG_BLOCK
+void add_disk_randomness(struct gendisk *disk)
+{
+ if (!disk || !disk->random)
+ return;
+ /* first major is 1, so we get >= 0x200 here */
+ DEBUG_ENT("disk event %d:%d\n",
+ MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)));
+
+ add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
+}
+#endif
+
+/*********************************************************************
+ *
+ * Entropy extraction routines
+ *
+ *********************************************************************/
+
+static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+ size_t nbytes, int min, int rsvd);
+
+/*
+ * This utility inline function is responsible for transferring entropy
+ * from the primary pool to the secondary extraction pool. We make
+ * sure we pull enough for a 'catastrophic reseed'.
+ */
+static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
+{
+ __u32 tmp[OUTPUT_POOL_WORDS];
+
+ if (r->pull && r->entropy_count < nbytes * 8 &&
+ r->entropy_count < r->poolinfo->POOLBITS) {
+ /* If we're limited, always leave two wakeup worth's BITS */
+ int rsvd = r->limit ? 0 : random_read_wakeup_thresh/4;
+ int bytes = nbytes;
+
+ /* pull at least as many as BYTES as wakeup BITS */
+ bytes = max_t(int, bytes, random_read_wakeup_thresh / 8);
+ /* but never more than the buffer size */
+ bytes = min_t(int, bytes, sizeof(tmp));
+
+ DEBUG_ENT("going to reseed %s with %d bits "
+ "(%zu of %d requested)\n",
+ r->name, bytes * 8, nbytes * 8, r->entropy_count);
+
+ bytes = extract_entropy(r->pull, tmp, bytes,
+ random_read_wakeup_thresh / 8, rsvd);
+ mix_pool_bytes(r, tmp, bytes, NULL);
+ credit_entropy_bits(r, bytes*8);
+ }
+}
+
+/*
+ * These functions extracts randomness from the "entropy pool", and
+ * returns it in a buffer.
+ *
+ * The min parameter specifies the minimum amount we can pull before
+ * failing to avoid races that defeat catastrophic reseeding while the
+ * reserved parameter indicates how much entropy we must leave in the
+ * pool after each pull to avoid starving other readers.
+ *
+ * Note: extract_entropy() assumes that .poolwords is a multiple of 16 words.
+ */
+
+static size_t account(struct entropy_store *r, size_t nbytes, int min,
+ int reserved)
+{
+ unsigned long flags;
+ int wakeup_write = 0;
+
+ /* Hold lock while accounting */
+ spin_lock_irqsave(&r->lock, flags);
+
+ BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
+ DEBUG_ENT("trying to extract %zu bits from %s\n",
+ nbytes * 8, r->name);
+
+ /* Can we pull enough? */
+ if (r->entropy_count / 8 < min + reserved) {
+ nbytes = 0;
+ } else {
+ int entropy_count, orig;
+retry:
+ entropy_count = orig = ACCESS_ONCE(r->entropy_count);
+ /* If limited, never pull more than available */
+ if (r->limit && nbytes + reserved >= entropy_count / 8)
+ nbytes = entropy_count/8 - reserved;
+
+ if (entropy_count / 8 >= nbytes + reserved) {
+ entropy_count -= nbytes*8;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+ } else {
+ entropy_count = reserved;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+ }
+
+ if (entropy_count < random_write_wakeup_thresh)
+ wakeup_write = 1;
+ }
+
+ DEBUG_ENT("debiting %zu entropy credits from %s%s\n",
+ nbytes * 8, r->name, r->limit ? "" : " (unlimited)");
+
+ spin_unlock_irqrestore(&r->lock, flags);
+
+ if (wakeup_write) {
+ wake_up_interruptible(&random_write_wait);
+ kill_fasync(&fasync, SIGIO, POLL_OUT);
+ }
+
+ return nbytes;
+}
+
+static void extract_buf(struct entropy_store *r, __u8 *out)
+{
+ int i;
+ union {
+ __u32 w[5];
+ unsigned long l[LONGS(EXTRACT_SIZE)];
+ } hash;
+ __u32 workspace[SHA_WORKSPACE_WORDS];
+ __u8 extract[64];
+ unsigned long flags;
+
+ /* Generate a hash across the pool, 16 words (512 bits) at a time */
+ sha_init(hash.w);
+ spin_lock_irqsave(&r->lock, flags);
+ for (i = 0; i < r->poolinfo->poolwords; i += 16)
+ sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
+
+ /*
+ * We mix the hash back into the pool to prevent backtracking
+ * attacks (where the attacker knows the state of the pool
+ * plus the current outputs, and attempts to find previous
+ * ouputs), unless the hash function can be inverted. By
+ * mixing at least a SHA1 worth of hash data back, we make
+ * brute-forcing the feedback as hard as brute-forcing the
+ * hash.
+ */
+ __mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
+ spin_unlock_irqrestore(&r->lock, flags);
+
+ /*
+ * To avoid duplicates, we atomically extract a portion of the
+ * pool while mixing, and hash one final time.
+ */
+ sha_transform(hash.w, extract, workspace);
+ memset(extract, 0, sizeof(extract));
+ memset(workspace, 0, sizeof(workspace));
+
+ /*
+ * In case the hash function has some recognizable output
+ * pattern, we fold it in half. Thus, we always feed back
+ * twice as much data as we output.
+ */
+ hash.w[0] ^= hash.w[3];
+ hash.w[1] ^= hash.w[4];
+ hash.w[2] ^= rol32(hash.w[2], 16);
+
+ /*
+ * If we have a architectural hardware random number
+ * generator, mix that in, too.
+ */
+ for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
+ unsigned long v;
+ if (!arch_get_random_long(&v))
+ break;
+ hash.l[i] ^= v;
+ }
+
+ memcpy(out, &hash, EXTRACT_SIZE);
+ memset(&hash, 0, sizeof(hash));
+}
+
+static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+ size_t nbytes, int min, int reserved)
+{
+ ssize_t ret = 0, i;
+ __u8 tmp[EXTRACT_SIZE];
+ unsigned long flags;
+
+ /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
+ if (fips_enabled) {
+ spin_lock_irqsave(&r->lock, flags);
+ if (!r->last_data_init) {
+ r->last_data_init = true;
+ spin_unlock_irqrestore(&r->lock, flags);
+ trace_extract_entropy(r->name, EXTRACT_SIZE,
+ r->entropy_count, _RET_IP_);
+ xfer_secondary_pool(r, EXTRACT_SIZE);
+ extract_buf(r, tmp);
+ spin_lock_irqsave(&r->lock, flags);
+ memcpy(r->last_data, tmp, EXTRACT_SIZE);
+ }
+ spin_unlock_irqrestore(&r->lock, flags);
+ }
+
+ trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
+ xfer_secondary_pool(r, nbytes);
+ nbytes = account(r, nbytes, min, reserved);
+
+ while (nbytes) {
+ extract_buf(r, tmp);
+
+ if (fips_enabled) {
+ spin_lock_irqsave(&r->lock, flags);
+ if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
+ panic("Hardware RNG duplicated output!\n");
+ memcpy(r->last_data, tmp, EXTRACT_SIZE);
+ spin_unlock_irqrestore(&r->lock, flags);
+ }
+ i = min_t(int, nbytes, EXTRACT_SIZE);
+ memcpy(buf, tmp, i);
+ nbytes -= i;
+ buf += i;
+ ret += i;
+ }
+
+ /* Wipe data just returned from memory */
+ memset(tmp, 0, sizeof(tmp));
+
+ return ret;
+}
+
+static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
+ size_t nbytes)
+{
+ ssize_t ret = 0, i;
+ __u8 tmp[EXTRACT_SIZE];
+
+ trace_extract_entropy_user(r->name, nbytes, r->entropy_count, _RET_IP_);
+ xfer_secondary_pool(r, nbytes);
+ nbytes = account(r, nbytes, 0, 0);
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ schedule();
+ }
+
+ extract_buf(r, tmp);
+ i = min_t(int, nbytes, EXTRACT_SIZE);
+ if (copy_to_user(buf, tmp, i)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ nbytes -= i;
+ buf += i;
+ ret += i;
+ }
+
+ /* Wipe data just returned from memory */
+ memset(tmp, 0, sizeof(tmp));
+
+ return ret;
+}
+
+/*
+ * This function is the exported kernel interface. It returns some
+ * number of good random numbers, suitable for key generation, seeding
+ * TCP sequence numbers, etc. It does not use the hw random number
+ * generator, if available; use get_random_bytes_arch() for that.
+ */
+void get_random_bytes(void *buf, int nbytes)
+{
+ extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
+}
+EXPORT_SYMBOL(get_random_bytes);
+
+/*
+ * This function will use the architecture-specific hardware random
+ * number generator if it is available. The arch-specific hw RNG will
+ * almost certainly be faster than what we can do in software, but it
+ * is impossible to verify that it is implemented securely (as
+ * opposed, to, say, the AES encryption of a sequence number using a
+ * key known by the NSA). So it's useful if we need the speed, but
+ * only if we're willing to trust the hardware manufacturer not to
+ * have put in a back door.
+ */
+void get_random_bytes_arch(void *buf, int nbytes)
+{
+ char *p = buf;
+
+ trace_get_random_bytes(nbytes, _RET_IP_);
+ while (nbytes) {
+ unsigned long v;
+ int chunk = min(nbytes, (int)sizeof(unsigned long));
+
+ if (!arch_get_random_long(&v))
+ break;
+
+ memcpy(p, &v, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+
+ if (nbytes)
+ extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
+}
+EXPORT_SYMBOL(get_random_bytes_arch);
+
+
+/*
+ * init_std_data - initialize pool with system data
+ *
+ * @r: pool to initialize
+ *
+ * This function clears the pool's entropy count and mixes some system
+ * data into the pool to prepare it for use. The pool is not cleared
+ * as that can only decrease the entropy in the pool.
+ */
+static void init_std_data(struct entropy_store *r)
+{
+ int i;
+ ktime_t now = ktime_get_real();
+ unsigned long rv;
+
+ r->entropy_count = 0;
+ r->entropy_total = 0;
+ r->last_data_init = false;
+ mix_pool_bytes(r, &now, sizeof(now), NULL);
+ for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) {
+ if (!arch_get_random_long(&rv))
+ break;
+ mix_pool_bytes(r, &rv, sizeof(rv), NULL);
+ }
+ mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL);
+}
+
+/*
+ * Note that setup_arch() may call add_device_randomness()
+ * long before we get here. This allows seeding of the pools
+ * with some platform dependent data very early in the boot
+ * process. But it limits our options here. We must use
+ * statically allocated structures that already have all
+ * initializations complete at compile time. We should also
+ * take care not to overwrite the precious per platform data
+ * we were given.
+ */
+static int rand_initialize(void)
+{
+ init_std_data(&input_pool);
+ init_std_data(&blocking_pool);
+ init_std_data(&nonblocking_pool);
+ return 0;
+}
+module_init(rand_initialize);
+
+#ifdef CONFIG_BLOCK
+void rand_initialize_disk(struct gendisk *disk)
+{
+ struct timer_rand_state *state;
+
+ /*
+ * If kzalloc returns null, we just won't use that entropy
+ * source.
+ */
+ state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+ if (state)
+ disk->random = state;
+}
+#endif
+
+static ssize_t
+random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+{
+ ssize_t n, retval = 0, count = 0;
+
+ if (nbytes == 0)
+ return 0;
+
+ while (nbytes > 0) {
+ n = nbytes;
+ if (n > SEC_XFER_SIZE)
+ n = SEC_XFER_SIZE;
+
+ DEBUG_ENT("reading %zu bits\n", n*8);
+
+ n = extract_entropy_user(&blocking_pool, buf, n);
+
+ if (n < 0) {
+ retval = n;
+ break;
+ }
+
+ DEBUG_ENT("read got %zd bits (%zd still needed)\n",
+ n*8, (nbytes-n)*8);
+
+ if (n == 0) {
+ if (file->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ break;
+ }
+
+ DEBUG_ENT("sleeping?\n");
+
+ wait_event_interruptible(random_read_wait,
+ input_pool.entropy_count >=
+ random_read_wakeup_thresh);
+
+ DEBUG_ENT("awake\n");
+
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ break;
+ }
+
+ continue;
+ }
+
+ count += n;
+ buf += n;
+ nbytes -= n;
+ break; /* This break makes the device work */
+ /* like a named pipe */
+ }
+
+ return (count ? count : retval);
+}
+
+static ssize_t
+urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+{
+ return extract_entropy_user(&nonblocking_pool, buf, nbytes);
+}
+
+static unsigned int
+random_poll(struct file *file, poll_table * wait)
+{
+ unsigned int mask;
+
+ poll_wait(file, &random_read_wait, wait);
+ poll_wait(file, &random_write_wait, wait);
+ mask = 0;
+ if (input_pool.entropy_count >= random_read_wakeup_thresh)
+ mask |= POLLIN | POLLRDNORM;
+ if (input_pool.entropy_count < random_write_wakeup_thresh)
+ mask |= POLLOUT | POLLWRNORM;
+ return mask;
+}
+
+static int
+write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
+{
+ size_t bytes;
+ __u32 buf[16];
+ const char __user *p = buffer;
+
+ while (count > 0) {
+ bytes = min(count, sizeof(buf));
+ if (copy_from_user(&buf, p, bytes))
+ return -EFAULT;
+
+ count -= bytes;
+ p += bytes;
+
+ mix_pool_bytes(r, buf, bytes, NULL);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+static ssize_t random_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ size_t ret;
+
+ ret = write_pool(&blocking_pool, buffer, count);
+ if (ret)
+ return ret;
+ ret = write_pool(&nonblocking_pool, buffer, count);
+ if (ret)
+ return ret;
+
+ return (ssize_t)count;
+}
+
+static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ int size, ent_count;
+ int __user *p = (int __user *)arg;
+ int retval;
+
+ switch (cmd) {
+ case RNDGETENTCNT:
+ /* inherently racy, no point locking */
+ if (put_user(input_pool.entropy_count, p))
+ return -EFAULT;
+ return 0;
+ case RNDADDTOENTCNT:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(ent_count, p))
+ return -EFAULT;
+ credit_entropy_bits(&input_pool, ent_count);
+ return 0;
+ case RNDADDENTROPY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(ent_count, p++))
+ return -EFAULT;
+ if (ent_count < 0)
+ return -EINVAL;
+ if (get_user(size, p++))
+ return -EFAULT;
+ retval = write_pool(&input_pool, (const char __user *)p,
+ size);
+ if (retval < 0)
+ return retval;
+ credit_entropy_bits(&input_pool, ent_count);
+ return 0;
+ case RNDZAPENTCNT:
+ case RNDCLEARPOOL:
+ /* Clear the entropy pool counters. */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ rand_initialize();
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int random_fasync(int fd, struct file *filp, int on)
+{
+ return fasync_helper(fd, filp, on, &fasync);
+}
+
+const struct file_operations random_fops = {
+ .read = random_read,
+ .write = random_write,
+ .poll = random_poll,
+ .unlocked_ioctl = random_ioctl,
+ .fasync = random_fasync,
+ .llseek = noop_llseek,
+};
+
+const struct file_operations urandom_fops = {
+ .read = urandom_read,
+ .write = random_write,
+ .unlocked_ioctl = random_ioctl,
+ .fasync = random_fasync,
+ .llseek = noop_llseek,
+};
+
+/***************************************************************
+ * Random UUID interface
+ *
+ * Used here for a Boot ID, but can be useful for other kernel
+ * drivers.
+ ***************************************************************/
+
+/*
+ * Generate random UUID
+ */
+void generate_random_uuid(unsigned char uuid_out[16])
+{
+ get_random_bytes(uuid_out, 16);
+ /* Set UUID version to 4 --- truly random generation */
+ uuid_out[6] = (uuid_out[6] & 0x0F) | 0x40;
+ /* Set the UUID variant to DCE */
+ uuid_out[8] = (uuid_out[8] & 0x3F) | 0x80;
+}
+EXPORT_SYMBOL(generate_random_uuid);
+
+/********************************************************************
+ *
+ * Sysctl interface
+ *
+ ********************************************************************/
+
+#ifdef CONFIG_SYSCTL
+
+#include <linux/sysctl.h>
+
+static int min_read_thresh = 8, min_write_thresh;
+static int max_read_thresh = INPUT_POOL_WORDS * 32;
+static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static char sysctl_bootid[16];
+
+/*
+ * These functions is used to return both the bootid UUID, and random
+ * UUID. The difference is in whether table->data is NULL; if it is,
+ * then a new UUID is generated and returned to the user.
+ *
+ * If the user accesses this via the proc interface, it will be returned
+ * as an ASCII string in the standard UUID format. If accesses via the
+ * sysctl system call, it is returned as 16 bytes of binary data.
+ */
+static int proc_do_uuid(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table fake_table;
+ unsigned char buf[64], tmp_uuid[16], *uuid;
+
+ uuid = table->data;
+ if (!uuid) {
+ uuid = tmp_uuid;
+ generate_random_uuid(uuid);
+ } else {
+ static DEFINE_SPINLOCK(bootid_spinlock);
+
+ spin_lock(&bootid_spinlock);
+ if (!uuid[8])
+ generate_random_uuid(uuid);
+ spin_unlock(&bootid_spinlock);
+ }
+
+ sprintf(buf, "%pU", uuid);
+
+ fake_table.data = buf;
+ fake_table.maxlen = sizeof(buf);
+
+ return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+}
+
+static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
+extern struct ctl_table random_table[];
+struct ctl_table random_table[] = {
+ {
+ .procname = "poolsize",
+ .data = &sysctl_poolsize,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "entropy_avail",
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ .data = &input_pool.entropy_count,
+ },
+ {
+ .procname = "read_wakeup_threshold",
+ .data = &random_read_wakeup_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_read_thresh,
+ .extra2 = &max_read_thresh,
+ },
+ {
+ .procname = "write_wakeup_threshold",
+ .data = &random_write_wakeup_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_write_thresh,
+ .extra2 = &max_write_thresh,
+ },
+ {
+ .procname = "boot_id",
+ .data = &sysctl_bootid,
+ .maxlen = 16,
+ .mode = 0444,
+ .proc_handler = proc_do_uuid,
+ },
+ {
+ .procname = "uuid",
+ .maxlen = 16,
+ .mode = 0444,
+ .proc_handler = proc_do_uuid,
+ },
+ { }
+};
+#endif /* CONFIG_SYSCTL */
+
+static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+
+static int __init random_int_secret_init(void)
+{
+ get_random_bytes(random_int_secret, sizeof(random_int_secret));
+ return 0;
+}
+late_initcall(random_int_secret_init);
+
+/*
+ * Get a random word for internal kernel use only. Similar to urandom but
+ * with the goal of minimal entropy pool depletion. As a result, the random
+ * value is not cryptographically secure but for several uses the cost of
+ * depleting entropy is too high
+ */
+static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
+unsigned int get_random_int(void)
+{
+ __u32 *hash;
+ unsigned int ret;
+
+ if (arch_get_random_int(&ret))
+ return ret;
+
+ hash = get_cpu_var(get_random_int_hash);
+
+ hash[0] += current->pid + jiffies + get_cycles();
+ md5_transform(hash, random_int_secret);
+ ret = hash[0];
+ put_cpu_var(get_random_int_hash);
+
+ return ret;
+}
+EXPORT_SYMBOL(get_random_int);
+
+/*
+ * randomize_range() returns a start address such that
+ *
+ * [...... <range> .....]
+ * start end
+ *
+ * a <range> with size "len" starting at the return value is inside in the
+ * area defined by [start, end], but is otherwise randomized.
+ */
+unsigned long
+randomize_range(unsigned long start, unsigned long end, unsigned long len)
+{
+ unsigned long range = end - len - start;
+
+ if (end <= start + len)
+ return 0;
+ return PAGE_ALIGN(get_random_int() % range + start);
+}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
new file mode 100644
index 0000000..14a8ff2
--- /dev/null
+++ b/include/linux/sysctl.h
@@ -0,0 +1,217 @@
+/*
+ * sysctl.h: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ *
+ ****************************************************************
+ ****************************************************************
+ **
+ ** WARNING:
+ ** The values in this file are exported to user space via
+ ** the sysctl() binary interface. Do *NOT* change the
+ ** numbering of any existing values here, and do not change
+ ** any numbers within any one set of values. If you have to
+ ** redefine an existing interface, use a new number for it.
+ ** The kernel will then return -ENOTDIR to any application using
+ ** the old binary interface.
+ **
+ ****************************************************************
+ ****************************************************************
+ */
+#ifndef _LINUX_SYSCTL_H
+#define _LINUX_SYSCTL_H
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/wait.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/sysctl.h>
+
+/* For the /proc/sys support */
+struct ctl_table;
+struct nsproxy;
+struct ctl_table_root;
+struct ctl_table_header;
+struct ctl_dir;
+
+typedef struct ctl_table ctl_table;
+
+typedef int proc_handler (struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+
+extern int proc_dostring(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_dointvec(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_dointvec_minmax(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
+ void __user *, size_t *, loff_t *);
+extern int proc_do_large_bitmap(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+
+/*
+ * Register a set of sysctl names by calling register_sysctl_table
+ * with an initialised array of struct ctl_table's. An entry with
+ * NULL procname terminates the table. table->de will be
+ * set up by the registration and need not be initialised in advance.
+ *
+ * sysctl names can be mirrored automatically under /proc/sys. The
+ * procname supplied controls /proc naming.
+ *
+ * The table's mode will be honoured both for sys_sysctl(2) and
+ * proc-fs access.
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories. A
+ * null procname disables /proc mirroring at this node.
+ *
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table. The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ *
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler). Several default handlers are available to
+ * cover common cases.
+ */
+
+/* Support for userspace poll() to watch for changes */
+struct ctl_table_poll {
+ atomic_t event;
+ wait_queue_head_t wait;
+};
+
+static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
+{
+ return (void *)(unsigned long)atomic_read(&poll->event);
+}
+
+#define __CTL_TABLE_POLL_INITIALIZER(name) { \
+ .event = ATOMIC_INIT(0), \
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
+
+#define DEFINE_CTL_TABLE_POLL(name) \
+ struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
+
+/* A sysctl table is an array of struct ctl_table: */
+struct ctl_table
+{
+ const char *procname; /* Text ID for /proc/sys, or zero */
+ void *data;
+ int maxlen;
+ umode_t mode;
+ struct ctl_table *child; /* Deprecated */
+ proc_handler *proc_handler; /* Callback for text formatting */
+ struct ctl_table_poll *poll;
+ void *extra1;
+ void *extra2;
+};
+
+struct ctl_node {
+ struct rb_node node;
+ struct ctl_table_header *header;
+};
+
+/* struct ctl_table_header is used to maintain dynamic lists of
+ struct ctl_table trees. */
+struct ctl_table_header
+{
+ union {
+ struct {
+ struct ctl_table *ctl_table;
+ int used;
+ int count;
+ int nreg;
+ };
+ struct rcu_head rcu;
+ };
+ struct completion *unregistering;
+ struct ctl_table *ctl_table_arg;
+ struct ctl_table_root *root;
+ struct ctl_table_set *set;
+ struct ctl_dir *parent;
+ struct ctl_node *node;
+};
+
+struct ctl_dir {
+ /* Header must be at the start of ctl_dir */
+ struct ctl_table_header header;
+ struct rb_root root;
+};
+
+struct ctl_table_set {
+ int (*is_seen)(struct ctl_table_set *);
+ struct ctl_dir dir;
+};
+
+struct ctl_table_root {
+ struct ctl_table_set default_set;
+ struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
+ struct nsproxy *namespaces);
+ int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
+};
+
+/* struct ctl_path describes where in the hierarchy a table is added */
+struct ctl_path {
+ const char *procname;
+};
+
+#ifdef CONFIG_SYSCTL
+
+void proc_sys_poll_notify(struct ctl_table_poll *poll);
+
+extern void setup_sysctl_set(struct ctl_table_set *p,
+ struct ctl_table_root *root,
+ int (*is_seen)(struct ctl_table_set *));
+extern void retire_sysctl_set(struct ctl_table_set *set);
+
+void register_sysctl_root(struct ctl_table_root *root);
+struct ctl_table_header *__register_sysctl_table(
+ struct ctl_table_set *set,
+ const char *path, struct ctl_table *table);
+struct ctl_table_header *__register_sysctl_paths(
+ struct ctl_table_set *set,
+ const struct ctl_path *path, struct ctl_table *table);
+struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table);
+struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
+struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+ struct ctl_table *table);
+
+void unregister_sysctl_table(struct ctl_table_header * table);
+
+extern int sysctl_init(void);
+#else /* CONFIG_SYSCTL */
+static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
+{
+ return NULL;
+}
+
+static inline struct ctl_table_header *register_sysctl_paths(
+ const struct ctl_path *path, struct ctl_table *table)
+{
+ return NULL;
+}
+
+static inline void unregister_sysctl_table(struct ctl_table_header * table)
+{
+}
+
+static inline void setup_sysctl_set(struct ctl_table_set *p,
+ struct ctl_table_root *root,
+ int (*is_seen)(struct ctl_table_set *))
+{
+}
+
+#endif /* CONFIG_SYSCTL */
+
+#endif /* _LINUX_SYSCTL_H */
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
new file mode 100644
index 0000000..422df19
--- /dev/null
+++ b/include/trace/events/random.h
@@ -0,0 +1,134 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM random
+
+#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RANDOM_H
+
+#include <linux/writeback.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(random__mix_pool_bytes,
+ TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+
+ TP_ARGS(pool_name, bytes, IP),
+
+ TP_STRUCT__entry(
+ __field( const char *, pool_name )
+ __field( int, bytes )
+ __field(unsigned long, IP )
+ ),
+
+ TP_fast_assign(
+ __entry->pool_name = pool_name;
+ __entry->bytes = bytes;
+ __entry->IP = IP;
+ ),
+
+ TP_printk("%s pool: bytes %d caller %pF",
+ __entry->pool_name, __entry->bytes, (void *)__entry->IP)
+);
+
+DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
+ TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+
+ TP_ARGS(pool_name, bytes, IP)
+);
+
+DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
+ TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+
+ TP_ARGS(pool_name, bytes, IP)
+);
+
+TRACE_EVENT(credit_entropy_bits,
+ TP_PROTO(const char *pool_name, int bits, int entropy_count,
+ int entropy_total, unsigned long IP),
+
+ TP_ARGS(pool_name, bits, entropy_count, entropy_total, IP),
+
+ TP_STRUCT__entry(
+ __field( const char *, pool_name )
+ __field( int, bits )
+ __field( int, entropy_count )
+ __field( int, entropy_total )
+ __field(unsigned long, IP )
+ ),
+
+ TP_fast_assign(
+ __entry->pool_name = pool_name;
+ __entry->bits = bits;
+ __entry->entropy_count = entropy_count;
+ __entry->entropy_total = entropy_total;
+ __entry->IP = IP;
+ ),
+
+ TP_printk("%s pool: bits %d entropy_count %d entropy_total %d "
+ "caller %pF", __entry->pool_name, __entry->bits,
+ __entry->entropy_count, __entry->entropy_total,
+ (void *)__entry->IP)
+);
+
+TRACE_EVENT(get_random_bytes,
+ TP_PROTO(int nbytes, unsigned long IP),
+
+ TP_ARGS(nbytes, IP),
+
+ TP_STRUCT__entry(
+ __field( int, nbytes )
+ __field(unsigned long, IP )
+ ),
+
+ TP_fast_assign(
+ __entry->nbytes = nbytes;
+ __entry->IP = IP;
+ ),
+
+ TP_printk("nbytes %d caller %pF", __entry->nbytes, (void *)__entry->IP)
+);
+
+DECLARE_EVENT_CLASS(random__extract_entropy,
+ TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
+ unsigned long IP),
+
+ TP_ARGS(pool_name, nbytes, entropy_count, IP),
+
+ TP_STRUCT__entry(
+ __field( const char *, pool_name )
+ __field( int, nbytes )
+ __field( int, entropy_count )
+ __field(unsigned long, IP )
+ ),
+
+ TP_fast_assign(
+ __entry->pool_name = pool_name;
+ __entry->nbytes = nbytes;
+ __entry->entropy_count = entropy_count;
+ __entry->IP = IP;
+ ),
+
+ TP_printk("%s pool: nbytes %d entropy_count %d caller %pF",
+ __entry->pool_name, __entry->nbytes, __entry->entropy_count,
+ (void *)__entry->IP)
+);
+
+
+DEFINE_EVENT(random__extract_entropy, extract_entropy,
+ TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
+ unsigned long IP),
+
+ TP_ARGS(pool_name, nbytes, entropy_count, IP)
+);
+
+DEFINE_EVENT(random__extract_entropy, extract_entropy_user,
+ TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
+ unsigned long IP),
+
+ TP_ARGS(pool_name, nbytes, entropy_count, IP)
+);
+
+
+
+#endif /* _TRACE_RANDOM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
new file mode 100644
index 0000000..07f6fc4
--- /dev/null
+++ b/kernel/sysctl.c
@@ -0,0 +1,2671 @@
+/*
+ * sysctl.c: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ * Added /proc support, Dec 1995
+ * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
+ * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
+ * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
+ * Dynamic registration fixes, Stephen Tweedie.
+ * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
+ * Horn.
+ * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
+ * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
+ * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
+ * Wendling.
+ * The list_for_each() macro wasn't appropriate for the sysctl loop.
+ * Removed it and replaced it with older style, 03/23/00, Bill Wendling
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/bitmap.h>
+#include <linux/signal.h>
+#include <linux/printk.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/ctype.h>
+#include <linux/kmemcheck.h>
+#include <linux/kmemleak.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/net.h>
+#include <linux/sysrq.h>
+#include <linux/highuid.h>
+#include <linux/writeback.h>
+#include <linux/ratelimit.h>
+#include <linux/compaction.h>
+#include <linux/hugetlb.h>
+#include <linux/initrd.h>
+#include <linux/key.h>
+#include <linux/times.h>
+#include <linux/limits.h>
+#include <linux/dcache.h>
+#include <linux/dnotify.h>
+#include <linux/syscalls.h>
+#include <linux/vmstat.h>
+#include <linux/nfs_fs.h>
+#include <linux/acpi.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/kmod.h>
+#include <linux/capability.h>
+#include <linux/binfmts.h>
+#include <linux/sched/sysctl.h>
+
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86
+#include <asm/nmi.h>
+#include <asm/stacktrace.h>
+#include <asm/io.h>
+#endif
+#ifdef CONFIG_SPARC
+#include <asm/setup.h>
+#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+#include <linux/acct.h>
+#endif
+#ifdef CONFIG_RT_MUTEXES
+#include <linux/rtmutex.h>
+#endif
+#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
+#include <linux/lockdep.h>
+#endif
+#ifdef CONFIG_CHR_DEV_SG
+#include <scsi/sg.h>
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
+#include <linux/nmi.h>
+#endif
+
+
+#if defined(CONFIG_SYSCTL)
+
+/* External variables not in a header file. */
+extern int sysctl_overcommit_memory;
+extern int sysctl_overcommit_ratio;
+extern int max_threads;
+extern int suid_dumpable;
+#ifdef CONFIG_COREDUMP
+extern int core_uses_pid;
+extern char core_pattern[];
+extern unsigned int core_pipe_limit;
+#endif
+extern int pid_max;
+extern int pid_max_min, pid_max_max;
+extern int percpu_pagelist_fraction;
+extern int compat_log;
+extern int latencytop_enabled;
+extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifndef CONFIG_MMU
+extern int sysctl_nr_trim_pages;
+#endif
+#ifdef CONFIG_BLOCK
+extern int blk_iopoll_enabled;
+#endif
+
+/* Constants used for minimum and maximum */
+#ifdef CONFIG_LOCKUP_DETECTOR
+static int sixty = 60;
+#endif
+
+static int zero;
+static int __maybe_unused one = 1;
+static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
+static unsigned long one_ul = 1;
+static int one_hundred = 100;
+#ifdef CONFIG_PRINTK
+static int ten_thousand = 10000;
+#endif
+
+/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
+static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
+
+/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+static int maxolduid = 65535;
+static int minolduid;
+static int min_percpu_pagelist_fract = 8;
+
+static int ngroups_max = NGROUPS_MAX;
+static const int cap_last_cap = CAP_LAST_CAP;
+
+#ifdef CONFIG_INOTIFY_USER
+#include <linux/inotify.h>
+#endif
+#ifdef CONFIG_SPARC
+#endif
+
+#ifdef CONFIG_SPARC64
+extern int sysctl_tsb_ratio;
+#endif
+
+#ifdef __hppa__
+extern int pwrsw_enabled;
+#endif
+
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
+extern int unaligned_enabled;
+#endif
+
+#ifdef CONFIG_IA64
+extern int unaligned_dump_stack;
+#endif
+
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+extern int no_unaligned_warning;
+#endif
+
+#ifdef CONFIG_PROC_SYSCTL
+static int proc_do_cad_pid(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_taint(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
+#ifdef CONFIG_PRINTK
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_COREDUMP
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
+#ifdef CONFIG_MAGIC_SYSRQ
+/* Note: sysrq code uses it's own private copy */
+static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+
+static int sysrq_sysctl_handler(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int error;
+
+ error = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (error)
+ return error;
+
+ if (write)
+ sysrq_toggle_support(__sysrq_enabled);
+
+ return 0;
+}
+
+#endif
+
+static struct ctl_table kern_table[];
+static struct ctl_table vm_table[];
+static struct ctl_table fs_table[];
+static struct ctl_table debug_table[];
+static struct ctl_table dev_table[];
+extern struct ctl_table random_table[];
+#ifdef CONFIG_EPOLL
+extern struct ctl_table epoll_table[];
+#endif
+
+#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+int sysctl_legacy_va_layout;
+#endif
+
+/* The default sysctl tables: */
+
+static struct ctl_table sysctl_base_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kern_table,
+ },
+ {
+ .procname = "vm",
+ .mode = 0555,
+ .child = vm_table,
+ },
+ {
+ .procname = "fs",
+ .mode = 0555,
+ .child = fs_table,
+ },
+ {
+ .procname = "debug",
+ .mode = 0555,
+ .child = debug_table,
+ },
+ {
+ .procname = "dev",
+ .mode = 0555,
+ .child = dev_table,
+ },
+ { }
+};
+
+#ifdef CONFIG_SCHED_DEBUG
+static int min_sched_granularity_ns = 100000; /* 100 usecs */
+static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
+static int min_wakeup_granularity_ns; /* 0 usecs */
+static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
+#ifdef CONFIG_SMP
+static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_SCHED_DEBUG */
+
+#ifdef CONFIG_COMPACTION
+static int min_extfrag_threshold;
+static int max_extfrag_threshold = 1000;
+#endif
+
+static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_child_runs_first",
+ .data = &sysctl_sched_child_runs_first,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_min_granularity_ns",
+ .data = &sysctl_sched_min_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .procname = "sched_latency_ns",
+ .data = &sysctl_sched_latency,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .procname = "sched_wakeup_granularity_ns",
+ .data = &sysctl_sched_wakeup_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_wakeup_granularity_ns,
+ .extra2 = &max_wakeup_granularity_ns,
+ },
+#ifdef CONFIG_SMP
+ {
+ .procname = "sched_tunable_scaling",
+ .data = &sysctl_sched_tunable_scaling,
+ .maxlen = sizeof(enum sched_tunable_scaling),
+ .mode = 0644,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_tunable_scaling,
+ .extra2 = &max_sched_tunable_scaling,
+ },
+ {
+ .procname = "sched_migration_cost_ns",
+ .data = &sysctl_sched_migration_cost,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_nr_migrate",
+ .data = &sysctl_sched_nr_migrate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_time_avg_ms",
+ .data = &sysctl_sched_time_avg,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_shares_window_ns",
+ .data = &sysctl_sched_shares_window,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "timer_migration",
+ .data = &sysctl_timer_migration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+ {
+ .procname = "numa_balancing_scan_delay_ms",
+ .data = &sysctl_numa_balancing_scan_delay,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_period_min_ms",
+ .data = &sysctl_numa_balancing_scan_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_period_reset",
+ .data = &sysctl_numa_balancing_scan_period_reset,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_period_max_ms",
+ .data = &sysctl_numa_balancing_scan_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "numa_balancing_scan_size_mb",
+ .data = &sysctl_numa_balancing_scan_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
+ {
+ .procname = "sched_rt_period_us",
+ .data = &sysctl_sched_rt_period,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_rt_handler,
+ },
+ {
+ .procname = "sched_rt_runtime_us",
+ .data = &sysctl_sched_rt_runtime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sched_rt_handler,
+ },
+ {
+ .procname = "sched_rr_timeslice_ms",
+ .data = &sched_rr_timeslice,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sched_rr_handler,
+ },
+#ifdef CONFIG_SCHED_AUTOGROUP
+ {
+ .procname = "sched_autogroup_enabled",
+ .data = &sysctl_sched_autogroup_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+ {
+ .procname = "sched_cfs_bandwidth_slice_us",
+ .data = &sysctl_sched_cfs_bandwidth_slice,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
+#endif
+#ifdef CONFIG_PROVE_LOCKING
+ {
+ .procname = "prove_locking",
+ .data = &prove_locking,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_LOCK_STAT
+ {
+ .procname = "lock_stat",
+ .data = &lock_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "panic",
+ .data = &panic_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_COREDUMP
+ {
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "core_pattern",
+ .data = core_pattern,
+ .maxlen = CORENAME_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring_coredump,
+ },
+ {
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+ {
+ .procname = "tainted",
+ .maxlen = sizeof(long),
+ .mode = 0644,
+ .proc_handler = proc_taint,
+ },
+#endif
+#ifdef CONFIG_LATENCYTOP
+ {
+ .procname = "latencytop",
+ .data = &latencytop_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+ {
+ .procname = "real-root-dev",
+ .data = &real_root_dev,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "print-fatal-signals",
+ .data = &print_fatal_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_SPARC
+ {
+ .procname = "reboot-cmd",
+ .data = reboot_command,
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "stop-a",
+ .data = &stop_a_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "scons-poweroff",
+ .data = &scons_pwroff,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_SPARC64
+ {
+ .procname = "tsb-ratio",
+ .data = &sysctl_tsb_ratio,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef __hppa__
+ {
+ .procname = "soft-power",
+ .data = &pwrsw_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
+ {
+ .procname = "unaligned-trap",
+ .data = &unaligned_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "ctrl-alt-del",
+ .data = &C_A_D,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_FUNCTION_TRACER
+ {
+ .procname = "ftrace_enabled",
+ .data = &ftrace_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = ftrace_enable_sysctl,
+ },
+#endif
+#ifdef CONFIG_STACK_TRACER
+ {
+ .procname = "stack_tracer_enabled",
+ .data = &stack_tracer_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = stack_trace_sysctl,
+ },
+#endif
+#ifdef CONFIG_TRACING
+ {
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_MODULES
+ {
+ .procname = "modprobe",
+ .data = &modprobe_path,
+ .maxlen = KMOD_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "modules_disabled",
+ .data = &modules_disabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+ },
+#endif
+
+ {
+ .procname = "hotplug",
+ .data = &uevent_helper,
+ .maxlen = UEVENT_HELPER_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+
+#ifdef CONFIG_CHR_DEV_SG
+ {
+ .procname = "sg-big-buff",
+ .data = &sg_big_buff,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ {
+ .procname = "acct",
+ .data = &acct_parm,
+ .maxlen = 3*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+ {
+ .procname = "sysrq",
+ .data = &__sysrq_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = sysrq_sysctl_handler,
+ },
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+ {
+ .procname = "cad_pid",
+ .data = NULL,
+ .maxlen = sizeof (int),
+ .mode = 0600,
+ .proc_handler = proc_do_cad_pid,
+ },
+#endif
+ {
+ .procname = "threads-max",
+ .data = &max_threads,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "random",
+ .mode = 0555,
+ .child = random_table,
+ },
+ {
+ .procname = "usermodehelper",
+ .mode = 0555,
+ .child = usermodehelper_table,
+ },
+ {
+ .procname = "overflowuid",
+ .data = &overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+#ifdef CONFIG_S390
+#ifdef CONFIG_MATHEMU
+ {
+ .procname = "ieee_emulation_warnings",
+ .data = &sysctl_ieee_emulation_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "userprocess_debug",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "pid_max",
+ .data = &pid_max,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &pid_max_min,
+ .extra2 = &pid_max_max,
+ },
+ {
+ .procname = "panic_on_oops",
+ .data = &panic_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#if defined CONFIG_PRINTK
+ {
+ .procname = "printk",
+ .data = &console_loglevel,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_ratelimit",
+ .data = &printk_ratelimit_state.interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "printk_ratelimit_burst",
+ .data = &printk_ratelimit_state.burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "printk_delay",
+ .data = &printk_delay_msec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &ten_thousand,
+ },
+ {
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+#endif
+ {
+ .procname = "ngroups_max",
+ .data = &ngroups_max,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "cap_last_cap",
+ .data = (void *)&cap_last_cap,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+#if defined(CONFIG_LOCKUP_DETECTOR)
+ {
+ .procname = "watchdog",
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dowatchdog,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "watchdog_thresh",
+ .data = &watchdog_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dowatchdog,
+ .extra1 = &zero,
+ .extra2 = &sixty,
+ },
+ {
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "nmi_watchdog",
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dowatchdog,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+ {
+ .procname = "unknown_nmi_panic",
+ .data = &unknown_nmi_panic,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#if defined(CONFIG_X86)
+ {
+ .procname = "panic_on_unrecovered_nmi",
+ .data = &panic_on_unrecovered_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "panic_on_io_nmi",
+ .data = &panic_on_io_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ {
+ .procname = "panic_on_stackoverflow",
+ .data = &sysctl_panic_on_stackoverflow,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "bootloader_type",
+ .data = &bootloader_type,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "bootloader_version",
+ .data = &bootloader_version,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "kstack_depth_to_print",
+ .data = &kstack_depth_to_print,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#if defined(CONFIG_MMU)
+ {
+ .procname = "randomize_va_space",
+ .data = &randomize_va_space,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#if defined(CONFIG_S390) && defined(CONFIG_SMP)
+ {
+ .procname = "spin_retry",
+ .data = &spin_retry,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
+ {
+ .procname = "acpi_video_flags",
+ .data = &acpi_realmode_flags,
+ .maxlen = sizeof (unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+ {
+ .procname = "ignore-unaligned-usertrap",
+ .data = &no_unaligned_warning,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_IA64
+ {
+ .procname = "unaligned-dump-stack",
+ .data = &unaligned_dump_stack,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+ {
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ },
+ {
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#endif
+#ifdef CONFIG_COMPAT
+ {
+ .procname = "compat-log",
+ .data = &compat_log,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_RT_MUTEXES
+ {
+ .procname = "max_lock_depth",
+ .data = &max_lock_depth,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "poweroff_cmd",
+ .data = &poweroff_cmd,
+ .maxlen = POWEROFF_CMD_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+#ifdef CONFIG_KEYS
+ {
+ .procname = "keys",
+ .mode = 0555,
+ .child = key_sysctls,
+ },
+#endif
+#ifdef CONFIG_RCU_TORTURE_TEST
+ {
+ .procname = "rcutorture_runnable",
+ .data = &rcutorture_runnable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_PERF_EVENTS
+ /*
+ * User-space scripts rely on the existence of this file
+ * as a feature check for perf_events being enabled.
+ *
+ * So it's an ABI, do not remove!
+ */
+ {
+ .procname = "perf_event_paranoid",
+ .data = &sysctl_perf_event_paranoid,
+ .maxlen = sizeof(sysctl_perf_event_paranoid),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "perf_event_mlock_kb",
+ .data = &sysctl_perf_event_mlock,
+ .maxlen = sizeof(sysctl_perf_event_mlock),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "perf_event_max_sample_rate",
+ .data = &sysctl_perf_event_sample_rate,
+ .maxlen = sizeof(sysctl_perf_event_sample_rate),
+ .mode = 0644,
+ .proc_handler = perf_proc_update_handler,
+ },
+ {
+ .procname = "perf_cpu_time_max_percent",
+ .data = &sysctl_perf_cpu_time_max_percent,
+ .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
+ .mode = 0644,
+ .proc_handler = perf_cpu_time_max_percent_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+#endif
+#ifdef CONFIG_KMEMCHECK
+ {
+ .procname = "kmemcheck",
+ .data = &kmemcheck_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_BLOCK
+ {
+ .procname = "blk_iopoll",
+ .data = &blk_iopoll_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ { }
+};
+
+static struct ctl_table vm_table[] = {
+ {
+ .procname = "overcommit_memory",
+ .data = &sysctl_overcommit_memory,
+ .maxlen = sizeof(sysctl_overcommit_memory),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
+ .procname = "panic_on_oom",
+ .data = &sysctl_panic_on_oom,
+ .maxlen = sizeof(sysctl_panic_on_oom),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
+ .procname = "oom_kill_allocating_task",
+ .data = &sysctl_oom_kill_allocating_task,
+ .maxlen = sizeof(sysctl_oom_kill_allocating_task),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "oom_dump_tasks",
+ .data = &sysctl_oom_dump_tasks,
+ .maxlen = sizeof(sysctl_oom_dump_tasks),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "overcommit_ratio",
+ .data = &sysctl_overcommit_ratio,
+ .maxlen = sizeof(sysctl_overcommit_ratio),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "page-cluster",
+ .data = &page_cluster,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "dirty_background_ratio",
+ .data = &dirty_background_ratio,
+ .maxlen = sizeof(dirty_background_ratio),
+ .mode = 0644,
+ .proc_handler = dirty_background_ratio_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "dirty_background_bytes",
+ .data = &dirty_background_bytes,
+ .maxlen = sizeof(dirty_background_bytes),
+ .mode = 0644,
+ .proc_handler = dirty_background_bytes_handler,
+ .extra1 = &one_ul,
+ },
+ {
+ .procname = "dirty_ratio",
+ .data = &vm_dirty_ratio,
+ .maxlen = sizeof(vm_dirty_ratio),
+ .mode = 0644,
+ .proc_handler = dirty_ratio_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "dirty_bytes",
+ .data = &vm_dirty_bytes,
+ .maxlen = sizeof(vm_dirty_bytes),
+ .mode = 0644,
+ .proc_handler = dirty_bytes_handler,
+ .extra1 = &dirty_bytes_min,
+ },
+ {
+ .procname = "dirty_writeback_centisecs",
+ .data = &dirty_writeback_interval,
+ .maxlen = sizeof(dirty_writeback_interval),
+ .mode = 0644,
+ .proc_handler = dirty_writeback_centisecs_handler,
+ },
+ {
+ .procname = "dirty_expire_centisecs",
+ .data = &dirty_expire_interval,
+ .maxlen = sizeof(dirty_expire_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "nr_pdflush_threads",
+ .mode = 0444 /* read-only */,
+ .proc_handler = pdflush_proc_obsolete,
+ },
+ {
+ .procname = "swappiness",
+ .data = &vm_swappiness,
+ .maxlen = sizeof(vm_swappiness),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+#ifdef CONFIG_HUGETLB_PAGE
+ {
+ .procname = "nr_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_sysctl_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
+ },
+#endif
+ {
+ .procname = "hugetlb_shm_group",
+ .data = &sysctl_hugetlb_shm_group,
+ .maxlen = sizeof(gid_t),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "hugepages_treat_as_movable",
+ .data = &hugepages_treat_as_movable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = hugetlb_treat_movable_handler,
+ },
+ {
+ .procname = "nr_overcommit_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_overcommit_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
+ },
+#endif
+ {
+ .procname = "lowmem_reserve_ratio",
+ .data = &sysctl_lowmem_reserve_ratio,
+ .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
+ .mode = 0644,
+ .proc_handler = lowmem_reserve_ratio_sysctl_handler,
+ },
+ {
+ .procname = "drop_caches",
+ .data = &sysctl_drop_caches,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = drop_caches_sysctl_handler,
+ .extra1 = &one,
+ .extra2 = &three,
+ },
+#ifdef CONFIG_COMPACTION
+ {
+ .procname = "compact_memory",
+ .data = &sysctl_compact_memory,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = sysctl_compaction_handler,
+ },
+ {
+ .procname = "extfrag_threshold",
+ .data = &sysctl_extfrag_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_extfrag_handler,
+ .extra1 = &min_extfrag_threshold,
+ .extra2 = &max_extfrag_threshold,
+ },
+
+#endif /* CONFIG_COMPACTION */
+ {
+ .procname = "min_free_kbytes",
+ .data = &min_free_kbytes,
+ .maxlen = sizeof(min_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "percpu_pagelist_fraction",
+ .data = &percpu_pagelist_fraction,
+ .maxlen = sizeof(percpu_pagelist_fraction),
+ .mode = 0644,
+ .proc_handler = percpu_pagelist_fraction_sysctl_handler,
+ .extra1 = &min_percpu_pagelist_fract,
+ },
+#ifdef CONFIG_MMU
+ {
+ .procname = "max_map_count",
+ .data = &sysctl_max_map_count,
+ .maxlen = sizeof(sysctl_max_map_count),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+#else
+ {
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+#endif
+ {
+ .procname = "laptop_mode",
+ .data = &laptop_mode,
+ .maxlen = sizeof(laptop_mode),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "block_dump",
+ .data = &block_dump,
+ .maxlen = sizeof(block_dump),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "vfs_cache_pressure",
+ .data = &sysctl_vfs_cache_pressure,
+ .maxlen = sizeof(sysctl_vfs_cache_pressure),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ },
+#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+ {
+ .procname = "legacy_va_layout",
+ .data = &sysctl_legacy_va_layout,
+ .maxlen = sizeof(sysctl_legacy_va_layout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ },
+#endif
+#ifdef CONFIG_NUMA
+ {
+ .procname = "zone_reclaim_mode",
+ .data = &zone_reclaim_mode,
+ .maxlen = sizeof(zone_reclaim_mode),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "min_unmapped_ratio",
+ .data = &sysctl_min_unmapped_ratio,
+ .maxlen = sizeof(sysctl_min_unmapped_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+ {
+ .procname = "min_slab_ratio",
+ .data = &sysctl_min_slab_ratio,
+ .maxlen = sizeof(sysctl_min_slab_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+#endif
+#ifdef CONFIG_SMP
+ {
+ .procname = "stat_interval",
+ .data = &sysctl_stat_interval,
+ .maxlen = sizeof(sysctl_stat_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
+#ifdef CONFIG_MMU
+ {
+ .procname = "mmap_min_addr",
+ .data = &dac_mmap_min_addr,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = mmap_min_addr_handler,
+ },
+#endif
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_zonelist_order",
+ .data = &numa_zonelist_order,
+ .maxlen = NUMA_ZONELIST_ORDER_LEN,
+ .mode = 0644,
+ .proc_handler = numa_zonelist_order_handler,
+ },
+#endif
+#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
+ (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
+ {
+ .procname = "vdso_enabled",
+ .data = &vdso_enabled,
+ .maxlen = sizeof(vdso_enabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ },
+#endif
+#ifdef CONFIG_HIGHMEM
+ {
+ .procname = "highmem_is_dirtyable",
+ .data = &vm_highmem_is_dirtyable,
+ .maxlen = sizeof(vm_highmem_is_dirtyable),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+ {
+ .procname = "scan_unevictable_pages",
+ .data = &scan_unevictable_pages,
+ .maxlen = sizeof(scan_unevictable_pages),
+ .mode = 0644,
+ .proc_handler = scan_unevictable_handler,
+ },
+#ifdef CONFIG_MEMORY_FAILURE
+ {
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+ {
+ .procname = "user_reserve_kbytes",
+ .data = &sysctl_user_reserve_kbytes,
+ .maxlen = sizeof(sysctl_user_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "admin_reserve_kbytes",
+ .data = &sysctl_admin_reserve_kbytes,
+ .maxlen = sizeof(sysctl_admin_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ { }
+};
+
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+static struct ctl_table binfmt_misc_table[] = {
+ { }
+};
+#endif
+
+static struct ctl_table fs_table[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ {
+ .procname = "overflowuid",
+ .data = &fs_overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &fs_overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minolduid,
+ .extra2 = &maxolduid,
+ },
+#ifdef CONFIG_FILE_LOCKING
+ {
+ .procname = "leases-enable",
+ .data = &leases_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_DNOTIFY
+ {
+ .procname = "dir-notify-enable",
+ .data = &dir_notify_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_MMU
+#ifdef CONFIG_FILE_LOCKING
+ {
+ .procname = "lease-break-time",
+ .data = &lease_break_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_AIO
+ {
+ .procname = "aio-nr",
+ .data = &aio_nr,
+ .maxlen = sizeof(aio_nr),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "aio-max-nr",
+ .data = &aio_max_nr,
+ .maxlen = sizeof(aio_max_nr),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#endif /* CONFIG_AIO */
+#ifdef CONFIG_INOTIFY_USER
+ {
+ .procname = "inotify",
+ .mode = 0555,
+ .child = inotify_table,
+ },
+#endif
+#ifdef CONFIG_EPOLL
+ {
+ .procname = "epoll",
+ .mode = 0555,
+ .child = epoll_table,
+ },
+#endif
+#endif
+ {
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+ {
+ .procname = "binfmt_misc",
+ .mode = 0555,
+ .child = binfmt_misc_table,
+ },
+#endif
+ {
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &pipe_proc_fn,
+ .extra1 = &pipe_min_size,
+ },
+ { }
+};
+
+static struct ctl_table debug_table[] = {
+#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
+ {
+ .procname = "exception-trace",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#endif
+#if defined(CONFIG_OPTPROBES)
+ {
+ .procname = "kprobes-optimization",
+ .data = &sysctl_kprobes_optimization,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_kprobes_optimization_handler,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+ { }
+};
+
+static struct ctl_table dev_table[] = {
+ { }
+};
+
+int __init sysctl_init(void)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(sysctl_base_table);
+ kmemleak_not_leak(hdr);
+ return 0;
+}
+
+#endif /* CONFIG_SYSCTL */
+
+/*
+ * /proc/sys support
+ */
+
+#ifdef CONFIG_PROC_SYSCTL
+
+static int _proc_do_string(void* data, int maxlen, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ size_t len;
+ char __user *p;
+ char c;
+
+ if (!data || !maxlen || !*lenp) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ len = 0;
+ p = buffer;
+ while (len < *lenp) {
+ if (get_user(c, p++))
+ return -EFAULT;
+ if (c == 0 || c == '\n')
+ break;
+ len++;
+ }
+ if (len >= maxlen)
+ len = maxlen-1;
+ if(copy_from_user(data, buffer, len))
+ return -EFAULT;
+ ((char *) data)[len] = 0;
+ *ppos += *lenp;
+ } else {
+ len = strlen(data);
+ if (len > maxlen)
+ len = maxlen;
+
+ if (*ppos > len) {
+ *lenp = 0;
+ return 0;
+ }
+
+ data += *ppos;
+ len -= *ppos;
+
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ if(copy_to_user(buffer, data, len))
+ return -EFAULT;
+ if (len < *lenp) {
+ if(put_user('\n', ((char __user *) buffer) + len))
+ return -EFAULT;
+ len++;
+ }
+ *lenp = len;
+ *ppos += len;
+ }
+ return 0;
+}
+
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return _proc_do_string(table->data, table->maxlen, write,
+ buffer, lenp, ppos);
+}
+
+static size_t proc_skip_spaces(char **buf)
+{
+ size_t ret;
+ char *tmp = skip_spaces(*buf);
+ ret = tmp - *buf;
+ *buf = tmp;
+ return ret;
+}
+
+static void proc_skip_char(char **buf, size_t *size, const char v)
+{
+ while (*size) {
+ if (**buf != v)
+ break;
+ (*size)--;
+ (*buf)++;
+ }
+}
+
+#define TMPBUFLEN 22
+/**
+ * proc_get_long - reads an ASCII formatted integer from a user buffer
+ *
+ * @buf: a kernel buffer
+ * @size: size of the kernel buffer
+ * @val: this is where the number will be stored
+ * @neg: set to %TRUE if number is negative
+ * @perm_tr: a vector which contains the allowed trailers
+ * @perm_tr_len: size of the perm_tr vector
+ * @tr: pointer to store the trailer character
+ *
+ * In case of success %0 is returned and @buf and @size are updated with
+ * the amount of bytes read. If @tr is non-NULL and a trailing
+ * character exists (size is non-zero after returning from this
+ * function), @tr is updated with the trailing character.
+ */
+static int proc_get_long(char **buf, size_t *size,
+ unsigned long *val, bool *neg,
+ const char *perm_tr, unsigned perm_tr_len, char *tr)
+{
+ int len;
+ char *p, tmp[TMPBUFLEN];
+
+ if (!*size)
+ return -EINVAL;
+
+ len = *size;
+ if (len > TMPBUFLEN - 1)
+ len = TMPBUFLEN - 1;
+
+ memcpy(tmp, *buf, len);
+
+ tmp[len] = 0;
+ p = tmp;
+ if (*p == '-' && *size > 1) {
+ *neg = true;
+ p++;
+ } else
+ *neg = false;
+ if (!isdigit(*p))
+ return -EINVAL;
+
+ *val = simple_strtoul(p, &p, 0);
+
+ len = p - tmp;
+
+ /* We don't know if the next char is whitespace thus we may accept
+ * invalid integers (e.g. 1234...a) or two integers instead of one
+ * (e.g. 123...1). So lets not allow such large numbers. */
+ if (len == TMPBUFLEN - 1)
+ return -EINVAL;
+
+ if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
+ return -EINVAL;
+
+ if (tr && (len < *size))
+ *tr = *p;
+
+ *buf += len;
+ *size -= len;
+
+ return 0;
+}
+
+/**
+ * proc_put_long - converts an integer to a decimal ASCII formatted string
+ *
+ * @buf: the user buffer
+ * @size: the size of the user buffer
+ * @val: the integer to be converted
+ * @neg: sign of the number, %TRUE for negative
+ *
+ * In case of success %0 is returned and @buf and @size are updated with
+ * the amount of bytes written.
+ */
+static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
+ bool neg)
+{
+ int len;
+ char tmp[TMPBUFLEN], *p = tmp;
+
+ sprintf(p, "%s%lu", neg ? "-" : "", val);
+ len = strlen(tmp);
+ if (len > *size)
+ len = *size;
+ if (copy_to_user(*buf, tmp, len))
+ return -EFAULT;
+ *size -= len;
+ *buf += len;
+ return 0;
+}
+#undef TMPBUFLEN
+
+static int proc_put_char(void __user **buf, size_t *size, char c)
+{
+ if (*size) {
+ char __user **buffer = (char __user **)buf;
+ if (put_user(c, *buffer))
+ return -EFAULT;
+ (*size)--, (*buffer)++;
+ *buf = *buffer;
+ }
+ return 0;
+}
+
+static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ *valp = *negp ? -*lvalp : *lvalp;
+ } else {
+ int val = *valp;
+ if (val < 0) {
+ *negp = true;
+ *lvalp = (unsigned long)-val;
+ } else {
+ *negp = false;
+ *lvalp = (unsigned long)val;
+ }
+ }
+ return 0;
+}
+
+static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
+
+static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
+ int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
+ int write, void *data),
+ void *data)
+{
+ int *i, vleft, first = 1, err = 0;
+ unsigned long page = 0;
+ size_t left;
+ char *kbuf;
+
+ if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (int *) tbl_data;
+ vleft = table->maxlen / sizeof(*i);
+ left = *lenp;
+
+ if (!conv)
+ conv = do_proc_dointvec_conv;
+
+ if (write) {
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+ page = __get_free_page(GFP_TEMPORARY);
+ kbuf = (char *) page;
+ if (!kbuf)
+ return -ENOMEM;
+ if (copy_from_user(kbuf, buffer, left)) {
+ err = -EFAULT;
+ goto free;
+ }
+ kbuf[left] = 0;
+ }
+
+ for (; left && vleft--; i++, first=0) {
+ unsigned long lval;
+ bool neg;
+
+ if (write) {
+ left -= proc_skip_spaces(&kbuf);
+
+ if (!left)
+ break;
+ err = proc_get_long(&kbuf, &left, &lval, &neg,
+ proc_wspace_sep,
+ sizeof(proc_wspace_sep), NULL);
+ if (err)
+ break;
+ if (conv(&neg, &lval, i, 1, data)) {
+ err = -EINVAL;
+ break;
+ }
+ } else {
+ if (conv(&neg, &lval, i, 0, data)) {
+ err = -EINVAL;
+ break;
+ }
+ if (!first)
+ err = proc_put_char(&buffer, &left, '\t');
+ if (err)
+ break;
+ err = proc_put_long(&buffer, &left, lval, neg);
+ if (err)
+ break;
+ }
+ }
+
+ if (!write && !first && left && !err)
+ err = proc_put_char(&buffer, &left, '\n');
+ if (write && !err && left)
+ left -= proc_skip_spaces(&kbuf);
+free:
+ if (write) {
+ free_page(page);
+ if (first)
+ return err ? : -EINVAL;
+ }
+ *lenp -= left;
+ *ppos += *lenp;
+ return err;
+}
+
+static int do_proc_dointvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
+ int write, void *data),
+ void *data)
+{
+ return __do_proc_dointvec(table->data, table, write,
+ buffer, lenp, ppos, conv, data);
+}
+
+/**
+ * proc_dointvec - read a vector of integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
+ NULL,NULL);
+}
+
+/*
+ * Taint values can only be increased
+ * This means we can safely use a temporary.
+ */
+static int proc_taint(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table t;
+ unsigned long tmptaint = get_taint();
+ int err;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ t = *table;
+ t.data = &tmptaint;
+ err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
+ if (err < 0)
+ return err;
+
+ if (write) {
+ /*
+ * Poor man's atomic or. Not worth adding a primitive
+ * to everyone's atomic.h for this
+ */
+ int i;
+ for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
+ if ((tmptaint >> i) & 1)
+ add_taint(i, LOCKDEP_STILL_OK);
+ }
+ }
+
+ return err;
+}
+
+#ifdef CONFIG_PRINTK
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
+
+struct do_proc_dointvec_minmax_conv_param {
+ int *min;
+ int *max;
+};
+
+static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ struct do_proc_dointvec_minmax_conv_param *param = data;
+ if (write) {
+ int val = *negp ? -*lvalp : *lvalp;
+ if ((param->min && *param->min > val) ||
+ (param->max && *param->max < val))
+ return -EINVAL;
+ *valp = val;
+ } else {
+ int val = *valp;
+ if (val < 0) {
+ *negp = true;
+ *lvalp = (unsigned long)-val;
+ } else {
+ *negp = false;
+ *lvalp = (unsigned long)val;
+ }
+ }
+ return 0;
+}
+
+/**
+ * proc_dointvec_minmax - read a vector of integers with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct do_proc_dointvec_minmax_conv_param param = {
+ .min = (int *) table->extra1,
+ .max = (int *) table->extra2,
+ };
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_dointvec_minmax_conv, &param);
+}
+
+static void validate_coredump_safety(void)
+{
+#ifdef CONFIG_COREDUMP
+ if (suid_dumpable == SUID_DUMP_ROOT &&
+ core_pattern[0] != '/' && core_pattern[0] != '|') {
+ printk(KERN_WARNING "Unsafe core_pattern used with "\
+ "suid_dumpable=2. Pipe handler or fully qualified "\
+ "core dump path required.\n");
+ }
+#endif
+}
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+#ifdef CONFIG_COREDUMP
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dostring(table, write, buffer, lenp, ppos);
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+#endif
+
+static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ unsigned long convmul,
+ unsigned long convdiv)
+{
+ unsigned long *i, *min, *max;
+ int vleft, first = 1, err = 0;
+ unsigned long page = 0;
+ size_t left;
+ char *kbuf;
+
+ if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (unsigned long *) data;
+ min = (unsigned long *) table->extra1;
+ max = (unsigned long *) table->extra2;
+ vleft = table->maxlen / sizeof(unsigned long);
+ left = *lenp;
+
+ if (write) {
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+ page = __get_free_page(GFP_TEMPORARY);
+ kbuf = (char *) page;
+ if (!kbuf)
+ return -ENOMEM;
+ if (copy_from_user(kbuf, buffer, left)) {
+ err = -EFAULT;
+ goto free;
+ }
+ kbuf[left] = 0;
+ }
+
+ for (; left && vleft--; i++, first = 0) {
+ unsigned long val;
+
+ if (write) {
+ bool neg;
+
+ left -= proc_skip_spaces(&kbuf);
+
+ err = proc_get_long(&kbuf, &left, &val, &neg,
+ proc_wspace_sep,
+ sizeof(proc_wspace_sep), NULL);
+ if (err)
+ break;
+ if (neg)
+ continue;
+ if ((min && val < *min) || (max && val > *max))
+ continue;
+ *i = val;
+ } else {
+ val = convdiv * (*i) / convmul;
+ if (!first)
+ err = proc_put_char(&buffer, &left, '\t');
+ err = proc_put_long(&buffer, &left, val, false);
+ if (err)
+ break;
+ }
+ }
+
+ if (!write && !first && left && !err)
+ err = proc_put_char(&buffer, &left, '\n');
+ if (write && !err)
+ left -= proc_skip_spaces(&kbuf);
+free:
+ if (write) {
+ free_page(page);
+ if (first)
+ return err ? : -EINVAL;
+ }
+ *lenp -= left;
+ *ppos += *lenp;
+ return err;
+}
+
+static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ unsigned long convmul,
+ unsigned long convdiv)
+{
+ return __do_proc_doulongvec_minmax(table->data, table, write,
+ buffer, lenp, ppos, convmul, convdiv);
+}
+
+/**
+ * proc_doulongvec_minmax - read a vector of long integers with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
+}
+
+/**
+ * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
+ * values from/to the user buffer, treated as an ASCII string. The values
+ * are treated as milliseconds, and converted to jiffies when they are stored.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
+int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ return do_proc_doulongvec_minmax(table, write, buffer,
+ lenp, ppos, HZ, 1000l);
+}
+
+
+static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ if (*lvalp > LONG_MAX / HZ)
+ return 1;
+ *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
+ } else {
+ int val = *valp;
+ unsigned long lval;
+ if (val < 0) {
+ *negp = true;
+ lval = (unsigned long)-val;
+ } else {
+ *negp = false;
+ lval = (unsigned long)val;
+ }
+ *lvalp = lval / HZ;
+ }
+ return 0;
+}
+
+static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
+ return 1;
+ *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
+ } else {
+ int val = *valp;
+ unsigned long lval;
+ if (val < 0) {
+ *negp = true;
+ lval = (unsigned long)-val;
+ } else {
+ *negp = false;
+ lval = (unsigned long)val;
+ }
+ *lvalp = jiffies_to_clock_t(lval);
+ }
+ return 0;
+}
+
+static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
+ int write, void *data)
+{
+ if (write) {
+ unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
+
+ if (jif > INT_MAX)
+ return 1;
+ *valp = (int)jif;
+ } else {
+ int val = *valp;
+ unsigned long lval;
+ if (val < 0) {
+ *negp = true;
+ lval = (unsigned long)-val;
+ } else {
+ *negp = false;
+ lval = (unsigned long)val;
+ }
+ *lvalp = jiffies_to_msecs(lval);
+ }
+ return 0;
+}
+
+/**
+ * proc_dointvec_jiffies - read a vector of integers as seconds
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in seconds, and are converted into
+ * jiffies.
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
+ do_proc_dointvec_jiffies_conv,NULL);
+}
+
+/**
+ * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: pointer to the file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in 1/USER_HZ seconds, and
+ * are converted into jiffies.
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table,write,buffer,lenp,ppos,
+ do_proc_dointvec_userhz_jiffies_conv,NULL);
+}
+
+/**
+ * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ * @ppos: the current position in the file
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in 1/1000 seconds, and
+ * are converted into jiffies.
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ do_proc_dointvec_ms_jiffies_conv, NULL);
+}
+
+static int proc_do_cad_pid(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct pid *new_pid;
+ pid_t tmp;
+ int r;
+
+ tmp = pid_vnr(cad_pid);
+
+ r = __do_proc_dointvec(&tmp, table, write, buffer,
+ lenp, ppos, NULL, NULL);
+ if (r || !write)
+ return r;
+
+ new_pid = find_get_pid(tmp);
+ if (!new_pid)
+ return -ESRCH;
+
+ put_pid(xchg(&cad_pid, new_pid));
+ return 0;
+}
+
+/**
+ * proc_do_large_bitmap - read/write from/to a large bitmap
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * The bitmap is stored at table->data and the bitmap length (in bits)
+ * in table->maxlen.
+ *
+ * We use a range comma separated format (e.g. 1,3-4,10-10) so that
+ * large bitmaps may be represented in a compact manner. Writing into
+ * the file will clear the bitmap then update it with the given input.
+ *
+ * Returns 0 on success.
+ */
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err = 0;
+ bool first = 1;
+ size_t left = *lenp;
+ unsigned long bitmap_len = table->maxlen;
+ unsigned long *bitmap = (unsigned long *) table->data;
+ unsigned long *tmp_bitmap = NULL;
+ char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
+
+ if (!bitmap_len || !left || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ unsigned long page = 0;
+ char *kbuf;
+
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+
+ page = __get_free_page(GFP_TEMPORARY);
+ kbuf = (char *) page;
+ if (!kbuf)
+ return -ENOMEM;
+ if (copy_from_user(kbuf, buffer, left)) {
+ free_page(page);
+ return -EFAULT;
+ }
+ kbuf[left] = 0;
+
+ tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!tmp_bitmap) {
+ free_page(page);
+ return -ENOMEM;
+ }
+ proc_skip_char(&kbuf, &left, '\n');
+ while (!err && left) {
+ unsigned long val_a, val_b;
+ bool neg;
+
+ err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
+ sizeof(tr_a), &c);
+ if (err)
+ break;
+ if (val_a >= bitmap_len || neg) {
+ err = -EINVAL;
+ break;
+ }
+
+ val_b = val_a;
+ if (left) {
+ kbuf++;
+ left--;
+ }
+
+ if (c == '-') {
+ err = proc_get_long(&kbuf, &left, &val_b,
+ &neg, tr_b, sizeof(tr_b),
+ &c);
+ if (err)
+ break;
+ if (val_b >= bitmap_len || neg ||
+ val_a > val_b) {
+ err = -EINVAL;
+ break;
+ }
+ if (left) {
+ kbuf++;
+ left--;
+ }
+ }
+
+ bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
+ first = 0;
+ proc_skip_char(&kbuf, &left, '\n');
+ }
+ free_page(page);
+ } else {
+ unsigned long bit_a, bit_b = 0;
+
+ while (left) {
+ bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
+ if (bit_a >= bitmap_len)
+ break;
+ bit_b = find_next_zero_bit(bitmap, bitmap_len,
+ bit_a + 1) - 1;
+
+ if (!first) {
+ err = proc_put_char(&buffer, &left, ',');
+ if (err)
+ break;
+ }
+ err = proc_put_long(&buffer, &left, bit_a, false);
+ if (err)
+ break;
+ if (bit_a != bit_b) {
+ err = proc_put_char(&buffer, &left, '-');
+ if (err)
+ break;
+ err = proc_put_long(&buffer, &left, bit_b, false);
+ if (err)
+ break;
+ }
+
+ first = 0; bit_b++;
+ }
+ if (!err)
+ err = proc_put_char(&buffer, &left, '\n');
+ }
+
+ if (!err) {
+ if (write) {
+ if (*ppos)
+ bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
+ else
+ bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
+ }
+ kfree(tmp_bitmap);
+ *lenp -= left;
+ *ppos += *lenp;
+ return 0;
+ } else {
+ kfree(tmp_bitmap);
+ return err;
+ }
+}
+
+#else /* CONFIG_PROC_SYSCTL */
+
+int proc_dostring(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
+
+#endif /* CONFIG_PROC_SYSCTL */
+
+/*
+ * No sense putting this after each symbol definition, twice,
+ * exception granted :-)
+ */
+EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_dointvec_jiffies);
+EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
+EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_doulongvec_minmax);
+EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);