glibc/stdlib/tst-arc4random-thread.c
Florian Weimer abeae3c006 Linux: Fixes for getrandom fork handling
Careful updates of grnd_alloc.len are required to ensure that
after fork, grnd_alloc.states does not contain entries that
are also encountered by __getrandom_reset_state in TCBs.
For the same reason, it is necessary to overwrite the TCB state
pointer with NULL before updating grnd_alloc.states in
__getrandom_vdso_release.

Before this change, different TCBs could share the same getrandom
state after multi-threaded fork.  This would be a critical security
bug (predictable randomness) if not caught during development.

The additional check in stdlib/tst-arc4random-thread makes it more
likely that the test fails due to the bugs mentioned above.

Both __getrandom_reset_state and __getrandom_vdso_release could
put reserved NULL pointers into the states array.  This is also
fixed with this commit.  After these changes, no null pointers were
observed in the states array during testing.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
2025-01-16 19:58:09 +01:00

367 lines
10 KiB
C

/* Test that threads generate distinct streams of randomness.
Copyright (C) 2022-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <array_length.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <support/check.h>
#include <support/namespace.h>
#include <support/support.h>
#include <support/xthread.h>
/* Number of arc4random_buf calls per thread. */
enum { count_per_thread = 2048 };
/* Number of threads computing randomness. */
enum { inner_threads = 4 };
/* Number of threads launching other threads. */
static int outer_threads = 1;
/* Number of launching rounds performed by the outer threads. */
enum { outer_rounds = 10 };
/* Maximum number of bytes generated in an arc4random call. */
enum { max_size = 32 };
/* Sizes generated by threads. Must be long enough to be unique with
high probability. */
static const int sizes[] = { 12, 15, 16, 17, 24, 31, max_size };
/* Data structure to capture randomness results. */
struct blob
{
unsigned int size;
int thread_id; /* -1 means after fork. */
unsigned int index;
unsigned char bytes[max_size];
};
struct subprocess_args
{
struct blob *blob;
void (*func)(unsigned char *, size_t);
};
static void
generate_arc4random (unsigned char *bytes, size_t size)
{
int i;
for (i = 0; i < size / sizeof (uint32_t); i++)
{
uint32_t x = arc4random ();
memcpy (&bytes[4 * i], &x, sizeof x);
}
int rem = size % sizeof (uint32_t);
if (rem > 0)
{
uint32_t x = arc4random ();
memcpy (&bytes[4 * i], &x, rem);
}
}
static void
generate_arc4random_buf (unsigned char *bytes, size_t size)
{
arc4random_buf (bytes, size);
}
static void
generate_arc4random_uniform (unsigned char *bytes, size_t size)
{
for (int i = 0; i < size; i++)
bytes[i] = arc4random_uniform (256);
}
#define DYNARRAY_STRUCT dynarray_blob
#define DYNARRAY_ELEMENT struct blob
#define DYNARRAY_PREFIX dynarray_blob_
#include <malloc/dynarray-skeleton.c>
/* Sort blob elements by length first, then by comparing the data
member. */
static int
compare_blob (const void *left1, const void *right1)
{
const struct blob *left = left1;
const struct blob *right = right1;
if (left->size != right->size)
/* No overflow due to limited range. */
return left->size - right->size;
return memcmp (left->bytes, right->bytes, left->size);
}
/* Used to store the global result. */
static pthread_mutex_t global_result_lock = PTHREAD_MUTEX_INITIALIZER;
static struct dynarray_blob global_result;
/* Copy data to the global result, with locking. */
static void
copy_result_to_global (struct dynarray_blob *result)
{
xpthread_mutex_lock (&global_result_lock);
size_t old_size = dynarray_blob_size (&global_result);
TEST_VERIFY_EXIT
(dynarray_blob_resize (&global_result,
old_size + dynarray_blob_size (result)));
memcpy (dynarray_blob_begin (&global_result) + old_size,
dynarray_blob_begin (result),
dynarray_blob_size (result) * sizeof (struct blob));
xpthread_mutex_unlock (&global_result_lock);
}
/* Used to assign unique thread IDs. Accessed atomically. */
static int next_thread_id;
static void *
inner_thread (void *closure)
{
void (*func) (unsigned char *, size_t) = closure;
/* Use local result to avoid global lock contention while generating
randomness. */
struct dynarray_blob result;
dynarray_blob_init (&result);
int thread_id = __atomic_fetch_add (&next_thread_id, 1, __ATOMIC_RELAXED);
/* Determine the sizes to be used by this thread. */
int size_slot = thread_id % (array_length (sizes) + 1);
bool switch_sizes = size_slot == array_length (sizes);
if (switch_sizes)
size_slot = 0;
/* Compute the random blobs. */
for (int i = 0; i < count_per_thread; ++i)
{
struct blob *place = dynarray_blob_emplace (&result);
TEST_VERIFY_EXIT (place != NULL);
place->size = sizes[size_slot];
place->thread_id = thread_id;
place->index = i;
func (place->bytes, place->size);
if (switch_sizes)
size_slot = (size_slot + 1) % array_length (sizes);
}
/* Store the blobs in the global result structure. */
copy_result_to_global (&result);
dynarray_blob_free (&result);
return NULL;
}
/* Launch the inner threads and wait for their termination. */
static void *
outer_thread (void *closure)
{
void (*func) (unsigned char *, size_t) = closure;
for (int round = 0; round < outer_rounds; ++round)
{
pthread_t threads[inner_threads];
for (int i = 0; i < inner_threads; ++i)
threads[i] = xpthread_create (NULL, inner_thread, func);
for (int i = 0; i < inner_threads; ++i)
xpthread_join (threads[i]);
}
return NULL;
}
static bool termination_requested;
/* Call arc4random_buf to fill one blob with 16 bytes. */
static void *
get_one_blob_thread (void *closure)
{
struct subprocess_args *arg = closure;
struct blob *result = arg->blob;
result->size = 16;
arg->func (result->bytes, result->size);
return NULL;
}
/* Invoked from fork_thread to actually obtain randomness data. */
static void
fork_thread_subprocess (void *closure)
{
struct subprocess_args *arg = closure;
struct blob *shared_result = arg->blob;
struct subprocess_args args[3] =
{
{ shared_result + 0, arg->func },
{ shared_result + 1, arg->func },
{ shared_result + 2, arg->func }
};
pthread_t thr1 = xpthread_create (NULL, get_one_blob_thread, &args[1]);
pthread_t thr2 = xpthread_create (NULL, get_one_blob_thread, &args[2]);
get_one_blob_thread (&args[0]);
xpthread_join (thr1);
xpthread_join (thr2);
}
/* Continuously fork subprocesses to obtain a little bit of
randomness. */
static void *
fork_thread (void *closure)
{
void (*func)(unsigned char *, size_t) = closure;
struct dynarray_blob result;
dynarray_blob_init (&result);
/* Three blobs from each subprocess. */
struct blob *shared_result
= support_shared_allocate (3 * sizeof (*shared_result));
while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED))
{
/* Obtain the results from a subprocess. */
struct subprocess_args arg = { shared_result, func };
support_isolate_in_subprocess (fork_thread_subprocess, &arg);
for (int i = 0; i < 3; ++i)
{
struct blob *place = dynarray_blob_emplace (&result);
TEST_VERIFY_EXIT (place != NULL);
place->size = shared_result[i].size;
place->thread_id = -1;
place->index = i;
memcpy (place->bytes, shared_result[i].bytes, place->size);
}
}
support_shared_free (shared_result);
copy_result_to_global (&result);
dynarray_blob_free (&result);
return NULL;
}
/* Launch the outer threads and wait for their termination. */
static void
run_outer_threads (void (*func)(unsigned char *, size_t))
{
/* Special thread that continuously calls fork. */
pthread_t fork_thread_id = xpthread_create (NULL, fork_thread, func);
pthread_t threads[outer_threads];
for (int i = 0; i < outer_threads; ++i)
threads[i] = xpthread_create (NULL, outer_thread, func);
for (int i = 0; i < outer_threads; ++i)
xpthread_join (threads[i]);
__atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED);
xpthread_join (fork_thread_id);
}
static int
do_test_func (const char *fname, void (*func)(unsigned char *, size_t))
{
dynarray_blob_init (&global_result);
int expected_blobs
= count_per_thread * inner_threads * outer_threads * outer_rounds;
printf ("info: %s: minimum of %d blob results expected\n",
fname, expected_blobs);
run_outer_threads (func);
/* The forking thread delivers a non-deterministic number of
results, which is why expected_blobs is only a minimum number of
results. */
printf ("info: %s: %zu blob results observed\n", fname,
dynarray_blob_size (&global_result));
TEST_VERIFY (dynarray_blob_size (&global_result) >= expected_blobs);
/* Verify that there are no duplicates. */
qsort (dynarray_blob_begin (&global_result),
dynarray_blob_size (&global_result),
sizeof (struct blob), compare_blob);
struct blob *end = dynarray_blob_end (&global_result);
for (struct blob *p = dynarray_blob_begin (&global_result) + 1;
p < end; ++p)
{
if (compare_blob (p - 1, p) == 0)
{
support_record_failure ();
char *quoted = support_quote_blob (p->bytes, p->size);
printf ("error: %s: duplicate blob: \"%s\" (%d bytes)\n",
fname, quoted, (int) p->size);
printf (" first source: thread %d, index %u\n",
p[-1].thread_id, p[-1].index);
printf (" second source: thread %d, index %u\n",
p[0].thread_id, p[0].index);
free (quoted);
}
}
for (struct blob *p = dynarray_blob_begin (&global_result);
p < end; ++p)
{
unsigned int sum = 0;
for (unsigned int i = 0; i < p->size; ++i)
sum += p->bytes[i];
if (sum == 0)
{
support_record_failure ();
printf ("error: all-zero result of length %u on thread %d\n",
p->size, p->thread_id);
}
}
dynarray_blob_free (&global_result);
return 0;
}
static int
do_test (void)
{
/* Do not run more threads than the maximum of schedulable CPUs. */
cpu_set_t cpuset;
if (sched_getaffinity (0, sizeof cpuset, &cpuset) == 0)
{
unsigned int ncpus = CPU_COUNT (&cpuset);
/* Limit the number to not overload the system. */
outer_threads = (ncpus / 2) / inner_threads ?: 1;
}
printf ("info: outer_threads=%d inner_threads=%d\n", outer_threads,
inner_threads);
do_test_func ("arc4random", generate_arc4random);
do_test_func ("arc4random_buf", generate_arc4random_buf);
do_test_func ("arc4random_uniform", generate_arc4random_uniform);
return 0;
}
#include <support/test-driver.c>