338 lines
8.5 KiB
C
338 lines
8.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2012 Linux Test Project, Inc.
|
|
*/
|
|
|
|
/*
|
|
* use migrate_pages() and check that address is on correct node
|
|
* 1. process A can migrate its non-shared mem with CAP_SYS_NICE
|
|
* 2. process A can migrate its non-shared mem without CAP_SYS_NICE
|
|
* 3. process A can migrate shared mem only with CAP_SYS_NICE
|
|
* 4. process A can migrate non-shared mem in process B with same effective uid
|
|
* 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
|
|
*/
|
|
#include <sys/types.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/prctl.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <pwd.h>
|
|
|
|
#include "tst_test.h"
|
|
#include "lapi/syscalls.h"
|
|
#include "numa_helper.h"
|
|
#include "migrate_pages_common.h"
|
|
|
|
/*
|
|
* This is an estimated minimum of free mem required to migrate this
|
|
* process to another node as migrate_pages will fail if there is not
|
|
* enough free space on node. While running this test on x86_64
|
|
* it used ~2048 pages (total VM, not just RSS). Considering ia64 as
|
|
* architecture with largest (non-huge) page size (16k), this limit
|
|
* is set to 2048*16k == 32M.
|
|
*/
|
|
#define NODE_MIN_FREEMEM (32*1024*1024)
|
|
|
|
#ifdef HAVE_NUMA_V2
|
|
|
|
static const char nobody_uid[] = "nobody";
|
|
static struct passwd *ltpuser;
|
|
static int *nodes, nodeA, nodeB;
|
|
static int num_nodes;
|
|
|
|
static void print_mem_stats(pid_t pid, int node)
|
|
{
|
|
char s[64];
|
|
long long node_size, freep;
|
|
|
|
if (pid == 0)
|
|
pid = getpid();
|
|
|
|
tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node);
|
|
|
|
/* dump pid's VM info */
|
|
sprintf(s, "cat /proc/%d/status", pid);
|
|
system(s);
|
|
sprintf(s, "cat /proc/%d/numa_maps", pid);
|
|
system(s);
|
|
|
|
/* dump node free mem */
|
|
node_size = numa_node_size64(node, &freep);
|
|
tst_res(TINFO, "Node id: %d, size: %lld, free: %lld",
|
|
node, node_size, freep);
|
|
}
|
|
|
|
static int migrate_to_node(pid_t pid, int node)
|
|
{
|
|
unsigned long nodemask_size, max_node;
|
|
unsigned long *old_nodes, *new_nodes;
|
|
int i;
|
|
|
|
tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d",
|
|
getpid(), pid, node);
|
|
max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
|
|
nodemask_size = max_node / 8;
|
|
old_nodes = SAFE_MALLOC(nodemask_size);
|
|
new_nodes = SAFE_MALLOC(nodemask_size);
|
|
|
|
memset(old_nodes, 0, nodemask_size);
|
|
memset(new_nodes, 0, nodemask_size);
|
|
for (i = 0; i < num_nodes; i++)
|
|
set_bit(old_nodes, nodes[i], 1);
|
|
set_bit(new_nodes, node, 1);
|
|
|
|
TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
|
|
new_nodes));
|
|
if (TST_RET != 0) {
|
|
if (TST_RET < 0) {
|
|
tst_res(TFAIL | TTERRNO, "migrate_pages failed "
|
|
"ret: %ld, ", TST_RET);
|
|
print_mem_stats(pid, node);
|
|
} else {
|
|
tst_res(TINFO, "migrate_pages could not migrate all "
|
|
"pages, not migrated: %ld", TST_RET);
|
|
}
|
|
}
|
|
free(old_nodes);
|
|
free(new_nodes);
|
|
return TST_RET;
|
|
}
|
|
|
|
static int addr_on_node(void *addr)
|
|
{
|
|
int node;
|
|
int ret;
|
|
|
|
ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
|
|
(unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
|
|
if (ret == -1) {
|
|
tst_res(TFAIL | TERRNO,
|
|
"error getting memory policy for page %p", addr);
|
|
}
|
|
return node;
|
|
}
|
|
|
|
static int check_addr_on_node(void *addr, int exp_node)
|
|
{
|
|
int node;
|
|
|
|
node = addr_on_node(addr);
|
|
if (node == exp_node) {
|
|
tst_res(TPASS, "pid(%d) addr %p is on expected node: %d",
|
|
getpid(), addr, exp_node);
|
|
return TPASS;
|
|
} else {
|
|
tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d "
|
|
", expected %d", getpid(), addr, node, exp_node);
|
|
print_mem_stats(0, exp_node);
|
|
return TFAIL;
|
|
}
|
|
}
|
|
|
|
static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
|
|
{
|
|
char *private, *shared;
|
|
int ret;
|
|
pid_t child;
|
|
|
|
/* parent can migrate its non-shared memory */
|
|
tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
|
|
private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
|
|
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
|
|
private[0] = 0;
|
|
tst_res(TINFO, "private anonymous: %p", private);
|
|
|
|
migrate_to_node(0, node2);
|
|
check_addr_on_node(private, node2);
|
|
migrate_to_node(0, node1);
|
|
check_addr_on_node(private, node1);
|
|
SAFE_MUNMAP(private, getpagesize());
|
|
|
|
/* parent can migrate shared memory with CAP_SYS_NICE */
|
|
shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
|
|
MAP_ANONYMOUS | MAP_SHARED, 0, 0);
|
|
shared[0] = 1;
|
|
tst_res(TINFO, "shared anonymous: %p", shared);
|
|
migrate_to_node(0, node2);
|
|
check_addr_on_node(shared, node2);
|
|
|
|
/* shared mem is on node2, try to migrate in child to node1 */
|
|
fflush(stdout);
|
|
child = SAFE_FORK();
|
|
if (child == 0) {
|
|
tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d",
|
|
cap_sys_nice);
|
|
private = SAFE_MMAP(NULL, getpagesize(),
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
|
|
private[0] = 1;
|
|
shared[0] = 1;
|
|
if (!cap_sys_nice)
|
|
SAFE_SETEUID(ltpuser->pw_uid);
|
|
|
|
migrate_to_node(0, node1);
|
|
/* child can migrate non-shared memory */
|
|
ret = check_addr_on_node(private, node1);
|
|
|
|
exit(ret);
|
|
}
|
|
|
|
SAFE_WAITPID(child, NULL, 0);
|
|
if (cap_sys_nice)
|
|
/* child can migrate shared memory only
|
|
* with CAP_SYS_NICE */
|
|
check_addr_on_node(shared, node1);
|
|
else
|
|
check_addr_on_node(shared, node2);
|
|
SAFE_MUNMAP(shared, getpagesize());
|
|
}
|
|
|
|
static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
|
|
{
|
|
char *private;
|
|
int ret;
|
|
pid_t child1, child2;
|
|
|
|
tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
|
|
|
|
fflush(stdout);
|
|
child1 = SAFE_FORK();
|
|
if (child1 == 0) {
|
|
private = SAFE_MMAP(NULL, getpagesize(),
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
|
|
private[0] = 0;
|
|
|
|
/* make sure we are on node1 */
|
|
migrate_to_node(0, node1);
|
|
check_addr_on_node(private, node1);
|
|
|
|
SAFE_SETUID(ltpuser->pw_uid);
|
|
|
|
/* commit_creds() will clear dumpable, restore it */
|
|
if (prctl(PR_SET_DUMPABLE, 1))
|
|
tst_brk(TBROK | TERRNO, "prctl");
|
|
|
|
/* signal child2 it's OK to migrate child1 and wait */
|
|
TST_CHECKPOINT_WAKE(0);
|
|
TST_CHECKPOINT_WAIT(1);
|
|
|
|
/* child2 can migrate child1 process if it's privileged */
|
|
/* child2 can migrate child1 process if it has same uid */
|
|
ret = check_addr_on_node(private, node2);
|
|
|
|
exit(ret);
|
|
}
|
|
|
|
fflush(stdout);
|
|
child2 = SAFE_FORK();
|
|
if (child2 == 0) {
|
|
if (!cap_sys_nice)
|
|
SAFE_SETUID(ltpuser->pw_uid);
|
|
|
|
/* wait until child1 is ready on node1, then migrate and
|
|
* signal to check current node */
|
|
TST_CHECKPOINT_WAIT(0);
|
|
migrate_to_node(child1, node2);
|
|
TST_CHECKPOINT_WAKE(1);
|
|
|
|
exit(TPASS);
|
|
}
|
|
|
|
SAFE_WAITPID(child1, NULL, 0);
|
|
SAFE_WAITPID(child2, NULL, 0);
|
|
}
|
|
|
|
static void run(void)
|
|
{
|
|
test_migrate_current_process(nodeA, nodeB, 1);
|
|
test_migrate_current_process(nodeA, nodeB, 0);
|
|
test_migrate_other_process(nodeA, nodeB, 1);
|
|
test_migrate_other_process(nodeA, nodeB, 0);
|
|
}
|
|
|
|
static void setup(void)
|
|
{
|
|
int ret, i, j;
|
|
int pagesize = getpagesize();
|
|
void *p;
|
|
|
|
tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL);
|
|
|
|
if (numa_available() == -1)
|
|
tst_brk(TCONF, "NUMA not available");
|
|
|
|
ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
|
|
if (ret < 0)
|
|
tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret);
|
|
|
|
if (num_nodes < 2)
|
|
tst_brk(TCONF, "at least 2 allowed NUMA nodes"
|
|
" are required");
|
|
else if (tst_kvercmp(2, 6, 18) < 0)
|
|
tst_brk(TCONF, "2.6.18 or greater kernel required");
|
|
|
|
FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0");
|
|
/*
|
|
* find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
|
|
* The reason is that:
|
|
* 1. migrate_pages() is expected to succeed
|
|
* 2. this test avoids hitting:
|
|
* Bug 870326 - migrate_pages() reports success, but pages are
|
|
* not moved to desired node
|
|
* https://bugzilla.redhat.com/show_bug.cgi?id=870326
|
|
*/
|
|
nodeA = nodeB = -1;
|
|
for (i = 0; i < num_nodes; i++) {
|
|
p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
|
|
if (p == NULL)
|
|
break;
|
|
memset(p, 0xff, NODE_MIN_FREEMEM);
|
|
|
|
j = 0;
|
|
while (j < NODE_MIN_FREEMEM) {
|
|
if (addr_on_node(p + j) != nodes[i])
|
|
break;
|
|
j += pagesize;
|
|
}
|
|
numa_free(p, NODE_MIN_FREEMEM);
|
|
|
|
if (j >= NODE_MIN_FREEMEM) {
|
|
if (nodeA == -1)
|
|
nodeA = nodes[i];
|
|
else if (nodeB == -1)
|
|
nodeB = nodes[i];
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (nodeA == -1 || nodeB == -1)
|
|
tst_brk(TCONF, "at least 2 NUMA nodes with "
|
|
"free mem > %d are needed", NODE_MIN_FREEMEM);
|
|
tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB);
|
|
|
|
ltpuser = getpwnam(nobody_uid);
|
|
if (ltpuser == NULL)
|
|
tst_brk(TBROK | TERRNO, "getpwnam failed");
|
|
}
|
|
|
|
static struct tst_test test = {
|
|
.needs_root = 1,
|
|
.needs_checkpoints = 1,
|
|
.forks_child = 1,
|
|
.test_all = run,
|
|
.setup = setup,
|
|
.save_restore = (const char * const[]) {
|
|
"?/proc/sys/kernel/numa_balancing",
|
|
NULL,
|
|
},
|
|
};
|
|
#else
|
|
TST_TEST_TCONF(NUMA_ERROR_MSG);
|
|
#endif
|