migrate: fix syscall move_pages() return value for failure

commit a7504ed14f9b5e873599b2487eb95062dd0b65f8
Author: Huang Ying <ying.huang@intel.com>
Date:   Wed Aug 17 16:14:01 2022 +0800

    migrate: fix syscall move_pages() return value for failure

    Patch series "migrate_pages(): fix several bugs in error path", v3.

    During review the code of migrate_pages() and build a test program for
    it.  Several bugs in error path are identified and fixed in this
    series.

    Most patches are tested via

    - Apply error-inject.patch in Linux kernel
    - Compile test-migrate.c (with -lnuma)
    - Test with test-migrate.sh

    error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
    It turns out that error injection is an important tool to fix bugs in
    error path.

    This patch (of 8):

    The return value of move_pages() syscall is incorrect when counting
    the remaining pages to be migrated.  For example, for the following
    test program,

    "
     #define _GNU_SOURCE

     #include <stdbool.h>
     #include <stdio.h>
     #include <string.h>
     #include <stdlib.h>
     #include <errno.h>

     #include <fcntl.h>
     #include <sys/uio.h>
     #include <sys/mman.h>
     #include <sys/types.h>
     #include <unistd.h>
     #include <numaif.h>
     #include <numa.h>

     #ifndef MADV_FREE
     #define MADV_FREE      8               /* free pages only if memory pressure */
     #endif

     #define ONE_MB         (1024 * 1024)
     #define MAP_SIZE       (16 * ONE_MB)
     #define THP_SIZE       (2 * ONE_MB)
     #define THP_MASK       (THP_SIZE - 1)

     #define ERR_EXIT_ON(cond, msg)                                 \
             do {                                                   \
                     int __cond_in_macro = (cond);                  \
                     if (__cond_in_macro)                           \
                             error_exit(__cond_in_macro, (msg));    \
             } while (0)

     void error_msg(int ret, int nr, int *status, const char *msg)
     {
             int i;

             fprintf(stderr, "Error: %s, ret : %d, error: %s\n",
                     msg, ret, strerror(errno));

             if (!nr)
                     return;
             fprintf(stderr, "status: ");
             for (i = 0; i < nr; i++)
                     fprintf(stderr, "%d ", status[i]);
             fprintf(stderr, "\n");
     }

     void error_exit(int ret, const char *msg)
     {
             error_msg(ret, 0, NULL, msg);
             exit(1);
     }

     int page_size;

     bool do_vmsplice;
     bool do_thp;

     static int pipe_fds[2];
     void *addr;
     char *pn;
     char *pn1;
     void *pages[2];
     int status[2];

     void prepare()
     {
             int ret;
             struct iovec iov;

             if (addr) {
                     munmap(addr, MAP_SIZE);
                     close(pipe_fds[0]);
                     close(pipe_fds[1]);
             }

             ret = pipe(pipe_fds);
             ERR_EXIT_ON(ret, "pipe");

             addr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
             ERR_EXIT_ON(addr == MAP_FAILED, "mmap");
             if (do_thp) {
                     ret = madvise(addr, MAP_SIZE, MADV_HUGEPAGE);
                     ERR_EXIT_ON(ret, "advise hugepage");
             }

             pn = (char *)(((unsigned long)addr + THP_SIZE) & ~THP_MASK);
             pn1 = pn + THP_SIZE;
             pages[0] = pn;
             pages[1] = pn1;
             *pn = 1;

             if (do_vmsplice) {
                     iov.iov_base = pn;
                     iov.iov_len = page_size;
                     ret = vmsplice(pipe_fds[1], &iov, 1, 0);
                     ERR_EXIT_ON(ret < 0, "vmsplice");
             }

             status[0] = status[1] = 1024;
     }

     void test_migrate()
     {
             int ret;
             int nodes[2] = { 1, 1 };
             pid_t pid = getpid();

             prepare();
             ret = move_pages(pid, 1, pages, nodes, status, MPOL_MF_MOVE_ALL);
             error_msg(ret, 1, status, "move 1 page");

             prepare();
             ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
             error_msg(ret, 2, status, "move 2 pages, page 1 not mapped");

             prepare();
             *pn1 = 1;
             ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
             error_msg(ret, 2, status, "move 2 pages");

             prepare();
             *pn1 = 1;
             nodes[1] = 0;
             ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
             error_msg(ret, 2, status, "move 2 pages, page 1 to node 0");
     }

     int main(int argc, char *argv[])
     {
             numa_run_on_node(0);
             page_size = getpagesize();

             test_migrate();

             fprintf(stderr, "\nMake page 0 cannot be migrated:\n");
             do_vmsplice = true;
             test_migrate();

             fprintf(stderr, "\nTest THP:\n");
             do_thp = true;
             do_vmsplice = false;
             test_migrate();

             fprintf(stderr, "\nTHP: make page 0 cannot be migrated:\n");
             do_vmsplice = true;
             test_migrate();

             return 0;
     }
    "

    The output of the current kernel is,

    "
    Error: move 1 page, ret : 0, error: Success
    status: 1
    Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
    status: 1 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1 1
    Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
    status: 1 0

    Make page 0 cannot be migrated:
    Error: move 1 page, ret : 0, error: Success
    status: 1024
    Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
    status: 1024 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1024 1024
    Error: move 2 pages, page 1 to node 0, ret : 1, error: Success
    status: 1024 1024
    "

    While the expected output is,

    "
    Error: move 1 page, ret : 0, error: Success
    status: 1
    Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
    status: 1 -14
    Error: move 2 pages, ret : 0, error: Success
    status: 1 1
    Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
    status: 1 0

    Make page 0 cannot be migrated:
    Error: move 1 page, ret : 1, error: Success
    status: 1024
    Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
    status: 1024 -14
    Error: move 2 pages, ret : 1, error: Success
    status: 1024 1024
    Error: move 2 pages, page 1 to node 0, ret : 2, error: Success
    status: 1024 1024
    "

    Fix this via correcting the remaining pages counting.  With the fix,
    the output for the test program as above is expected.

    Link: https://lkml.kernel.org/r/20220817081408.513338-1-ying.huang@intel.com
    Link: https://lkml.kernel.org/r/20220817081408.513338-2-ying.huang@intel.com
    Fixes: 5984fabb6e ("mm: move_pages: report the number of non-attempted pages")
    Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
    Reviewed-by: Oscar Salvador <osalvador@suse.de>
    Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
    Cc: Zi Yan <ziy@nvidia.com>
    Cc: Yang Shi <shy828301@gmail.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168372
Signed-off-by: Nico Pache <npache@redhat.com>
This commit is contained in:
Nico Pache 2023-05-08 17:26:31 -06:00
parent f219a1c570
commit 9d813d3caf
1 changed files with 6 additions and 2 deletions

View File

@ -1760,7 +1760,7 @@ static int move_pages_and_store_status(struct mm_struct *mm, int node,
* well.
*/
if (err > 0)
err += nr_pages - i - 1;
err += nr_pages - i;
return err;
}
return store_status(status, start, node, i - start);
@ -1846,8 +1846,12 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
err = move_pages_and_store_status(mm, current_node, &pagelist,
status, start, i, nr_pages);
if (err)
if (err) {
/* We have accounted for page i */
if (err > 0)
err--;
goto out;
}
current_node = NUMA_NO_NODE;
}
out_flush: