分段错误“gsl_spmatrix_add”

发布于 2025-01-12 07:25:52 字数 4289 浏览 5 评论 0原文

编辑: 我已将问题更改为产生相同错误的新代码,并且这样做更可靠。

一段时间以来,我一直在努力寻找代码中的分段错误,并将其归结为以下代码:

#include <gsl/gsl_spmatrix.h>

#include <iostream>

using namespace std;

void test_gsl() {
    size_t size = 5;
    size_t nzmax = 5 * 5;
    constexpr size_t threads = 5;

    // allocate
    gsl_spmatrix* thread_matrices[threads];
    for (size_t thread = 0; thread < threads; thread++) {
        thread_matrices[thread] = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_TRIPLET);
    }

    // set
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix_set(thread_matrices[i], 0, 0, 1.0);
    }

    // crs
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix* temp = thread_matrices[i];
        thread_matrices[i] = gsl_spmatrix_crs(thread_matrices[i]);
        gsl_spmatrix_free(temp);
    }

    // add to total
    gsl_spmatrix* total_matrix = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);
    gsl_spmatrix* total_copy = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix_memcpy(total_copy, total_matrix);  // this is required to avoid another segfault
        gsl_spmatrix_add(total_matrix, total_copy, thread_matrices[i]); // unknown segfault!
    }

    gsl_spmatrix_free(total_matrix);
    gsl_spmatrix_free(total_copy);
}

int main(int argc, char* argv[]) {
    
    test_gsl();
    printf("end\n");

    return 0;
}

当我运行此代码时,我始终得到以下输出:

Segmentation fault (core dumped)

分段错误与 gsl_spmatrix_add( Total_matrix、total_copy、thread_matrices[i]);。

我正在使用 cmake 编译此代码:

cmake_minimum_required(VERSION 3.22.1)

project(diskmodel)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED YES)

add_subdirectory("src")
project(galaxy)

find_package(GSL REQUIRED)

add_executable(${PROJECT_NAME} main.cpp)

set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "${PROJECT_NAME}" SUFFIX ".exe")

target_link_libraries(${PROJECT_NAME} GSL::gsl GSL::gslcblas )

是什么导致了此段错误?

编辑:

编译后: g++ 'gsl-config --libs' main.cpp -fsanitize=undefined -g 我得到与以前相同的输出。使用 address 编译时,我得到:

=================================================================
==31330==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 400 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64a06 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:153
    #1 0x7efd449d393e in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f893e)

Indirect leak of 240 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d3b6c in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f8b6c)

Indirect leak of 200 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d3b88 in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f8b88)

Indirect leak of 40 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d39ac in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f89ac)

Indirect leak of 40 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d397d in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f897d)

当使用我的 cmake 文件编译并运行 gdb Galaxy.exe 时,我得到以下回溯:

#0  0x00007ffff7f2c185 in gsl_spblas_scatter () from /lib/x86_64-linux-gnu/libgsl.so.23
#1  0x00007ffff7f2b364 in gsl_spmatrix_add () from /lib/x86_64-linux-gnu/libgsl.so.23
#2  0x00005555555553d2 in test_gsl () at .../src/main.cpp:35
#3  0x0000555555555420 in main (argc=1, argv=0x7fffffffdaf8) at .../src/main.cpp:44

并且使用 -p< 时没有历史记录/代码>。

当使用ulimit -c unlimited然后运行时,不会生成核心文件。我尝试研究这个,但我似乎找不到它在任何地方生成,我不知道为什么。

EDIT:
I have changed the question to new code that produces the same error and is more reliable in doing so.

I have been struggling to find a segmentation fault in my code for a while now and have boiled it down to the following code:

#include <gsl/gsl_spmatrix.h>

#include <iostream>

using namespace std;

void test_gsl() {
    size_t size = 5;
    size_t nzmax = 5 * 5;
    constexpr size_t threads = 5;

    // allocate
    gsl_spmatrix* thread_matrices[threads];
    for (size_t thread = 0; thread < threads; thread++) {
        thread_matrices[thread] = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_TRIPLET);
    }

    // set
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix_set(thread_matrices[i], 0, 0, 1.0);
    }

    // crs
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix* temp = thread_matrices[i];
        thread_matrices[i] = gsl_spmatrix_crs(thread_matrices[i]);
        gsl_spmatrix_free(temp);
    }

    // add to total
    gsl_spmatrix* total_matrix = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);
    gsl_spmatrix* total_copy = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);
    for (size_t i = 0; i < threads; i++) {
        gsl_spmatrix_memcpy(total_copy, total_matrix);  // this is required to avoid another segfault
        gsl_spmatrix_add(total_matrix, total_copy, thread_matrices[i]); // unknown segfault!
    }

    gsl_spmatrix_free(total_matrix);
    gsl_spmatrix_free(total_copy);
}

int main(int argc, char* argv[]) {
    
    test_gsl();
    printf("end\n");

    return 0;
}

When I run this I consistently get the following output:

Segmentation fault (core dumped)

The segmentation fault is on the line with gsl_spmatrix_add(total_matrix, total_copy, thread_matrices[i]);.

I'm compiling this code using cmake:

cmake_minimum_required(VERSION 3.22.1)

project(diskmodel)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED YES)

add_subdirectory("src")
project(galaxy)

find_package(GSL REQUIRED)

add_executable(${PROJECT_NAME} main.cpp)

set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "${PROJECT_NAME}" SUFFIX ".exe")

target_link_libraries(${PROJECT_NAME} GSL::gsl GSL::gslcblas )

What is causing this seg fault?

EDIT:

After compiling with:
g++ 'gsl-config --libs' main.cpp -fsanitize=undefined -g I get the same output as before. When compiling with address I get:

=================================================================
==31330==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 400 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64a06 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:153
    #1 0x7efd449d393e in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f893e)

Indirect leak of 240 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d3b6c in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f8b6c)

Indirect leak of 200 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d3b88 in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f8b88)

Indirect leak of 40 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d39ac in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f89ac)

Indirect leak of 40 byte(s) in 5 object(s) allocated from:
    #0 0x7efd44b64808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144
    #1 0x7efd449d397d in gsl_spmatrix_alloc_nzmax (/lib/x86_64-linux-gnu/libgsl.so.23+0x1f897d)

When compiling using my cmake file and running gdb galaxy.exe I get the following backtrace:

#0  0x00007ffff7f2c185 in gsl_spblas_scatter () from /lib/x86_64-linux-gnu/libgsl.so.23
#1  0x00007ffff7f2b364 in gsl_spmatrix_add () from /lib/x86_64-linux-gnu/libgsl.so.23
#2  0x00005555555553d2 in test_gsl () at .../src/main.cpp:35
#3  0x0000555555555420 in main (argc=1, argv=0x7fffffffdaf8) at .../src/main.cpp:44

and no history when using -p.

When using ulimit -c unlimited and then running a core file is not generated. I tried looking into this, but I can't seem to find it to generate anywhere and I don't know why.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

遇到 2025-01-19 07:25:52

看起来像是 GSL 中的一个错误。请报告:-)

该行

gsl_spmatrix *total_matrix = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);

是 GSL 稀疏矩阵的有效分配器。然而,它的初始化是“智能”的,因为它的一些内存缓冲区被malloc分配,但没有初始化。这是指成员p
init_source.c 的第 130 行(来自 GSL 源代码,子模块(目录)spmatrix):

m->p = malloc((n1 + 1) * sizeof(int));

代码接下来要做的事情是

gsl_spmatrix_memcpy(total_copy, total_matrix); // this is required to avoid another segfault

嗯,注释有点有趣,但让我们看一下进入代码(copy_source.c第93-96行):

          for (n = 0; n < src->size1 + 1; ++n)
            {
              dest->p[n] = src->p[n];
            }

这里,size1似乎是矩阵行数,被声明为5。所以,代码用垃圾替换(通过复制)垃圾。这告诉我们,如果声明为具有 5 行的矩阵的非零行少于 5 个,则 GSL 似乎无法正常工作。我相信这就是您问题的解决方案。您声明了一些矩阵,例如 total_matrixtotal_copy 具有 5 行,但实际上它们没有。然而,到目前为止,代码并没有错误,因为将垃圾复制到垃圾上并没有错误。

代码中的下一步:

gsl_spmatrix_add(total_matrix, total_copy, thread_matrices[i]);

调用与成员 p 相关的代码:

      for (j = 0; j < outer_size; ++j)
        {
          Cp[j] = nz;

这将打开一个循环,在您的情况下将执行 5 次。这里CpC->p的简写。因此,迄今为止初始化的 p 成员的唯一元素是 C = A + BC 的第 j 个元素。接下来,在这个循环中我们可以看到:

          /* CSC: x += A(:,j); CSR: x += A(j,:) */
          nz = FUNCTION (spmatrix, scatter) (a, j, w, x, (int) (j + 1), c, nz);

请注意,j 作为第二个参数传递,并且未完全初始化 a 作为第一个参数。这将通过宏调用第 538 行中定义的 spmatrix_scatter

static size_t
FUNCTION (spmatrix, scatter) (const TYPE (gsl_spmatrix) * A, const size_t j, int * w,
                              ATOMIC * x, const int mark, TYPE (gsl_spmatrix) * C, size_t nz)
{
  int p;
  int * Ai = A->i;
  int * Ap = A->p;
  ATOMIC * Ad = A->data;
  int * Ci = C->i;

  for (p = Ap[j]; p < Ap[j + 1]; ++p)
    {

现在,可以看出,GSL 访问 Ap[j]Ap[j + 1] 的未初始化值。这会导致几条指令后立即出现段错误。

现在,如何避免这种情况呢?

让我们看看创建 CSR 矩阵的“犹太”方式(第 152-156 行,compress_source.c):

      Cp = dest->p;

      /* initialize row pointers to 0 */
      for (n = 0; n < dest->size1 + 1; ++n)
        Cp[n] = 0;

万岁!这是 p 成员的正确初始化。顺便说一句,接下来的几行解释了 CRS 表示中的 p 成员用于存储每行中的元素数量。看来这是 gsl_spmatrix_alloc_nzmax 中缺少的代码。

结论:不要依赖 gsl_spmatrix_alloc_nzmax 返回的矩阵。它们应该可以用作“目标矩阵”,例如用作C = A + B 中的C,但不能用作零填充的源矩阵。

希望这有帮助。

附言。
您可以删除这个完全不必要的调用 gsl_spmatrix_memcpy(total_copy,total_matrix);

Looks like a bug in GSL. Please report :-)

The line

gsl_spmatrix *total_matrix = gsl_spmatrix_alloc_nzmax(size, size, nzmax, GSL_SPMATRIX_CRS);

is a valid allocator of a GSL sparse matrix. Its initialization, however, is "smart" in that some of its memory buffers are malloced, but not initialized. This refers to the member p.
Line 130 of init_source.c (from GSL sources, submodule (directory) spmatrix):

m->p = malloc((n1 + 1) * sizeof(int));

The next thing your code does is

gsl_spmatrix_memcpy(total_copy, total_matrix); // this is required to avoid another segfault

Well, the comment is somewhat intriguing, but let's look into the code (lines 93-96 of copy_source.c):

          for (n = 0; n < src->size1 + 1; ++n)
            {
              dest->p[n] = src->p[n];
            }

Here, size1 seems to be the number of matrix rows, which were declared as 5. So, the code replaces (by copying) rubbish with rubbish. This tells us that GSL does not seem to work well if a matrix declared as having 5 rows has fewer than 5 nonzero rows. I believe this is the solution of your problem. You declared some matrices, e.g. total_matrix and total_copy as having 5 rows, but they actually have none. So far the code is not buggy, however, because copying rubbish onto rubbish is no error.

The next step in your code:

gsl_spmatrix_add(total_matrix, total_copy, thread_matrices[i]);

invokes this code related to the member p:

      for (j = 0; j < outer_size; ++j)
        {
          Cp[j] = nz;

This opens a loop, which in your case will be executed 5 times. Here Cp is a shorthand for C->p. The only element of the p member that is initialized so far is thus the j-th one of C in C = A + B. Next, inside this loop we can see:

          /* CSC: x += A(:,j); CSR: x += A(j,:) */
          nz = FUNCTION (spmatrix, scatter) (a, j, w, x, (int) (j + 1), c, nz);

Notice that j is passed as the 2nd argument, and incompletely initialised a as the 1st. This invokes spmatrix_scatter defined in line 538 via a macro.

static size_t
FUNCTION (spmatrix, scatter) (const TYPE (gsl_spmatrix) * A, const size_t j, int * w,
                              ATOMIC * x, const int mark, TYPE (gsl_spmatrix) * C, size_t nz)
{
  int p;
  int * Ai = A->i;
  int * Ap = A->p;
  ATOMIC * Ad = A->data;
  int * Ci = C->i;

  for (p = Ap[j]; p < Ap[j + 1]; ++p)
    {

Now, as can be seen, GSL accesses an uninitialized values of Ap[j] and Ap[j + 1]. This results in an immediate segfault a few instructions later.

Now, how to avoid this?

Let's look inside the "kosher" way of creating a CSR matrix (lines 152-156, compress_source.c):

      Cp = dest->p;

      /* initialize row pointers to 0 */
      for (n = 0; n < dest->size1 + 1; ++n)
        Cp[n] = 0;

Hurray! This is a proper initialization of the p member. By the way, the next few lines explain that the p member in CRS representation is used to store the number of elements in each row. It seems that this is the code missing in gsl_spmatrix_alloc_nzmax

Conclusion: do not rely on matrices returned by gsl_spmatrix_alloc_nzmax. They should be OK to use as "destination matrices", e.g. as C in C = A + B, but not as zero-filled source ones.

Hope this helps.

PS.
You can remove this completely unnecessary invocation of gsl_spmatrix_memcpy(total_copy, total_matrix);

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文