如何使用 Grand Central Dispatch 并行化数独求解器?

发布于 2024-08-14 06:09:52 字数 315 浏览 4 评论 0原文

作为编程练习,我刚刚编写了一个使用回溯算法的数独求解器(请参阅 Wikipedia 获取用 C 编写的简单示例)。

为了更进一步,我想使用 Snow Leopard 的 GCD 来并行化它,以便它可以在我的机器的所有内核上运行。有人可以指导我应该如何执行此操作以及应该进行哪些代码更改吗?谢谢!

马特

As a programming exercise, I just finished writing a Sudoku solver that uses the backtracking algorithm (see Wikipedia for a simple example written in C).

To take this a step further, I would like to use Snow Leopard's GCD to parallelize this so that it runs on all of my machine's cores. Can someone give me pointers on how I should go about doing this and what code changes I should make? Thanks!

Matt

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

十年九夏 2024-08-21 06:09:52

如果您最终使用它,请告诉我。它是标准的 ANSI C,所以应该可以在所有东西上运行。使用方法请参见其他帖子。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

short sudoku[9][9];
unsigned long long cubeSolutions=0;
void* cubeValues[10];
const unsigned char oneLookup[64] = {0x8b, 0x80, 0, 0x80, 0, 0, 0, 0x80, 0, 0,0,0,0,0,0, 0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};

int ifOne(int val) {
  if ( oneLookup[(val-1) >> 3] & (1 << ((val-1) & 0x7))  )
    return val;
  return 0;
}


void init_sudoku() {
  int i,j;
  for (i=0; i<9; i++)
    for (j=0; j<9; j++)
      sudoku[i][j]=0x1ff;
}

void set_sudoku( char* initialValues) {
  int i;
  if ( strlen (initialValues) !=  81 ) {
    printf("Error: inputString should have length=81, length is %2.2d\n", strlen(initialValues) );
    exit (-12);
  }
  for (i=0; i < 81; i++)
    if ((initialValues[i] > 0x30) && (initialValues[i] <= 0x3a))
      sudoku[i/9][i%9] = 1 << (initialValues[i] - 0x31) ;
}

void print_sudoku ( int style ) {
  int i, j, k;
  for (i=0; i < 9; i++) {
    for (j=0; j < 9; j++) {
      if ( ifOne(sudoku[i][j]) || !style) {
        for (k=0; k < 9; k++)
          if (sudoku[i][j] & 1<<k)
            printf("%d", k+1);
      } else
        printf("*");
      if ( !((j+1)%3) )
        printf("\t");
      else
        printf(",");
    }
    printf("\n");
    if (!((i+1) % 3) )
      printf("\n");
  }
}

void print_HTML_sudoku () {
  int i, j, k, l, m;
  printf("<TABLE>\n");
  for (i=0; i<3; i++) {
    printf("  <TR>\n");
    for (j=0; j<3; j++) {
      printf("    <TD><TABLE>\n");
      for (l=0; l<3; l++) { printf("      <TR>"); for (m=0; m<3; m++) { printf("<TD>"); for (k=0; k < 9; k++)  { if (sudoku[i*3+l][j*3+m] & 1<<k)
            printf("%d", k+1);
          }
          printf("</TD>");
        }
        printf("</TR>\n");
      }
    printf("    </TABLE></TD>\n");
    }
    printf("  </TR>\n");
  }
  printf("</TABLE>");
}



int doRow () {
  int count=0, new_value, row_value, i, j;
  for (i=0; i<9; i++) {
    row_value=0x1ff;
    for (j=0; j<9; j++)
      row_value&=~ifOne(sudoku[i][j]);
    for (j=0; j<9; j++) {
      new_value=sudoku[i][j] & row_value;
      if (new_value && (new_value != sudoku[i][j]) ) {
        count++;
        sudoku[i][j] = new_value;
      }
    }
  }
  return count;
}

int doCol () {
  int count=0, new_value, col_value, i, j;
  for (i=0; i<9; i++) {
    col_value=0x1ff;
    for (j=0; j<9; j++)
      col_value&=~ifOne(sudoku[j][i]);
    for (j=0; j<9; j++) {
      new_value=sudoku[j][i] & col_value;
      if (new_value && (new_value != sudoku[j][i]) ) {
        count++;
        sudoku[j][i] = new_value;
      }
    }
  }
  return count;
}

int doCube () {
  int count=0, new_value, cube_value, i, j, l, m;
  for (i=0; i<3; i++)
    for (j=0; j<3; j++) {
      cube_value=0x1ff;
      for (l=0; l<3; l++)
        for (m=0; m<3; m++)
          cube_value&=~ifOne(sudoku[i*3+l][j*3+m]);
      for (l=0; l<3; l++)
        for (m=0; m<3; m++) {
          new_value=sudoku[i*3+l][j*3+m] & cube_value;
          if (new_value && (new_value != sudoku[i*3+l][j*3+m]) ) {
            count++;
            sudoku[i*3+l][j*3+m] = new_value;
          }
        }
    }
  return count;
}

#define FALSE -1
#define TRUE 1
#define INCOMPLETE 0

int validCube () {
  int i, j, l, m, r, c;
  int pigeon;
  int solved=TRUE;

  //check horizontal
  for (i=0; i<9; i++) {
    pigeon=0;
    for (j=0; j<9; j++)
      if (ifOne(sudoku[i][j])) {
        if (pigeon & sudoku[i][j]) return FALSE;
        pigeon |= sudoku[i][j];
      } else {
        solved=INCOMPLETE;
      }
  }

  //check vertical
  for (i=0; i<9; i++) {
    pigeon=0;
    for (j=0; j<9; j++)
      if (ifOne(sudoku[j][i])) {
        if (pigeon & sudoku[j][i]) return FALSE;
        pigeon |= sudoku[j][i];
      }
      else {
        solved=INCOMPLETE;
      }
  }

  //check cube
  for (i=0; i<3; i++)
    for (j=0; j<3; j++) {
      pigeon=0;
      r=j*3; c=i*3;
      for (l=0; l<3; l++)
        for (m=0; m<3; m++)
        if (ifOne(sudoku[r+l][c+m])) {
          if (pigeon & sudoku[r+l][c+m]) return FALSE;
          pigeon |= sudoku[r+l][c+m];
        }
        else {
          solved=INCOMPLETE;
        }
    }

  return solved;
}

int solveSudoku(int position ) {
  int status, i, k;
  short oldCube[9][9];

  for (i=position; i < 81; i++) {

    while ( doCube() + doRow() + doCol() );

    status = validCube() ;
    if ((status == TRUE) || (status == FALSE))
      return status;


    if ((status == INCOMPLETE) && !ifOne(sudoku[i/9][i%9]) ) {
      memcpy( &oldCube, &sudoku, sizeof(short) * 81) ;
      for (k=0; k < 9; k++) {
        if ( sudoku[i/9][i%9] & (1<<k) ) {
          sudoku[i/9][i%9] = 1 << k ;
          if (solveSudoku(i+1) == TRUE ) {

            /* return TRUE; */
            /* Or look for entire set of solutions */

            if (cubeSolutions < 10) {
              cubeValues[cubeSolutions] = malloc ( sizeof(short) * 81 ) ;
              memcpy( cubeValues[cubeSolutions], &sudoku, sizeof(short) * 81) ;
            }

            cubeSolutions++;
            if ((cubeSolutions & 0x3ffff) == 0x3ffff ) {
              printf ("cubeSolutions = %llx\n", cubeSolutions+1 );
            }

            //if ( cubeSolutions > 10 ) 
            //    return TRUE;

          }

          memcpy( &sudoku, &oldCube, sizeof(short) * 81) ;
        }
        if (k==8)
          return FALSE;
      }

    }
  }

  return FALSE;
}


int main ( int argc, char** argv)  {
  int i;
  if (argc != 2) {
    printf("Error: number of arguments on command line is incorrect\n");
    exit (-12);
  }

  init_sudoku();
  set_sudoku(argv[1]);

  printf("[----------------------- Input  Data ------------------------]\n\n");
  print_sudoku(1);

  solveSudoku(0);
  if ((validCube()==1) && !cubeSolutions)  {
    // If sudoku is effectively already solved, cubeSolutions will not be set
    printf ("\n  This is a trivial sudoku. \n\n");
    print_sudoku(1);
  }


  if (!cubeSolutions && validCube()!=1)
    printf("Not Solvable\n");
  if (cubeSolutions > 1) {
    if (cubeSolutions >= 10)
      printf("10+ Solutions, returning first 10 (%lld) [%llx] \n", cubeSolutions, cubeSolutions);
    else
      printf("%llx Solutions. \n", cubeSolutions);
  }

  for (i=0; (i < cubeSolutions) && (i < 10); i++) {
    memcpy ( &sudoku, cubeValues[i], sizeof(short) * 81 );
    printf("[----------------------- Solution %2.2d ------------------------]\n\n", i+1);
    print_sudoku(0);
    //print_HTML_sudoku();
  }
  return 0;
}

Please let me know if you end up using it. It is run of the mill ANSI C, so should run on everything. See other post for usage.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

short sudoku[9][9];
unsigned long long cubeSolutions=0;
void* cubeValues[10];
const unsigned char oneLookup[64] = {0x8b, 0x80, 0, 0x80, 0, 0, 0, 0x80, 0, 0,0,0,0,0,0, 0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};

int ifOne(int val) {
  if ( oneLookup[(val-1) >> 3] & (1 << ((val-1) & 0x7))  )
    return val;
  return 0;
}


void init_sudoku() {
  int i,j;
  for (i=0; i<9; i++)
    for (j=0; j<9; j++)
      sudoku[i][j]=0x1ff;
}

void set_sudoku( char* initialValues) {
  int i;
  if ( strlen (initialValues) !=  81 ) {
    printf("Error: inputString should have length=81, length is %2.2d\n", strlen(initialValues) );
    exit (-12);
  }
  for (i=0; i < 81; i++)
    if ((initialValues[i] > 0x30) && (initialValues[i] <= 0x3a))
      sudoku[i/9][i%9] = 1 << (initialValues[i] - 0x31) ;
}

void print_sudoku ( int style ) {
  int i, j, k;
  for (i=0; i < 9; i++) {
    for (j=0; j < 9; j++) {
      if ( ifOne(sudoku[i][j]) || !style) {
        for (k=0; k < 9; k++)
          if (sudoku[i][j] & 1<<k)
            printf("%d", k+1);
      } else
        printf("*");
      if ( !((j+1)%3) )
        printf("\t");
      else
        printf(",");
    }
    printf("\n");
    if (!((i+1) % 3) )
      printf("\n");
  }
}

void print_HTML_sudoku () {
  int i, j, k, l, m;
  printf("<TABLE>\n");
  for (i=0; i<3; i++) {
    printf("  <TR>\n");
    for (j=0; j<3; j++) {
      printf("    <TD><TABLE>\n");
      for (l=0; l<3; l++) { printf("      <TR>"); for (m=0; m<3; m++) { printf("<TD>"); for (k=0; k < 9; k++)  { if (sudoku[i*3+l][j*3+m] & 1<<k)
            printf("%d", k+1);
          }
          printf("</TD>");
        }
        printf("</TR>\n");
      }
    printf("    </TABLE></TD>\n");
    }
    printf("  </TR>\n");
  }
  printf("</TABLE>");
}



int doRow () {
  int count=0, new_value, row_value, i, j;
  for (i=0; i<9; i++) {
    row_value=0x1ff;
    for (j=0; j<9; j++)
      row_value&=~ifOne(sudoku[i][j]);
    for (j=0; j<9; j++) {
      new_value=sudoku[i][j] & row_value;
      if (new_value && (new_value != sudoku[i][j]) ) {
        count++;
        sudoku[i][j] = new_value;
      }
    }
  }
  return count;
}

int doCol () {
  int count=0, new_value, col_value, i, j;
  for (i=0; i<9; i++) {
    col_value=0x1ff;
    for (j=0; j<9; j++)
      col_value&=~ifOne(sudoku[j][i]);
    for (j=0; j<9; j++) {
      new_value=sudoku[j][i] & col_value;
      if (new_value && (new_value != sudoku[j][i]) ) {
        count++;
        sudoku[j][i] = new_value;
      }
    }
  }
  return count;
}

int doCube () {
  int count=0, new_value, cube_value, i, j, l, m;
  for (i=0; i<3; i++)
    for (j=0; j<3; j++) {
      cube_value=0x1ff;
      for (l=0; l<3; l++)
        for (m=0; m<3; m++)
          cube_value&=~ifOne(sudoku[i*3+l][j*3+m]);
      for (l=0; l<3; l++)
        for (m=0; m<3; m++) {
          new_value=sudoku[i*3+l][j*3+m] & cube_value;
          if (new_value && (new_value != sudoku[i*3+l][j*3+m]) ) {
            count++;
            sudoku[i*3+l][j*3+m] = new_value;
          }
        }
    }
  return count;
}

#define FALSE -1
#define TRUE 1
#define INCOMPLETE 0

int validCube () {
  int i, j, l, m, r, c;
  int pigeon;
  int solved=TRUE;

  //check horizontal
  for (i=0; i<9; i++) {
    pigeon=0;
    for (j=0; j<9; j++)
      if (ifOne(sudoku[i][j])) {
        if (pigeon & sudoku[i][j]) return FALSE;
        pigeon |= sudoku[i][j];
      } else {
        solved=INCOMPLETE;
      }
  }

  //check vertical
  for (i=0; i<9; i++) {
    pigeon=0;
    for (j=0; j<9; j++)
      if (ifOne(sudoku[j][i])) {
        if (pigeon & sudoku[j][i]) return FALSE;
        pigeon |= sudoku[j][i];
      }
      else {
        solved=INCOMPLETE;
      }
  }

  //check cube
  for (i=0; i<3; i++)
    for (j=0; j<3; j++) {
      pigeon=0;
      r=j*3; c=i*3;
      for (l=0; l<3; l++)
        for (m=0; m<3; m++)
        if (ifOne(sudoku[r+l][c+m])) {
          if (pigeon & sudoku[r+l][c+m]) return FALSE;
          pigeon |= sudoku[r+l][c+m];
        }
        else {
          solved=INCOMPLETE;
        }
    }

  return solved;
}

int solveSudoku(int position ) {
  int status, i, k;
  short oldCube[9][9];

  for (i=position; i < 81; i++) {

    while ( doCube() + doRow() + doCol() );

    status = validCube() ;
    if ((status == TRUE) || (status == FALSE))
      return status;


    if ((status == INCOMPLETE) && !ifOne(sudoku[i/9][i%9]) ) {
      memcpy( &oldCube, &sudoku, sizeof(short) * 81) ;
      for (k=0; k < 9; k++) {
        if ( sudoku[i/9][i%9] & (1<<k) ) {
          sudoku[i/9][i%9] = 1 << k ;
          if (solveSudoku(i+1) == TRUE ) {

            /* return TRUE; */
            /* Or look for entire set of solutions */

            if (cubeSolutions < 10) {
              cubeValues[cubeSolutions] = malloc ( sizeof(short) * 81 ) ;
              memcpy( cubeValues[cubeSolutions], &sudoku, sizeof(short) * 81) ;
            }

            cubeSolutions++;
            if ((cubeSolutions & 0x3ffff) == 0x3ffff ) {
              printf ("cubeSolutions = %llx\n", cubeSolutions+1 );
            }

            //if ( cubeSolutions > 10 ) 
            //    return TRUE;

          }

          memcpy( &sudoku, &oldCube, sizeof(short) * 81) ;
        }
        if (k==8)
          return FALSE;
      }

    }
  }

  return FALSE;
}


int main ( int argc, char** argv)  {
  int i;
  if (argc != 2) {
    printf("Error: number of arguments on command line is incorrect\n");
    exit (-12);
  }

  init_sudoku();
  set_sudoku(argv[1]);

  printf("[----------------------- Input  Data ------------------------]\n\n");
  print_sudoku(1);

  solveSudoku(0);
  if ((validCube()==1) && !cubeSolutions)  {
    // If sudoku is effectively already solved, cubeSolutions will not be set
    printf ("\n  This is a trivial sudoku. \n\n");
    print_sudoku(1);
  }


  if (!cubeSolutions && validCube()!=1)
    printf("Not Solvable\n");
  if (cubeSolutions > 1) {
    if (cubeSolutions >= 10)
      printf("10+ Solutions, returning first 10 (%lld) [%llx] \n", cubeSolutions, cubeSolutions);
    else
      printf("%llx Solutions. \n", cubeSolutions);
  }

  for (i=0; (i < cubeSolutions) && (i < 10); i++) {
    memcpy ( &sudoku, cubeValues[i], sizeof(short) * 81 );
    printf("[----------------------- Solution %2.2d ------------------------]\n\n", i+1);
    print_sudoku(0);
    //print_HTML_sudoku();
  }
  return 0;
}
不弃不离 2024-08-21 06:09:52

首先,由于回溯是深度优先搜索,因此它不能直接并行化,因为任何新计算的结果都不能直接由另一个线程使用。相反,您必须尽早划分问题,即线程 #1 从回溯图中节点的第一个组合开始,然后继续搜索该子图的其余部分。线程 #2 从第一个可能的第二个组合开始,依此类推。简而言之,对于n个线程在搜索空间的顶层找到n个可能的组合(不要“前向跟踪”),然后分配这些n 起始点为 n 个线程。

然而,我认为这个想法从根本上来说是有缺陷的:许多数独排列只需几千个前向+回溯步骤就可以解决,并且可以在单个线程上在几毫秒内解决。事实上,速度如此之快,甚至在多线程上,即使是几个线程所需的小协调(假设n个线程将计算时间减少到原始时间的1/n)与总运行时间相比,核心/多 CPU 是不可忽略的,因此它绝不是一个更有效的解决方案。

For one, since backtracking is a depth-first search it is not directly parallelizable, since any newly computed result cannot be used be directly used by another thread. Instead, you must divide the problem early, i.e. thread #1 starts with the first combination for a node in the backtracking graph, and proceeds to search the rest of that subgraph. Thread #2 starts with the second possible combination at the first and so forth. In short, for n threads find the n possible combinations on the top level of the search space (do not "forward-track"), then assign these n starting points to n threads.

However I think the idea is fundamentally flawed: Many sudoku permutations are solved in a matter of a couple thousands of forward+backtracking steps, and are solved within milliseconds on a single thread. This is in fact so fast that even the small coordination required for a few threads (assume that n threads reduce computation time to 1/n of original time) on a multi-core/multi-CPU is not negligible compared to the total running time, thus it is not by any chance a more efficient solution.

另类 2024-08-21 06:09:52

您确定要这样做吗?比如,你想解决什么问题?如果要使用所有核心,请使用线程。如果您想要一个快速的数独求解器,我可以给您一个我写的,请参阅下面的输出。如果您想为自己工作,请继续使用 GCD ;)。

更新

我不认为 GCD 不好,它只是与解决数独的任务不太相关。 GCD 是一种将 GUI 事件与代码联系起来的技术。本质上,GCD 解决了两个问题,一个是 MacOS X 更新窗口的怪异问题,另一个是它提供了一种将代码与 GUI 事件绑定的改进方法(与线程相比)。

它不适用于这个问题,因为数独的解决速度比人们想象的要快得多(以我的拙见)。话虽这么说,如果您的目标是更快地解决数独,您会想要使用线程,因为您会想要直接使用多个处理器。

[bear@bear scripts]$ time ./a.out ..1..4.......6.3.5...9.....8.....7.3.......285...7.6..3...8...6..92......4...1... 
[----------------------- Input  Data ------------------------]

*,*,1   *,*,4   *,*,*   
*,*,*   *,6,*   3,*,5   
*,*,*   9,*,*   *,*,*   

8,*,*   *,*,*   7,*,3   
*,*,*   *,*,*   *,2,8   
5,*,*   *,7,*   6,*,*   

3,*,*   *,8,*   *,*,6   
*,*,9   2,*,*   *,*,*   
*,4,*   *,*,1   *,*,*   

[----------------------- Solution 01 ------------------------]

7,6,1   3,5,4   2,8,9   
2,9,8   1,6,7   3,4,5   
4,5,3   9,2,8   1,6,7   

8,1,2   6,4,9   7,5,3   
9,7,6   5,1,3   4,2,8   
5,3,4   8,7,2   6,9,1   

3,2,7   4,8,5   9,1,6   
1,8,9   2,3,6   5,7,4   
6,4,5   7,9,1   8,3,2   


real    0m0.044s
user    0m0.041s
sys 0m0.001s

Are you sure you want to do that? Like, what problem are you trying to solve? If you want to use all cores, use threads. If you want a fast sudoku solver, I can give you one I wrote, see output below. If you want to make work for yourself, go ahead and use GCD ;).

Update:

I don't think GCD is bad, it just isn't terribly relevant to the task of solving sudoku. GCD is a technology to tie GUI events to code. Essentially, GCD solves two problems, a Quirk in how the MacOS X updates windows, and, it provides an improved method (as compared to threads) of tying code to GUI events.

It doesn't apply to this problem because Sudoku can be solved significantly faster than a person can think (in my humble opinion). That being said, if your goal was to solve Sudoku faster, you would want to use threads, because you would want to directly use more than one processor.

[bear@bear scripts]$ time ./a.out ..1..4.......6.3.5...9.....8.....7.3.......285...7.6..3...8...6..92......4...1... 
[----------------------- Input  Data ------------------------]

*,*,1   *,*,4   *,*,*   
*,*,*   *,6,*   3,*,5   
*,*,*   9,*,*   *,*,*   

8,*,*   *,*,*   7,*,3   
*,*,*   *,*,*   *,2,8   
5,*,*   *,7,*   6,*,*   

3,*,*   *,8,*   *,*,6   
*,*,9   2,*,*   *,*,*   
*,4,*   *,*,1   *,*,*   

[----------------------- Solution 01 ------------------------]

7,6,1   3,5,4   2,8,9   
2,9,8   1,6,7   3,4,5   
4,5,3   9,2,8   1,6,7   

8,1,2   6,4,9   7,5,3   
9,7,6   5,1,3   4,2,8   
5,3,4   8,7,2   6,9,1   

3,2,7   4,8,5   9,1,6   
1,8,9   2,3,6   5,7,4   
6,4,5   7,9,1   8,3,2   


real    0m0.044s
user    0m0.041s
sys 0m0.001s
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文