随机排列单链表的前 N 个元素

发布于 2024-10-07 02:17:08 字数 991 浏览 7 评论 0原文

我必须随机排列长度为 n 的单链表的前 N 个元素。每个元素定义为：

typedef struct E_s
{
  struct E_s *next;
}E_t;

我有一个根元素，我可以遍历整个大小为n的链表。随机排列前 N 个元素（从根开始）的最有效技术是什么？

所以，给定 a->b->c->d->e->f->...x->y->z 我需要做一些事情。就像 f->a->e->c->b->...x->y->z

我的具体情况：

nN 相对于 n 约为 20%
我的 RAM 资源有限，最好的算法应该使它就位
我必须在循环中进行多次迭代，因此速度很重要
不需要理想的随机性（均匀分布），如果它“几乎”随机
就可以在进行排列之前，我已经遍历了 N 个元素（出于其他需要），所以也许我也可以使用它进行排列

更新：我发现本文。它指出它提出了一种 O(log n) 堆栈空间和预期 O(n log n) 时间的算法。

原文

I have to permute N first elements of a singly linked list of length n, randomly. Each element is defined as:

typedef struct E_s
{
  struct E_s *next;
}E_t;

I have a root element and I can traverse the whole linked list of size n. What is the most efficient technique to permute only N first elements (starting from root) randomly?

So, given a->b->c->d->e->f->...x->y->z I need to make smth. like f->a->e->c->b->...x->y->z

My specific case:

n-N is about 20% relative to n
I have limited RAM resources, the best algorithm should make it in place
I have to do it in a loop, in many iterations, so the speed does matter
The ideal randomness (uniform distribution) is not required, it's Ok if it's "almost" random
Before making permutations, I traverse the N elements already (for other needs), so maybe I could use this for permutations as well

UPDATE: I found this paper. It states it presents an algorithm of O(log n) stack space and expected O(n log n) time.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

永不分离 2024-10-14 02:17:08

我没有尝试过，但您可以使用“随机合并排序”。

更准确地说，您可以随机化合并例程。您不会系统地合并两个子列表，而是基于抛硬币进行合并（即以 0.5 的概率选择第一个子列表的第一个元素，以 0.5 的概率选择右侧子列表的第一个元素）。

这应该在 O(n log n) 中运行并使用 O(1) 空间（如果正确实现）。

下面您可以找到一个用 C 语言实现的示例，您可以根据自己的需要进行调整。请注意，此实现在两个位置使用随机化：在 splitList 和 merge 中。但是，您可以只选择这两个地方之一。我不确定分布是否是随机的（我几乎确定不是），但一些测试用例产生了不错的结果。

#include <stdio.h>
#include <stdlib.h>

#define N 40

typedef struct _node{
  int value;
  struct _node *next;
} node;

void splitList(node *x, node **leftList, node **rightList){
  int lr=0; // left-right-list-indicator
  *leftList = 0;
  *rightList = 0;
  while (x){
    node *xx = x->next;
    lr=rand()%2;
    if (lr==0){
      x->next = *leftList;
      *leftList = x;
    }
    else {
      x->next = *rightList;
      *rightList = x;
    }
    x=xx;
    lr=(lr+1)%2;
  }
}

void merge(node *left, node *right, node **result){
  *result = 0;
  while (left || right){
    if (!left){
      node *xx = right;
      while (right->next){
    right = right->next;
      }
      right->next = *result;
      *result = xx;
      return;
    }
    if (!right){
      node *xx = left;
      while (left->next){
    left = left->next;
      }
      left->next = *result;
      *result = xx;
      return;
    }
    if (rand()%2==0){
      node *xx = right->next;
      right->next = *result;
      *result = right;
      right = xx;
    }
    else {
      node *xx = left->next;
      left->next = *result;
      *result = left;
      left = xx;
    }
  }
}

void mergeRandomize(node **x){
  if ((!*x) || !(*x)->next){
    return;
  }
  node *left;
  node *right;
  splitList(*x, &left, &right);
  mergeRandomize(&left);
  mergeRandomize(&right);
  merge(left, right, &*x);
}

int main(int argc, char *argv[]) {
  srand(time(NULL));
  printf("Original Linked List\n");
  int i;
  node *x = (node*)malloc(sizeof(node));;
  node *root=x;
  x->value=0;
  for(i=1; i<N; ++i){
    node *xx;
    xx = (node*)malloc(sizeof(node));
    xx->value=i;
    xx->next=0;
    x->next = xx;
    x = xx;
  }
  x=root;
  do {
    printf ("%d, ", x->value);
    x=x->next;
  } while (x);

  x = root;
  node *left, *right;
  mergeRandomize(&x);
  if (!x){
    printf ("Error.\n");
    return -1;
  }
  printf ("\nNow randomized:\n");
  do {
    printf ("%d, ", x->value);
    x=x->next;
  } while (x);
  printf ("\n");
  return 0;
}

I've not tried it, but you could use a "randomized merge-sort".

To be more precise, you randomize the merge-routine. You do not merge the two sub-lists systematically, but you do it based on a coin toss (i.e. with probability 0.5 you select the first element of the first sublist, with probability 0.5 you select the first element of the right sublist).

This should run in O(n log n) and use O(1) space (if properly implemented).

Below you find a sample implementation in C you might adapt to your needs. Note that this implementation uses randomisation at two places: In splitList and in merge. However, you might choose just one of these two places. I'm not sure if the distribution is random (I'm almost sure it is not), but some test cases yielded decent results.

#include <stdio.h>
#include <stdlib.h>

#define N 40

typedef struct _node{
  int value;
  struct _node *next;
} node;

void splitList(node *x, node **leftList, node **rightList){
  int lr=0; // left-right-list-indicator
  *leftList = 0;
  *rightList = 0;
  while (x){
    node *xx = x->next;
    lr=rand()%2;
    if (lr==0){
      x->next = *leftList;
      *leftList = x;
    }
    else {
      x->next = *rightList;
      *rightList = x;
    }
    x=xx;
    lr=(lr+1)%2;
  }
}

void merge(node *left, node *right, node **result){
  *result = 0;
  while (left || right){
    if (!left){
      node *xx = right;
      while (right->next){
    right = right->next;
      }
      right->next = *result;
      *result = xx;
      return;
    }
    if (!right){
      node *xx = left;
      while (left->next){
    left = left->next;
      }
      left->next = *result;
      *result = xx;
      return;
    }
    if (rand()%2==0){
      node *xx = right->next;
      right->next = *result;
      *result = right;
      right = xx;
    }
    else {
      node *xx = left->next;
      left->next = *result;
      *result = left;
      left = xx;
    }
  }
}

void mergeRandomize(node **x){
  if ((!*x) || !(*x)->next){
    return;
  }
  node *left;
  node *right;
  splitList(*x, &left, &right);
  mergeRandomize(&left);
  mergeRandomize(&right);
  merge(left, right, &*x);
}

int main(int argc, char *argv[]) {
  srand(time(NULL));
  printf("Original Linked List\n");
  int i;
  node *x = (node*)malloc(sizeof(node));;
  node *root=x;
  x->value=0;
  for(i=1; i<N; ++i){
    node *xx;
    xx = (node*)malloc(sizeof(node));
    xx->value=i;
    xx->next=0;
    x->next = xx;
    x = xx;
  }
  x=root;
  do {
    printf ("%d, ", x->value);
    x=x->next;
  } while (x);

  x = root;
  node *left, *right;
  mergeRandomize(&x);
  if (!x){
    printf ("Error.\n");
    return -1;
  }
  printf ("\nNow randomized:\n");
  do {
    printf ("%d, ", x->value);
    x=x->next;
  } while (x);
  printf ("\n");
  return 0;
}

回复收藏 0 原文

合约呢 2024-10-14 02:17:08

转换为数组，使用 Fisher-Yates shuffle，然后转换回来到一个列表。

回复收藏 0 原文

败给现实 2024-10-14 02:17:08

我不相信有任何有效的方法可以在没有中间数据结构的情况下随机洗牌单链表。我只是将前 N 个元素读入数组，执行 Fisher-Yates shuffle ，然后将前 N 个元素重建为单链表。

回复收藏 0 原文

瑾夏年华 2024-10-14 02:17:08

首先，获取列表的长度和最后一个元素。你说你已经在随机化之前进行了遍历，那将是一个好时机。

然后，通过将第一个元素链接到最后一个元素，将其变成循环列表。通过将大小除以四并迭代第二遍来获取指向列表的四个指针。（这些指针也可以通过在之前的遍历中每四次迭代递增一次、两次和三次来从之前的遍历中获得。）

对于随机化遍历，再次遍历并将指针 0 和 2 以及指针 1 和 3 交换 50%可能性。（要么执行两项交换操作，要么都不执行；仅执行一次交换就会将列表一分为二。）

以下是一些示例代码。看起来它可以更随机一些，但我想多进行几次就可以达到目的。无论如何，分析算法比编写算法更困难：vP。对于缺少缩进表示歉意；我只是将其打入浏览器中的 ideone 中。

http://ideone.com/9I7mx

#include <iostream>
#include <cstdlib>
#include <ctime>
using namespace std;

struct list_node {
int v;
list_node *n;
list_node( int inv, list_node *inn )
: v( inv ), n( inn) {}
};

int main() {
srand( time(0) );

// initialize the list and 4 pointers at even intervals
list_node *n_first = new list_node( 0, 0 ), *n = n_first;
list_node *p[4];
p[0] = n_first;
for ( int i = 1; i < 20; ++ i ) {
n = new list_node( i, n );
if ( i % (20/4) == 0 ) p[ i / (20/4) ] = n;
}
// intervals must be coprime to list length!
p[2] = p[2]->n;
p[3] = p[3]->n;
// turn it into a circular list
n_first->n = n;

// swap the pointers around to reshape the circular list
// one swap cuts a circular list in two, or joins two circular lists
// so perform one cut and one join, effectively reordering elements.
for ( int i = 0; i < 20; ++ i ) {
list_node *p_old[4];
copy( p, p + 4, p_old );
p[0] = p[0]->n;
p[1] = p[1]->n;
p[2] = p[2]->n;
p[3] = p[3]->n;
if ( rand() % 2 ) {
swap( p_old[0]->n, p_old[2]->n );
swap( p_old[1]->n, p_old[3]->n );
}
}

// you might want to turn it back into a NULL-terminated list

// print results
for ( int i = 0; i < 20; ++ i ) {
cout << n->v << ", ";
n = n->n;
}
cout << '\n';
}

First, get the length of the list and the last element. You say you already do a traversal before randomization, that would be a good time.

Then, turn it into a circular list by linking the first element to the last element. Get four pointers into the list by dividing the size by four and iterating through it for a second pass. (These pointers could also be obtained from the previous pass by incrementing once, twice, and three times per four iterations in the previous traversal.)

For the randomization pass, traverse again and swap pointers 0 and 2 and pointers 1 and 3 with 50% probability. (Do either both swap operations or neither; just one swap will split the list in two.)

Here is some example code. It looks like it could be a little more random, but I suppose a few more passes could do the trick. Anyway, analyzing the algorithm is more difficult than writing it :vP . Apologies for the lack of indentation; I just punched it into ideone in the browser.

http://ideone.com/9I7mx

#include <iostream>
#include <cstdlib>
#include <ctime>
using namespace std;

struct list_node {
int v;
list_node *n;
list_node( int inv, list_node *inn )
: v( inv ), n( inn) {}
};

int main() {
srand( time(0) );

// initialize the list and 4 pointers at even intervals
list_node *n_first = new list_node( 0, 0 ), *n = n_first;
list_node *p[4];
p[0] = n_first;
for ( int i = 1; i < 20; ++ i ) {
n = new list_node( i, n );
if ( i % (20/4) == 0 ) p[ i / (20/4) ] = n;
}
// intervals must be coprime to list length!
p[2] = p[2]->n;
p[3] = p[3]->n;
// turn it into a circular list
n_first->n = n;

// swap the pointers around to reshape the circular list
// one swap cuts a circular list in two, or joins two circular lists
// so perform one cut and one join, effectively reordering elements.
for ( int i = 0; i < 20; ++ i ) {
list_node *p_old[4];
copy( p, p + 4, p_old );
p[0] = p[0]->n;
p[1] = p[1]->n;
p[2] = p[2]->n;
p[3] = p[3]->n;
if ( rand() % 2 ) {
swap( p_old[0]->n, p_old[2]->n );
swap( p_old[1]->n, p_old[3]->n );
}
}

// you might want to turn it back into a NULL-terminated list

// print results
for ( int i = 0; i < 20; ++ i ) {
cout << n->v << ", ";
n = n->n;
}
cout << '\n';
}

回复收藏 0 原文

姜生凉生 2024-10-14 02:17:08

对于 N 非常大的情况（因此它不适合您的记忆），您可以执行以下操作（有点像 Knuth 的 3.4.2P）：

j = N
k = 1 和 j 之间的随机
遍历输入列表，找到第 k 项并输出；从序列中删除所述项目（或以某种方式标记它，以便您在下次遍历时不会考虑它）
减少 j 并返回到 2 除非 j==0
输出列表的其余部分

请注意，这是 O(N^ 2），除非你能保证步骤3中的随机访问。

如果N相对较小，以至于N个项目适合内存，只需将它们加载到数组中并进行洗牌，就像@Mitch建议的那样。

回复收藏 0 原文

紙鸢 2024-10-14 02:17:08

如果你知道N和n，我想你可以简单地做到这一点。它也是完全随机的。您只需遍历整个列表一次，并在每次添加节点时遍历随机部分。我认为这是 O(n+NlogN) 或 O(n+N^2)。我不知道。它基于更新根据先前节点发生的情况为随机部分选择节点的条件概率。

考虑到先前节点发生的情况，确定为随机部分选择某个节点的概率 (p=(N-size)/(n-position)，其中 size 是先前选择的节点数，position 是先前考虑的节点数)
如果没有为随机部分选择节点，则转到步骤 4。如果为随机部分选择节点，则根据到目前为止的大小在随机部分中随机选择位置（位置=（0 和 1 之间的随机数）* 大小，大小又是先前节点的数量）。
将节点放在需要去的地方，更新指针。增加尺寸。更改为查看先前指向您刚刚查看并移动的节点。
增加位置，查看下一个节点。

我不懂C，但我可以给你伪代码。在这里，我将排列称为随机化的第一个元素。

integer size=0;         //size of permutation
integer position=0      //number of nodes you've traversed so far
Node    head=head of linked list        //this holds the node at the head of your linked list.
Node    current_node=head           //Starting at head, you'll move this down the list to check each node, whether you put it in the list.
Node    previous=head               //stores the previous node for changing pointers.  starts at head to avoid asking for the next field on a null node

While ((size not equal to N) or (current_node is not null)){            //iterating through the list until the permutation is full.  We should never pass the end of list, but just in case, I include that condition)

pperm=(N-size)/(n-position)          //probability that a selected node will be in the permutation.
if ([generate a random decimal between 0 and 1] < pperm)    //this decides whether or not the current node will go in the permutation

    if (j is not equal to 0){   //in case we are at start of list, there's no need to change the list       

        pfirst=1/(size+1)       //probability that, if you select a node to be in the permutation, that it will be first.  Since the permutation has
                    //zero elements at start, adding an element will make it the initial node of a permutation and percent chance=1.
        integer place_in_permutation = round down([generate a random decimal between 0 and 1]/pfirst)   //place in the permutation.  note that the head =0.
        previous.next=current_node.next

        if(place_in_permutation==0){            //if placing current node first, must change the head

            current_node.next=head          //set the current Node to point to the previous head
            head=current_node           //set the variable head to point to the current node

        }
        else{
            Node temp=head
            for (counter starts at zero. counter is less than place_in_permutation-1.  Each iteration, increment counter){

                counter=counter.next
            }   //at this time, temp should point to the node right before the insertion spot
            current_node.next=temp.next
            temp.next=current_node
        }
        current_node=previous
    }
    size++              //since we add one to the permutation, increase the size of the permutation
}
j++;
previous=current_node
current_node=current_node.next

。

如果您保留最近添加的节点，以防必须在其右侧添加一个节点，则可能会提高效率

If you know both N and n, I think you can do it simply. It's fully random, too. You only iterate through the whole list once, and through the randomized part each time you add a node. I think that's O(n+NlogN) or O(n+N^2). I'm not sure. It's based upon updating the conditional probability that a node is selected for the random portion given what happened to previous nodes.

Determine the probability that a certain node will be selected for the random portion given what happened to previous nodes (p=(N-size)/(n-position) where size is number of nodes previously chosen and position is number of nodes previously considered)
If node is not selected for random part, move to step 4. If node is selected for the random part, randomly choose place in random part based upon the size so far (place=(random between 0 and 1) * size, size is again number of previous nodes).
Place the node where it needs to go, update the pointers. Increment size. Change to looking at the node that previously pointed at what you were just looking at and moved.
Increment position, look at the next node.

I don't know C, but I can give you the pseudocode. In this, I refer to the permutation as the first elements that are randomized.

integer size=0;         //size of permutation
integer position=0      //number of nodes you've traversed so far
Node    head=head of linked list        //this holds the node at the head of your linked list.
Node    current_node=head           //Starting at head, you'll move this down the list to check each node, whether you put it in the list.
Node    previous=head               //stores the previous node for changing pointers.  starts at head to avoid asking for the next field on a null node

While ((size not equal to N) or (current_node is not null)){            //iterating through the list until the permutation is full.  We should never pass the end of list, but just in case, I include that condition)

pperm=(N-size)/(n-position)          //probability that a selected node will be in the permutation.
if ([generate a random decimal between 0 and 1] < pperm)    //this decides whether or not the current node will go in the permutation

    if (j is not equal to 0){   //in case we are at start of list, there's no need to change the list       

        pfirst=1/(size+1)       //probability that, if you select a node to be in the permutation, that it will be first.  Since the permutation has
                    //zero elements at start, adding an element will make it the initial node of a permutation and percent chance=1.
        integer place_in_permutation = round down([generate a random decimal between 0 and 1]/pfirst)   //place in the permutation.  note that the head =0.
        previous.next=current_node.next

        if(place_in_permutation==0){            //if placing current node first, must change the head

            current_node.next=head          //set the current Node to point to the previous head
            head=current_node           //set the variable head to point to the current node

        }
        else{
            Node temp=head
            for (counter starts at zero. counter is less than place_in_permutation-1.  Each iteration, increment counter){

                counter=counter.next
            }   //at this time, temp should point to the node right before the insertion spot
            current_node.next=temp.next
            temp.next=current_node
        }
        current_node=previous
    }
    size++              //since we add one to the permutation, increase the size of the permutation
}
j++;
previous=current_node
current_node=current_node.next

}

You could probably increase the efficiency if you held on to the most recently added node in case you had to add one to the right of it.

回复收藏 0 原文

折戟 2024-10-14 02:17:08

与 Vlad 的答案类似，这里有一个轻微的改进（统计上）：

算法中的索引是从 1 开始的。

初始化lastR = -1
如果N <= 1 转到步骤6。
在1 和N 之间随机化数字r。

if r != N

4.1 遍历列表到项 r 及其前任项。

如果lastR != -1
如果 r == lastR，则指向第 r 个前一项的指针仍然存在。
如果 r < lastR，从链表开头遍历到它。
如果r> lastR，从lastR的前一项开始遍历到它。

4.2 将列表中的第 r 项删除到结果列表中作为尾部。

4.3 lastR = r

将N减1并转到步骤2。
将结果列表的尾部链接到剩余输入列表的头部。您现在拥有原始列表，其中前 N 个项目已排列。

由于您没有随机访问权限，这将减少您在列表中所需的遍历时间（我假设减少一半，所以渐近地，您不会获得任何东西）。

Similar to Vlad's answer, here is a slight improvement (statistically):

Indices in algorithm are 1 based.

Initialize lastR = -1
If N <= 1 go to step 6.
Randomize number r between 1 and N.

if r != N

4.1 Traverse the list to item r and its predecessor.

If lastR != -1
If r == lastR, your pointer for the of the r'th item predecessor is still there.
If r < lastR, traverse to it from the beginning of the list.
If r > lastR, traverse to it from the predecessor of the lastR'th item.

4.2 remove the r'th item from the list into a result list as the tail.

4.3 lastR = r

Decrease N by one and go to step 2.
link the tail of the result list to the head of the remaining input list. You now have the original list with the first N items permutated.

Since you do not have random access, this will reduce the traversing time you will need within the list (I assume that by half, so asymptotically, you won't gain anything).

回复收藏 0 原文

丿*梦醉红颜 2024-10-14 02:17:08

O(NlogN) 易于实现的解决方案，不需要额外的存储：

假设您想要随机化 L：

L 是否有 1 或 0 个元素，您已完成
< p>创建两个空列表L1和L2
循环L破坏性地将其元素移动到L1或L2，随机选择两者。
重复 L1 和 L2 的过程（递归！）
将 L1 和 L2 连接到 L3
return L3

Update

在第 3 步，L 应分为大小相等 (+-1) 的列表 L1 和 L2，以保证最佳情况复杂性 (N*log N) 。这可以通过动态调整一个元素进入 L1 或 L2 的概率来完成

p(insert element into L1) = (1/2 * len0(L) - len(L1)) / len(L)

：

len(M) is the current number of elements in list M
len0(L) is the number of elements there was in L at the beginning of step 3

O(NlogN) easy to implement solution that does not require extra storage:

Say you want to randomize L:

is L has 1 or 0 elements you are done
create two empty lists L1 and L2
loop over L destructively moving its elements to L1 or L2 choosing between the two at random.
repeat the process for L1 and L2 (recurse!)
join L1 and L2 into L3
return L3

Update

At step 3, L should be divided into equal sized (+-1) lists L1 and L2 in order to guaranty best case complexity (N*log N). That can be done adjusting the probability of one element going into L1 or L2 dynamically:

p(insert element into L1) = (1/2 * len0(L) - len(L1)) / len(L)

where

len(M) is the current number of elements in list M
len0(L) is the number of elements there was in L at the beginning of step 3

回复收藏 0 原文

天暗了我发光 2024-10-14 02:17:08

有一种算法需要 O(sqrt(N)) 空间和 O(N) 时间，对于单链表。

它不会在所有排列序列上生成均匀分布，但它可以给出不易区分的良好排列。基本思想类似于按行和列排列矩阵，如下所述。

算法

设元素大小为N，且m = Floor(sqrt(N))。假设一个“方阵”N = m*m 将使这个方法更加清晰。

在第一遍中，您应该将每 m 个元素分隔的元素的指针存储为 p_0, p_1, p_2, ..., p_m。也就是说，p_0->next->...->next(m times) == p_1 应该为 true。
排列每一行
- 对于 i = 0 到 m 执行以下操作：
- 通过大小为 O 的数组对链接列表中 p_i->next 到 p_(i+1)->next 之间的所有元素进行索引（男）
- 使用标准方法打乱此数组
- 使用此打乱后的数组重新链接元素
排列每一列。
- 初始化数组A来存储指针p_0, ..., p_m。它用于遍历列
- 对于 i = 0 到 m 执行
- 通过大小为 m 的数组对链接列表中指向 A[0], A[1], ..., A[m-1] 的所有元素进行索引>
- 随机排列此数组
- 使用此打乱后的数组重新链接元素
- 将指针前进到下一列A[i] := A[i]->下一个

p_0 是指向第一个元素的元素，而 p_m 则指向最后一个元素。另外，如果 N != m*m，您可以对某些 p_i 使用 m+1 分隔。现在您得到一个“矩阵”，其中 p_i 指向每行的开头。

分析和随机性

空间复杂度：该算法需要O(m)空间来存储行的开头。 O(m) 空间用于存储数组，O(m) 空间用于存储列排列期间的额外指针。因此，时间复杂度约为 O(3*sqrt(N))。对于 N = 1000000，大约有 3000 个条目和 12 kB 内存。
时间复杂度：显然是O(N)。它要么逐行或逐列遍历“矩阵”
随机性：首先要注意的是，每个元素可以按行和列排列到达矩阵中的任何位置。元素可以到达链表中的任何位置，这一点非常重要。其次，虽然它不会生成所有排列序列，但它确实会生成其中的一部分。为了找到排列的数量，我们假设N=m*m，每行排列有m!并且有m行，所以我们有(m! ）^m。如果还包括列排列，则完全等于(m!)^(2*m)，因此几乎不可能得到相同的序列。

强烈建议至少再重复第二步和第三步一次，以获得更加随机的序列。因为它可以将几乎所有的行列相关性抑制到其原始位置。当您的列表不是“方形”时，这一点也很重要。根据您的需要，您可能想要使用更多的重复。使用的重复次数越多，排列就越多，随机性也就越强。我记得可以生成N=9的均匀分布，并且我猜想可以证明随着重复趋于无穷，它与真正的均匀分布相同。

编辑：时间和空间复杂度是严格限制的，并且在任何情况下几乎相同。我想这个空间消耗可以满足你的需求。如果您有任何疑问，您可以在一个小列表中尝试一下，我想您会发现它很有用。

There is an algorithm takes O(sqrt(N)) space and O(N) time, for a singly linked list.

It does not generate a uniform distribution over all permutation sequence, but it can gives good permutation that is not easily distinguishable. The basic idea is similar to permute a matrix by rows and columns as described below.

Algorithm

Let the size of the elements to be N, and m = floor(sqrt(N)). Assuming a "square matrix" N = m*m will make this method much clear.

In the first pass, you should store the pointers of elements that is separated by every m elements as p_0, p_1, p_2, ..., p_m. That is, p_0->next->...->next(m times) == p_1 should be true.
Permute each row
- For i = 0 to m do:
- Index all elements between p_i->next to p_(i+1)->next in the link list by an array of size O(m)
- Shuffle this array using standard method
- Relink the elements using this shuffled array
Permute each column.
- Initialize an array A to store pointers p_0, ..., p_m. It is used to traverse the columns
- For i = 0 to m do
- Index all elements pointed A[0], A[1], ..., A[m-1] in the link list by an array of size m
- Shuffle this array
- Relink the elements using this shuffled array
- Advance the pointer to next column A[i] := A[i]->next

Note that p_0 is an element point to the first element and the p_m point to the last element. Also, if N != m*m, you may use m+1 separation for some p_i instead. Now you get a "matrix" such that the p_i point to the start of each row.

Analysis and randomness

Space complexity: This algorithm need O(m) space to store the start of row. O(m) space to store the array and O(m) space to store the extra pointer during column permutation. Hence, time complexity is ~ O(3*sqrt(N)). For N = 1000000, it is around 3000 entries and 12 kB memory.
Time complexity: It is obviously O(N). It either walk through the "matrix" row by row or column by column
Randomness: The first thing to note is that each element can go to anywhere in the matrix by row and column permutation. It is very important that elements can go to anywhere in the linked list. Second, though it does not generate all permutation sequence, it does generate part of them. To find the number of permutation, we assume N=m*m, each row permutation has m! and there is m row, so we have (m!)^m. If column permutation is also include, it is exactly equal to (m!)^(2*m), so it is almost impossible to get the same sequence.

It is highly recommended to repeat the second and third step by at least one more time to get an more random sequence. Because it can suppress almost all the row and column correlation to its original location. It is also important when your list is not "square". Depends on your need, you may want to use even more repetition. The more repetition you use, the more permutation it can be and the more random it is. I remember that it is possible to generate uniform distribution for N=9 and I guess that it is possible to prove that as repetition tends to infinity, it is the same as the true uniform distribution.

Edit: The time and space complexity is tight bound and is almost the same in any situation. I think this space consumption can satisfy your need. If you have any doubt, you may try it in a small list and I think you will find it useful.

回复收藏 0 原文

独守阴晴ぅ圆缺 2024-10-14 02:17:08

下面的列表随机化器的复杂度为 O(N*log N) ，内存使用量为 O(1)。

它基于我的另一篇文章中描述的递归算法，修改为迭代而不是递归，以消除 O(logN) 内存使用。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

typedef struct node {
    struct node *next;
    char *str;
} node;


unsigned int
next_power_of_two(unsigned int v) {
    v--;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    return v + 1;
}

void
dump_list(node *l) {
    printf("list:");
    for (; l; l = l->next) printf(" %s", l->str);
    printf("\n");
}

node *
array_to_list(unsigned int len, char *str[]) {
    unsigned int i;
    node *list;
    node **last = &list;
    for (i = 0; i < len; i++) {
        node *n = malloc(sizeof(node));
        n->str = str[i];
        *last = n;
        last = &n->next;
    }
    *last = NULL;
    return list;
}

node **
reorder_list(node **last, unsigned int po2, unsigned int len) {
    node *l = *last;
    node **last_a = last;
    node *b = NULL;
    node **last_b = &b;
    unsigned int len_a = 0;
    unsigned int i;
    for (i = len; i; i--) {
        double pa = (1.0 + RAND_MAX) * (po2 - len_a) / i;
        unsigned int r = rand();
        if (r < pa) {
            *last_a = l;
            last_a = &l->next;
            len_a++;
        }
        else {
            *last_b = l;
            last_b = &l->next;
        }
        l = l->next;
    }
    *last_b = l;
    *last_a = b;
    return last_b;
}

unsigned int
min(unsigned int a, unsigned int b) {
    return (a > b ? b : a);
}

randomize_list(node **l, unsigned int len) {
    unsigned int po2 = next_power_of_two(len);
    for (; po2 > 1; po2 >>= 1) {
        unsigned int j;
        node **last = l;
        for (j = 0; j < len; j += po2)
            last = reorder_list(last, po2 >> 1, min(po2, len - j));
    }
}

int
main(int len, char *str[]) {
    if (len > 1) {
        node *l;
        len--; str++; /* skip program name */
        l = array_to_list(len, str);
        randomize_list(&l, len);
        dump_list(l);
    }
    return 0;
}

/* try as:   a.out list of words foo bar doz li 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
*/

请注意，该版本的算法完全对缓存不友好，递归版本可能会表现得更好！

The list randomizer below has complexity O(N*log N) and O(1) memory usage.

It is based on the recursive algorithm described on my other post modified to be iterative instead of recursive in order to eliminate the O(logN) memory usage.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

typedef struct node {
    struct node *next;
    char *str;
} node;


unsigned int
next_power_of_two(unsigned int v) {
    v--;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    return v + 1;
}

void
dump_list(node *l) {
    printf("list:");
    for (; l; l = l->next) printf(" %s", l->str);
    printf("\n");
}

node *
array_to_list(unsigned int len, char *str[]) {
    unsigned int i;
    node *list;
    node **last = &list;
    for (i = 0; i < len; i++) {
        node *n = malloc(sizeof(node));
        n->str = str[i];
        *last = n;
        last = &n->next;
    }
    *last = NULL;
    return list;
}

node **
reorder_list(node **last, unsigned int po2, unsigned int len) {
    node *l = *last;
    node **last_a = last;
    node *b = NULL;
    node **last_b = &b;
    unsigned int len_a = 0;
    unsigned int i;
    for (i = len; i; i--) {
        double pa = (1.0 + RAND_MAX) * (po2 - len_a) / i;
        unsigned int r = rand();
        if (r < pa) {
            *last_a = l;
            last_a = &l->next;
            len_a++;
        }
        else {
            *last_b = l;
            last_b = &l->next;
        }
        l = l->next;
    }
    *last_b = l;
    *last_a = b;
    return last_b;
}

unsigned int
min(unsigned int a, unsigned int b) {
    return (a > b ? b : a);
}

randomize_list(node **l, unsigned int len) {
    unsigned int po2 = next_power_of_two(len);
    for (; po2 > 1; po2 >>= 1) {
        unsigned int j;
        node **last = l;
        for (j = 0; j < len; j += po2)
            last = reorder_list(last, po2 >> 1, min(po2, len - j));
    }
}

int
main(int len, char *str[]) {
    if (len > 1) {
        node *l;
        len--; str++; /* skip program name */
        l = array_to_list(len, str);
        randomize_list(&l, len);
        dump_list(l);
    }
    return 0;
}

/* try as:   a.out list of words foo bar doz li 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
*/

Note that this version of the algorithm is completely cache unfriendly, the recursive version would probably perform much better!

回复收藏 0 原文