GUID 的高效数据结构

发布于 2024-10-21 23:25:08 字数 215 浏览 12 评论 0原文

我正在寻找一种数据结构,它使我能够快速(最好是 O(1) 快速)确定给定的 GUID 是否是 GUID 集合的成员。

我当前的方法是使用 TDictionary 以 0 作为值。

虽然这工作很快,但使用 Hashmap 重新哈希 GUID(根据定义被认为是唯一的)并让 Dictionary 处理不需要的值似乎是一种浪费。

一定有更好的解决方案,但我找不到。你可以吗?

I am in search of a data structure which enables me to quickly (prefarably O(1)-quickly) determine if a given GUID is a member of a Collection of GUIDs or not.

My current approach is to use a TDictionary with 0 as values.

While this works quickly, it seems to be a waste to use a Hashmap to rehash a GUID, which is by defintion considered to be unique, and to have the Dictionary handle values which are unneeded.

There must be a better solution for this, but I can't find one. Can you?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

两相知 2024-10-28 23:25:08

很少有数据结构提供 O(1) 访问。一个是数组,另一个是 HashMap(David 的回答),我只知道另外一个: Trie。下面是按位 Trie 的简单实现: 具有一些有趣的属性:

  • 由于不发生重新分配,因此不受内存碎片的影响。
  • O(1) 添加和存在测试。当然,O(1)涉及的常数相当大。

代码:

program Project23;

{$APPTYPE CONSOLE}

uses
  SysUtils, Generics.Collections;

type

  PGuidTrieNode=^TGuidTrieNode;
  TGuidTrieNode = record
    Sub:array[Boolean] of PGuidTrieNode;
  end;
  TGuidByteArray = array[0..15] of Byte;

  TGuidTrie = class
  protected
    Root: PGuidTrieNode;
  public
    constructor Create;
    destructor Destroy;override;

    procedure Add(G: TGUID);
    function Exists(G: TGUID): Boolean;
  end;

{ TGuidTrie }

procedure TGuidTrie.Add(G: TGUID);
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to High(GBA) do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if (i = High(GBA)) and (Bit = 7) then
        begin
          // Payload
          Node.Sub[IsBitSet] := Pointer(1);
        end
      else
        begin
          if not Assigned(Node.Sub[IsBitSet]) then
            Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
          Node := Node.Sub[IsBitSet];
        end;
    end;
  end;
end;

constructor TGuidTrie.Create;
begin
  Root := GetMemory(SizeOf(TGuidTrieNode))
end;

destructor TGuidTrie.Destroy;

  procedure KillNode(Node: PGuidTrieNode);
  var i:Integer;
  begin
    if Assigned(Node.Sub[True]) then
        if Node.Sub[True] <> Pointer(1) then
        begin
          KillNode(Node.Sub[True]);
        end;
    FreeMemory(Node);
  end;

begin
  KillNode(Root);
  inherited;
end;

function TGuidTrie.Exists(G: TGUID): Boolean;
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to 15 do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if not Assigned(Node.Sub[IsBitSet]) then
      begin
        Result := False;
        Exit;
      end;
      Node := Node.Sub[IsBitSet];
    end;
  end;
  Result := True; // Node now contains the Payload
end;

const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
      G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';

var T: TGuidTrie;

begin
  try

    T := TGuidTrie.Create;
    try
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G1);
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');

      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G2);
      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
    finally T.Free;
    end;

  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
end.

Very few data structures offer O(1) access. One's the Array, the other one's the HashMap (David's answer), and I only know one other: The Trie. Here follows a simple implementation of a bit-wise Trie: Has some interesting properties:

  • Immune to memory fragmentation since no re-allocations take place.
  • O(1) add and existence test. Of course, the constant involved in O(1) is fairly large.

The code:

program Project23;

{$APPTYPE CONSOLE}

uses
  SysUtils, Generics.Collections;

type

  PGuidTrieNode=^TGuidTrieNode;
  TGuidTrieNode = record
    Sub:array[Boolean] of PGuidTrieNode;
  end;
  TGuidByteArray = array[0..15] of Byte;

  TGuidTrie = class
  protected
    Root: PGuidTrieNode;
  public
    constructor Create;
    destructor Destroy;override;

    procedure Add(G: TGUID);
    function Exists(G: TGUID): Boolean;
  end;

{ TGuidTrie }

procedure TGuidTrie.Add(G: TGUID);
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to High(GBA) do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if (i = High(GBA)) and (Bit = 7) then
        begin
          // Payload
          Node.Sub[IsBitSet] := Pointer(1);
        end
      else
        begin
          if not Assigned(Node.Sub[IsBitSet]) then
            Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
          Node := Node.Sub[IsBitSet];
        end;
    end;
  end;
end;

constructor TGuidTrie.Create;
begin
  Root := GetMemory(SizeOf(TGuidTrieNode))
end;

destructor TGuidTrie.Destroy;

  procedure KillNode(Node: PGuidTrieNode);
  var i:Integer;
  begin
    if Assigned(Node.Sub[True]) then
        if Node.Sub[True] <> Pointer(1) then
        begin
          KillNode(Node.Sub[True]);
        end;
    FreeMemory(Node);
  end;

begin
  KillNode(Root);
  inherited;
end;

function TGuidTrie.Exists(G: TGUID): Boolean;
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to 15 do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if not Assigned(Node.Sub[IsBitSet]) then
      begin
        Result := False;
        Exit;
      end;
      Node := Node.Sub[IsBitSet];
    end;
  end;
  Result := True; // Node now contains the Payload
end;

const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
      G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';

var T: TGuidTrie;

begin
  try

    T := TGuidTrie.Create;
    try
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G1);
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');

      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G2);
      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
    finally T.Free;
    end;

  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
end.
泪意 2024-10-28 23:25:08

我认为你已经成功了 99%。

散列听起来是正确的解决方案。利用 GUID 特殊性的明显方法是提供您自己的哈希函数,该函数将构成 GUID 的 4 个 32 位整数组合成一个 32 位整数。我只是对 4 个整数进行异或运算。

我假设您正在使用 Generics.Collections.TDictionary。您可以通过将自定义比较器传递给构造函数来提供自己的哈希函数。我不担心存储备用值,我不认为它会以明显的方式影响性能。

我相信您将 GUID 存储为 128 位整数而不是字符串。

最后,我想到 GUID 的默认比较器可能确实已经以这种方式生成哈希代码。在进行任何更改之前值得检查一下。

编辑

默认哈希代码使用应用于二进制数据的 Bob Jenkins 哈希。异或会更快,但默认的哈希码似乎不会成为性能瓶颈。

换句话说,我认为 TDictionary 将完全满足您的需求。

I think you are 99% of the way there.

Hashing sounds like the right solution. The obvious way to take advantage of the special nature of the GUID is to supply your own hash function which combines into a single 32 bit integer the 4 32 bit integers that make up a GUID. I'd just XOR the 4 integers.

I presume you are using Generics.Collections.TDictionary. You can supply your own hash function by passing a custom comparer to the constructor. I wouldn't worry about storing spare values, I don't think it will affect performance in a discernible way.

I trust that you are storing your GUIDs as 128 bit integers and not as strings.

Finally, it has occurred to me that the default comparer for a GUID might indeed already do the hash code generation this way. It's worth checking that out before making any changes.

EDIT

Default hash code uses Bob Jenkins hash applied to the binary data. An XOR would be faster, but the default hash code doesn't seem like it would be a performance bottleneck.

In other words, I think that TDictionary<TGUID,Integer> will serve your needs perfectly adequately.

一指流沙 2024-10-28 23:25:08
type
    PGuidDictionaryItem = ^TGuidDictionaryItem;

    TGuidDictionaryItem = record
        Key: TGuid;
        Value: Pointer;
        Next: PGuidDictionaryItem;
    end;

    TGuidDictionary = class
    private
    const
        HashSize = 2048;
    var
        Size: integer;
        FTable: array [0..HashSize-1] of PGuidDictionaryItem;

        function GetHashCode(Guid: TGUID): integer;
    public
        constructor Create;
        destructor Destroy; override;

        procedure Add(Key: TGUID; Value: TObject);
        function TryFind(Key: TGUID; out Value: TObject): boolean;
        function Contains(Key: TGUID): Boolean;
        procedure Remove(Key: TGuid);
    end;

{ TGuidDictionary }

procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
var
    Hc: integer;
    PHi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);

    if FTable[Hc] <> nil then
    begin
        PHi := FTable[Hc];
        repeat
            if TGuidEx.EqualGuids(PHi.Key, Key) then
                Break;

            PHi := Phi.Next;
        until PHi = nil;
    end
    else
        Phi := nil;

    if PHi <> nil then
        PHi.Value := Value
    else
    begin
        New(PHi);
        PHi.Value := Value;
        PHi.Key := Key;
        PHi.Next := FTable[Hc];
        FTable[Hc] := PHi;
    end;
end;

function TGuidDictionary.Contains(Key: TGUID): Boolean;
var
    O: TObject;
begin
    Result := TryFind(Key, O);
end;

constructor TGuidDictionary.Create;
var
    i: integer;
begin
    inherited;

    for i := Low(FTable) to High(FTable) do
        FTable[i] := nil;
end;

destructor TGuidDictionary.Destroy;
var
    i: integer;
    Phi, PhiNext: PGuidDictionaryItem;
begin
    for i := Low(FTable) to High(FTable) do
    begin
        Phi := FTable[i];
        while Phi <> nil do
        begin
            PhiNext := Phi.Next;
            Dispose(Phi);
            Phi := PhiNext;
        end;
    end;

    inherited;
end;

function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
var
    N: array [0..3] of integer absolute Guid;
begin
    Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
end;

procedure TGuidDictionary.Remove(Key: TGuid);
var
    Hc: Integer;
    Phi, BeforPhi: PGuidDictionaryItem;

begin
    Hc := GetHashCode(Key);

    BeforPhi := nil;
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
    begin
        BeforPhi := Phi;
        Phi := Phi.Next;
    end;

    if Phi = nil then
        Exit;

    if BeforPhi <> nil then
        BeforPhi.Next := Phi.Next
    else
        FTable[Hc] := Phi.Next;

    Dispose(Phi);
end;

function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
var
    Hc: Integer;
    Phi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
        Phi := Phi.Next;

    if Phi <> nil then
        Value := TObject(Phi.Value)
    else
        Value := nil;

    Result := Phi <> nil;
end;

procedure TestDictMisc.TestGuidDictionary;
const
    G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
    G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var
    T: TGuidDictionary;
    Obj1, Obj2, O: TObject;
begin
    T := TGuidDictionary.Create;
    Obj1 := TObject.Create();
    Obj2 := TObject.Create();
    try
        CheckFalse(T.Contains(G1));

        T.Add(G1, Obj1);
        CheckTrue(T.Contains(G1));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

        CheckTrue(T.TryFind(G2, {out} O));
        CheckSame(Obj2, O);

        T.Remove(G1);
        CheckFalse(T.Contains(G1));
        CheckFalse(T.TryFind(G1, {out} O));

        T.Add(G1, Obj1);
        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

    finally
        Obj1.Free();
        Obj2.Free();

        T.Free;
    end;
end;
type
    PGuidDictionaryItem = ^TGuidDictionaryItem;

    TGuidDictionaryItem = record
        Key: TGuid;
        Value: Pointer;
        Next: PGuidDictionaryItem;
    end;

    TGuidDictionary = class
    private
    const
        HashSize = 2048;
    var
        Size: integer;
        FTable: array [0..HashSize-1] of PGuidDictionaryItem;

        function GetHashCode(Guid: TGUID): integer;
    public
        constructor Create;
        destructor Destroy; override;

        procedure Add(Key: TGUID; Value: TObject);
        function TryFind(Key: TGUID; out Value: TObject): boolean;
        function Contains(Key: TGUID): Boolean;
        procedure Remove(Key: TGuid);
    end;

{ TGuidDictionary }

procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
var
    Hc: integer;
    PHi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);

    if FTable[Hc] <> nil then
    begin
        PHi := FTable[Hc];
        repeat
            if TGuidEx.EqualGuids(PHi.Key, Key) then
                Break;

            PHi := Phi.Next;
        until PHi = nil;
    end
    else
        Phi := nil;

    if PHi <> nil then
        PHi.Value := Value
    else
    begin
        New(PHi);
        PHi.Value := Value;
        PHi.Key := Key;
        PHi.Next := FTable[Hc];
        FTable[Hc] := PHi;
    end;
end;

function TGuidDictionary.Contains(Key: TGUID): Boolean;
var
    O: TObject;
begin
    Result := TryFind(Key, O);
end;

constructor TGuidDictionary.Create;
var
    i: integer;
begin
    inherited;

    for i := Low(FTable) to High(FTable) do
        FTable[i] := nil;
end;

destructor TGuidDictionary.Destroy;
var
    i: integer;
    Phi, PhiNext: PGuidDictionaryItem;
begin
    for i := Low(FTable) to High(FTable) do
    begin
        Phi := FTable[i];
        while Phi <> nil do
        begin
            PhiNext := Phi.Next;
            Dispose(Phi);
            Phi := PhiNext;
        end;
    end;

    inherited;
end;

function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
var
    N: array [0..3] of integer absolute Guid;
begin
    Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
end;

procedure TGuidDictionary.Remove(Key: TGuid);
var
    Hc: Integer;
    Phi, BeforPhi: PGuidDictionaryItem;

begin
    Hc := GetHashCode(Key);

    BeforPhi := nil;
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
    begin
        BeforPhi := Phi;
        Phi := Phi.Next;
    end;

    if Phi = nil then
        Exit;

    if BeforPhi <> nil then
        BeforPhi.Next := Phi.Next
    else
        FTable[Hc] := Phi.Next;

    Dispose(Phi);
end;

function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
var
    Hc: Integer;
    Phi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
        Phi := Phi.Next;

    if Phi <> nil then
        Value := TObject(Phi.Value)
    else
        Value := nil;

    Result := Phi <> nil;
end;

procedure TestDictMisc.TestGuidDictionary;
const
    G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
    G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var
    T: TGuidDictionary;
    Obj1, Obj2, O: TObject;
begin
    T := TGuidDictionary.Create;
    Obj1 := TObject.Create();
    Obj2 := TObject.Create();
    try
        CheckFalse(T.Contains(G1));

        T.Add(G1, Obj1);
        CheckTrue(T.Contains(G1));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

        CheckTrue(T.TryFind(G2, {out} O));
        CheckSame(Obj2, O);

        T.Remove(G1);
        CheckFalse(T.Contains(G1));
        CheckFalse(T.TryFind(G1, {out} O));

        T.Add(G1, Obj1);
        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

    finally
        Obj1.Free();
        Obj2.Free();

        T.Free;
    end;
end;
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文