我的 SHA256 OCaml 实现是否正常?

发布于 2024-07-07 12:03:52 字数 7824 浏览 8 评论 0原文

我是一名 OCaml 程序员新手,我认为尝试实现一个非常棘手的算法会让自己陷入困境。 我对所有大大小小的批评持开放态度,无论是风格方面的、安全方面的还是性能方面的。

我已经意识到的一个批评是该算法需要将整个消息放入内存中(而 SHA256 的参考实现一次可以处理一个块)。

我特别关心是否有任何递归函数不是尾递归的。

我已经测试了代码,它确实在 x86_64 Linux 上生成了正确的消息摘要。

预先感谢您的考虑。

编辑:

如果您没有什么发现,请不要在这里花太多时间。 我正在寻找明显损坏的行为,而不是重写。

let as_bytes bits =
  match (bits mod 8) with
    | 0 -> (bits / 8)
    | _ -> failwith "as_bytes: bits must be multiple of 8"
let as_bits bytes = bytes * 8
let k = [|
    0x428a2f98l; 0x71374491l; 0xb5c0fbcfl; 0xe9b5dba5l;
    0x3956c25bl; 0x59f111f1l; 0x923f82a4l; 0xab1c5ed5l;
    0xd807aa98l; 0x12835b01l; 0x243185bel; 0x550c7dc3l;
    0x72be5d74l; 0x80deb1fel; 0x9bdc06a7l; 0xc19bf174l;
    0xe49b69c1l; 0xefbe4786l; 0x0fc19dc6l; 0x240ca1ccl;
    0x2de92c6fl; 0x4a7484aal; 0x5cb0a9dcl; 0x76f988dal;
    0x983e5152l; 0xa831c66dl; 0xb00327c8l; 0xbf597fc7l;
    0xc6e00bf3l; 0xd5a79147l; 0x06ca6351l; 0x14292967l;
    0x27b70a85l; 0x2e1b2138l; 0x4d2c6dfcl; 0x53380d13l;
    0x650a7354l; 0x766a0abbl; 0x81c2c92el; 0x92722c85l;
    0xa2bfe8a1l; 0xa81a664bl; 0xc24b8b70l; 0xc76c51a3l;
    0xd192e819l; 0xd6990624l; 0xf40e3585l; 0x106aa070l;
    0x19a4c116l; 0x1e376c08l; 0x2748774cl; 0x34b0bcb5l;
    0x391c0cb3l; 0x4ed8aa4al; 0x5b9cca4fl; 0x682e6ff3l;
    0x748f82eel; 0x78a5636fl; 0x84c87814l; 0x8cc70208l;
    0x90befffal; 0xa4506cebl; 0xbef9a3f7l; 0xc67178f2l
  |]
  let hash s =
    let add_int32 x y = Int32.add x y in

    let left_int32 x n = Int32.shift_left x n in
    let right_int32 x n = Int32.shift_right_logical x n in
    let or_int32 x y = Int32.logor x y in
    let xor_int32 x y = Int32.logxor x y in
    let and_int32 x y = Int32.logand x y in
    let not_int32 x = Int32.lognot x in

    let rotate x n = (or_int32 (right_int32 x n) (left_int32 x (32 - n))) in
    let shift x n = right_int32 x n in
    let ch x y z = xor_int32 (and_int32 x y) (and_int32 (not_int32 x) z) in
    let maj x y z = (xor_int32 (and_int32 x y) (xor_int32 (and_int32 x z) (and_int32 y z))) in
    let sum0 x = (xor_int32 (rotate x  2) (xor_int32 (rotate x 13) (rotate x 22))) in
    let sum1 x = (xor_int32 (rotate x  6) (xor_int32 (rotate x 11) (rotate x 25))) in
    let rh00 x = (xor_int32 (rotate x  7) (xor_int32 (rotate x 18) (shift  x  3))) in
    let rh01 x = (xor_int32 (rotate x 17) (xor_int32 (rotate x 19) (shift  x 10))) in

    let as_bytes bits =
      match (bits mod 8) with
        | 0 -> (bits / 8)
        | _ -> failwith "as_bytes: bits must be multiple of 8"
    in
    let as_bits bytes = bytes * 8 in
    let sha = [|
      0x6a09e667l;
      0xbb67ae85l;
      0x3c6ef372l;
      0xa54ff53al;
      0x510e527fl;
      0x9b05688cl;
      0x1f83d9abl;
      0x5be0cd19l
    |]
    in
    let message = Buffer.create (as_bytes 512) in (* smallest possible buffer is at least 512 bits *)
      begin
        Buffer.add_string message s;
        let original_length = as_bits (Buffer.length message) in 
        Buffer.add_char message '\x80'; (* append '1' bit *)
          let pad_start = as_bits (Buffer.length message) in
          let pad_blocks = if (original_length mod 512) < 448 then 1 else 2 in
          let message_length = ((original_length / 512) + pad_blocks) * 512 in
            begin (* appending k bits of 0 (where message_length-64 is our k) *)
              for i = as_bytes pad_start to (as_bytes (message_length - (as_bytes  64)))-8 do
                Buffer.add_char message '\x00'
              done;
              Buffer.add_buffer message (Bin.pack64 (Int64.of_int original_length))
            end
      end;
      let rec process_block i blocks =
        let array_of_block i = 
          let boff = i*(as_bytes 512) in
          let to_int32 x = (Int32.of_int (int_of_char x)) in
          let w = Array.make (as_bytes 512) 0l in
            begin
              for t = 0 to 15 do
                w.(t) <- (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4  )))) 24)
                         (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4+1)))) 16)
                         (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4+2))))  8)
                                               (to_int32 (Buffer.nth message (boff + (t*4+3))))   )));
              done;
              for t = 16 to 63 do
                w.(t) <- add_int32 (add_int32 (rh01 w.(t-2)) w.(t-7)) (add_int32 (rh00 w.(t-15)) w.(t-16))
              done;
              w
            end
        in
          if i = blocks then 
            let sha256 = Buffer.create (as_bytes 256) in
            let rec pack_sha256 i =
              match i with
                | 8 -> Buffer.contents sha256
                | _ ->
                    begin
                      Buffer.add_buffer sha256 (Bin.pack32 sha.(i));
                      pack_sha256 (i+1)
                    end
            in pack_sha256 0
          else
            begin
              let w = array_of_block i in
              let tem = [| 0l; 0l |] in
                begin
                  let a = ref sha.(0) in 
                  let b = ref sha.(1) in
                  let c = ref sha.(2) in
                  let d = ref sha.(3) in 
                  let e = ref sha.(4) in
                  let f = ref sha.(5) in
                  let g = ref sha.(6) in
                  let h = ref sha.(7) in
                    for t = 0 to 63 do
                      begin
                        tem.(0) <- add_int32 (add_int32 !h (sum1 !e)) (add_int32 (ch !e !f !g) (add_int32 k.(t) w.(t)));
                        tem.(1) <- add_int32 (sum0 !a) (maj !a !b !c);
                        h := !g;
                        g := !f;
                        f := !e;
                        e := add_int32 !d tem.(0);
                        d := !c;
                        c := !b;
                        b := !a;
                        a := add_int32 tem.(0) tem.(1);
                       end
                    done;
                    sha.(0) <- add_int32 sha.(0) !a;
                    sha.(1) <- add_int32 sha.(1) !b;
                    sha.(2) <- add_int32 sha.(2) !c;
                    sha.(3) <- add_int32 sha.(3) !d;
                    sha.(4) <- add_int32 sha.(4) !e;
                    sha.(5) <- add_int32 sha.(5) !f;
                    sha.(6) <- add_int32 sha.(6) !g;
                    sha.(7) <- add_int32 sha.(7) !h;

                    (* good faith attempt to clear memory *)
                    for i = 0 to 63 do w.(t) <- 0 done;
                    tem.(0) <- 0; tem.(1) <- 0;
                    a := 0; b := 0; c := 0; d := 0; e := 0; f := 0; g := 0; h := 0;
                end;
            process_block (i+1) blocks
          end
  in process_block 0 ((Buffer.length message) / (as_bytes 512))

  let hexdigits s =
    let rec hexdigits_inner hx i =
      match i with
        | 32 -> hx
        | _ -> hexdigits_inner (hx ^ (Printf.sprintf "%02x" (int_of_char s.[i]))) (i+1)
    in
      hexdigits_inner "" 0

在单独的文件中定义的 pack 函数如下:

let pack64 x = 
  let b = Buffer.create 8 in 
    for i = 0 to 7 do
      let shft = (7-i)*8 in
        Buffer.add_char b (char_of_int (Int64.to_int (Int64.logand (Int64.shift_right x shft) 0xFFL)));
    done;
    b

let pack x n = 
  if (n mod 8) = 0 then
    let n' = n/8 in
    let b = Buffer.create n' in 
      for i = 0 to n'-1 do
        let shft = ((n'-1)-i)*8 in
          Buffer.add_char b (char_of_int (Int32.to_int (Int32.logand (Int32.shift_right x shft) 0xFFl)));
      done;
      b
  else
    raise (Invalid_argument ("pack: " ^ (string_of_int n) ^ " is not a multiple of 8"))

let pack32 x = pack x 32
let pack16 x = pack x 16
let pack8 x = pack x 8

I'm a novice OCaml programmer and thought I would throw myself into the deep end by attempting to implement a very tricky algorithm. I'm open to all criticisms great and small be they stylistic or security or performance related.

One criticism that I'm already aware of is that the algorithm requires the entire message to fit in memory (whereas the reference implementation of SHA256 can process a block at a time).

I'm especially concerned about if any of the recursive-functions are not tail-recursive.

I have tested the code and it does produce proper message digests on x86_64 Linux.

Thanks in advance for your consideration.

Edit:

Should nothing jump out at you please don't spend too much time here. I'm looking for obviously broken behavior, not rewrites.

let as_bytes bits =
  match (bits mod 8) with
    | 0 -> (bits / 8)
    | _ -> failwith "as_bytes: bits must be multiple of 8"
let as_bits bytes = bytes * 8
let k = [|
    0x428a2f98l; 0x71374491l; 0xb5c0fbcfl; 0xe9b5dba5l;
    0x3956c25bl; 0x59f111f1l; 0x923f82a4l; 0xab1c5ed5l;
    0xd807aa98l; 0x12835b01l; 0x243185bel; 0x550c7dc3l;
    0x72be5d74l; 0x80deb1fel; 0x9bdc06a7l; 0xc19bf174l;
    0xe49b69c1l; 0xefbe4786l; 0x0fc19dc6l; 0x240ca1ccl;
    0x2de92c6fl; 0x4a7484aal; 0x5cb0a9dcl; 0x76f988dal;
    0x983e5152l; 0xa831c66dl; 0xb00327c8l; 0xbf597fc7l;
    0xc6e00bf3l; 0xd5a79147l; 0x06ca6351l; 0x14292967l;
    0x27b70a85l; 0x2e1b2138l; 0x4d2c6dfcl; 0x53380d13l;
    0x650a7354l; 0x766a0abbl; 0x81c2c92el; 0x92722c85l;
    0xa2bfe8a1l; 0xa81a664bl; 0xc24b8b70l; 0xc76c51a3l;
    0xd192e819l; 0xd6990624l; 0xf40e3585l; 0x106aa070l;
    0x19a4c116l; 0x1e376c08l; 0x2748774cl; 0x34b0bcb5l;
    0x391c0cb3l; 0x4ed8aa4al; 0x5b9cca4fl; 0x682e6ff3l;
    0x748f82eel; 0x78a5636fl; 0x84c87814l; 0x8cc70208l;
    0x90befffal; 0xa4506cebl; 0xbef9a3f7l; 0xc67178f2l
  |]
  let hash s =
    let add_int32 x y = Int32.add x y in

    let left_int32 x n = Int32.shift_left x n in
    let right_int32 x n = Int32.shift_right_logical x n in
    let or_int32 x y = Int32.logor x y in
    let xor_int32 x y = Int32.logxor x y in
    let and_int32 x y = Int32.logand x y in
    let not_int32 x = Int32.lognot x in

    let rotate x n = (or_int32 (right_int32 x n) (left_int32 x (32 - n))) in
    let shift x n = right_int32 x n in
    let ch x y z = xor_int32 (and_int32 x y) (and_int32 (not_int32 x) z) in
    let maj x y z = (xor_int32 (and_int32 x y) (xor_int32 (and_int32 x z) (and_int32 y z))) in
    let sum0 x = (xor_int32 (rotate x  2) (xor_int32 (rotate x 13) (rotate x 22))) in
    let sum1 x = (xor_int32 (rotate x  6) (xor_int32 (rotate x 11) (rotate x 25))) in
    let rh00 x = (xor_int32 (rotate x  7) (xor_int32 (rotate x 18) (shift  x  3))) in
    let rh01 x = (xor_int32 (rotate x 17) (xor_int32 (rotate x 19) (shift  x 10))) in

    let as_bytes bits =
      match (bits mod 8) with
        | 0 -> (bits / 8)
        | _ -> failwith "as_bytes: bits must be multiple of 8"
    in
    let as_bits bytes = bytes * 8 in
    let sha = [|
      0x6a09e667l;
      0xbb67ae85l;
      0x3c6ef372l;
      0xa54ff53al;
      0x510e527fl;
      0x9b05688cl;
      0x1f83d9abl;
      0x5be0cd19l
    |]
    in
    let message = Buffer.create (as_bytes 512) in (* smallest possible buffer is at least 512 bits *)
      begin
        Buffer.add_string message s;
        let original_length = as_bits (Buffer.length message) in 
        Buffer.add_char message '\x80'; (* append '1' bit *)
          let pad_start = as_bits (Buffer.length message) in
          let pad_blocks = if (original_length mod 512) < 448 then 1 else 2 in
          let message_length = ((original_length / 512) + pad_blocks) * 512 in
            begin (* appending k bits of 0 (where message_length-64 is our k) *)
              for i = as_bytes pad_start to (as_bytes (message_length - (as_bytes  64)))-8 do
                Buffer.add_char message '\x00'
              done;
              Buffer.add_buffer message (Bin.pack64 (Int64.of_int original_length))
            end
      end;
      let rec process_block i blocks =
        let array_of_block i = 
          let boff = i*(as_bytes 512) in
          let to_int32 x = (Int32.of_int (int_of_char x)) in
          let w = Array.make (as_bytes 512) 0l in
            begin
              for t = 0 to 15 do
                w.(t) <- (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4  )))) 24)
                         (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4+1)))) 16)
                         (or_int32 (left_int32 (to_int32 (Buffer.nth message (boff + (t*4+2))))  8)
                                               (to_int32 (Buffer.nth message (boff + (t*4+3))))   )));
              done;
              for t = 16 to 63 do
                w.(t) <- add_int32 (add_int32 (rh01 w.(t-2)) w.(t-7)) (add_int32 (rh00 w.(t-15)) w.(t-16))
              done;
              w
            end
        in
          if i = blocks then 
            let sha256 = Buffer.create (as_bytes 256) in
            let rec pack_sha256 i =
              match i with
                | 8 -> Buffer.contents sha256
                | _ ->
                    begin
                      Buffer.add_buffer sha256 (Bin.pack32 sha.(i));
                      pack_sha256 (i+1)
                    end
            in pack_sha256 0
          else
            begin
              let w = array_of_block i in
              let tem = [| 0l; 0l |] in
                begin
                  let a = ref sha.(0) in 
                  let b = ref sha.(1) in
                  let c = ref sha.(2) in
                  let d = ref sha.(3) in 
                  let e = ref sha.(4) in
                  let f = ref sha.(5) in
                  let g = ref sha.(6) in
                  let h = ref sha.(7) in
                    for t = 0 to 63 do
                      begin
                        tem.(0) <- add_int32 (add_int32 !h (sum1 !e)) (add_int32 (ch !e !f !g) (add_int32 k.(t) w.(t)));
                        tem.(1) <- add_int32 (sum0 !a) (maj !a !b !c);
                        h := !g;
                        g := !f;
                        f := !e;
                        e := add_int32 !d tem.(0);
                        d := !c;
                        c := !b;
                        b := !a;
                        a := add_int32 tem.(0) tem.(1);
                       end
                    done;
                    sha.(0) <- add_int32 sha.(0) !a;
                    sha.(1) <- add_int32 sha.(1) !b;
                    sha.(2) <- add_int32 sha.(2) !c;
                    sha.(3) <- add_int32 sha.(3) !d;
                    sha.(4) <- add_int32 sha.(4) !e;
                    sha.(5) <- add_int32 sha.(5) !f;
                    sha.(6) <- add_int32 sha.(6) !g;
                    sha.(7) <- add_int32 sha.(7) !h;

                    (* good faith attempt to clear memory *)
                    for i = 0 to 63 do w.(t) <- 0 done;
                    tem.(0) <- 0; tem.(1) <- 0;
                    a := 0; b := 0; c := 0; d := 0; e := 0; f := 0; g := 0; h := 0;
                end;
            process_block (i+1) blocks
          end
  in process_block 0 ((Buffer.length message) / (as_bytes 512))

  let hexdigits s =
    let rec hexdigits_inner hx i =
      match i with
        | 32 -> hx
        | _ -> hexdigits_inner (hx ^ (Printf.sprintf "%02x" (int_of_char s.[i]))) (i+1)
    in
      hexdigits_inner "" 0

The pack functions, defined in a separate file, are thus:

let pack64 x = 
  let b = Buffer.create 8 in 
    for i = 0 to 7 do
      let shft = (7-i)*8 in
        Buffer.add_char b (char_of_int (Int64.to_int (Int64.logand (Int64.shift_right x shft) 0xFFL)));
    done;
    b

let pack x n = 
  if (n mod 8) = 0 then
    let n' = n/8 in
    let b = Buffer.create n' in 
      for i = 0 to n'-1 do
        let shft = ((n'-1)-i)*8 in
          Buffer.add_char b (char_of_int (Int32.to_int (Int32.logand (Int32.shift_right x shft) 0xFFl)));
      done;
      b
  else
    raise (Invalid_argument ("pack: " ^ (string_of_int n) ^ " is not a multiple of 8"))

let pack32 x = pack x 32
let pack16 x = pack x 16
let pack8 x = pack x 8

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

身边 2024-07-14 12:03:52

您应该做的第一件事是从标准中获取测试向量,并验证您的实现是否生成完全相同的结果。 如果没有,那就坏了。

如果您有一个“已知的健全”实现(openssl 命令可能有一个),您还可以生成其他测试向量。

最后,使用逐渐变大的文件运行一些性能测试,并与已知的快速实现进行比较(openssl 应该相当快)。 如果失败(耗尽所有内存或速度太慢),您必须对其进行修复。

如果它通过了所有这些测试,那么它应该已经足够好了。 哈希算法不应该有太多安全问题(除非您正在对敏感数据进行哈希处理,否则您必须非常小心地覆盖您使用的所有内存)。

The first thing you should do is get the test vectors from the standard and verify if your implementation generates the exact same result. If it does not, it's broken.

You can also generate other test vectors if you have a "known sane" implementation (the openssl command probably has one).

Finally, run some performance tests with progressively larger files, and compare with a known fast implementation (the openssl one should be quite fast). If it fails (exausting all memory or being too slow), you have to fix something on it.

If it passes all these tests, it should be good enough. There shouldn't be many security concerns with a hashing algorithm (unless you are hashing sensitive data, where you would have to be very careful to overwrite all the memory you used).

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文