高效地从 byte[] 数组中提取任意长度的位序列

发布于 2024-09-26 00:26:27 字数 7080 浏览 5 评论 0原文

我正在寻找在任意位置提取任意长度（0 <= length <= 16）的（无符号）位序列的最有效方法。骨架类显示了我当前的实现本质上如何处理问题：

public abstract class BitArray {

byte[] bytes = new byte[2048];
int bitGet;

public BitArray() {
}

public void readNextBlock(int initialBitGet, int count) {
    // substitute for reading from an input stream 
    for (int i=(initialBitGet>>3); i<=count; ++i) {
        bytes[i] = (byte) i;
    }
    prepareBitGet(initialBitGet, count);
}

public abstract void prepareBitGet(int initialBitGet, int count);

public abstract int getBits(int count);

static class Version0 extends BitArray {
    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
    }

    public int getBits(int len) {
        // intentionally gives meaningless result
        bitGet += len;
        return 0;
    }
}

static class Version1 extends BitArray {
    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet - 1;
    }

    public int getBits(int len) {
        int byteIndex = bitGet;
        bitGet = byteIndex + len;
        int shift = 23 - (byteIndex & 7) - len;
        int mask = (1 << len) - 1;
        byteIndex >>= 3;
        return (((bytes[byteIndex] << 16) | 
               ((bytes[++byteIndex] & 0xFF) <<  8) |
                (bytes[++byteIndex] & 0xFF)) >> shift) & mask;
    }
}

static class Version2 extends BitArray {
    static final int[] mask = { 0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
                0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
    }

    public int getBits(int len) {
        int offset = bitGet;
        bitGet = offset + len;
        int byteIndex = offset >> 3; // originally used /8
        int bitIndex = offset & 7;   // originally used %8
        if ((bitIndex + len) > 16) {
            return ((bytes[byteIndex] << 16 |
                    (bytes[byteIndex + 1] & 0xFF) << 8 |
                    (bytes[byteIndex + 2] & 0xFF)) >> (24 - bitIndex - len)) & mask[len];
        } else if ((offset + len) > 8) {
            return ((bytes[byteIndex] << 8 |
                    (bytes[byteIndex + 1] & 0xFF)) >> (16 - bitIndex - len)) & mask[len];
        } else {
            return (bytes[byteIndex] >> (8 - offset - len)) & mask[len];
        }
    }
}

static class Version3 extends BitArray {
    int[] ints = new int[2048];

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
        int put_i = (initialBitGet >> 3) - 1;
        int get_i = put_i;
        int buf;
        buf = ((bytes[++get_i] & 0xFF) << 16) |
              ((bytes[++get_i] & 0xFF) <<  8) |
               (bytes[++get_i] & 0xFF);
        do {
            buf = (buf << 8) | (bytes[++get_i] & 0xFF);
            ints[++put_i] = buf;
        } while (get_i < count);
    }

    public int getBits(int len) {
        int bit_idx = bitGet;
        bitGet = bit_idx + len;
        int shift = 32 - (bit_idx & 7) - len;
        int mask = (1 << len) - 1;
        int int_idx = bit_idx >> 3;
        return (ints[int_idx] >> shift) & mask;
    }
}

static class Version4 extends BitArray {
    int[] ints = new int[1024];

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
        int g = initialBitGet >> 3;
        int p = (initialBitGet >> 4) - 1;
        final byte[] b = bytes;
        int t = (b[g]  <<  8) | (b[++g] & 0xFF);
        final int[] i = ints;
        do {
            i[++p] = (t = (t << 16) | ((b[++g] & 0xFF) <<8) | (b[++g] & 0xFF));
        } while (g < count);
    }

    public int getBits(final int len) {
        final int i;
        bitGet = (i = bitGet) + len;
        return (ints[i >> 4] >> (32 - len - (i & 15))) & ((1 << len) - 1);
    }
}

public void benchmark(String label) {
    int checksum = 0;
    readNextBlock(32, 1927);
    long time = System.nanoTime();
    for (int pass=1<<18; pass>0; --pass) {
        prepareBitGet(32, 1927);
        for (int i=2047; i>=0; --i) {
            checksum += getBits(i & 15);
        }
    }
    time = System.nanoTime() - time;
    System.out.println(label+" took "+Math.round(time/1E6D)+" ms, checksum="+checksum);
    try { // avoid having the console interfere with our next measurement
        Thread.sleep(369);
    } catch (InterruptedException e) {}
}

public static void main(String[] argv) {
    BitArray test;
    // for the sake of getting a little less influence from the OS for stable measurement
    Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
    while (true) {
        test = new Version0();
        test.benchmark("no implementaion");
        test = new Version1();
        test.benchmark("Durandal's (original)");
        test = new Version2();
        test.benchmark("blitzpasta's (adapted)");
        test = new Version3();
        test.benchmark("MSN's (posted)");
        test = new Version4();
        test.benchmark("MSN's (half-buffer modification)");
        System.out.println("--- next pass ---");
    }
}
}

这可行，但我正在寻找一个更有效的解决方案（性能方面）。字节数组保证相对较小，在几个字节到最大 ~1800 个字节之间。在每次调用 read 方法之间，该数组仅被读取一次（完整）。 getBits() 中不需要进行任何错误检查，例如超出数组等。

看来我上面的最初问题还不够清楚。 N 位的“位序列”形成 N 位的整数，我需要以最小的开销提取这些整数。我没有使用字符串，因为这些值要么用作查找索引，要么直接输入到某些计算中。所以基本上，上面显示的骨架是一个真正的类， getBits() 签名显示了其余代码如何与其交互。

将示例代码扩展为微基准测试，包括 blitzpasta 的解决方案（修复了缺失字节掩码）。在我的旧 AMD 机器上，结果为 ~11400ms 与 ~38000ms。仅供参考：除法和模运算会破坏性能。如果将 /8 替换为 >>3，将 %8 替换为 &7，则两种解决方案都是彼此非常接近（jdk1.7.0ea104）。

对于如何工作以及做什么似乎有些困惑。示例代码的第一篇原始文章包含一个 read() 方法，用于指示填充字节缓冲区的位置和时间。当代码变成微基准时，这个问题就消失了。我重新介绍它是为了让这一点更清楚一些。这个想法是通过添加 BitArray 的另一个子类来击败所有现有版本，该子类需要实现 getBits() 和prepareBitGet()，后者可能是空的。 不要改变基准测试来为您的解决方案提供优势，所有现有解决方案都可以做同样的事情，这使得这是一个完全没有实际意义的优化！（真的！！）

我添加了一个 Version0，它除了增加 bitGet 状态之外什么也不做。它总是返回 0 以粗略地了解基准测试开销有多大。它只是用于比较。

此外，还添加了对 MSN 想法的改编（版本 3）。为了让所有竞争对手保持公平和可比性，字节数组填充现在是基准测试的一部分，也是一个准备步骤（见上文）。最初MSN的解决方案做得不太好，在准备int[]缓冲区方面有很多开销。我冒昧地稍微优化了这个步骤，这使它成为一个激烈的竞争对手:) 您可能还会发现我对您的代码进行了一些简化。您的 getBit() 可以压缩为 3 行代码，可能会减少百分之一或百分之二。我故意这样做是为了保持代码的可读性，并且因为其他版本也没有尽可能简洁（同样是为了可读性）。

结论（上面的代码示例更新以包含基于所有适用贡献的版本）。在我的旧 AMD 机器（Sun JRE 1.6.0_21）上，它们显示为：

V0 no Implementaion take 5384 ms
V1 Durandal（原始）花费了 10283 毫秒
V2 blitzpasta（改编）花费了 12212 毫秒
V3 MSN（已发布）花费了 11030 毫秒
V4 MSN（半缓冲区修改）花费了 9700 毫秒

注意：在此基准测试中，每次调用 getBits() 平均获取 7.5 位，并且每个位仅读取一次。由于 V3/V4 必须支付较高的初始化成本，因此它们往往会通过更多、更短的获取来表现出更好的运行时行为（因此平均获取大小越接近最大值 16 越糟糕）。尽管如此，V4 在所有场景中仍略微领先于所有其他产品。在实际应用中，必须考虑缓存争用，因为 V3/v4 所需的额外空间可能会增加缓存未命中率，以至于 V0 是更好的选择。如果要多次遍历数组，则应优先考虑 V4，因为它的获取速度比其他数组更快，并且在第一次遍历后可以分摊昂贵的初始化费用。

原文

I'm looking for the most efficient way of extracting (unsigned) bit sequences of arbitrary length (0 <= length <= 16) at arbitrary position. The skeleton class show how my current implementation essentially handles the problem:

public abstract class BitArray {

byte[] bytes = new byte[2048];
int bitGet;

public BitArray() {
}

public void readNextBlock(int initialBitGet, int count) {
    // substitute for reading from an input stream 
    for (int i=(initialBitGet>>3); i<=count; ++i) {
        bytes[i] = (byte) i;
    }
    prepareBitGet(initialBitGet, count);
}

public abstract void prepareBitGet(int initialBitGet, int count);

public abstract int getBits(int count);

static class Version0 extends BitArray {
    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
    }

    public int getBits(int len) {
        // intentionally gives meaningless result
        bitGet += len;
        return 0;
    }
}

static class Version1 extends BitArray {
    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet - 1;
    }

    public int getBits(int len) {
        int byteIndex = bitGet;
        bitGet = byteIndex + len;
        int shift = 23 - (byteIndex & 7) - len;
        int mask = (1 << len) - 1;
        byteIndex >>= 3;
        return (((bytes[byteIndex] << 16) | 
               ((bytes[++byteIndex] & 0xFF) <<  8) |
                (bytes[++byteIndex] & 0xFF)) >> shift) & mask;
    }
}

static class Version2 extends BitArray {
    static final int[] mask = { 0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
                0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
    }

    public int getBits(int len) {
        int offset = bitGet;
        bitGet = offset + len;
        int byteIndex = offset >> 3; // originally used /8
        int bitIndex = offset & 7;   // originally used %8
        if ((bitIndex + len) > 16) {
            return ((bytes[byteIndex] << 16 |
                    (bytes[byteIndex + 1] & 0xFF) << 8 |
                    (bytes[byteIndex + 2] & 0xFF)) >> (24 - bitIndex - len)) & mask[len];
        } else if ((offset + len) > 8) {
            return ((bytes[byteIndex] << 8 |
                    (bytes[byteIndex + 1] & 0xFF)) >> (16 - bitIndex - len)) & mask[len];
        } else {
            return (bytes[byteIndex] >> (8 - offset - len)) & mask[len];
        }
    }
}

static class Version3 extends BitArray {
    int[] ints = new int[2048];

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
        int put_i = (initialBitGet >> 3) - 1;
        int get_i = put_i;
        int buf;
        buf = ((bytes[++get_i] & 0xFF) << 16) |
              ((bytes[++get_i] & 0xFF) <<  8) |
               (bytes[++get_i] & 0xFF);
        do {
            buf = (buf << 8) | (bytes[++get_i] & 0xFF);
            ints[++put_i] = buf;
        } while (get_i < count);
    }

    public int getBits(int len) {
        int bit_idx = bitGet;
        bitGet = bit_idx + len;
        int shift = 32 - (bit_idx & 7) - len;
        int mask = (1 << len) - 1;
        int int_idx = bit_idx >> 3;
        return (ints[int_idx] >> shift) & mask;
    }
}

static class Version4 extends BitArray {
    int[] ints = new int[1024];

    public void prepareBitGet(int initialBitGet, int count) {
        bitGet = initialBitGet;
        int g = initialBitGet >> 3;
        int p = (initialBitGet >> 4) - 1;
        final byte[] b = bytes;
        int t = (b[g]  <<  8) | (b[++g] & 0xFF);
        final int[] i = ints;
        do {
            i[++p] = (t = (t << 16) | ((b[++g] & 0xFF) <<8) | (b[++g] & 0xFF));
        } while (g < count);
    }

    public int getBits(final int len) {
        final int i;
        bitGet = (i = bitGet) + len;
        return (ints[i >> 4] >> (32 - len - (i & 15))) & ((1 << len) - 1);
    }
}

public void benchmark(String label) {
    int checksum = 0;
    readNextBlock(32, 1927);
    long time = System.nanoTime();
    for (int pass=1<<18; pass>0; --pass) {
        prepareBitGet(32, 1927);
        for (int i=2047; i>=0; --i) {
            checksum += getBits(i & 15);
        }
    }
    time = System.nanoTime() - time;
    System.out.println(label+" took "+Math.round(time/1E6D)+" ms, checksum="+checksum);
    try { // avoid having the console interfere with our next measurement
        Thread.sleep(369);
    } catch (InterruptedException e) {}
}

public static void main(String[] argv) {
    BitArray test;
    // for the sake of getting a little less influence from the OS for stable measurement
    Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
    while (true) {
        test = new Version0();
        test.benchmark("no implementaion");
        test = new Version1();
        test.benchmark("Durandal's (original)");
        test = new Version2();
        test.benchmark("blitzpasta's (adapted)");
        test = new Version3();
        test.benchmark("MSN's (posted)");
        test = new Version4();
        test.benchmark("MSN's (half-buffer modification)");
        System.out.println("--- next pass ---");
    }
}
}

This works, but I'm looking for a more efficient solution (performance wise). The byte array is guaranteed to be relatively small, between a few bytes up to a max of ~1800 bytes. The array is read exactly once (completely) between each call to the read method. There is no need for any error checking in getBits(), such as exceeding the array etc.

It seems my initial question above isn't clear enough. A "bit sequence" of N bits forms an integer of N bits, and I need to extract those integers with minimal overhead. I have no use for strings, as the values are either used as lookup indices or are directly fed into some computation. So basically, the skeleton shown above is a real class and getBits() signature shows how the rest of the code interacts with it.

Extendet the example code into a microbenchmark, included blitzpasta's solution (fixed missing byte masking). On my old AMD box it turns out as ~11400ms vs ~38000ms. FYI: Its the divide and modulo operations that kill the performance. If you replace /8 with >>3 and %8 with &7, both solutions are pretty close to each other (jdk1.7.0ea104).

There seemed to be a bit confusion about how and what to work on. The first, original post of the example code included a read() method to indicate where and when the byte buffer was filled. This got lost when the code was turned into the microbench. I re-introduced it to make this a little clearer.
The idea is to beat all existing versions by adding another subclass of BitArray which need to implement getBits() and prepareBitGet(), the latter may be empty. Do not change the benchmarking to give your solution an advantage, the same could be done for all the existing solutions, making this a completely moot optimization! (really!!)

I added a Version0, which does nothing but increment the bitGet state. It always returns 0 to get a rough idea how big the benchmark overhead is. Its only there for comparison.

Also, an adaption on MSN's idea was added (Version3). To keep things fair and comparable for all competitors, the byte array filling is now part of the benchmark, as well as a preparatory step (see above). Originally MSN's solution did not do so well, there was lots of overhead in preparing the int[] buffer. I took the liberty of optimizing the step a little, which turned it into a fierce competitor :)
You might also find that I de-convoluted your code a little. Your getBit() could be condensed into a 3-liner, probably shaving off one or two percent. I deliberately did this to keep the code readable and because the other versions aren't as condensed as possible either (again for readability).

Conclusion (code example above update to include versions based on all applicable contributions). On my old AMD box (Sun JRE 1.6.0_21), they come out as:

V0 no implementaion took 5384 ms
V1 Durandal's (original) took 10283 ms
V2 blitzpasta's (adapted) took 12212 ms
V3 MSN's (posted) took 11030 ms
V4 MSN's (half-buffer modification) took 9700 ms

Notes: In this benchmark an average of 7.5 bits is fetched per call to getBits(), and each bit is only read once. Since V3/V4 have to pay a high initialization cost, they tend to show better runtime behavior with more, shorter fetches (and consequently worse the closer to the maximum of 16 the average fetch size gets). Still, V4 stays slightly ahead of all others in all scenarios.
In an actual application, the cache contention must be taken into account, since the extra space needed for V3/v4 may increase cache misses to a point where V0 would be a better choice. If the array is to be traversed more than once, V4 should be favored, since it fetches faster than every other and the costly initialization is amortized after the fist pass.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

神仙妹妹 2024-10-03 00:26:27

如果您只想将无符号位序列作为 int。

static final int[] lookup = {0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };

/*
 * bytes: byte array, with the bits indexed from 0 (MSB) to (bytes.length * 8 - 1) (LSB)
 * offset: index of the MSB of the bit sequence.
 * len: length of bit sequence, must from range [0,16].
 * Not checked for overflow
 */
static int getBitSeqAsInt(byte[] bytes, int offset, int len){

    int byteIndex = offset / 8;
    int bitIndex = offset % 8;
    int val;

    if ((bitIndex + len) > 16) {
        val = ((bytes[byteIndex] << 16 | bytes[byteIndex + 1] << 8 | bytes[byteIndex + 2]) >> (24 - bitIndex - len)) & lookup[len];
    } else if ((offset + len) > 8) {
        val = ((bytes[byteIndex] << 8 | bytes[byteIndex + 1]) >> (16 - bitIndex - len)) & lookup[len];
    } else {
        val = (bytes[byteIndex] >> (8 - offset - len)) & lookup[len];
    }

    return val;
}

如果你想要它作为字符串（马格斯答案的修改）。

static String getBitSequence(byte[] bytes, int offset, int len){

    int byteIndex = offset / 8;
    int bitIndex = offset % 8;
    int count = 0;
    StringBuilder result = new StringBuilder();        

    outer:
    for(int i = byteIndex; i < bytes.length; ++i) {
        for(int j = (1 << (7 - bitIndex)); j > 0; j >>= 1) {
            if(count == len) {
                break outer;
            }                
            if((bytes[byteIndex] & j) == 0) {
                result.append('0');
            } else {
                result.append('1');
            }
            ++count;
        }
        bitIndex = 0;
    }
    return  result.toString();
}

If you just want the unsigned bit sequence as an int.

static final int[] lookup = {0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };

/*
 * bytes: byte array, with the bits indexed from 0 (MSB) to (bytes.length * 8 - 1) (LSB)
 * offset: index of the MSB of the bit sequence.
 * len: length of bit sequence, must from range [0,16].
 * Not checked for overflow
 */
static int getBitSeqAsInt(byte[] bytes, int offset, int len){

    int byteIndex = offset / 8;
    int bitIndex = offset % 8;
    int val;

    if ((bitIndex + len) > 16) {
        val = ((bytes[byteIndex] << 16 | bytes[byteIndex + 1] << 8 | bytes[byteIndex + 2]) >> (24 - bitIndex - len)) & lookup[len];
    } else if ((offset + len) > 8) {
        val = ((bytes[byteIndex] << 8 | bytes[byteIndex + 1]) >> (16 - bitIndex - len)) & lookup[len];
    } else {
        val = (bytes[byteIndex] >> (8 - offset - len)) & lookup[len];
    }

    return val;
}

If you want it as a String (modification of Margus' answer).

static String getBitSequence(byte[] bytes, int offset, int len){

    int byteIndex = offset / 8;
    int bitIndex = offset % 8;
    int count = 0;
    StringBuilder result = new StringBuilder();        

    outer:
    for(int i = byteIndex; i < bytes.length; ++i) {
        for(int j = (1 << (7 - bitIndex)); j > 0; j >>= 1) {
            if(count == len) {
                break outer;
            }                
            if((bytes[byteIndex] & j) == 0) {
                result.append('0');
            } else {
                result.append('1');
            }
            ++count;
        }
        bitIndex = 0;
    }
    return  result.toString();
}

回复收藏 0 原文

情话墙 2024-10-03 00:26:27

好吧，根据您想要将时间与内存拉锯的时间缩短多远，您可以在每个 16 位偏移量处分配一个每 32 位的边表，然后根据 16 位进行掩码和移位offset：

byte[] bytes = new byte[2048];   
int bitGet;   
unsigned int dwords[] = new unsigned int[2046];

public BitArray() {   
    for (int i=0; i<bytes.length; ++i) {   
        bytes[i] = (byte) i;   
    }   

    for (int i= 0; i<dwords.length; ++i) {
        dwords[i]= 
            (bytes[i    ] << 24) | 
            (bytes[i + 1] << 16) | 
            (bytes[i + 2] <<  8) | 
            (bytes[i + 3]);
    }
}   

int getBits(int len)
{
    int offset= bitGet;
    int offset_index= offset>>4;
    int offset_offset= offset & 15;

    return (dwords[offset_index] >> offset_offset) & ((1 << len) - 1);
}

避免分支（代价是内存占用量增加四倍）。查找掩码真的比 (1 << len) - 1 快得多吗？

Well, depending on how far you want to go down the time vs. memory see-saw, you can allocate a side table of every 32-bits at every 16-bit offset and then do a mask and shift based on the 16-bit offset:

byte[] bytes = new byte[2048];   
int bitGet;   
unsigned int dwords[] = new unsigned int[2046];

public BitArray() {   
    for (int i=0; i<bytes.length; ++i) {   
        bytes[i] = (byte) i;   
    }   

    for (int i= 0; i<dwords.length; ++i) {
        dwords[i]= 
            (bytes[i    ] << 24) | 
            (bytes[i + 1] << 16) | 
            (bytes[i + 2] <<  8) | 
            (bytes[i + 3]);
    }
}   

int getBits(int len)
{
    int offset= bitGet;
    int offset_index= offset>>4;
    int offset_offset= offset & 15;

    return (dwords[offset_index] >> offset_offset) & ((1 << len) - 1);
}

You avoid the branching (at the cost of quadrupling your memory footprint). And is looking up the mask really that much faster than (1 << len) - 1?

回复收藏 0 原文

清君侧 2024-10-03 00:26:27

~~只是想知道为什么你不能使用 java.util.BitSet;~~

基本上你能做的就是将整个数据读取为 byte[]，将其转换为 string 格式的二进制文件，并使用 .substring() 等字符串实用程序来完成这项工作。这也将起作用位序列> 16..

假设您有 3 个字节：1, 2, 3 并且您想要提取从第 5 位到第 16 位的位序列。

数字二进制

1      00000001
2      00000010
3      00000011

代码示例：

public static String getRealBinary(byte[] input){
    StringBuilder sb = new StringBuilder();

    for (byte c : input) {
        for (int n =  128; n > 0; n >>= 1){
            if ((c & n) == 0)
                sb.append('0');
            else sb.append('1');
        }
    }

    return sb.toString();
}
public static void main(String[] args) {
    byte bytes[] = new byte[]{1,2,3};
    String sbytes = getRealBinary(bytes);
    System.out.println(sbytes);
    System.out.println(sbytes.substring(5,16));
}

输出：

000000010000001000000011
00100000010

速度：

我进行了1m的测试运行次，在我的计算机上花费了0.995s，所以它相当快：

自己重复测试的代码：

public static void main(String[] args) {
    Random r = new Random();
    byte bytes[] = new byte[4];
    long start, time, total=0;

    for (int i = 0; i < 1000000; i++) {
        r.nextBytes(bytes);
        start = System.currentTimeMillis();
        getRealBinary(bytes).substring(5,16);
        time = System.currentTimeMillis() - start;
        total+=time;
    }
    System.out.println("It took " +total + "ms");
}

~~Just wondering why can't you use java.util.BitSet;~~

Basically what you can do, is to read the whole data as byte[], convert it to binary in string format and use string utilities like .substring() to do the work. This will also work bit sequences > 16.

Lets say you have 3 bytes: 1, 2, 3 and you want to extract bit sequence from 5th to 16th bit.

Number Binary

1      00000001
2      00000010
3      00000011

Code example:

public static String getRealBinary(byte[] input){
    StringBuilder sb = new StringBuilder();

    for (byte c : input) {
        for (int n =  128; n > 0; n >>= 1){
            if ((c & n) == 0)
                sb.append('0');
            else sb.append('1');
        }
    }

    return sb.toString();
}
public static void main(String[] args) {
    byte bytes[] = new byte[]{1,2,3};
    String sbytes = getRealBinary(bytes);
    System.out.println(sbytes);
    System.out.println(sbytes.substring(5,16));
}

Output:

000000010000001000000011
00100000010

Speed:

I did a testrun for 1m times and on my computer it took 0.995s, so its reasonably very fast:

Code to repeat the test yourself:

public static void main(String[] args) {
    Random r = new Random();
    byte bytes[] = new byte[4];
    long start, time, total=0;

    for (int i = 0; i < 1000000; i++) {
        r.nextBytes(bytes);
        start = System.currentTimeMillis();
        getRealBinary(bytes).substring(5,16);
        time = System.currentTimeMillis() - start;
        total+=time;
    }
    System.out.println("It took " +total + "ms");
}

回复收藏 0 原文

烦人精 2024-10-03 00:26:27

您最多需要 16 位，取自字节数组。 16 位最多可以跨越 3 个字节。
这是一个可能的解决方案：

    int GetBits(int bit_index, int bit_length) {
          int byte_offset = bit_index >> 3;
          return ((((((byte_array[byte_offset]<<8)
                    +byte_array[byte_offset+1])<<8)
                    +byte_array[byte_offset+2]))
                   >>(24-(bit_index&7)+bit_length))))
                  &((1<<bit_length)-1);
         }

[未经测试]

如果您经常调用此方法，您应该预先计算 3 个连接字节的 24 位值，并将它们存储到 int 数组中。

我会观察到，如果您在 x86 上用 C 语言进行编码，您甚至不需要预先计算 24 位数组；只需以 32 位值的形式在所需偏移处访问 by te 数组即可。 x86 可以很好地执行未对齐的提取。 [评论者指出，endianess 搞砸了这个问题，所以这不是一个答案，好吧，使用 24 位版本。]

You want at most 16 bits, taken from an array of bytes. 16 bits can span at most 3 bytes.
Here's a possible solution:

    int GetBits(int bit_index, int bit_length) {
          int byte_offset = bit_index >> 3;
          return ((((((byte_array[byte_offset]<<8)
                    +byte_array[byte_offset+1])<<8)
                    +byte_array[byte_offset+2]))
                   >>(24-(bit_index&7)+bit_length))))
                  &((1<<bit_length)-1);
         }

[Untested]

If you call this a lot you should precompute the 24-bit values for the 3 concatenated bytes, and store those into an int array.

I'll observe that if you are coding this in C on an x86, you don't even need to precompute the 24 bit array; simply access the by te array at the desire offset as a 32 bit value. The x86 will do unaligned fetches just fine. [commenter noted that endianess mucks this up, so it isn't an answer, OK, do the 24 bit version.]

回复收藏 0 原文

爱的故事 2024-10-03 00:26:27

由于 Java 7 BitSet 具有 toLongArray 方法，我相信它会完全满足问题的要求：

int subBits = (int) bitSet.get(lowBit, highBit).toLongArray()[0];

它的优点是它适用于大于整数或长整数的序列。它的性能劣势是必须分配一个新的 BitSet 对象和一个新的数组对象来保存结果。

看看它与基准测试中的其他方法相比如何，这真的很有趣。

Since Java 7 BitSet has the toLongArray method, which I believe will do exactly what the question asks for:

int subBits = (int) bitSet.get(lowBit, highBit).toLongArray()[0];

This has the advantage that it works with sequences larger than ints or longs. It has the performance disadvantage that a new BitSet object must be allocated, and a new array object to hold the result.

It would be really interesting to see how this compares with the other methods in the benchmark.

回复收藏 0 原文

~没有更多了~