C# 压缩字节数组

发布于 2024-09-10 10:05:46 字数 3223 浏览 6 评论 0原文

我对压缩算法了解不多。我正在寻找一种简单的压缩算法（或代码片段），它可以减少 byte[,,] 或 byte[] 的大小。我无法使用 System.IO.Compression。此外，数据有很多重复。

我尝试实现 RLE 算法（下面发布供您检查）。然而，它生成的数组大 1.2 到 1.8 倍。

public static class RLE
{
    public static byte[] Encode(byte[] source)
    {
        List<byte> dest = new List<byte>();
        byte runLength;

        for (int i = 0; i < source.Length; i++)
        {
            runLength = 1;
            while (runLength < byte.MaxValue 
                && i + 1 < source.Length 
                && source[i] == source[i + 1])
            {
                runLength++;
                i++;
            }
            dest.Add(runLength);
            dest.Add(source[i]);
        }

        return dest.ToArray();
    }

    public static byte[] Decode(byte[] source)
    {
        List<byte> dest = new List<byte>();
        byte runLength; 

        for (int i = 1; i < source.Length; i+=2)
        {
            runLength = source[i - 1];

            while (runLength > 0)
            {
                dest.Add(source[i]);
                runLength--;
            }
        }
        return dest.ToArray();
    }

}

我还找到了一个基于 java、字符串和整数的 LZW 实现。我已将其转换为 C#，结果看起来不错（代码如下）。但是，我不确定它是如何工作的，也不知道如何让它使用字节而不是字符串和整数。

public class LZW
{
    /* Compress a string to a list of output symbols. */
    public static int[] compress(string uncompressed)
    {
        // Build the dictionary.
        int dictSize = 256;
        Dictionary<string, int> dictionary = new Dictionary<string, int>();
        for (int i = 0; i < dictSize; i++)
            dictionary.Add("" + (char)i, i);

        string w = "";
        List<int> result = new List<int>();

        for (int i = 0; i < uncompressed.Length; i++)
        {
            char c = uncompressed[i];
            string wc = w + c;
            if (dictionary.ContainsKey(wc))
                w = wc;
            else
            {
                result.Add(dictionary[w]);
                // Add wc to the dictionary.
                dictionary.Add(wc, dictSize++);
                w = "" + c;
            }
        }

        // Output the code for w.
        if (w != "")
            result.Add(dictionary[w]);
        return result.ToArray();
    }

    /* Decompress a list of output ks to a string. */
    public static string decompress(int[] compressed)
    {
        int dictSize = 256;
        Dictionary<int, string> dictionary = new Dictionary<int, string>();
        for (int i = 0; i < dictSize; i++)
            dictionary.Add(i, "" + (char)i);

        string w = "" + (char)compressed[0];
        string result = w;
        for (int i = 1; i < compressed.Length; i++)
        {
            int k = compressed[i];
            string entry = "";
            if (dictionary.ContainsKey(k))
                entry = dictionary[k];
            else if (k == dictSize)
                entry = w + w[0];

            result += entry;

            // Add w+entry[0] to the dictionary.
            dictionary.Add(dictSize++, w + entry[0]);

            w = entry;
        }

        return result;
    }
}

原文

I do not know much about compression algorithms. I am looking for a simple compression algorithm (or code snippet) which can reduce the size of a byte[,,] or byte[]. I cannot make use of System.IO.Compression. Also, the data has lots of repetition.

I tried implementing the RLE algorithm (posted below for your inspection). However, it produces array's 1.2 to 1.8 times larger.

public static class RLE
{
    public static byte[] Encode(byte[] source)
    {
        List<byte> dest = new List<byte>();
        byte runLength;

        for (int i = 0; i < source.Length; i++)
        {
            runLength = 1;
            while (runLength < byte.MaxValue 
                && i + 1 < source.Length 
                && source[i] == source[i + 1])
            {
                runLength++;
                i++;
            }
            dest.Add(runLength);
            dest.Add(source[i]);
        }

        return dest.ToArray();
    }

    public static byte[] Decode(byte[] source)
    {
        List<byte> dest = new List<byte>();
        byte runLength; 

        for (int i = 1; i < source.Length; i+=2)
        {
            runLength = source[i - 1];

            while (runLength > 0)
            {
                dest.Add(source[i]);
                runLength--;
            }
        }
        return dest.ToArray();
    }

}

I have also found a java, string and integer based, LZW implementation. I have converted it to C# and the results look good (code posted below). However, I am not sure how it works nor how to make it work with bytes instead of strings and integers.

public class LZW
{
    /* Compress a string to a list of output symbols. */
    public static int[] compress(string uncompressed)
    {
        // Build the dictionary.
        int dictSize = 256;
        Dictionary<string, int> dictionary = new Dictionary<string, int>();
        for (int i = 0; i < dictSize; i++)
            dictionary.Add("" + (char)i, i);

        string w = "";
        List<int> result = new List<int>();

        for (int i = 0; i < uncompressed.Length; i++)
        {
            char c = uncompressed[i];
            string wc = w + c;
            if (dictionary.ContainsKey(wc))
                w = wc;
            else
            {
                result.Add(dictionary[w]);
                // Add wc to the dictionary.
                dictionary.Add(wc, dictSize++);
                w = "" + c;
            }
        }

        // Output the code for w.
        if (w != "")
            result.Add(dictionary[w]);
        return result.ToArray();
    }

    /* Decompress a list of output ks to a string. */
    public static string decompress(int[] compressed)
    {
        int dictSize = 256;
        Dictionary<int, string> dictionary = new Dictionary<int, string>();
        for (int i = 0; i < dictSize; i++)
            dictionary.Add(i, "" + (char)i);

        string w = "" + (char)compressed[0];
        string result = w;
        for (int i = 1; i < compressed.Length; i++)
        {
            int k = compressed[i];
            string entry = "";
            if (dictionary.ContainsKey(k))
                entry = dictionary[k];
            else if (k == dictSize)
                entry = w + w[0];

            result += entry;

            // Add w+entry[0] to the dictionary.
            dictionary.Add(dictSize++, w + entry[0]);

            w = entry;
        }

        return result;
    }
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

烦人精 2024-09-17 10:05:46

看看这里。我使用这段代码作为在我的一个工作项目中进行压缩的基础。不确定 Xbox 360 SDK 中可以访问多少 .NET Framework，因此不确定这对您来说效果如何。

回复收藏 0 原文

阳光的暖冬 2024-09-17 10:05:46

RLE 算法的问题在于它太简单了。它为每个字节加上重复次数的前缀，但这确实意味着在长范围的非重复字节中，每个字节都以“1”为前缀。对于没有任何重复的数据，这将使文件大小加倍。

通过使用代码类型 RLE 可以避免这种情况； “代码”（也称为“令牌”）将是一个可以有两种含义的字节；它要么指示后面的单个字节重复多少次，要么指示后面有多少非重复字节应按原样复制。这两个代码之间的差异是通过启用最高位来实现的，这意味着该值仍有 7 位可用，这意味着每个此类代码复制或重复的数量最多可达 127。

这意味着即使在最坏的情况下，最终大小只能比原始文件大小大1/127左右。

可以在这里找到整个概念的良好解释，以及完整的工作（实际上是经过高度优化的）C# 代码：

http://www.shikadi.net/moddingwiki/RLE_Compression

请注意，有时，数据最终会比原始数据大无论如何，仅仅是因为没有足够的重复RLE 工作的字节数。处理此类压缩失败的一个好方法是向最终数据添加标头。如果您只是在开头添加一个额外的字节，即 0 表示未压缩数据，1 表示 RLE 压缩数据，那么，当 RLE 无法给出较小的结果时，您只需将其保存为未压缩状态，0 在前面，最后的数据将比原来大一个字节。然后，另一侧的系统可以读取该起始字节并使用它来确定是否应解压缩或仅复制后续数据。