哈夫曼树编码

发布于 2024-09-11 04:07:33 字数 3822 浏览 11 评论 0原文

我之前问过的哈夫曼树还有另一个问题！这是代码：

package huffman;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.PriorityQueue;
import java.util.Scanner;

public class Huffman {

    public ArrayList<Frequency> fileReader(String file)
    {
        ArrayList<Frequency> al = new ArrayList<Frequency>();
        Scanner s;
        try {

            s = new Scanner(new FileReader(file)).useDelimiter("");
            while (s.hasNext())
            {
                boolean found = false;
                int i = 0;
                String temp = s.next();
                while(!found)
                {


                    if(al.size() == i && !found)
                    {
                        found = true;
                        al.add(new Frequency(temp, 1));
                    }
                    else if(temp.equals(al.get(i).getString()))
                    {
                        int tempNum = al.get(i).getFreq() + 1;
                        al.get(i).setFreq(tempNum);
                        found = true;
                    }
                    i++;

                }



            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return al;
    }
    public Frequency buildTree(ArrayList<Frequency> al)
    {
        Frequency r = al.get(1);
        PriorityQueue<Frequency> pq = new PriorityQueue<Frequency>();
        for(int i = 0; i < al.size(); i++)
        {
            pq.add(al.get(i));          
        }
        /*while(pq.size() > 0)
        {
            System.out.println(pq.remove().getString());
        }*/

        for(int i = 0; i < al.size() - 1; i++)
        {   
           Frequency p = pq.remove(); 
           Frequency q = pq.remove();
           int temp = p.getFreq() + q.getFreq();
           r = new Frequency(null, temp);
           r.left = p; 
           r.right = q;
           pq.add(r);  // put in the correct place in the priority queue
        }
        pq.remove(); // leave the priority queue empty
        return(r); // this is the root of the tree built

    }

    public void inOrder(Frequency top)
    {
        if(top == null)
        {
            return;
        }
        else
        {
            inOrder(top.left);
            System.out.print(top.getString() +", ");
            inOrder(top.right);
            return;
        }
    }
    public void printFreq(ArrayList<Frequency> al)
    {
        for(int i = 0; i < al.size(); i++)
        {
            System.out.println(al.get(i).getString() + "; " + al.get(i).getFreq());
        }
    }

}

现在需要做的是我需要创建一个方法，该方法将搜索树以查找特定字符的二进制代码（011001 等）。最好的方法是什么？我想也许我会在树中进行正常搜索，就好像它是一个 AVL 树，如果它较大，则向右，如果较小，则向左。

但是因为节点不使用整数双精度等，而是仅使用包含字符作为字符串或 null 的对象来表示它不是叶子而只是根。另一种选择是按顺序运行以找到我正在寻找的叶子，但同时我如何确定我是向右走很多次还是向左走很多次才能获得角色。

package huffman;

public class Frequency implements Comparable  {
    private String s;
    private int n;
    public Frequency left;
    public Frequency right;

    Frequency(String s, int n)
    {
        this.s = s;
        this.n = n;
    }
    public String getString()
    {
        return s;
    }
    public int getFreq()
    {
        return n;
    }
    public void setFreq(int n)
    {
        this.n = n;
    }
    @Override
    public int compareTo(Object arg0) {
        Frequency other = (Frequency)arg0;

        return n < other.n ? -1 : (n == other.n ? 0 : 1);
    }
}

我想做的是找到实际到达每个字符的二进制代码。因此，如果我尝试对 aabbbcccc 进行编码，我将如何创建一个字符串来保存二进制代码，左转为 0，右转为 1。

令我困惑的是，因为您无法确定任何内容在哪里是因为树明显不平衡，无法确定角色是在你所在位置的右侧还是左侧。因此，您必须搜索整个树，但如果到达的节点不是您要查找的节点，则必须回溯到另一个根以到达其他叶子。

原文

My Huffman tree which I had asked about earlier has another problem! Here is the code:

package huffman;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.PriorityQueue;
import java.util.Scanner;

public class Huffman {

    public ArrayList<Frequency> fileReader(String file)
    {
        ArrayList<Frequency> al = new ArrayList<Frequency>();
        Scanner s;
        try {

            s = new Scanner(new FileReader(file)).useDelimiter("");
            while (s.hasNext())
            {
                boolean found = false;
                int i = 0;
                String temp = s.next();
                while(!found)
                {


                    if(al.size() == i && !found)
                    {
                        found = true;
                        al.add(new Frequency(temp, 1));
                    }
                    else if(temp.equals(al.get(i).getString()))
                    {
                        int tempNum = al.get(i).getFreq() + 1;
                        al.get(i).setFreq(tempNum);
                        found = true;
                    }
                    i++;

                }



            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return al;
    }
    public Frequency buildTree(ArrayList<Frequency> al)
    {
        Frequency r = al.get(1);
        PriorityQueue<Frequency> pq = new PriorityQueue<Frequency>();
        for(int i = 0; i < al.size(); i++)
        {
            pq.add(al.get(i));          
        }
        /*while(pq.size() > 0)
        {
            System.out.println(pq.remove().getString());
        }*/

        for(int i = 0; i < al.size() - 1; i++)
        {   
           Frequency p = pq.remove(); 
           Frequency q = pq.remove();
           int temp = p.getFreq() + q.getFreq();
           r = new Frequency(null, temp);
           r.left = p; 
           r.right = q;
           pq.add(r);  // put in the correct place in the priority queue
        }
        pq.remove(); // leave the priority queue empty
        return(r); // this is the root of the tree built

    }

    public void inOrder(Frequency top)
    {
        if(top == null)
        {
            return;
        }
        else
        {
            inOrder(top.left);
            System.out.print(top.getString() +", ");
            inOrder(top.right);
            return;
        }
    }
    public void printFreq(ArrayList<Frequency> al)
    {
        for(int i = 0; i < al.size(); i++)
        {
            System.out.println(al.get(i).getString() + "; " + al.get(i).getFreq());
        }
    }

}

What needs to be done now is I need to create a method that will search through the tree to find the binary code (011001 etc) to the specific character. What is the best way to do this? I thought maybe I would do a normal search through the tree as if it were an AVL tree going to the right if its bigger or left if it's smaller.

But because the nodes don't use ints doubles etc. but only using objects that contain characters as strings or null to signify its not a leaf but only a root. The other option would be to do an in-order run through to find the leaf that I'm looking for but at the same time how would I determine if I went right so many times or left so many times to get the character.

package huffman;

public class Frequency implements Comparable  {
    private String s;
    private int n;
    public Frequency left;
    public Frequency right;

    Frequency(String s, int n)
    {
        this.s = s;
        this.n = n;
    }
    public String getString()
    {
        return s;
    }
    public int getFreq()
    {
        return n;
    }
    public void setFreq(int n)
    {
        this.n = n;
    }
    @Override
    public int compareTo(Object arg0) {
        Frequency other = (Frequency)arg0;

        return n < other.n ? -1 : (n == other.n ? 0 : 1);
    }
}

What I'm trying to do is find the binary code to actually get to each character. So if I were trying to encode aabbbcccc how would I create a string holding the binary code for a going left is 0 and going right is 1.

What has me confused is because you can't determine where anything is because the tree is obviously unbalanced and there is no determining if a character is right or left of where you are. So you have to search through the whole tree but if you get to a node that isn't what you are looking for, you have backtrack to another root to get to the other leaves.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

固执像三岁 2024-09-18 04:07:33

遍历哈夫曼树节点，得到一个像 {'a': "1001", 'b': "10001"} 等映射。您可以使用这个映射来获取特定字符的二进制代码。

如果您需要反向执行，只需将其作为状态机处理即可：

state = huffman_root
for each bit
    if (state.type == 'leaf')
        output(state.data);
        state = huffman_root
    state = state.leaves[bit]

老实说，我没有查看您的代码。如何处理这棵奇特的树应该是很明显的。

Traverse through the huffman tree nodes to get a map like {'a': "1001", 'b': "10001"} etc. You can use this map to get the binary code to a specific character.

If you need to do in reverse, just handle it as a state machine:

state = huffman_root
for each bit
    if (state.type == 'leaf')
        output(state.data);
        state = huffman_root
    state = state.leaves[bit]

Honestly said, I didn't look into your code. It ought be pretty obvious what to do with the fancy tree.

回复收藏 0 原文

诠释孤独 2024-09-18 04:07:33

请记住，如果你有 1001，你永远不会有 10010 或 10011。所以你的基本方法看起来像这样（伪代码）：

if(input == thisNode.key) return thisNode.value
if(input.endsWith(1)) return search(thisNode.left)
else return search(thisNode.right)

我没有阅读你的程序来弄清楚如何集成它，但这是霍夫曼的关键要素简而言之编码

尝试这样的事情 - 你正在尝试找到令牌。因此，如果您想查找“10010”的字符串，您可以执行 search(root,"10010")

String search(Frequency top, String token) {
    return search(top,token,0);
}

// depending on your tree, you may have to switch top.left and top.right
String search(Frequency top, String token, int depth) {
    if(token.length() == depth) return "NOT FOUND";
    if(token.length() == depth - 1) return top.getString();
    if(token.charAt(depth) == '0') return search(top.left,token,depth+1);
    else return search(top.right,token,depth+1);

}

Remember, if you have 1001, you will never have a 10010 or 10011. So your basic method looks like this (in pseudocode):

if(input == thisNode.key) return thisNode.value
if(input.endsWith(1)) return search(thisNode.left)
else return search(thisNode.right)

I didn't read your program to figure out how to integrate it, but that's a key element of huffman encoding in a nutshell

Try something like this - you're trying to find token. So if you wanted to find the String for "10010", you'd do search(root,"10010")

String search(Frequency top, String token) {
    return search(top,token,0);
}

// depending on your tree, you may have to switch top.left and top.right
String search(Frequency top, String token, int depth) {
    if(token.length() == depth) return "NOT FOUND";
    if(token.length() == depth - 1) return top.getString();
    if(token.charAt(depth) == '0') return search(top.left,token,depth+1);
    else return search(top.right,token,depth+1);

}

回复收藏 0 原文

把回忆走一遍 2024-09-18 04:07:33

当我尝试霍夫曼编码编码树时，我考虑了两个选择。

选项 1：使用基于指针的二叉树。我对其中的大部分内容进行了编码，然后觉得，要从叶子追踪树以找到编码，我需要父指针。否则，就像本文中提到的那样，您对树进行搜索，这并不是立即找到编码的解决方案。基于指针的树的缺点是，树中的每个节点都必须有 3 个指针，我认为这太多了。遵循指针的代码很简单，但比选项 2 中的代码更复杂。

选项 2：使用基于数组的树来表示将在运行中用于编码和解码的编码树。所以如果你想要一个字符的编码，你可以在数组中找到该字符。非常简单，我用了一张桌子，就在那里我得到了叶子。现在我追踪到数组中索引 1 处的根。我为父级执行 (current_index / 2) 。如果子索引是父索引/2，则为左索引，否则为右索引。

选项 2 很容易编写代码，尽管数组可以有空格。我认为它的性能比基于指针的树更好。除了识别根和叶之外，现在是索引问题而不是对象类型问题。 ;) 如果您必须发送您的树，这也将非常有用！？

另外，在解码霍夫曼代码时，您不会搜索 (root, 10110) 。您只需在编码比特流中遍历树，根据您的比特向左或向右移动，当您到达叶子时，就输出字符。

希望这有帮助。

Harisankar Krishna Swamy（示例)

回复收藏 0 原文