优化 Trie 实现

发布于 2024-09-11 03:00:37 字数 2460 浏览 2 评论 0原文

除了好玩之外,我今天实现了一个 Trie 。目前它支持 add() 和 search(),remove() 也应该实现,但我认为这相当简单。

它功能齐全,但是用数据填充 Trie 有点太不符合我的口味了。我使用此列表作为数据源: http://www.isc.ro/lists/twl06 .zip (在 SO 的其他地方找到)。加载大约需要 11 秒。我最初的实现花了大约 15 秒,所以我已经给了它一个很好的性能提升,但我仍然不满意:)

我的问题是:还有什么可以给我带来(实质性的)性能提升?我不受这个设计的约束,彻底修改是可以接受的。

class Trie
{
    private $trie;
    public function __construct(TrieNode $trie = null)
    {
        if($trie !== null) $this->trie = $trie;
        else $this->trie = new TrieNode();
        $this->counter = 0;
    }
    public function add($value, $val = null)
    {
        $str = '';
        $trie_ref = $this->trie;
        foreach(str_split($value) as $char)
        {
            $str .= $char;
            $trie_ref = $trie_ref->addNode($str);
        }
        $trie_ref->value = $val;
        return true;
    }
    public function search($value, $only_words = false)
    {
        if($value === '') return $this->trie;
        $trie_ref = $this->trie;
        $str = '';
        foreach(str_split($value) as $char)
        {
            $str .= $char;
            if($trie_ref = $trie_ref->getNode($str))
            {
                if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
                continue;
            }
            return false;
        }
        return false;
    }
    public function extractWords(TrieNode $trie)
    {
        $res = array();
        foreach($trie->getChildren() as $child)
        {
            if($child->value !== null) $res[] = $child->value;
            if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child));
        }
        return $res;
    }
}
class TrieNode
{
    public $value;
    protected $children = array();
    public function addNode($index)
    {
        if(isset($this->children[$index])) return $this->children[$index];
        return $this->children[$index] = new self();
    }
    public function getNode($index)
    {
        return (isset($this->children[$index]) ? $this->children[$index] : false);
    }
    public function getChildren()
    {
        return $this->children;
    }
    public function hasChildren()
    {
        return count($this->children)>0;
    }
}

For no reason other than fun I implemented a Trie today. At the moment it supports add() and search(), remove() should also be implemented but I think that's fairly straight forward.

It is fully functional, but filling the Trie with data takes a little too much for my taste. I'm using this list as datasource: http://www.isc.ro/lists/twl06.zip (found somewhere else on SO). It takes ~11s to load. My initial implementation took ~15s so I already gave it a nice performance boost, but I'm still not satisfied :)

My question is: what else could give me a (substantial) performance boost? I'm not bound by this design, a complete overhaul is acceptable.

class Trie
{
    private $trie;
    public function __construct(TrieNode $trie = null)
    {
        if($trie !== null) $this->trie = $trie;
        else $this->trie = new TrieNode();
        $this->counter = 0;
    }
    public function add($value, $val = null)
    {
        $str = '';
        $trie_ref = $this->trie;
        foreach(str_split($value) as $char)
        {
            $str .= $char;
            $trie_ref = $trie_ref->addNode($str);
        }
        $trie_ref->value = $val;
        return true;
    }
    public function search($value, $only_words = false)
    {
        if($value === '') return $this->trie;
        $trie_ref = $this->trie;
        $str = '';
        foreach(str_split($value) as $char)
        {
            $str .= $char;
            if($trie_ref = $trie_ref->getNode($str))
            {
                if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
                continue;
            }
            return false;
        }
        return false;
    }
    public function extractWords(TrieNode $trie)
    {
        $res = array();
        foreach($trie->getChildren() as $child)
        {
            if($child->value !== null) $res[] = $child->value;
            if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child));
        }
        return $res;
    }
}
class TrieNode
{
    public $value;
    protected $children = array();
    public function addNode($index)
    {
        if(isset($this->children[$index])) return $this->children[$index];
        return $this->children[$index] = new self();
    }
    public function getNode($index)
    {
        return (isset($this->children[$index]) ? $this->children[$index] : false);
    }
    public function getChildren()
    {
        return $this->children;
    }
    public function hasChildren()
    {
        return count($this->children)>0;
    }
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

内心旳酸楚 2024-09-18 03:00:37

不知道 php,但是

在以下方法中:

   public function add($value, $val = null) 
    { 
        $str = ''; 
        $trie_ref = $this->trie; 
        foreach(str_split($value) as $char) 
        { 
            $str .= $char; 
            $trie_ref = $trie_ref->addNode($str); 
        } 
        $trie_ref->value = $val; 
        return true; 
    } 
    public function search($value, $only_words = false) 
    { 
        if($value === '') return $this->trie; 
        $trie_ref = $this->trie; 
        $str = ''; 
        foreach(str_split($value) as $char) 
        { 
            $str .= $char; 
            if($trie_ref = $trie_ref->getNode($str)) 
            { 
                if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
                continue; 
            } 
            return false; 
        } 
        return false; 
    } 

为什么你甚至需要 $str .= $char (我认为是追加)?这本身会将您的 O(n) 时间加法/搜索更改为 Omega(n^2)(n 是 $value 的长度)而不是 O(n)。

在 trie 中,您通常在遍历字符串的同时遍历 trie,即根据当前字符而不是当前前缀查找下一个节点。

Don't know php but,

in the following methods:

   public function add($value, $val = null) 
    { 
        $str = ''; 
        $trie_ref = $this->trie; 
        foreach(str_split($value) as $char) 
        { 
            $str .= $char; 
            $trie_ref = $trie_ref->addNode($str); 
        } 
        $trie_ref->value = $val; 
        return true; 
    } 
    public function search($value, $only_words = false) 
    { 
        if($value === '') return $this->trie; 
        $trie_ref = $this->trie; 
        $str = ''; 
        foreach(str_split($value) as $char) 
        { 
            $str .= $char; 
            if($trie_ref = $trie_ref->getNode($str)) 
            { 
                if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
                continue; 
            } 
            return false; 
        } 
        return false; 
    } 

Why do you even need the $str .= $char (which I suppose is append)? This itself changes your O(n) time addition/searching to Omega(n^2) (n is length of $value) instead of O(n).

In a trie, you usually walk the trie while walking the string i.e you find the next node based on the current character, rather than the current prefix.

幻梦 2024-09-18 03:00:37

我想这个实现是针对 Key|value 类型的插入和查找?这是处理[英语]单词的一个。

class Trie {


static function insert_word(Node $root, $text) 
{
    $v = $root;
    foreach(str_split($text) as $char) {
    $next = $v->children[$char];
        if ($next === null)
        {
            $v->children[$char] = $next = new Node();
        }
        $v = $next;
    }

    $v->leaf = true;
}


static function get_words_sorted(Node $node, $text) 
{

    $res = array();  
    for($ch = 0; $ch < 128; $ch++) {
    $child = $node->children[chr($ch)];

        if ($child !== null)
        {
            $res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch)));

        }
    }
    if ($node->leaf === true) 
    {
        $res[] = $text;
    }
    return $res;

}

static function search(Node $root, $text) 
{
    $v = $root;
    while($v !== null)
    {
        foreach(str_split($text) as $char) {
            $next = $v->children[$char];
            if ($next === null)
            {
                return false;
            }
            else
            {
                $v = $next;
            }
        }

        if($v->leaf === true)
        {
            return true;
        }
        else
        {
            return false;
        }

    }
    return false;

}

}


class Node {

    public $children;
    public $leaf;


    function __construct()
    {
        $children = Array();

    }
}

用法示例

    $root = new Node();
    $words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be");


    for ($i = 0; $i < sizeof($words); $i++)
    {

        Trie::insert_word($root, $words[$i]);
    }

    $search_words = array("alloy", "ant", "bee", "aren't", "allot");

    foreach($search_words as $word)
    {
        if(Trie::search($root, $word) === true)
        {
            echo $word . " IS in my dictionary<br/>";
        }
        else
        {
            echo $word . " is NOT in my dictionary <br/>";
        }
    }

I suppose this implementation is for a Key|value type of insertion and lookup? Here is one that handles [English] words.

class Trie {


static function insert_word(Node $root, $text) 
{
    $v = $root;
    foreach(str_split($text) as $char) {
    $next = $v->children[$char];
        if ($next === null)
        {
            $v->children[$char] = $next = new Node();
        }
        $v = $next;
    }

    $v->leaf = true;
}


static function get_words_sorted(Node $node, $text) 
{

    $res = array();  
    for($ch = 0; $ch < 128; $ch++) {
    $child = $node->children[chr($ch)];

        if ($child !== null)
        {
            $res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch)));

        }
    }
    if ($node->leaf === true) 
    {
        $res[] = $text;
    }
    return $res;

}

static function search(Node $root, $text) 
{
    $v = $root;
    while($v !== null)
    {
        foreach(str_split($text) as $char) {
            $next = $v->children[$char];
            if ($next === null)
            {
                return false;
            }
            else
            {
                $v = $next;
            }
        }

        if($v->leaf === true)
        {
            return true;
        }
        else
        {
            return false;
        }

    }
    return false;

}

}


class Node {

    public $children;
    public $leaf;


    function __construct()
    {
        $children = Array();

    }
}

Example usage

    $root = new Node();
    $words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be");


    for ($i = 0; $i < sizeof($words); $i++)
    {

        Trie::insert_word($root, $words[$i]);
    }

    $search_words = array("alloy", "ant", "bee", "aren't", "allot");

    foreach($search_words as $word)
    {
        if(Trie::search($root, $word) === true)
        {
            echo $word . " IS in my dictionary<br/>";
        }
        else
        {
            echo $word . " is NOT in my dictionary <br/>";
        }
    }
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文