使用 trie 的 Python 拼写检查器

发布于 2024-12-10 19:45:26 字数 1136 浏览 0 评论 0原文

我正在尝试使用 trie 数据结构实现拼写检查器。我目前对 Node 有以下概要:

class Node:
    def __init__(self):
        self.next = {}
        self.word_marker = False

    def add_item(self, string):
        #if the length of the string is 0 then return. When the end of the word 
        #comes set the word_marker to true
        if len(string) == 0:
            self.word_marker = True
            return
        #set the key to the first letter of the string and reformat the string to reflect the first letter taken out
        #ultimately going to store the letters for each node as the key
        key = string[0]
        string = string[1:]

        #if there is a key in the dictionary, then recursively call add_item with new string
        if key in self.next:
            self.next[key].add_item(string)

        else:
            node = Node()
            self.next[key] = node
            node.add_item(string)

我想做的下一件事是编写函数来搜索 string,然后返回建议的拼写。 (def Correct(self, string))。我将如何通过这个 trie 来实现搜索?假设我已经通过定义根节点 root 将单词列表添加到 trie,然后对列表中的每个单词使用 add_item

I am trying to implement a spell checker using a trie data structure. I currently have the following outline for a Node:

class Node:
    def __init__(self):
        self.next = {}
        self.word_marker = False

    def add_item(self, string):
        #if the length of the string is 0 then return. When the end of the word 
        #comes set the word_marker to true
        if len(string) == 0:
            self.word_marker = True
            return
        #set the key to the first letter of the string and reformat the string to reflect the first letter taken out
        #ultimately going to store the letters for each node as the key
        key = string[0]
        string = string[1:]

        #if there is a key in the dictionary, then recursively call add_item with new string
        if key in self.next:
            self.next[key].add_item(string)

        else:
            node = Node()
            self.next[key] = node
            node.add_item(string)

The next thing that I want to do is write the function to search for a string and then return a suggested spelling. (def correct(self, string)). How would I go through this trie to implement the search? Assume that I already added a list of words to the trie by defining a root node root, then using add_item for each of the words in the list.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

梦幻之岛 2024-12-17 19:45:26

如果您还没有阅读过,您可能需要查看 Norvig 的“如何编写拼写校正器” '

If you haven't already, you might want to check out Norvig's 'How to Write a Spelling Corrector'

白况 2024-12-17 19:45:26

There's an answer to related to your question here: https://stackoverflow.com/a/11016430/793956 .

Also consider using libraries such as
https://github.com/kmike/marisa-trie#readme or
https://github.com/kmike/datrie#readme

源来凯始玺欢你 2024-12-17 19:45:26

虽然不是直接答案,但您可能希望从更开发的 Trie 实现开始,如下所示:

class Trie:

    def __init__(self):
        self.__final = False
        self.__nodes = {}

    def __repr__(self):
        return 'Trie<len={}, final={}>'.format(len(self), self.__final)

    def __getstate__(self):
        return self.__final, self.__nodes

    def __setstate__(self, state):
        self.__final, self.__nodes = state

    def __len__(self):
        return len(self.__nodes)

    def __bool__(self):
        return self.__final

    def __contains__(self, array):
        try:
            return self[array]
        except KeyError:
            return False

    def __iter__(self):
        yield self
        for node in self.__nodes.values():
            yield from node

    def __getitem__(self, array):
        return self.__get(array, False)

    def create(self, array):
        self.__get(array, True).__final = True

    def read(self):
        yield from self.__read([])

    def update(self, array):
        self[array].__final = True

    def delete(self, array):
        self[array].__final = False

    def prune(self):
        for key, value in tuple(self.__nodes.items()):
            if not value.prune():
                del self.__nodes[key]
        if not len(self):
            self.delete([])
        return self

    def __get(self, array, create):
        if array:
            head, *tail = array
            if create and head not in self.__nodes:
                self.__nodes[head] = Trie()
            return self.__nodes[head].__get(tail, create)
        return self

    def __read(self, name):
        if self.__final:
            yield name
        for key, value in self.__nodes.items():
            yield from value.__read(name + [key])

Though not a direct answer, you might want to start with a more developed Trie implementation like this:

class Trie:

    def __init__(self):
        self.__final = False
        self.__nodes = {}

    def __repr__(self):
        return 'Trie<len={}, final={}>'.format(len(self), self.__final)

    def __getstate__(self):
        return self.__final, self.__nodes

    def __setstate__(self, state):
        self.__final, self.__nodes = state

    def __len__(self):
        return len(self.__nodes)

    def __bool__(self):
        return self.__final

    def __contains__(self, array):
        try:
            return self[array]
        except KeyError:
            return False

    def __iter__(self):
        yield self
        for node in self.__nodes.values():
            yield from node

    def __getitem__(self, array):
        return self.__get(array, False)

    def create(self, array):
        self.__get(array, True).__final = True

    def read(self):
        yield from self.__read([])

    def update(self, array):
        self[array].__final = True

    def delete(self, array):
        self[array].__final = False

    def prune(self):
        for key, value in tuple(self.__nodes.items()):
            if not value.prune():
                del self.__nodes[key]
        if not len(self):
            self.delete([])
        return self

    def __get(self, array, create):
        if array:
            head, *tail = array
            if create and head not in self.__nodes:
                self.__nodes[head] = Trie()
            return self.__nodes[head].__get(tail, create)
        return self

    def __read(self, name):
        if self.__final:
            yield name
        for key, value in self.__nodes.items():
            yield from value.__read(name + [key])
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文