计算编辑距离

发布于 2024-12-13 09:57:31 字数 1755 浏览 2 评论 0原文

我正在尝试创建一个类,它使用 Levenshtein 距离函数来比较指定文档中的文本与目录中的所有其他文档。

我心里有基本的想法,但我不知道如何用 PHP 编写它。我有 C# 背景,因此我将提供尽可能多的细节。

class ComputeLevenshtein
{
   public $filePathList = new Array(); //The array that stores the absolute path of all documents within a specified directory
   public $directory;
   public $filePath; //This is the document that will be compared for each document in a directory

    public function __construct() {
        $this->directory = //;  
       /* I'm stuck here, once a user registers, a separate directory is 
          named after the user. I need to be able to read the username 
          from the Session Variable once the user logs in. 
          I'll just have to pass it in as a parameter. 
          Do I have to create a session wrapper? 
          If it's too complex, 
          then I'll just start off with a static directory */
    }

        // Returns the array containing each filePath for every document in a directory.
        function computeFilePathList($directory) 
        {
           for each file in Directory
           {
             $filepath = file.FilePath(); //store the filepath in a variable
             $this->filePathList.add($filePath) //add the filepath to the array
           }

        }  

        function ($docFilePath) // returns the Levenshtein Distance
        {

            for each path in filePathList
            {
              $input= readDoc($docFilePath);
              $lev = levenshtein($input, readDoc($path));
            }

            return $lev;
        }

    function readDoc($docFilePath) // Returns the raw text of that doc
    {
      //I Have the code for reading the doc in a seperate function
      return $text;
    }
}

I'm trying to make a class that uses the Levenshtein distance function to compare the text from a specified document amongst all the other documents in a directory.

I have the basic idea in mind but I don't know how to code it in PHP. I am from a C# background so I'll provide as much detail as possible.

class ComputeLevenshtein
{
   public $filePathList = new Array(); //The array that stores the absolute path of all documents within a specified directory
   public $directory;
   public $filePath; //This is the document that will be compared for each document in a directory

    public function __construct() {
        $this->directory = //;  
       /* I'm stuck here, once a user registers, a separate directory is 
          named after the user. I need to be able to read the username 
          from the Session Variable once the user logs in. 
          I'll just have to pass it in as a parameter. 
          Do I have to create a session wrapper? 
          If it's too complex, 
          then I'll just start off with a static directory */
    }

        // Returns the array containing each filePath for every document in a directory.
        function computeFilePathList($directory) 
        {
           for each file in Directory
           {
             $filepath = file.FilePath(); //store the filepath in a variable
             $this->filePathList.add($filePath) //add the filepath to the array
           }

        }  

        function ($docFilePath) // returns the Levenshtein Distance
        {

            for each path in filePathList
            {
              $input= readDoc($docFilePath);
              $lev = levenshtein($input, readDoc($path));
            }

            return $lev;
        }

    function readDoc($docFilePath) // Returns the raw text of that doc
    {
      //I Have the code for reading the doc in a seperate function
      return $text;
    }
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

紫﹏色ふ单纯 2024-12-20 09:57:31

怎么样:

class Levenshtein
{
    private $_p = array(); 

    public function __construct($input, $compare)
    {
        $this->_p['input']   = $input;
        $this->_p['compare'] = $compare; // string to check against
    }

    public function __get($property)
    {
        if (array_key_exists($property, $this->_p)) {
            return $this->_p[$property];
        }

        if (!isset($this->_p['dist']) && $property === 'dist') {
            $this->_p['dist'] = levenshtein($this->_p['input'],
                                            $this->_p['compare']);
            return $this->_p['dist'];
        }
    }
}

class DirectoryLevenshtein
{
    private $_directory;
    private $_filePath;
    private $_distances = array();

    public function __construct($directoryPath, $filePath = null)
    {
        if (!is_dir($directoryPath)) {
            throw new Exception("Path '$directoryPath' does not exist");
        }

        if (substr($directoryPath, -1) !== '/') {
            $directoryPath .= '/';
        }

        $this->_directory = $directoryPath;

        if ($filePath !== null) {
            if (!$this->setFilePath($filePath)) {
                throw new Exception("File '$filePath' is not readable");
            }
        }
    }

    public function __get($file)
    {
        if (array_key_exists($file, $this->_distances)) {
            return $this->_distances[$file];
        }

        if (is_readable($this->_directory . $file)) {
            if (empty($this->_filePath)) {
                return null;
            }

            $input   = file_get_contents($this->_filePath);
            $compare = file_get_contents($this->_directory . $file);
            $this->_distances[$file] = new Levenshtein($input, $compare);
            return $this->_distances[$file];
        }
    }

    public function getDirectoryContents()
    {
        $files = scandir($this->_directory);

        while ($files[0] === '.' || $files[0] === '..') {
            array_shift($files);
        }

        return $files;
    }

    public function setFilePath($filePath)
    {
        if (empty($this->_filePath) && is_readable($filePath)) {
            $this->_filePath = $filePath;
            return true;
        }

        return false;
    }
}

要使用它,请执行以下操作:

// could user session wrapper instead
$userDir = '/path/to/user/dirs/' . $_SESSION['user'];
// file to compare all files with 
$filePath = /path/to/file.txt

$dirLev = new DirectoryLevenshtein($userDir, $filePath);

// Files in directory
$files = $dirLev->getDirectoryContents();

// Distances
foreach ($files as $file) {
    echo "$file: {$dirLev->file->dist}\n";
}

How about this:

class Levenshtein
{
    private $_p = array(); 

    public function __construct($input, $compare)
    {
        $this->_p['input']   = $input;
        $this->_p['compare'] = $compare; // string to check against
    }

    public function __get($property)
    {
        if (array_key_exists($property, $this->_p)) {
            return $this->_p[$property];
        }

        if (!isset($this->_p['dist']) && $property === 'dist') {
            $this->_p['dist'] = levenshtein($this->_p['input'],
                                            $this->_p['compare']);
            return $this->_p['dist'];
        }
    }
}

class DirectoryLevenshtein
{
    private $_directory;
    private $_filePath;
    private $_distances = array();

    public function __construct($directoryPath, $filePath = null)
    {
        if (!is_dir($directoryPath)) {
            throw new Exception("Path '$directoryPath' does not exist");
        }

        if (substr($directoryPath, -1) !== '/') {
            $directoryPath .= '/';
        }

        $this->_directory = $directoryPath;

        if ($filePath !== null) {
            if (!$this->setFilePath($filePath)) {
                throw new Exception("File '$filePath' is not readable");
            }
        }
    }

    public function __get($file)
    {
        if (array_key_exists($file, $this->_distances)) {
            return $this->_distances[$file];
        }

        if (is_readable($this->_directory . $file)) {
            if (empty($this->_filePath)) {
                return null;
            }

            $input   = file_get_contents($this->_filePath);
            $compare = file_get_contents($this->_directory . $file);
            $this->_distances[$file] = new Levenshtein($input, $compare);
            return $this->_distances[$file];
        }
    }

    public function getDirectoryContents()
    {
        $files = scandir($this->_directory);

        while ($files[0] === '.' || $files[0] === '..') {
            array_shift($files);
        }

        return $files;
    }

    public function setFilePath($filePath)
    {
        if (empty($this->_filePath) && is_readable($filePath)) {
            $this->_filePath = $filePath;
            return true;
        }

        return false;
    }
}

To use it do something like the following:

// could user session wrapper instead
$userDir = '/path/to/user/dirs/' . $_SESSION['user'];
// file to compare all files with 
$filePath = /path/to/file.txt

$dirLev = new DirectoryLevenshtein($userDir, $filePath);

// Files in directory
$files = $dirLev->getDirectoryContents();

// Distances
foreach ($files as $file) {
    echo "$file: {$dirLev->file->dist}\n";
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文