Word Break

发布于 2025-02-22 13:01:32 字数 4459 浏览 0 评论 0 收藏 0

tags: [ DP_Sequence ]

Source

leetcode: Word Break | LeetCode OJ
lintcode: (107) Word Break

Given a string s and a dictionary of words dict, determine if s can be
segmented into a space-separated sequence of one or more dictionary words.

For example, given
s = "leetcode",
dict = ["leet", "code"].

Return true because "leetcode" can be segmented as "leet code".

题解

单序列( DP_Sequence ) DP 题，由单序列动态规划的四要素可大致写出：

State: f[i] 表示前 i 个字符能否根据词典中的词被成功分词。
Function: f[i] = or{f[j], j < i, letter in [j+1, i] can be found in dict} , 含义为小于 i 的索引 j 中只要有一个 f[j] 为真且 j+1 到 i 中组成的字符能在词典中找到时， f[i] 即为真，否则为假。具体实现可分为自顶向下或者自底向上。
Initialization: f[0] = true , 数组长度为字符串长度 + 1，便于处理。
Answer: f[s.length]

考虑到单词长度通常不会太长，故在 s 较长时使用自底向上效率更高。

Python

class Solution:
  # @param s, a string
  # @param wordDict, a set<string>
  # @return a boolean
  def wordBreak(self, s, wordDict):
    if not s:
      return True
    if not wordDict:
      return False

    max_word_len = max([len(w) for w in wordDict])
    can_break = [True]
    for i in xrange(len(s)):
      can_break.append(False)
      for j in xrange(i, -1, -1):
        # optimize for too long interval
        if i - j + 1 > max_word_len:
          break
        if can_break[j] and s[j:i + 1] in wordDict:
          can_break[i + 1] = True
          break
    return can_break[-1]

C++

class Solution {
public:
  bool wordBreak(string s, unordered_set<string>& wordDict) {
    if (s.empty()) return true;
    if (wordDict.empty()) return false;

    // get the max word length of wordDict
    int max_word_len = 0;
    for (unordered_set<string>::iterator it = wordDict.begin();
     it != wordDict.end(); ++it) {

      max_word_len = max(max_word_len, (*it).size());
    }

    vector<bool> can_break(s.size() + 1, false);
    can_break[0] = true;
    for (int i = 1; i <= s.size(); ++i) {
      for (int j = i - 1; j >= 0; --j) {
        // optimize for too long interval
        if (i - j > max_word_len) break;

        if (can_break[j] && 
      wordDict.find(s.substr(j, i - j)) != wordDict.end()) {

          can_break[i] = true;
          break;
        }
      }
    }

    return can_break[s.size()];
  }
};

Java

public class Solution {
  public boolean wordBreak(String s, Set<String> wordDict) {
    if (s == null || s.length() == 0) return true;
    if (wordDict == null || wordDict.isEmpty()) return false;

    // get the max word length of wordDict
    int max_word_len = 0;
    for (String word : wordDict) {
      max_word_len = Math.max(max_word_len, word.length());
    }

    boolean[] can_break = new boolean[s.length() + 1];
    can_break[0] = true;
    for (int i = 1; i <= s.length(); i++) {
      for (int j = i - 1; j >= 0; j--) {
        // optimize for too long interval
        if (i - j > max_word_len) break;

        String word = s.substring(j, i);
        if (can_break[j] && wordDict.contains(word)) {
          can_break[i] = true;
          break;
        }
      }
    }

    return can_break[s.length()];
  }
}