在单词中找到重叠的字母

发布于 2025-02-13 07:04:11 字数 910 浏览 1 评论 0原文

我只有一个只有三个单词的字符串：

first_string <- c("self", "funny", "nymph")

您可以看到该向量的单词都可以将一个单词放在一个单词上，因为字母有一些重叠，即我们得到SEL f un <强> ny mph。让我们称其为单词火车。

此外，我还有另一个矢量，带有许多单词。让第二个向量为：

second_string <- c("house", "garden", "duck", "evil", "fluff")

我想知道第二个字符串的哪个单词可以添加到train一词中。在这种情况下，这是house和fluff（house可以在Sel f 的末尾添加> ny mph和fluff可以放在self和funny之间。因此，此处的预期输出将是：

expected <- data.frame(word= c("house", "fluff"), word_train= c("selfunnymphouse", "selfluffunnymph"))

重叠可以有任何长度，即自我和有趣的重叠仅与一个字符重叠，但有趣而若虫则重叠，分为两个字符。

编辑

新单词可以更改第一个单词火车的单词顺序。例如，如果第二个向量包含hugs单词> unny，它在nymph self和funny之前。

原文

I have a string with only three words like this:

first_string <- c("self", "funny", "nymph")

As you can see the words of this vector can all be put together to one word because there is some overlap in letters, i.e. we get selfunnymph. Let`s call this a word train.

Besides, I have another vector with many words. Let the second vector be:

second_string <- c("house", "garden", "duck", "evil", "fluff")

I want to know what words of the second string can be added to the word train. In this case this is house and fluff (house can be added in the end of selfunnymph and fluff can be put between self and funny). So the expected output here would be:

expected <- data.frame(word= c("house", "fluff"), word_train= c("selfunnymphouse", "selfluffunnymph"))

The overlap can be of any length, i.e. self and funny overlap only with one character but funny and nymph overlap in two characters.

EDIT

The new word can change the word order of the first word train. For example, if the second vector contains the word hugs we can make the word train nymphugselfunny, which puts nymph before self and funny.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

茶花眉 2025-02-20 07:04:12

我想知道您为什么问这个，但是无论如何，这都是一个有趣的练习。这是我的实现：

library('dplyr')


# define cars -------------------------------------------------------------

original_cars <- c("self", "funny", "nymph")
new_cars <- c("house", "garden", "duck", "evil", "fluff")
cars <- c(original_cars, new_cars)


# get all possible connections ('parts') per car --------------------------

car_parts <- lapply(seq_along(cars), \(car_id) {
  
  car = cars[car_id]
  n = nchar(car)
  
  ids <- rep(car_id, n)
  names <- rep(car, n)
  left <- vapply(seq_len(n), \(i) substr(car, 1, i), "")
  right <- vapply(seq_len(n), \(i) substr(car, n-i+1, n), "")
  overlap <- nchar(left)
  
  data.frame(car.id = ids, car.name = names, left = left, right = right, overlap = overlap)
  
}) |> do.call(rbind, args=_)

# > car_parts
#    car.id car.name   left  right overlap
# 1       1     self      s      f       1
# 2       1     self     se     lf       2
# 3       1     self    sel    elf       3
# 4       1     self   self   self       4
# 5       2    funny      f      y       1
# 6       2    funny     fu     ny       2
# 7       2    funny    fun    nny       3
# 8       2    funny   funn   unny       4
# 9       2    funny  funny  funny       5
# 10      3    nymph      n      h       1
# [...]


# get all possible connections between two cars ---------------------------

connections <- inner_join(car_parts |> select(-left),
           car_parts |> select(-right),
           by = c('overlap', 'right' = 'left'),
           suffix = c('.left', '.right')) |>
  filter(car.id.left != car.id.right) |>
  mutate(connection.id = row_number()) |>
  select(connection.id, car.id.left, car.id.right, car.name.left, car.name.right, coupling = right)

rm(car_parts)

# > connections
#   connection.id car.id.left car.id.right car.name.left car.name.right coupling
# 1             1           1            2          self          funny        f
# 2             2           1            8          self          fluff        f
# 3             3           2            3         funny          nymph       ny
# 4             4           3            4         nymph          house        h
# 5             5           4            7         house           evil        e
# 6             6           4            1         house           self       se
# 7             7           5            3        garden          nymph        n
# 8             8           8            2         fluff          funny        f


# function to store valid trains ------------------------------------------

# example:
# valid_trains <- list()
# valid_trains <- add_valid_train( valid_trains, c(1, 8), c(2) )

add_valid_train <- function(valid_trains, train_cars, train_connections) {
  
  names = c(cars[train_cars[1]],
            vapply(train_connections, \(x) connections$car.name.right[x], "") )
  
  couplings = vapply(train_connections, \(x) connections$coupling[x], "")
  
  append(valid_trains, list(list(cars = train_cars, names = names, couplings = couplings)))
  
}


# function to recursively find next cars to add to train ------------------

# example:
# add_car(9, 5, c(1,2,3), c(1,3,5))

add_car <- function(valid_trains, new_car, new_connection = NULL, train_cars = c(), train_connections = c(), depth = 0) {
  
  cat(strrep('   ',depth), cars[new_car],'\n', sep='')
  
  # store current train as valid
  train_cars <- c(train_cars, new_car)
  train_connections <- c(train_connections, new_connection)
  
  # find next possible cars to add; save train if no more options, otherwise add all options
  options <- connections |> filter(car.id.left == new_car, ! car.id.right %in% train_cars)
  if(nrow(options) == 0) valid_trains <- add_valid_train(valid_trains, train_cars, train_connections) # save only the longest options
  for(i in seq_len(nrow(options))) valid_trains <- add_car(valid_trains, options$car.id.right[i], options$connection.id[i], train_cars, train_connections, depth+1)
  
  return(valid_trains)
  
}


# get all valid trains ----------------------------------------------------

valid_trains <- list()
for(i in seq_along(cars)) add_car(valid_trains, i) -> valid_trains

# filter valid trains that have all cars from `original_cars` -------------

mask <- vapply(valid_trains, \(x) all(seq_along(original_cars) %in% x$cars), T)

new_trains <- lapply(valid_trains[mask], \(x) {
  x$newcars <- setdiff(x$cars, seq_along(original_cars))
  x$newnames <- cars[x$newcars]
  x
})

# print names of all trains that contain all 'original' cars:
#
# > sapply(new_trains, \(x) x$names)
# [[1]] "self"  "funny" "nymph" "house" "evil" 
# [[2]] "self"  "fluff" "funny" "nymph" "house" "evil" 
# [[3]] "funny" "nymph" "house" "self"  "fluff"
# [[4]] "nymph" "house" "self"  "funny"
# [[5]] "nymph" "house" "self"  "fluff" "funny"
# [[6]] "house" "self"  "funny" "nymph"
# [[7]] "house" "self"  "fluff" "funny" "nymph"
# [[8]] "garden" "nymph"  "house"  "self"   "funny" 
# [[9]] "garden" "nymph"  "house"  "self"   "fluff"  "funny" 
# [[10]] "fluff" "funny" "nymph" "house" "self" 

## All possible trains are in `valid_trains`, all of those where *all* the original cars are used are in `new_trains`.
## 
## It is possible that some trains are subsets of others.

编辑：当我查看您自己的实施时，我认为您对最长的火车感兴趣。现在，您解释了目的，我调整了算法以采用原始汽车，然后查看可以将哪些新车单独添加到原始组合中。有了以前的代码，一长串潜在的新名称将创建一些巨大的火车，这些火车对于命名家庭而言是非常不可行的。

library('dplyr')


# define cars -------------------------------------------------------------

original_cars <- c("self", "funny", "nymph")
new_cars <- c("house", "garden", "duck", "evil", "fluff")


# function to get all possible connections between a set of cars ----------

# example:
# cars <- c("self", "funny", "nymph", "house")
# get_connections(cars)
#
# > get_connections(c("self", "funny", "nymph", "house"))
#   connection.id car.id.left car.id.right car.name.left car.name.right coupling
# 1             1           1            2          self          funny        f
# 2             2           2            3         funny          nymph       ny
# 3             3           3            4         nymph          house        h
# 4             4           4            1         house           self       se

get_connections <- function(cars) {
  
  # get all connections the cars can make
  car_parts <- lapply(seq_along(cars), \(car_id) {
    
    car = cars[car_id]
    n = nchar(car)
    
    ids <- rep(car_id, n)
    names <- rep(car, n)
    left <- vapply(seq_len(n), \(i) substr(car, 1, i), "")
    right <- vapply(seq_len(n), \(i) substr(car, n-i+1, n), "")
    overlap <- nchar(left)
    
    data.frame(car.id = ids, car.name = names, left = left, right = right, overlap = overlap)
    
  }) |> do.call(rbind, args=_)
  
  # > car_parts
  #    car.id car.name   left  right overlap
  # 1       1     self      s      f       1
  # 2       1     self     se     lf       2
  # 3       1     self    sel    elf       3
  # 4       1     self   self   self       4
  # 5       2    funny      f      y       1
  # 6       2    funny     fu     ny       2
  # [...]
  
  # return all possible connections between two cars
  
  inner_join(car_parts |> select(-left),
                            car_parts |> select(-right),
                            by = c('overlap', 'right' = 'left'),
                            suffix = c('.left', '.right')) |>
    filter(car.id.left != car.id.right) |>
    mutate(connection.id = row_number()) |>
    select(connection.id, car.id.left, car.id.right, car.name.left, car.name.right, coupling = right)
  
}


# function to store valid trains ------------------------------------------

# example:
# cars <- c("self", "funny", "nymph", "house")
# connections <- get_connections(cars)
# valid_trains <- list()
# valid_trains <- add_valid_train( cars, connections, valid_trains, c(2, 3), c(2) )

add_valid_train <- function(cars, connections, valid_trains, train_cars, train_connections) {
  
  names = c(cars[train_cars[1]],
            vapply(train_connections, \(x) connections$car.name.right[x], "") )
  
  couplings = vapply(train_connections, \(x) connections$coupling[x], "")
  
  append(valid_trains, list(list(cars = train_cars, names = names, couplings = couplings)))
  
}


# function to recursively find next cars to add to train ------------------

# example:
# cars <- c("self", "funny", "nymph", "house")
# connections <- get_connections(cars)
# valid_trains <- list()
# add_car(cars, connections, valid_trains, 2)

add_car <- function(cars, connections, valid_trains, new_car, new_connection = NULL, train_cars = c(), train_connections = c(), depth = 0) {
  
  cat(strrep('   ',depth), cars[new_car], '\n', sep='')
  
  # store current train as valid
  train_cars <- c(train_cars, new_car)
  train_connections <- c(train_connections, new_connection)
  
  # find next possible cars to add
  options <- connections |> filter(car.id.left == new_car, ! car.id.right %in% train_cars)
  for(i in seq_len(nrow(options))) valid_trains <- add_car(cars, connections, valid_trains, options$car.id.right[i], options$connection.id[i], train_cars, train_connections, depth+1)
  
  # save train if no more options
  if(nrow(options) == 0) valid_trains <- add_valid_train(cars, connections, valid_trains, train_cars, train_connections)
  
  return(valid_trains)
  
}


# find individual new cars that can be added to existing cars --------------

results <- lapply(new_cars, function(new_car) {
  
  cat('adding "',new_car,'":\n', sep='')
  cars <- c(original_cars, new_car)
  connections <- get_connections(cars)
  
  # get all possible trains
  valid_trains <- list()
  for(i in seq_along(cars)) add_car(cars, connections, valid_trains, i) -> valid_trains
  
  cat('\n')
  
  # return only trains where all cars are used
  valid_trains <- valid_trains[ sapply(valid_trains, \(x) length(x$cars)) == length(cars) ]
  return(list(new_car = new_car, options = length(valid_trains), trains = valid_trains))
})

for(result in results) {
  cat('\n', result$new_car, ': ', result$options, ' options ', sep='')
  for(train in result$trains) {
    cat('[',train$names,'] ')
  }
}
# detailed results are in `results`

house: 4 options [ self funny nymph house ] [ funny nymph house self ] [ nymph house self funny ] [ house self funny nymph ] 
garden: 0 options 
duck: 0 options 
evil: 0 options 
fluff: 1 options [ self fluff funny nymph ]

I'm wondering why you asked this, but it was a fun exercise regardless. Here's my implementation:

library('dplyr')


# define cars -------------------------------------------------------------

original_cars <- c("self", "funny", "nymph")
new_cars <- c("house", "garden", "duck", "evil", "fluff")
cars <- c(original_cars, new_cars)


# get all possible connections ('parts') per car --------------------------

car_parts <- lapply(seq_along(cars), \(car_id) {
  
  car = cars[car_id]
  n = nchar(car)
  
  ids <- rep(car_id, n)
  names <- rep(car, n)
  left <- vapply(seq_len(n), \(i) substr(car, 1, i), "")
  right <- vapply(seq_len(n), \(i) substr(car, n-i+1, n), "")
  overlap <- nchar(left)
  
  data.frame(car.id = ids, car.name = names, left = left, right = right, overlap = overlap)
  
}) |> do.call(rbind, args=_)

# > car_parts
#    car.id car.name   left  right overlap
# 1       1     self      s      f       1
# 2       1     self     se     lf       2
# 3       1     self    sel    elf       3
# 4       1     self   self   self       4
# 5       2    funny      f      y       1
# 6       2    funny     fu     ny       2
# 7       2    funny    fun    nny       3
# 8       2    funny   funn   unny       4
# 9       2    funny  funny  funny       5
# 10      3    nymph      n      h       1
# [...]


# get all possible connections between two cars ---------------------------

connections <- inner_join(car_parts |> select(-left),
           car_parts |> select(-right),
           by = c('overlap', 'right' = 'left'),
           suffix = c('.left', '.right')) |>
  filter(car.id.left != car.id.right) |>
  mutate(connection.id = row_number()) |>
  select(connection.id, car.id.left, car.id.right, car.name.left, car.name.right, coupling = right)

rm(car_parts)

# > connections
#   connection.id car.id.left car.id.right car.name.left car.name.right coupling
# 1             1           1            2          self          funny        f
# 2             2           1            8          self          fluff        f
# 3             3           2            3         funny          nymph       ny
# 4             4           3            4         nymph          house        h
# 5             5           4            7         house           evil        e
# 6             6           4            1         house           self       se
# 7             7           5            3        garden          nymph        n
# 8             8           8            2         fluff          funny        f


# function to store valid trains ------------------------------------------

# example:
# valid_trains <- list()
# valid_trains <- add_valid_train( valid_trains, c(1, 8), c(2) )

add_valid_train <- function(valid_trains, train_cars, train_connections) {
  
  names = c(cars[train_cars[1]],
            vapply(train_connections, \(x) connections$car.name.right[x], "") )
  
  couplings = vapply(train_connections, \(x) connections$coupling[x], "")
  
  append(valid_trains, list(list(cars = train_cars, names = names, couplings = couplings)))
  
}


# function to recursively find next cars to add to train ------------------

# example:
# add_car(9, 5, c(1,2,3), c(1,3,5))

add_car <- function(valid_trains, new_car, new_connection = NULL, train_cars = c(), train_connections = c(), depth = 0) {
  
  cat(strrep('   ',depth), cars[new_car],'\n', sep='')
  
  # store current train as valid
  train_cars <- c(train_cars, new_car)
  train_connections <- c(train_connections, new_connection)
  
  # find next possible cars to add; save train if no more options, otherwise add all options
  options <- connections |> filter(car.id.left == new_car, ! car.id.right %in% train_cars)
  if(nrow(options) == 0) valid_trains <- add_valid_train(valid_trains, train_cars, train_connections) # save only the longest options
  for(i in seq_len(nrow(options))) valid_trains <- add_car(valid_trains, options$car.id.right[i], options$connection.id[i], train_cars, train_connections, depth+1)
  
  return(valid_trains)
  
}


# get all valid trains ----------------------------------------------------

valid_trains <- list()
for(i in seq_along(cars)) add_car(valid_trains, i) -> valid_trains

# filter valid trains that have all cars from `original_cars` -------------

mask <- vapply(valid_trains, \(x) all(seq_along(original_cars) %in% x$cars), T)

new_trains <- lapply(valid_trains[mask], \(x) {
  x$newcars <- setdiff(x$cars, seq_along(original_cars))
  x$newnames <- cars[x$newcars]
  x
})

# print names of all trains that contain all 'original' cars:
#
# > sapply(new_trains, \(x) x$names)
# [[1]] "self"  "funny" "nymph" "house" "evil" 
# [[2]] "self"  "fluff" "funny" "nymph" "house" "evil" 
# [[3]] "funny" "nymph" "house" "self"  "fluff"
# [[4]] "nymph" "house" "self"  "funny"
# [[5]] "nymph" "house" "self"  "fluff" "funny"
# [[6]] "house" "self"  "funny" "nymph"
# [[7]] "house" "self"  "fluff" "funny" "nymph"
# [[8]] "garden" "nymph"  "house"  "self"   "funny" 
# [[9]] "garden" "nymph"  "house"  "self"   "fluff"  "funny" 
# [[10]] "fluff" "funny" "nymph" "house" "self" 

## All possible trains are in `valid_trains`, all of those where *all* the original cars are used are in `new_trains`.
## 
## It is possible that some trains are subsets of others.

edit: When I looked at your own implementation, I thought you were interested in the longest possible trains. Now you explained the purpose, I adapted the algorithm to take the original cars, and see which of the new cars could be added individually to the original set. With the previous code, a long list of potential new names would have created some huge trains that would be very unfeasible for naming a family.

library('dplyr')


# define cars -------------------------------------------------------------

original_cars <- c("self", "funny", "nymph")
new_cars <- c("house", "garden", "duck", "evil", "fluff")


# function to get all possible connections between a set of cars ----------

# example:
# cars <- c("self", "funny", "nymph", "house")
# get_connections(cars)
#
# > get_connections(c("self", "funny", "nymph", "house"))
#   connection.id car.id.left car.id.right car.name.left car.name.right coupling
# 1             1           1            2          self          funny        f
# 2             2           2            3         funny          nymph       ny
# 3             3           3            4         nymph          house        h
# 4             4           4            1         house           self       se

get_connections <- function(cars) {
  
  # get all connections the cars can make
  car_parts <- lapply(seq_along(cars), \(car_id) {
    
    car = cars[car_id]
    n = nchar(car)
    
    ids <- rep(car_id, n)
    names <- rep(car, n)
    left <- vapply(seq_len(n), \(i) substr(car, 1, i), "")
    right <- vapply(seq_len(n), \(i) substr(car, n-i+1, n), "")
    overlap <- nchar(left)
    
    data.frame(car.id = ids, car.name = names, left = left, right = right, overlap = overlap)
    
  }) |> do.call(rbind, args=_)
  
  # > car_parts
  #    car.id car.name   left  right overlap
  # 1       1     self      s      f       1
  # 2       1     self     se     lf       2
  # 3       1     self    sel    elf       3
  # 4       1     self   self   self       4
  # 5       2    funny      f      y       1
  # 6       2    funny     fu     ny       2
  # [...]
  
  # return all possible connections between two cars
  
  inner_join(car_parts |> select(-left),
                            car_parts |> select(-right),
                            by = c('overlap', 'right' = 'left'),
                            suffix = c('.left', '.right')) |>
    filter(car.id.left != car.id.right) |>
    mutate(connection.id = row_number()) |>
    select(connection.id, car.id.left, car.id.right, car.name.left, car.name.right, coupling = right)
  
}


# function to store valid trains ------------------------------------------

# example:
# cars <- c("self", "funny", "nymph", "house")
# connections <- get_connections(cars)
# valid_trains <- list()
# valid_trains <- add_valid_train( cars, connections, valid_trains, c(2, 3), c(2) )

add_valid_train <- function(cars, connections, valid_trains, train_cars, train_connections) {
  
  names = c(cars[train_cars[1]],
            vapply(train_connections, \(x) connections$car.name.right[x], "") )
  
  couplings = vapply(train_connections, \(x) connections$coupling[x], "")
  
  append(valid_trains, list(list(cars = train_cars, names = names, couplings = couplings)))
  
}


# function to recursively find next cars to add to train ------------------

# example:
# cars <- c("self", "funny", "nymph", "house")
# connections <- get_connections(cars)
# valid_trains <- list()
# add_car(cars, connections, valid_trains, 2)

add_car <- function(cars, connections, valid_trains, new_car, new_connection = NULL, train_cars = c(), train_connections = c(), depth = 0) {
  
  cat(strrep('   ',depth), cars[new_car], '\n', sep='')
  
  # store current train as valid
  train_cars <- c(train_cars, new_car)
  train_connections <- c(train_connections, new_connection)
  
  # find next possible cars to add
  options <- connections |> filter(car.id.left == new_car, ! car.id.right %in% train_cars)
  for(i in seq_len(nrow(options))) valid_trains <- add_car(cars, connections, valid_trains, options$car.id.right[i], options$connection.id[i], train_cars, train_connections, depth+1)
  
  # save train if no more options
  if(nrow(options) == 0) valid_trains <- add_valid_train(cars, connections, valid_trains, train_cars, train_connections)
  
  return(valid_trains)
  
}


# find individual new cars that can be added to existing cars --------------

results <- lapply(new_cars, function(new_car) {
  
  cat('adding "',new_car,'":\n', sep='')
  cars <- c(original_cars, new_car)
  connections <- get_connections(cars)
  
  # get all possible trains
  valid_trains <- list()
  for(i in seq_along(cars)) add_car(cars, connections, valid_trains, i) -> valid_trains
  
  cat('\n')
  
  # return only trains where all cars are used
  valid_trains <- valid_trains[ sapply(valid_trains, \(x) length(x$cars)) == length(cars) ]
  return(list(new_car = new_car, options = length(valid_trains), trains = valid_trains))
})

for(result in results) {
  cat('\n', result$new_car, ': ', result$options, ' options ', sep='')
  for(train in result$trains) {
    cat('[',train$names,'] ')
  }
}
# detailed results are in `results`

house: 4 options [ self funny nymph house ] [ funny nymph house self ] [ nymph house self funny ] [ house self funny nymph ] 
garden: 0 options 
duck: 0 options 
evil: 0 options 
fluff: 1 options [ self fluff funny nymph ]

回复收藏 0 原文

四叶草在未来唯美盛开 2025-02-20 07:04:12

事实证明，这比我想象的要难得多，但这是我最终要做的：

制作一个矩阵，用每个单词的前n个字母和另一个单词的最后一个n个字母
进行比较两个矩阵的最后一个字母，显示了两个单词与哪些单词重叠的
粘贴重叠单词火车的单词
重复上面的步骤，直到没有新的重叠

来为我的数据运行代码，这使我像我在编写问题时没有期望的那样长的单词火车，而最长的单词火车是gardenymphouselfluffunny和selfluffunnymphousevil（都包含6个单词）。输出数据是：

                               wagons                    train
fluffunnymphouself       fluff, f....       fluffunnymphouself
funnymphouselfluff       funny, n....       funnymphouselfluff
gardenymphouselfluffunny garden, .... gardenymphouselfluffunny
gardenymphouselfunny     garden, ....     gardenymphouselfunny
houselfluffunnymph       house, s....       houselfluffunnymph
houselfunnymph           house, s....           houselfunnymph
selfluffunnymphousevil   fluff, f....   selfluffunnymphousevil
selfunnymphousevil       funny, n....       selfunnymphousevil
# The column wagons is a list of different length, depending on the words that are in the word train.

虽然代码很长。

# Vectors from question.
first_string <- c("self", "funny", "nymph")
second_string <- c("house", "garden", "duck", "evil", "fluff")

# Prepating the while loop which only runs while there are any new_wagons to add to the train.
all_wagons <- tolower(c(first_string, second_string))
new_wagons <- TRUE
results <- data.frame(wagons= I(list("")), train= "")

# Start the while loop.
while(any(new_wagons, na.rm= TRUE)){
# Going though every train that has been made so far..
  all_results <- by(results, list(results$train), function(train_i){
# What wagons have been used for this train?
    used_wagons <- unique(unlist(train_i[ , "wagons"]))
    used_wagons <- used_wagons[used_wagons != ""]
# What wagons can be used to extend the train?
    wagons_to_use_from <- unique(c(all_wagons[!all_wagons %in% used_wagons], train_i[ , "train"]))

# Get the first n letters of every word.
    wagon_start <- as.data.frame(sapply(wagons_to_use_from, function(wagon_i){
      sapply(1:max(nchar(wagons_to_use_from)), function(length_i){
        substr(wagon_i, 1, length_i)
      })}))
# Get the last n letters of every word.
    wagon_end <- as.data.frame(sapply(wagons_to_use_from, function(wagon_i){
      sapply(0:(max(nchar(wagons_to_use_from)-1)), function(length_i){
        substr(wagon_i, nchar(wagon_i)-length_i, nchar(wagon_i))
      })}))
# Find the overlap in letters.
    find_overlap <- data.frame(word= rep(names(wagon_end), each= nrow(wagon_end)))
    find_overlap$word_end <- unlist(wagon_end[ , unique(find_overlap$word)])
    find_overlap$without_word <- wagon_start[rep(1:nrow(wagon_start), ncol(wagon_end)), , drop= FALSE]
    find_overlap$without_word[matrix(c(1:nrow(find_overlap),
                                   rep(1:ncol(wagon_start), each= nrow(wagon_end))),
                                 ncol= 2)] <- NA
    new_wagons <- find_overlap$word_end == find_overlap$without_word

# If there is no new overlap then return the data as it was.
    if(!any(new_wagons, na.rm= TRUE)){
      results <- train_i
    } else{
# If there is an overlap then save the relevant words.
      word_i <- find_overlap$word[sort(which(new_wagons == TRUE, arr.ind = TRUE)[ , "row"])]
      word_overlap <- find_overlap$word_end[sort(which(new_wagons == TRUE, arr.ind = TRUE)[ , "row"])]
      word_after_i <- colnames(new_wagons)[which(t(new_wagons) == TRUE, arr.ind = TRUE)[, "row"]]
  
      word_trains <- data.frame(word_i, word_overlap, word_after_i, word_train= paste0(substr(word_i, 1, nchar(word_i)- nchar(word_overlap)),
                                                                                   word_after_i))
# Avoid former word trains as wagon names for next round:
      if(train_i$train != ""){
        word_trains <- word_trains[word_trains$word_i == train_i$train | word_trains$word_after_i == train_i$train, ]
      }
  # Output results where the former and new used words as well as the word train is.
      results <- do.call("rbind.data.frame", lapply(as.data.frame(t(word_trains)), function(word_trains_i){
        used_wagons_old <- used_wagons
        used_wagons_new <- c(word_trains_i[1], word_trains_i[3])
        wagons <- c(used_wagons_old, used_wagons_new)
        wagons <- wagons[wagons != train_i$train]
        wagons <- wagons[wagons != ""]
    
    
        data.frame(wagons= I(list(wagons)),
                   train= word_trains_i[4]
               
        )
    
      }))
    }


    list(results, new_wagons)

  })
# Make two dataframes, one with the word results, one with logicals whether there is any overlap.
  results <- do.call(rbind, lapply(all_results, `[[`, 1))
  results <- results[!duplicated(results$train), ]
  new_wagons <- unlist(do.call(list, lapply(all_results, `[[`, 2)))

}

It turned out to be much harded than I thought but this is what I ended up doing:

Make an matrix with the first n letters of each word and another matrix with last n letters of each word
Comparing the two matrices shows which words overlap
Paste overlaping words to a word train
Repeating the steps above until there is no new overlap

Running the code for my data from question gave me such long word trains as I did not expect while writing the question, with the longest word trains being gardenymphouselfluffunny and selfluffunnymphousevil (both contain 6 words). The output data is:

                               wagons                    train
fluffunnymphouself       fluff, f....       fluffunnymphouself
funnymphouselfluff       funny, n....       funnymphouselfluff
gardenymphouselfluffunny garden, .... gardenymphouselfluffunny
gardenymphouselfunny     garden, ....     gardenymphouselfunny
houselfluffunnymph       house, s....       houselfluffunnymph
houselfunnymph           house, s....           houselfunnymph
selfluffunnymphousevil   fluff, f....   selfluffunnymphousevil
selfunnymphousevil       funny, n....       selfunnymphousevil
# The column wagons is a list of different length, depending on the words that are in the word train.

The code is quite long though..

# Vectors from question.
first_string <- c("self", "funny", "nymph")
second_string <- c("house", "garden", "duck", "evil", "fluff")

# Prepating the while loop which only runs while there are any new_wagons to add to the train.
all_wagons <- tolower(c(first_string, second_string))
new_wagons <- TRUE
results <- data.frame(wagons= I(list("")), train= "")

# Start the while loop.
while(any(new_wagons, na.rm= TRUE)){
# Going though every train that has been made so far..
  all_results <- by(results, list(results$train), function(train_i){
# What wagons have been used for this train?
    used_wagons <- unique(unlist(train_i[ , "wagons"]))
    used_wagons <- used_wagons[used_wagons != ""]
# What wagons can be used to extend the train?
    wagons_to_use_from <- unique(c(all_wagons[!all_wagons %in% used_wagons], train_i[ , "train"]))

# Get the first n letters of every word.
    wagon_start <- as.data.frame(sapply(wagons_to_use_from, function(wagon_i){
      sapply(1:max(nchar(wagons_to_use_from)), function(length_i){
        substr(wagon_i, 1, length_i)
      })}))
# Get the last n letters of every word.
    wagon_end <- as.data.frame(sapply(wagons_to_use_from, function(wagon_i){
      sapply(0:(max(nchar(wagons_to_use_from)-1)), function(length_i){
        substr(wagon_i, nchar(wagon_i)-length_i, nchar(wagon_i))
      })}))
# Find the overlap in letters.
    find_overlap <- data.frame(word= rep(names(wagon_end), each= nrow(wagon_end)))
    find_overlap$word_end <- unlist(wagon_end[ , unique(find_overlap$word)])
    find_overlap$without_word <- wagon_start[rep(1:nrow(wagon_start), ncol(wagon_end)), , drop= FALSE]
    find_overlap$without_word[matrix(c(1:nrow(find_overlap),
                                   rep(1:ncol(wagon_start), each= nrow(wagon_end))),
                                 ncol= 2)] <- NA
    new_wagons <- find_overlap$word_end == find_overlap$without_word

# If there is no new overlap then return the data as it was.
    if(!any(new_wagons, na.rm= TRUE)){
      results <- train_i
    } else{
# If there is an overlap then save the relevant words.
      word_i <- find_overlap$word[sort(which(new_wagons == TRUE, arr.ind = TRUE)[ , "row"])]
      word_overlap <- find_overlap$word_end[sort(which(new_wagons == TRUE, arr.ind = TRUE)[ , "row"])]
      word_after_i <- colnames(new_wagons)[which(t(new_wagons) == TRUE, arr.ind = TRUE)[, "row"]]
  
      word_trains <- data.frame(word_i, word_overlap, word_after_i, word_train= paste0(substr(word_i, 1, nchar(word_i)- nchar(word_overlap)),
                                                                                   word_after_i))
# Avoid former word trains as wagon names for next round:
      if(train_i$train != ""){
        word_trains <- word_trains[word_trains$word_i == train_i$train | word_trains$word_after_i == train_i$train, ]
      }
  # Output results where the former and new used words as well as the word train is.
      results <- do.call("rbind.data.frame", lapply(as.data.frame(t(word_trains)), function(word_trains_i){
        used_wagons_old <- used_wagons
        used_wagons_new <- c(word_trains_i[1], word_trains_i[3])
        wagons <- c(used_wagons_old, used_wagons_new)
        wagons <- wagons[wagons != train_i$train]
        wagons <- wagons[wagons != ""]
    
    
        data.frame(wagons= I(list(wagons)),
                   train= word_trains_i[4]
               
        )
    
      }))
    }


    list(results, new_wagons)

  })
# Make two dataframes, one with the word results, one with logicals whether there is any overlap.
  results <- do.call(rbind, lapply(all_results, `[[`, 1))
  results <- results[!duplicated(results$train), ]
  new_wagons <- unlist(do.call(list, lapply(all_results, `[[`, 2)))

}

回复收藏 0 原文

相思故 2025-02-20 07:04:12

由于pracma :: perms用于生成所有排列和检查建筑火车的历史，但这可能是一种笨重/效率低下的方法，但我希望它可以为您提供一些线索

library(pracma)

# check if adjacent strings have overlaps
isOverlapped <- function(a, b) {
  for (k in 1:min(nchar(c(a, b)))) {
    if (substr(a, nchar(a) - k + 1, nchar(a)) == substr(b, 1, k)) {
      return(TRUE)
    }
  }
  FALSE
}

# check if a train can be created
checkTrain <- function(v) {
  for (i in 1:(length(v) - 1)) {
    if (!isOverlapped(v[i], v[i + 1])) {
      return(FALSE)
    }
  }
  TRUE
}

# produce all possible trains (based on first_string) with additional words from second_string
lapply(
  second_string ,
  function(x) {
    lst <- asplit(perms(c(first_string, x)), 1)
    lst[sapply(lst,checkTrain)]
  }
)

，您将获得列表

[[1]]
[[1]][[1]]
[1] "house" "self"  "funny" "nymph"

[[1]][[2]]
[1] "nymph" "house" "self"  "funny"

[[1]][[3]]
[1] "funny" "nymph" "house" "self"

[[1]][[4]]
[1] "self"  "funny" "nymph" "house"


[[2]]
list()

[[3]]
list()

[[4]]
list()

[[5]]
[[5]][[1]]
[1] "self"  "fluff" "funny" "nymph"

其中house给出4可能的火车和fluff给出1 train，而其他单词在second_string无法根据first_string构建任何火车。

This might be a bulky/inefficient approach, due to pracma::perms for generating all permutations and checking the vadility of building trains, but I hope it could provide you with some clues

library(pracma)

# check if adjacent strings have overlaps
isOverlapped <- function(a, b) {
  for (k in 1:min(nchar(c(a, b)))) {
    if (substr(a, nchar(a) - k + 1, nchar(a)) == substr(b, 1, k)) {
      return(TRUE)
    }
  }
  FALSE
}

# check if a train can be created
checkTrain <- function(v) {
  for (i in 1:(length(v) - 1)) {
    if (!isOverlapped(v[i], v[i + 1])) {
      return(FALSE)
    }
  }
  TRUE
}

# produce all possible trains (based on first_string) with additional words from second_string
lapply(
  second_string ,
  function(x) {
    lst <- asplit(perms(c(first_string, x)), 1)
    lst[sapply(lst,checkTrain)]
  }
)

and you will obtain a list

[[1]]
[[1]][[1]]
[1] "house" "self"  "funny" "nymph"

[[1]][[2]]
[1] "nymph" "house" "self"  "funny"

[[1]][[3]]
[1] "funny" "nymph" "house" "self"

[[1]][[4]]
[1] "self"  "funny" "nymph" "house"


[[2]]
list()

[[3]]
list()

[[4]]
list()

[[5]]
[[5]][[1]]
[1] "self"  "fluff" "funny" "nymph"

where house gives 4 possible trains and fluff gives 1 train, while other words in second_string cannot contribute to building any trains based on first_string.

回复收藏 0 原文

~没有更多了~