您可能会发现向量化很有帮助:
#Create a large character Vector:
names1<-as.character(rep(iris$Species,10))
# Use Lapply
system.time(Percent <- paste(round(unlist(lapply(1:length(names1), function(x) {
levenshteinSim(names1[x], names1[-x])}))*100, 1), "%", sep=""))
#Create Vectorized Function
fun1<-function(names,x) {
return(levenshteinSim(names[x],names[-x]))
}
vecFun1<-Vectorize(fun1,vectorize.args = "x")
#Execute Vectorized Function
system.time(percentVec<-vecFun1(names1,c(1:length(names1))))
percentVec<-paste(as.character(round(c(percentVec)*100,1)),"%",sep="")
这里是代码执行,向量化花费不到 1/3 的时间
> names1<-as.character(rep(iris$Species,10))
> system.time(Percent <- paste(round(unlist(lapply(1:length(names1), function(x) {
+ levenshteinSim(names1[x], names1[-x])}))*100, 1), "%", sep=""))
user system elapsed
5.07 0.02 5.09
>
> fun1<-function(names,x) {
+ return(levenshteinSim(names[x],names[-x]))
+ }
>
> vecFun1<-Vectorize(fun1,vectorize.args = "x")
>
> system.time(percentVec<-vecFun1(names1,c(1:length(names1))))
user system elapsed
1.62 0.00 1.62
我还将您的示例与 3 个元素的字符向量一起使用
> names2<-c("Adam Shaw","Justin Bose","Cydney Clide")
> names2 <- as.character(names2)
> system.time(Percent <- paste(round(unlist(lapply(1:length(names2), function(x) {
+ levenshteinSim(names2[x], names2[-x])}))*100, 1), "%", sep=""))
user system elapsed
0 0 0
>
> fun1<-function(names,x) {
+ return(levenshteinSim(names[x],names[-x]))
+ }
>
> vecFun1<-Vectorize(fun1,vectorize.args = "x")
>
> system.time(percentVec<-vecFun1(names2,c(1:length(names2))))
user system elapsed
0 0 0
>
> percentVec<-paste(as.character(round(c(percentVec)*100,1)),"%",sep="")
>
> Percent
[1] "9.1%" "16.7%" "9.1%" "16.7%" "16.7%" "16.7%"
> percentVec
[1] "9.1%" "16.7%" "9.1%" "16.7%" "16.7%" "16.7%"