我的问题陈述我有一组与纽约数据集相关的健康数据。文件结构如
经纬度日例 '
40.00 -73.25 1 1
所以创建了一个文件,其中有大约 17699997 的记录。所以我必须使用流包创建一个流,从文件中提取 1000 个数据。然后我将应用 Denstram() 算法进行在线集群和相同的 1000 点获取并应用离线算法,例如K-均值,DBSACN。
我想从文件中取1000分申请在线和离线集群。老化需要 1000 直到流即将结束。
所以我创建了以下代码。但我无法找到流的结尾。
但我想通知用户流结束。两个过程
线上流程 线下流程
Denstream_Dbscan<-function(Wrapper_Data_Stream)
{
i<-1;
while(TRUE)
{
# online process(micro cluster)
start_time<-Sys.time();
print(paste("Start online Process ",i," iteration",Sys.time()));
dstream_micro<-DSC_DenStream(epsilon=0.01,initPoints=200,minPoints=500);
system.time({
cluster(dstream_micro,Wrapper_Data_Stream,n=1000,verbose=TRUE);
})
# print(paste("center of cluster=",get_center(dstream_micro)));
print(paste("no of micro cluster=",nclusters(dstream_micro,type="auto")));
jpeg(paste("Micro Cluster(Online)_",i,".jpeg"));
plot(dstream_micro,dsd=Wrapper_Data_Stream,pch=c(1,2),col_points="red",col_clusters="green",xlab="latitude",ylab="longitude", main=paste("TimeTaken=",start_time-end_time),sub=paste("micro cluster:",Sys.time()),col.main="red",col.sub="black");
print(paste("End online Process for ",i," iteration",Sys.time()));
dev.off();
# offline process (macro cluster)
print(paste("Start offline Process ",i," iteration",Sys.time()));
start_time<-Sys.time();
dbscan_macro<-DSC_DBSCAN(eps=0.1)
system.time({
recluster(dbscan_macro,dstream_micro);
})
print(paste("no of macro cluster=",nclusters(dbscan_macro)));
# print(paste("center of cluster=",get_center(dstream_micro)));
end_time<-Sys.time();
jpeg(paste("Macro Cluster(offline)_",i,".jpeg"));
plot(dbscan_macro,Wrapper_Data_Stream,pch=c(1,2),col_points="grey",col_clusters="blue",xlab="latitude",ylab="longitude",main=paste("TimeTaken=",start_time-end_time),sub=paste("macro cluster:",Sys.time()),col.main="red",col.sub="black");
dev.off();
print(paste("End Offline Process for ",i," iteration",Sys.time()));
i<-i+1;
}
}