我有一个名为 ftrl_proximal() 的类,它适合数据模型。它是一个自行编写的分类器(不是 sklearn 的)。
当我只使用一个 CPU 运行时,该算法运行良好,但是一旦我尝试在多处理(交叉验证)中执行它,我就会收到如下所述的错误。代码是:
from FTRL import ftrl_proximal
from sklearn.externals import joblib
from sklearn.base import clone
import multiprocessing
from sklearn.cross_validation import StratifiedKFold
def ftrl_train(estimator, X, y, train_index, test_index):
y_ftrl_pred_test = []
y_ftrl_true = []
# Split the data to train and test
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Fit a model on sample of the data
for idx, x in enumerate(X_train):
# predict
_p = estimator.predict(x.indices)
# update
estimator.update(x.indices, _p, y_train[idx])
for idx, x in enumerate(X_test):
_v = estimator.predict(x.indices)
y_ftrl_pred_test.append(_v) # Predicted
y_ftrl_true.append(y_test[idx]) # True
return y_ftrl_pred_test, y_ftrl_true
cv_fold = 3 # determines the number of folds.
skf = StratifiedKFold(y, n_folds=cv_fold, random_state=0)
ftrl = ftrl_proximal(alpha, beta, L1, L2, D, interaction) # initialize a learner
parallel = joblib.Parallel(n_jobs=num_cores, verbose=0, pre_dispatch='2*n_jobs')
preds_blocks = parallel(joblib.delayed(ftrl_train)(clone(ftrl), X, y,
train_index, test_index, verbose=0, fit_params=None)
for train_index, test_index in skf)
错误:
Traceback (most recent call last):
File "/home/workspace/Predictor/modelSelection.py", line 61, in <module>
class Main():
File "/home/workspace/Predictor/modelSelection.py", line 199, in Main
for train_index, test_index in skf)
File "/home/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 658, in __call__
for function, args, kwargs in iterable:
File "/home/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 184, in next
return next(self._it)
File "/home/workspace/Predictor/modelSelection.py", line 199, in <genexpr>
for train_index, test_index in skf)
NameError: global name '_ftrl' is not defined