我正在尝试ML.NET进行基本情绪分析,如链接https://docs.microsoft.com/en-us/dotnet/machine-learning/tutorials/sentiment-analysis中给出的。
我一步一步地遵循并使用链接中给出的相同文件进行培训。还检查了此链接中另一个用户在类似问题中的所有评论和答案:
ml.net 关于格式错误和错误值的情绪分析警告
但仍然出现以下错误。大多数训练数据已导致错误,如下所示(已处理 860 行,包含 818 个错误值)。理想情况下,这不应该发生,因为数据和代码都是从 Microsoft 官方网站提供的(上面给出的第一个链接)。代码和错误粘贴在下面。
微软网站数据是否有任何变化,尚未由他们更新?
Not adding a normalizer.
Making per-feature arrays
Changing data from row-wise to column-wise
Bad value at line 8 in column Label
Bad value at line 112 in column Label
Bad value at line 187 in column Label
Bad value at line 9 in column Label
Bad value at line 10 in column Label
Bad value at line 11 in column Label
Bad value at line 12 in column Label
Bad value at line 188 in column Label
Bad value at line 190 in column Label
Bad value at line 113 in column Label
Suppressing further bad value messages
Processed 1773 rows with 1731 bad values and 0 format errors
Processed 42 instances
Binning and forming Feature objects
Reserved memory for tree learner: 1188 bytes
Starting to train ...
Warning: 50 of the boosting iterations failed to grow a tree. This is commonly because the minimum documents in leaf hyperparameter was set too high for this dataset.
Not training a calibrator because it is not needed.
Bad value at line 7 in column Label
Bad value at line 186 in column Label
Bad value at line 111 in column Label
Bad value at line 8 in column Label
Bad value at line 9 in column Label
Bad value at line 10 in column Label
Bad value at line 11 in column Label
Bad value at line 12 in column Label
Suppressing further bad value messages
Bad value at line 112 in column Label
Bad value at line 187 in column Label
Processed 860 rows with 818 bad values and 0 format errors
下面是代码:
public class SentimentData
{
[Column(ordinal: "0", name: "Label")]
public float Sentiment;
[Column(ordinal: "1")]
public string SentimentText;
}
public class SentimentPrediction
{
[ColumnName("PredictedLabel")]
public bool Sentiment;
}
class Program
{
//https://docs.microsoft.com/en-us/dotnet/machine-learning/tutorials/sentiment-analysis
static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, "Data", "wikipedia-detox-250-line-data.tsv");
static readonly string _testDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "wikipedia-detox-250-line-test.tsv");
static readonly string _modelpath = Path.Combine(Environment.CurrentDirectory, "Data", "Model.zip");
static async Task Main(string[] args)
{
//Microsoft.ML.Legacy.Transforms.SentimentAnalyzer sentimentAnalyzer = new SentimentAnalyzer();
//sentimentAnalyzer.Data;
Console.WriteLine("---------------Training ------------------------------------");
var model = await Train();
Evaluate(model);
Console.WriteLine("---------------Training Over------------------------------------");
Console.WriteLine("Type END to exit");
string s = "";
while (s.ToLower() != "end")
{
s = Console.ReadLine();
Console.WriteLine("Sentiment: {0}",(Predict(model, s).Sentiment ? "Negative" : "Positive"));
}
}
public static async Task<PredictionModel<SentimentData, SentimentPrediction>> Train()
{
var pipeline = new LearningPipeline();
TextLoader textLoader = new TextLoader(_dataPath).CreateFrom<SentimentData>(useHeader: true, allowQuotedStrings: true, supportSparse: true, trimWhitespace: true);
pipeline.Add(textLoader);
pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
//pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 50, NumTrees = 50, MinDocumentsInLeafs = 20 });
pipeline.Add(new LogisticRegressionBinaryClassifier() { });
PredictionModel<SentimentData, SentimentPrediction> model = pipeline.Train<SentimentData, SentimentPrediction>();
await model.WriteAsync(_modelpath);
return model;
}
public static void Evaluate(PredictionModel<SentimentData, SentimentPrediction> model)
{
var testData = new TextLoader(_testDataPath).CreateFrom<SentimentData>();
var evaluator = new BinaryClassificationEvaluator();
BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData);
Console.WriteLine();
Console.WriteLine("PredictionModel quality metrics evaluation");
Console.WriteLine("------------------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.Auc:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
}
public static SentimentPrediction Predict(PredictionModel<SentimentData, SentimentPrediction> model, string sentence)
{
return model.Predict(new SentimentData { SentimentText = sentence });
}