代码:
namespace MySparkAppML.ConsoleApp
{
public class Program
{
public static void Main(string[] args)
{
SparkSession spark = SparkSession
.Builder()
.AppName(".NET for Apache Spark Sentiment Analysis")
.GetOrCreate();
DataFrame df = spark .Read() .Option("header", true).Option("inferSchema", true) .Csv("yelptest.csv");
df.Show();
Console.WriteLine(predict("aaa"));
Console.WriteLine(predict("bbb"));
spark.Udf() .Register<string, float>("MLudf", predict);
df.CreateOrReplaceTempView("Reviews");
DataFrame sqlDf = spark.Sql("SELECT ReviewText, MLudf(ReviewText) FROM Reviews");
sqlDf.Show();
Console.ReadLine();
}
static float predict(string text)
{
MLContext mlContext = new MLContext();
ITransformer model = mlContext.Model.Load("MLModel.zip", out var schema);
var Engine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(model);
return Engine.Predict(new ModelInput() { ReviewText = text }).Score;
}
}
}
报错:
在Register外面执行 predict可正常执行
在register里面执行报错
主要是报
Could not load type 'Microsoft.ML.Data.DataViewTypeAttribute' from assembly 'Microsoft.ML.DataView, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51'.
at Microsoft.ML.Data.SchemaDefinition.GetNameAndCustomAttributes(MemberInfo memberInfo, Type userType,
堆栈信息
[2021-01-09T09:25:26.2725688Z] [LAPTOP-8R49BD47] [Error] [TaskRunner] [0] ProcessStream() failed with exception: System.TypeLoadException: Could not load type 'Microsoft.ML.Data.DataViewTypeAttribute' from assembly 'Microsoft.ML.DataView, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51'.
at Microsoft.ML.Data.SchemaDefinition.GetNameAndCustomAttributes(MemberInfo memberInfo, Type userType, HashSet1 colNames, String& name, IEnumerable
1& customAttributes)
at Microsoft.ML.Data.SchemaDefinition.Create(Type userType, Direction direction)
at Microsoft.ML.Data.InternalSchemaDefinition.Create(Type userType, Direction direction)
at Microsoft.ML.Data.DataViewConstructionUtils.CreateInputRow[TRow](IHostEnvironment env, SchemaDefinition schemaDefinition)
at Microsoft.ML.PredictionEngineBase2..ctor(IHostEnvironment env, ITransformer transformer, Boolean ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) at Microsoft.ML.Predicti21/01/09 17:25:26 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3) org.apache.spark.api.python.PythonException: System.TypeLoadException: Could not load type 'Microsoft.ML.Data.DataViewTypeAttribute' from assembly 'Microsoft.ML.DataView, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51'. at Microsoft.ML.Data.SchemaDefinition.GetNameAndCustomAttributes(MemberInfo memberInfo, Type userType, HashSet
1 colNames, String& name, IEnumerable1& customAttributes) at Microsoft.ML.Data.SchemaDefinition.Create(Type userType, Direction direction) at Microsoft.ML.Data.InternalSchemaDefinition.Create(Type userType, Direction direction) at Microsoft.ML.Data.DataViewConstructionUtils.CreateInputRow[TRow](IHostEnvironment env, SchemaDefinition schemaDefinition) at Microsoft.ML.PredictionEngineBase
2..ctor(IHostEnvironment env, ITransformer transformer, Boolean ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition)
at Microsoft.ML.PredictionEngine2..ctor(IHostEnvironment env, ITransformer transformer, Boolean ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) at Microsoft.ML.PredictionEngineExtensions.CreatePredictionEngine[TSrc,TDst](ITransformer transformer, IHostEnvironment env, Boolean ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) at Microsoft.ML.ModelOperationsCatalog.CreatePredictionEngine[TSrc,TDst](ITransformer transformer, Boolean ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) at MySparkAppML.ConsoleApp.Program.predict(String text) in C:\Users\YD\source\repos\MySparkAppML.ConsoleApp\Program.cs:line 35 at Microsoft.Spark.Sql.PicklingUdfWrapper
2.Execute(Int32 splitIndex, Object[] input, Int32[] argOffsets) in /_/src/csharp/Microsoft.Spark/Sql/PicklingUdfWrapper.cs:line 51
我环境全部都装好了 ,环境变量也配置好了,,跑其他 spark 批量数据都没问题
就这个机器学习 的 predict有问题,去国外搜索也没有找到答案,代码是和文档的一模一样的。环境检查过无数次