注意
前往結尾以下載完整的範例程式碼
當自訂模型既非分類器亦非迴歸器 (替代方案)¶
注意
此範例透過使用範例玩轉 ONNX 運算子中建議的語法重寫當自訂模型既非分類器亦非迴歸器,以編寫自訂轉換器、形狀計算器和剖析器。
scikit-learn 的 API 指定迴歸器產生一個輸出,而分類器產生兩個輸出:預測標籤和機率。此處的目標是新增第三個結果,以告知機率是否高於給定的臨界值。這是在 validate 方法中實作的。
鳶尾花與評分¶
會建立新的類別,其訓練任何分類器並實作上述的 validate 方法。
import inspect
import numpy as np
import skl2onnx
import onnx
import sklearn
from sklearn.base import ClassifierMixin, BaseEstimator, clone
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from skl2onnx import update_registered_converter
import os
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnxruntime as rt
from skl2onnx import to_onnx, get_model_alias
from skl2onnx.proto import onnx_proto
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx.algebra.onnx_ops import (
OnnxGreater,
OnnxCast,
OnnxReduceMaxApi18,
OnnxIdentity,
)
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
import matplotlib.pyplot as plt
class ValidatorClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, estimator=None, threshold=0.75):
ClassifierMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
estimator = LogisticRegression(solver="liblinear")
self.estimator = estimator
self.threshold = threshold
def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.estimator.fit)
if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
X, y, sample_weight=sample_weight
)
else:
self.estimator_ = clone(self.estimator).fit(X, y)
return self
def predict(self, X):
return self.estimator_.predict(X)
def predict_proba(self, X):
return self.estimator_.predict_proba(X)
def validate(self, X):
pred = self.predict_proba(X)
mx = pred.max(axis=1)
return (mx >= self.threshold) * 1
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = ValidatorClassifier()
model.fit(X_train, y_train)
現在讓我們測量指標,該指標會告知預測的機率是否高於臨界值。
print(model.validate(X_test))
[0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 1 0
0]
轉換為 ONNX¶
轉換會因為程式庫不知道與此新模型相關聯的任何轉換器而失敗。
try:
to_onnx(model, X_train[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
Unable to find a shape calculator for type '<class '__main__.ValidatorClassifier'>'.
It usually means the pipeline being converted contains a
transformer or a predictor with no corresponding converter
implemented in sklearn-onnx. If the converted is implemented
in another library, you need to register
the converted so that it can be used by sklearn-onnx (function
update_registered_converter). If the model is not yet covered
by sklearn-onnx, you may raise an issue to
https://github.com/onnx/sklearn-onnx/issues
to get the converter implemented or even contribute to the
project. If the model is a custom model, a new converter must
be implemented. Examples can be found in the gallery.
自訂轉換器¶
我們重複使用來自為自己的模型編寫自己的轉換器的一些程式碼。形狀計算器會定義已轉換模型的每個輸出的形狀。
def validator_classifier_shape_calculator(operator):
input0 = operator.inputs[0] # first input in ONNX graph
outputs = operator.outputs # outputs in ONNX graph
op = operator.raw_operator # scikit-learn model (mmust be fitted)
if len(outputs) != 3:
raise RuntimeError("3 outputs expected not {}.".format(len(outputs)))
N = input0.type.shape[0] # number of observations
C = op.estimator_.classes_.shape[0] # dimension of outputs
outputs[0].type = Int64TensorType([N]) # label
outputs[1].type = FloatTensorType([N, C]) # probabilities
outputs[2].type = Int64TensorType([C]) # validation
然後是轉換器。
def validator_classifier_converter(scope, operator, container):
input0 = operator.inputs[0] # first input in ONNX graph
outputs = operator.outputs # outputs in ONNX graph
op = operator.raw_operator # scikit-learn model (mmust be fitted)
opv = container.target_opset
# The model calls another one. The class `OnnxSubEstimator`
# calls the converter for this operator.
model = op.estimator_
onnx_op = OnnxSubEstimator(model, input0, op_version=opv, options={"zipmap": False})
rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv)
great = OnnxGreater(
rmax, np.array([op.threshold], dtype=np.float32), op_version=opv
)
valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv)
r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv)
r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv)
r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv)
r1.add_to(scope, container)
r2.add_to(scope, container)
r3.add_to(scope, container)
然後是註冊。
update_registered_converter(
ValidatorClassifier,
"CustomValidatorClassifier",
validator_classifier_shape_calculator,
validator_classifier_converter,
)
以及轉換…
try:
to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
3 outputs expected not 2.
它會失敗,因為程式庫預期模型的作用方式會像產生兩個輸出的分類器一樣。我們需要新增自訂剖析器,以告知程式庫此模型會產生三個輸出。
自訂剖析器¶
def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
# inputs
this_operator.inputs.append(inputs[0])
# outputs
val_label = scope.declare_local_variable("val_label", Int64TensorType())
val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
val_val = scope.declare_local_variable("val_val", Int64TensorType())
this_operator.outputs.append(val_label)
this_operator.outputs.append(val_prob)
this_operator.outputs.append(val_val)
# ends
return this_operator.outputs
註冊。
update_registered_converter(
ValidatorClassifier,
"CustomValidatorClassifier",
validator_classifier_shape_calculator,
validator_classifier_converter,
parser=validator_classifier_parser,
)
再次轉換。
model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
最終測試¶
我們現在需要檢查 ONNX 的結果是否相同。
X32 = X_test[:5].astype(np.float32)
sess = rt.InferenceSession(
model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
)
results = sess.run(None, {"X": X32})
print("--labels--")
print("sklearn", model.predict(X32))
print("onnx", results[0])
print("--probabilities--")
print("sklearn", model.predict_proba(X32))
print("onnx", results[1])
print("--validation--")
print("sklearn", model.validate(X32))
print("onnx", results[2])
--labels--
sklearn [2 0 2 0 1]
onnx [2 0 2 0 1]
--probabilities--
sklearn [[1.16320340e-03 3.22427021e-01 6.76409775e-01]
[9.26436260e-01 7.35320104e-02 3.17295932e-05]
[3.76762740e-04 2.92398099e-01 7.07225139e-01]
[8.90886038e-01 1.09066293e-01 4.76695821e-05]
[1.10966901e-02 6.00581713e-01 3.88321597e-01]]
onnx [[1.16317812e-03 3.22427094e-01 6.76409781e-01]
[9.26436305e-01 7.35319778e-02 3.17567901e-05]
[3.76794109e-04 2.92398095e-01 7.07225144e-01]
[8.90886009e-01 1.09066285e-01 4.76959940e-05]
[1.10967318e-02 6.00581765e-01 3.88321519e-01]]
--validation--
sklearn [0 1 0 1 0]
onnx [0 1 0 1 0]
看起來不錯。
顯示 ONNX 圖形¶
pydot_graph = GetPydotGraph(
model_onnx.graph,
name=model_onnx.graph.name,
rankdir="TB",
node_producer=GetOpNodeProducer(
"docstring", color="yellow", fillcolor="yellow", style="filled"
),
)
pydot_graph.write_dot("validator_classifier.dot")
os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot")
image = plt.imread("validator_classifier.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis("off")

(-0.5, 3557.5, 4934.5, -0.5)
此範例使用的版本
print("numpy:", np.__version__)
print("scikit-learn:", sklearn.__version__)
print("onnx: ", onnx.__version__)
print("onnxruntime: ", rt.__version__)
print("skl2onnx: ", skl2onnx.__version__)
numpy: 1.23.5
scikit-learn: 1.4.dev0
onnx: 1.15.0
onnxruntime: 1.16.0+cu118
skl2onnx: 1.16.0
腳本的總執行時間: (0 分 2.643 秒)