轉換模型的不同方法

此範例利用一些程式碼,以便輕鬆實作自訂轉換器。

使用 onnxruntime 進行預測

檢查轉換後的模型是否正常運作的簡單函式。

import onnxruntime
import onnx
import numpy
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import KMeans
from sklearn.pipeline import make_pipeline
from onnxruntime import InferenceSession
from skl2onnx import convert_sklearn, to_onnx, wrap_as_onnx_mixin
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxDiv
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin


def predict_with_onnxruntime(onx, X):
    sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
    input_name = sess.get_inputs()[0].name
    res = sess.run(None, {input_name: X.astype(np.float32)})
    return res[0]

簡單的 KMeans

第一種方式:convert_sklearn()

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

onx = convert_sklearn(
    tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

第二種方式:to_onnx():不再需要處理 FloatTensorType

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

第三種方式:wrap_as_onnx_mixin():將機器學習模型包裝到繼承自 OnnxOperatorMixin 的新類別中。

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)

onx = tr_mixin.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

第四種方式:wrap_as_onnx_mixin():可以在擬合模型之前呼叫。

X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=12)
tr.fit(X)

onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

管線和自訂物件

這是一個簡單的縮放器。

class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
    def __init__(self):
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self.op_version = 12

    def fit(self, X, y=None):
        self.W_ = np.mean(X, axis=0)
        self.S_ = np.std(X, axis=0)
        return self

    def transform(self, X):
        return (X - self.W_) / self.S_

    def onnx_shape_calculator(self):
        def shape_calculator(operator):
            operator.outputs[0].type = operator.inputs[0].type

        return shape_calculator

    def to_onnx_operator(
        self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
    ):
        if inputs is None:
            raise RuntimeError("Parameter inputs should contain at least " "one name.")
        opv = target_opset or self.op_version
        i0 = self.get_inputs(inputs, 0)
        W = self.W_.astype(np.float32)
        S = self.S_.astype(np.float32)
        return OnnxDiv(
            OnnxSub(i0, W, op_version=12), S, output_names=outputs, op_version=opv
        )

方式 1

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

onx = convert_sklearn(
    tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

方式 2

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

方式 3

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)
tr_mixin.to_onnx(X.astype(np.float32))

print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

方式 4

X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(
    make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), target_opset=12
)

tr.fit(X)

onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

顯示 ONNX 圖

最後,讓我們看看使用 sklearn-onnx 轉換的圖形。

from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer  # noqa

pydot_graph = GetPydotGraph(
    onx.graph,
    name=onx.graph.name,
    rankdir="TB",
    node_producer=GetOpNodeProducer(
        "docstring", color="yellow", fillcolor="yellow", style="filled"
    ),
)
pydot_graph.write_dot("pipeline_onnx_mixin.dot")

import os  # noqa

os.system("dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot")

import matplotlib.pyplot as plt  # noqa

image = plt.imread("pipeline_onnx_mixin.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis("off")
plot convert syntax
(-0.5, 3103.5, 6900.5, -0.5)

此範例使用的版本

import sklearn  # noqa

print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx  # noqa

print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
numpy: 1.23.5
scikit-learn: 1.4.dev0
onnx:  1.15.0
onnxruntime:  1.16.0+cu118
skl2onnx:  1.16.0

腳本的總執行時間:(0 分鐘 3.140 秒)

由 Sphinx-Gallery 產生的展示