diff --git a/backends/xnnpack/README.md b/backends/xnnpack/README.md index 02664557664..5ef923f3321 100644 --- a/backends/xnnpack/README.md +++ b/backends/xnnpack/README.md @@ -46,6 +46,7 @@ import torchvision.models as models from torch.export import export, ExportedProgram from torchvision.models.mobilenetv2 import MobileNet_V2_Weights from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.backends.xnnpack.utils.configs import get_transform_passes from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge from executorch.exir.backend.backend_api import to_backend @@ -56,6 +57,8 @@ sample_inputs = (torch.randn(1, 3, 224, 224), ) exported_program: ExportedProgram = export(mobilenet_v2, sample_inputs) edge: EdgeProgramManager = to_edge(exported_program) +# Optional in the generic API, but recommended for XNNPACK before partitioning. +edge = edge.transform(get_transform_passes()) edge = edge.to_backend(XnnpackPartitioner()) ``` diff --git a/docs/source/backends/xnnpack/xnnpack-overview.md b/docs/source/backends/xnnpack/xnnpack-overview.md index 5ef92c81126..fd74c4cf79c 100644 --- a/docs/source/backends/xnnpack/xnnpack-overview.md +++ b/docs/source/backends/xnnpack/xnnpack-overview.md @@ -32,6 +32,7 @@ import torch import torchvision.models as models from torchvision.models.mobilenetv2 import MobileNet_V2_Weights from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.backends.xnnpack.utils.configs import get_transform_passes from executorch.exir import to_edge_transform_and_lower mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() @@ -40,12 +41,18 @@ sample_inputs = (torch.randn(1, 3, 224, 224), ) et_program = to_edge_transform_and_lower( torch.export.export(mobilenet_v2, sample_inputs), partitioner=[XnnpackPartitioner()], + transform_passes=get_transform_passes(), ).to_executorch() with open("mv2_xnnpack.pte", "wb") as file: et_program.write_to_file(file) ``` +The `transform_passes` stage is optional in the generic lowering API. For +XNNPACK, `get_transform_passes()` is recommended in general because it runs +pre-partition graph transforms that expose supported patterns to the +partitioner. + See [Partitioner API](/backends/xnnpack/xnnpack-partitioner) for a reference on available partitioner options. ---- diff --git a/docs/source/backends/xnnpack/xnnpack-quantization.md b/docs/source/backends/xnnpack/xnnpack-quantization.md index 74d8eafba72..5082e8dcf7b 100644 --- a/docs/source/backends/xnnpack/xnnpack-quantization.md +++ b/docs/source/backends/xnnpack/xnnpack-quantization.md @@ -31,6 +31,7 @@ import torchvision.models as models from torchvision.models.mobilenetv2 import MobileNet_V2_Weights from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import XNNPACKQuantizer, get_symmetric_quantization_config from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.backends.xnnpack.utils.configs import get_transform_passes from executorch.exir import to_edge_transform_and_lower from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e @@ -52,9 +53,15 @@ quantized_model = convert_pt2e(prepared_model) # (5) et_program = to_edge_transform_and_lower( # (6) torch.export.export(quantized_model, sample_inputs), partitioner=[XnnpackPartitioner()], + transform_passes=get_transform_passes(), ).to_executorch() ``` +The `transform_passes` stage is optional in the generic lowering API. For +XNNPACK, `get_transform_passes()` is recommended in general because it runs +pre-partition graph transforms that expose supported patterns to the +partitioner. + See [PyTorch 2 Export Post Training Quantization](https://docs.pytorch.org/ao/main/tutorials_source/pt2e_quant_ptq.html) for more information. ### LLM quantization with quantize_ diff --git a/examples/xnnpack/README.md b/examples/xnnpack/README.md index fb15d89522d..718c0a6a6c2 100644 --- a/examples/xnnpack/README.md +++ b/examples/xnnpack/README.md @@ -18,6 +18,9 @@ examples/xnnpack ## Delegating a Floating-point Model The following command will produce a floating-point XNNPACK delegated model `mv2_xnnpack_fp32.pte` that can be run using XNNPACK's operators. It will also print out the lowered graph, showing what parts of the models have been lowered to XNNPACK via `executorch_call_delegate`. +The compiler uses the optional pre-partition transform stage. For XNNPACK this +is recommended in general because it exposes supported patterns to the +partitioner before delegation decisions are made. ```bash # For MobileNet V2 diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 9a78138adf3..2f558c01ea4 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -13,6 +14,7 @@ import torch from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.backends.xnnpack.utils.configs import get_transform_passes from executorch.exir import ( EdgeCompileConfig, ExecutorchBackendConfig, @@ -106,6 +108,7 @@ edge = to_edge_transform_and_lower( ep, partitioner=[XnnpackPartitioner()], + transform_passes=get_transform_passes(), compile_config=EdgeCompileConfig( _check_ir_validity=False if args.quantize else True, _skip_dim_order=True, # TODO(T182187531): enable dim order in xnnpack