Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backends/xnnpack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import torchvision.models as models
from torch.export import export, ExportedProgram
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.backends.xnnpack.utils.configs import get_transform_passes
from executorch.exir import EdgeProgramManager, ExecutorchProgramManager, to_edge
from executorch.exir.backend.backend_api import to_backend

Expand All @@ -56,6 +57,8 @@ sample_inputs = (torch.randn(1, 3, 224, 224), )
exported_program: ExportedProgram = export(mobilenet_v2, sample_inputs)
edge: EdgeProgramManager = to_edge(exported_program)

# Optional in the generic API, but recommended for XNNPACK before partitioning.
edge = edge.transform(get_transform_passes())
edge = edge.to_backend(XnnpackPartitioner())
```

Expand Down
7 changes: 7 additions & 0 deletions docs/source/backends/xnnpack/xnnpack-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import torch
import torchvision.models as models
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.backends.xnnpack.utils.configs import get_transform_passes
from executorch.exir import to_edge_transform_and_lower

mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()
Expand All @@ -40,12 +41,18 @@ sample_inputs = (torch.randn(1, 3, 224, 224), )
et_program = to_edge_transform_and_lower(
torch.export.export(mobilenet_v2, sample_inputs),
partitioner=[XnnpackPartitioner()],
transform_passes=get_transform_passes(),
).to_executorch()

with open("mv2_xnnpack.pte", "wb") as file:
et_program.write_to_file(file)
```

The `transform_passes` stage is optional in the generic lowering API. For
XNNPACK, `get_transform_passes()` is recommended in general because it runs
pre-partition graph transforms that expose supported patterns to the
partitioner.

See [Partitioner API](/backends/xnnpack/xnnpack-partitioner) for a reference on available partitioner options. <!-- @lint-ignore -->

----
Expand Down
7 changes: 7 additions & 0 deletions docs/source/backends/xnnpack/xnnpack-quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import torchvision.models as models
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import XNNPACKQuantizer, get_symmetric_quantization_config
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.backends.xnnpack.utils.configs import get_transform_passes
from executorch.exir import to_edge_transform_and_lower
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

Expand All @@ -52,9 +53,15 @@ quantized_model = convert_pt2e(prepared_model) # (5)
et_program = to_edge_transform_and_lower( # (6)
torch.export.export(quantized_model, sample_inputs),
partitioner=[XnnpackPartitioner()],
transform_passes=get_transform_passes(),
).to_executorch()
```

The `transform_passes` stage is optional in the generic lowering API. For
XNNPACK, `get_transform_passes()` is recommended in general because it runs
pre-partition graph transforms that expose supported patterns to the
partitioner.

See [PyTorch 2 Export Post Training Quantization](https://docs.pytorch.org/ao/main/tutorials_source/pt2e_quant_ptq.html) for more information.

### LLM quantization with quantize_
Expand Down
3 changes: 3 additions & 0 deletions examples/xnnpack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ examples/xnnpack
## Delegating a Floating-point Model

The following command will produce a floating-point XNNPACK delegated model `mv2_xnnpack_fp32.pte` that can be run using XNNPACK's operators. It will also print out the lowered graph, showing what parts of the models have been lowered to XNNPACK via `executorch_call_delegate`.
The compiler uses the optional pre-partition transform stage. For XNNPACK this
is recommended in general because it exposes supported patterns to the
partitioner before delegation decisions are made.

```bash
# For MobileNet V2
Expand Down
3 changes: 3 additions & 0 deletions examples/xnnpack/aot_compiler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# Copyright 2026 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -13,6 +14,7 @@

import torch
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.backends.xnnpack.utils.configs import get_transform_passes
from executorch.exir import (
EdgeCompileConfig,
ExecutorchBackendConfig,
Expand Down Expand Up @@ -106,6 +108,7 @@
edge = to_edge_transform_and_lower(
ep,
partitioner=[XnnpackPartitioner()],
transform_passes=get_transform_passes(),
compile_config=EdgeCompileConfig(
_check_ir_validity=False if args.quantize else True,
_skip_dim_order=True, # TODO(T182187531): enable dim order in xnnpack
Expand Down
Loading