From 2de89bc355ea478b7d9b0d3f9185876e7047931d Mon Sep 17 00:00:00 2001 From: Navan Chauhan Date: Fri, 28 Mar 2025 12:23:11 -0600 Subject: add instructions for quantization --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'README.md') diff --git a/README.md b/README.md index 0e5b25f..bf94169 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,37 @@ Works with handwritten formulae as well! - [ ] Image Export - [ ] UI Overhaul - [ ] Optimizations + +## Misc + +### Quantization + +#### Encoder Model + +```bash +python -m onnxruntime.quantization.preprocess --input iTexSnip/models/encoder_model.onnx --output encoder-infer.onnx +``` + +```python +import onnx +from onnxruntime.quantization import quantize_dynamic, QuantType +og = "encoder-infer.onnx" +quant = "encoder-quant.onnx" +quantized_model = quantize_dynamic(og, quant, nodes_to_exclude=['/embeddings/patch_embeddings/projection/Conv']) +``` + +It might be better if we quantize the encoder using static quantization. + +#### Decoder Model + +```bash +python -m onnxruntime.quantization.preprocess --input iTexSnip/models/decoder_model.onnx --output decoder-infer.onnx +``` + +```python +import onnx +from onnxruntime.quantization import quantize_dynamic, QuantType +og = "decoder-infer.onnx" +quant = "decoder-quant.onnx" +quantized_model = quantize_dynamic(og, quant) +``` -- cgit v1.2.3