Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143)
- Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157)
- Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189)
- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/193)

### Added
- Add many missing docstrings
Expand All @@ -26,6 +27,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Add integer MaxPool1D for Generic platform and RQSConv1D support for PULPOpen, with corresponding kernel tests.
- Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows
- Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork`
- Add support for the Generic target for the following operators [Ceil](https://onnx.ai/onnx/operators/onnx__Ceil.html), [Floor](https://onnx.ai/onnx/operators/onnx__Floor.html), [Clip](https://onnx.ai/onnx/operators/onnx__Clip.html), [Sub](https://onnx.ai/onnx/operators/onnx__Sub.html), [Exp](https://onnx.ai/onnx/operators/onnx__Exp.html), [Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html), [Swish](https://onnx.ai/onnx/operators/onnx__Swish.html), [HardSigmoid](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html), [HardSwish](https://onnx.ai/onnx/operators/onnx__HardSwish.html), [InstanceNormalization](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html), [GroupNormalization](https://onnx.ai/onnx/operators/onnx__GroupNormalization.html), [AveragePool](https://onnx.ai/onnx/operators/onnx__AveragePool.html), [GlobalAveragePool](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html), [GlobalMaxPool](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html).

### Changed
- Use by default `devel` container for GAP9 CI
Expand Down
107 changes: 100 additions & 7 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \
ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \
FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \
FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \
GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \
MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \
iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, \
FloatDWConvTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \
FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \
FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \
FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \
FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \
MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \
TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \
DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \
Expand Down Expand Up @@ -54,6 +57,17 @@
FloatAddTemplate.referenceTemplate, BasicTransformer)
]

# using AddChecker since they are exactly the same
BasicSubBindings = [
NodeBinding(AddChecker([PointerClass(type1), PointerClass(type2)], [PointerClass(int32_t)]),
SubTemplate.referenceTemplate, BasicTransformer)
for type1 in IntegerDataTypes
for type2 in IntegerDataTypes
] + [
NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSubTemplate.referenceTemplate, BasicTransformer)
]

BasicConv1DBindings = [
NodeBinding(ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], [PointerClass(type)]),
Expand Down Expand Up @@ -327,3 +341,82 @@
ConvTransposeTemplate.referenceTemplate,
BasicTransformer) for type in FloatDataTypes
]

BasicCeilBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatCeilTemplate.referenceTemplate,
BasicTransformer),
]

BasicFloorBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatFloorTemplate.referenceTemplate, BasicTransformer),
]

BasicClipBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatClipTemplate.referenceTemplate,
BasicTransformer),
]

BasicExpBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatExpTemplate.referenceTemplate,
BasicTransformer),
]

BasicSigmoidBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSigmoidTemplate.referenceTemplate, BasicTransformer),
]

BasicSwishBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSwishTemplate.referenceTemplate, BasicTransformer),
]

BasicHardSigmoidBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatHardSigmoidTemplate.referenceTemplate, BasicTransformer),
]

BasicHardSwishBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatHardSwishTemplate.referenceTemplate, BasicTransformer),
]

BasicInstanceNormBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatInstanceNormTemplate.referenceTemplate,
BasicTransformer),
]

BasicGroupNormBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGroupNormTemplate.referenceTemplate,
BasicTransformer),
]

BasicAveragePool1DBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatAveragePoolTemplate.referenceTemplate1d, BasicTransformer)
]

BasicAveragePool2DBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatAveragePoolTemplate.referenceTemplate2d, BasicTransformer)
]

BasicGlobalAveragePoolBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatGlobalAveragePoolTemplate.referenceTemplate, BasicTransformer)
]

BasicGlobalMaxPoolBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatGlobalMaxPoolTemplate.referenceTemplate, BasicTransformer)
]
123 changes: 103 additions & 20 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, OperatorRepresentation, Shape


class SingleOperationPerElementLayer(ONNXLayer):

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']


class ConcatLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down Expand Up @@ -168,10 +174,7 @@ def computeOps(self):
return self.mapper.parser.operatorRepresentation['size'] * 3 # One add, one mul, one div


class AddLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)
class AddLayer(SingleOperationPerElementLayer):

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
Expand All @@ -184,8 +187,8 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
outputShapes = [inputShapes[0]]
return (inputShapes, outputShapes)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']

SubLayer = AddLayer


class MatMulLayer(ONNXLayer):
Expand Down Expand Up @@ -329,10 +332,7 @@ def computeOps(self):
return gemm + rqs


class MulLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)
class MulLayer(SingleOperationPerElementLayer):

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
Expand All @@ -346,9 +346,6 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
inputShapes[0] = inputShapes[1]
return (inputShapes, outputShapes)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']


class ConvLayer(ONNXLayer):

Expand Down Expand Up @@ -438,13 +435,8 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
return (inputShapes, outputShapes)


class ReluLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']
class ReluLayer(SingleOperationPerElementLayer):
pass


class LayerNormLayer(ONNXLayer):
Expand Down Expand Up @@ -709,3 +701,94 @@ def computeOps(self):
numPx = opRep['dim_im_out_x']

return numPx * opsPerPx


class CeilLayer(SingleOperationPerElementLayer):
pass


class FloorLayer(SingleOperationPerElementLayer):
pass


class ClipLayer(ONNXLayer):

def computeOps(self):
# compare vs min and max
return self.mapper.parser.operatorRepresentation['size'] * 2


class ExpLayer(SingleOperationPerElementLayer):
pass


class SigmoidLayer(ONNXLayer):

def computeOps(self):
# sigmoid(x) = 1 / (1 + exp(-x)): neg, exp, add, div
return self.mapper.parser.operatorRepresentation['size'] * 4


class SwishLayer(ONNXLayer):

def computeOps(self):
# x * sigmoid(x): 4 ops for sigmoid + 1 mul
return self.mapper.parser.operatorRepresentation['size'] * 5


class HardSigmoidLayer(ONNXLayer):

def computeOps(self):
# max(0, min(1, alpha*x + beta)): mul, add, clip(min), clip(max)
return self.mapper.parser.operatorRepresentation['size'] * 4


class HardSwishLayer(ONNXLayer):

def computeOps(self):
# x * HardSigmoid(x): 4 ops for hard sigmoid + 1 mul
return self.mapper.parser.operatorRepresentation['size'] * 5


class InstanceNormLayer(ONNXLayer):

def computeOps(self):
# per element: mean-sum(1) + variance(sub+sq+add=3) + normalize(sub+div=2) + affine(mul+add=2) = 8
# per (batch, channel): mean(div=1) + variance(sqrt+div=2) = 3
opRep = self.mapper.parser.operatorRepresentation
B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial'])
return B * C * (S * 8 + 3)


class GroupNormLayer(ONNXLayer):

def computeOps(self):
# same structure as InstanceNorm: 8 ops/element + 3 ops per (batch, channel)
opRep = self.mapper.parser.operatorRepresentation
B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial'])
return B * C * (S * 8 + 3)


class AveragePoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
kernel_elements = int(np.prod(opRep['kernel_shape']))
# (kernel_elements - 1) additions + 1 division per output element
return opRep['data_out_size'] * kernel_elements


class GlobalAveragePoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
# (spatial_size - 1) additions + 1 division per output channel
return int(opRep['batch_size'] * opRep['num_channels'] * opRep['spatial_size'])


class GlobalMaxPoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
# (spatial_size - 1) comparisons per output channel
return int(opRep['batch_size'] * opRep['num_channels'] * (opRep['spatial_size'] - 1))
Loading
Loading