SHARK: bf16 result mismatch for Conv2D op
- Following is the Conv2d pytorch module.
import torch
import torch.nn as nn
class op_conv2d(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Conv2d(8, 10, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
)
def forward(self, x):
return self.layers(x)
model = op_conv2d()
model_bf16 = model.to(torch.bfloat16)
test_input_bf16 = torch.randn(2, 8, 12, 16).to(torch.bfloat16)
test_output_bf16 = model_bf16(test_input_bf16)
print("Input:", test_input_bf16)
print("Output:", test_output_bf16)
- This is the linalg IR of the above pytorch module:
#map = affine_map<(d0, d1, d2, d3) -> (d1)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module {
ml_program.global private mutable @global_seed(dense<0> : tensor<i64>) : tensor<i64>
func.func @main_graph(%arg0: tensor<2x8x12x16xbf16>) -> tensor<2x10x7x16xbf16> {
%cst = arith.constant dense<"0x863DAA3D53BDADBBB93DF13C8BBD91BD94BDB03D66BD9DBD9C3D923B8ABD883BA43DADBDBABC953C253D6CBD98BD8CBD5D3D21BCB4BDA53D743D15BD1EBD953D5CBC6F3DA8BD3C3C61BD24BBCC3AD8BCB13D44BDE43C73BD303C1CBC663C5EBDA63DB03DD7BC523D82BDB93D563D77BD35BD523D78BD46BBAD3DA03DA5BDF93C68BCE5BC563D04BDB8BDB73D6D3D05BD9CBDB13CAEBC15BD89BC47BD4A3D75BDE6BC51BDA7BCAFBC8BBD4B3D5B3D2DBD513D88BCA93DC7BC18BC49BB27BDCABCB5BD253DADBD7E3D94BCBDBC343D4BBDA33DB8BB143D2CBCD23C213D16BD8BBC80BCD83C8C3D44BC3C3D37BD38BD8C3D373D46BC1CBDCC3C41BB743D7FBD15BB7BBD983B8CBD9E3D73BD033D8B3D533CAABDA63C853D2E3DB4BB83BD9B3CF5BC08BD49BD773D5FBD8D3C703D7FBD9D3D133D0E3D8C3DB0BD9E3D87BC74BDB5BD283CC5BB843D863D84BDDDBC9FBDABBC633D8A3D20BDD53C13BC453C5CBD4B3D94BDB83C463D6EBBEBBC89BDAD3D273D05BDAB3CD83B823DE4BBB73D69BCB1BC81BD9B3D573A5BBD32BD6F3DE43CEBBB95BD22BDB33B8E3DF7BC863C133D893C9ABC6BBD8C3D6B3D92BD983D22BB893C173D803D7C3C30BD2BBD07BDFFBCA8BD68BDB0BD653C91BDA0BCB93D9BBD973D56BDCC3CB8BB523B8D3DC4BB6FBDEB3C48BDA5BD753D443D79BCC83CAFBD273C423CD83C8B3D7FBD3ABDAA3D293D89BD2CBDF7BB1BBCEE3C2B3D67BD09BD50BDA73CB43D44BD95BDCABBA93AE03C913D8E3D503D97BC81BC45BB863C0C3D88BD9E3D333DC0BB8CBD9B3DF43CB93D2B3D0A3DF1BC32BC1ABD8BBD71BDA2BDB63D933DF8BCA7BD993D6E3D92BCA43D6BBD8DBDA0BD75BD86BB29BDAB3D8EBDA13DF5BC9B3C98BD143D93BDE23CB1BC753DAA3D693D2C3D1BBCB33D64BDC4BC27BD13BDA13CA4BD8FBC6B3C3ABD2A3DA23D323DD8BC3C3C70BDA23B673D9DBD84BD553D11BD1ABDA73D99BC5DBC9B3DBFBBF9BC6C3D9ABD8A3D45BD72BC0D3CD4BB9DBD1C3D933DAABD68BBF23C623DA83C26BD6EBC33BD943DBABD393C8C3D773D5CBD9A3DABBD82BDB13DB4BC91BD1A3D58BD233C053DD1BC963DA0BD4DBC45BD663DA83D4D3CA2BD5ABDCC3C933DB6BD4DBD23BC44BC7D3D45BC953D9ABD8CBD9F3BBA3D033D1D3D3DBD70BD7E3DB53C82BA153BF93C31BDAB3D543D843DA7BD743D24BDB8BC32BA903DAFBD83BD343C4CBC2F3CC9BCB63B29BD53BD3A3D23BD44BC2BBD893D87BC8BBDB13D643D7B3CA43CC73CFA3B16BC173C6DBDAC3D383C45BD7D3D8E3D49BC07BD903D65BD7C3D653DA7BDE83CB9BDB8BD7CBC96BC83BD8B3C5CBD813DA53D51BD94BD7E3CFC3CB03D95BD9B3D6E3B553D223D7DBC2B3D923DB83D8C3DB83D7B3CB0BD13BC7FBD7DBDB23C89BD8F3D26BD0D3C073D33BD193C01BD96BC213D2C3DA13D61BA56BDB23DBC3CA1BDB4BD643CBD3CF1BCA23B273D97BDA5BCAE3CB6BD543C943D97BD5DBC803D2DBC44BDEABBB13DB13A6CBD72BD7B3C0D3D4D3D7D3DA2BC883D433D48BD8D3C773D4DBD143D98BD77BDCBBC8F3D90BDA5BDE83BAF3D7F3D71BC01BD5DBD9F3D5FBDCA3CEC3CD73CA53D9C3D363D9B3C4F3DB73CF8BCAD3D97BD56BCBFBCAB3D73BD8B3D1ABDB93CAF3D2B3DD3BC9B3D2A3DB63D963DA1BD9CBD20BC2EBDED3C3CBDAC3D1B3DB83D1CBD043D073D78BD96BD84BD8E3D9CBC503DD43CFA3BA63D4EBDB73CA5BDAD3D81BD3D3D213D83BD11BD863A453C97BDC5BBDCBC103DA6BDB1BD14BDA83D7BBD57BC79BD273D8DBD253D863DB93B9ABD8C3DF63C48BD80BD7A3D953BB83DE9BCA53C2F3DA1BD513D04BD5A3DB0BC11BD343D513D9DBCBA3D233D6ABDA03C8CBD6C3DA63D803CCFBC4ABD3B3D8FBD3BBDB4BD983C9E3D823D303D49BD313C60BD653D4E3C8E3D44BDC7BCA5BD0FBD023D5A3C903B8BBDCABB713D6D3CA3BD06BD71BDD73C763DA1BCD7BC813D9FBDFBBC84BD3F3C803C55BD8CBDB13C8D3D8A3C8F3D45BD22BCA23C50BC423D933D9E3DAABC893D44BD4FBCC33CB63D7C3D153BA8BD91BD8B3DCB3C3BBDECBC95BDB03D51BC53BC913D623DA8BBABBDAA3B343DB2BD293BD6BCF93B8B3DA0BD5CBD073D9BBDB0BDD4BCBDBC0CBD8B3DA83D643D873DA9BDAF3DA4BD703C92BD1EBD213DBBBBACBDAD3D013DF23C933D71BDBF3CA63DC4BC183DAE3D17BD7D3DCDBC343D1F3D43BCABBD66BDB7BD5C3C3ABDA3BC193D8EBDA1BC983DA8BD8D3DC139363C88BD88BD97BD35BD833CD53C6BBD9FBDB83D28BD88BD5CBD92BD283DB7BD96BDB83D3F3D20BD683DA4BD313D02BDE13AB03DE73C47BD65BD3C3D523C5B3D853D29BC81BD45BCAE3D22BAB43DB9BD34BDB6BD9B3D36BDA73D5DBCFB3C42BD913D0EBC98BD8C3D76BC6DBC3ABD493D963D4EBD253DADBD88BDA83D69BD9B3CA7BBBABDD13C453D073D70BDDDBC623B80BD173DA23D48BCBC3C88BDD7BC803DF8BCB3BD0F3DB0BD16BD653D963D313DFEBA443C0EBD22BCA8BDDB3C1E3C9CBC3F3D7B3C8D3B153D503D973D8DBD683D28BD2BBD163D35BD3CBCEC3B483D30BD353DCB3C75BC7CBC5E3D5A3D633DA53DB23D90BD243DB33D643DA93B66BD623D5DBDAB3D85BDB73D133DE93C20BD8A3D343DD23C07BD403D9A3D663D8BBC8DBD973D673DFC3B873D523D24BC19BC95BBF53C22BD9CBDABBDC83CA63D9D3C25BC13BC193D52BD61BD403D94BD0EBC763C513BB9BD9ABC793D29BC4DBDB7BD2A3D653CA1BDAA3DA83DA03D22BD953D36BD44BD7F3D7C3DB8BD1C3D8FBD63BDA0BDB73DB3BD86BD9D3D703D22BCD4BC6CBD6B3DA7BD023CAABC59BD953C043DC03BACBDF6BCBA3DA93D9A3C83BDB53DA53D99BD813D203B8F3D41BDAD3D32BDAF3C0FBCD73C2EBD1A3D85BDB0BCE83C6D3D0C3DA73D13BD30BDA6BC9F3D0BBDB93DDB3CBE3CF43C323D0BBCAB3DB53CB83D023B52BD433D1C3D013DEFBC8C3DC3BC54BDA2BD65BD393D7ABD933D073D15BD91BD813D743CB5BB763D9A3D5D3C733DB23D9F3DA73D9D3D753D72BD953C1B3D9D3D953D603D7ABC47BD75BDB9BD99BD393DDA3CA33D5EBDB4BD783D88BDA1BC0DBC8E3C84BD89BC353C25BD0A3DFC3C9D3DC9BC633D93BB6D3C7C3DA7BD1EBDAF3DC4BBBA3D513D903D9B3DCB3C9BBDA83D5BBCAEBC323CABBA533C29BDB83D573D153B94BA8B3CF63B783D84BD07BB723D463C4D3C12BD8D3D04BDB8BAB3BD68BDA43D56BC623DB63D903D023D32BDB9BD8E3D4E3CB8BDB93D6C3DFC3C64BDA6BC35BD3EBD813D6ABD4F3D543DA33D1B3D4D3D8D3C873D813ADE3C4ABDFE3B193C5C3D1C3A95BDB13D3CBA503D8D3DE4BB893D453D18BD1ABD9F3CDEBBA0BD843D81BCAC3D473C4B3D90BD65BD2C3C94BCBE3C833D"> : tensor<10x8x3x5xbf16>
%cst_0 = arith.constant dense<[8.056640e-02, 6.738280e-02, -3.637700e-02, -2.111820e-02, 7.568360e-02, 7.519530e-02, -3.112790e-02, 4.663090e-02, -4.589840e-02, 5.908200e-02]> : tensor<10xbf16>
%cst_1 = arith.constant 0.000000e+00 : bf16
%padded = tensor.pad %arg0 low[0, 0, 4, 2] high[0, 0, 4, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %cst_1 : bf16
} : tensor<2x8x12x16xbf16> to tensor<2x8x20x20xbf16>
%0 = tensor.empty() : tensor<2x10x7x16xbf16>
%1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_0 : tensor<10xbf16>) outs(%0 : tensor<2x10x7x16xbf16>) {
^bb0(%in: bf16, %out: bf16):
linalg.yield %in : bf16
} -> tensor<2x10x7x16xbf16>
%2 = linalg.conv_2d_nchw_fchw {dilations = dense<[3, 1]> : vector<2xi64>, strides = dense<[2, 1]> : vector<2xi64>} ins(%padded, %cst : tensor<2x8x20x20xbf16>, tensor<10x8x3x5xbf16>) outs(%1 : tensor<2x10x7x16xbf16>) -> tensor<2x10x7x16xbf16>
return %2 : tensor<2x10x7x16xbf16>
}
}
Running the above module through the IREE cpu backend generates incorrect results wrt the pytorch output.
About this issue
- Original URL
- State: open
- Created 4 months ago
- Comments: 30 (26 by maintainers)
It’s ok, I think I have the patch ready soon.
https://github.com/llvm/llvm-project/pull/83180 is merged, so you’ll get it in the next integrate or can cherry-pick it locally until then to verify it fixed your issue.
I think we should not close this unless we can conclude on handling bf16 in cpu. I mean how to verify the model is producing the correct outputs through onnx pipeline.