bad left good right
9 removals
Words removed | 9 |
Total words | 919 |
Words removed (%) | 0.98 |
137 lines
9 additions
Words added | 9 |
Total words | 919 |
Words added (%) | 0.98 |
137 lines
#blocked = #ttg.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
#blocked = #triton_gpu.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
#blocked1 = #ttg.blocked<{sizePerThread = [8], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
#blocked1 = #triton_gpu.blocked<{sizePerThread = [8], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
#blocked2 = #ttg.blocked<{sizePerThread = [4, 2], threadsPerWarp = [32, 1], warpsPerCTA = [4, 1], order = [1, 0]}>
#blocked2 = #triton_gpu.blocked<{sizePerThread = [4, 2], threadsPerWarp = [32, 1], warpsPerCTA = [4, 1], order = [1, 0]}>
#loc = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0)
#loc = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0)
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.target = "cuda:90", "ttg.threads-per-warp" = 32 : i32} {
module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 : i32, triton_gpu.target = "cuda:90", "triton_gpu.threads-per-warp" = 32 : i32} {
tt.func public @triton_f4_to_bf16_kernel(%arg0: !tt.ptr<i8> {tt.divisibility = 16 : i32} loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0), %arg1: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0), %arg2: i32 loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0)) attributes {noinline = false} {
tt.func public @triton_f4_to_bf16_kernel(%arg0: !tt.ptr<i8> {tt.divisibility = 16 : i32} loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0), %arg1: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0), %arg2: i32 loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":178:0)) attributes {noinline = false} {
%c2_i32 = arith.constant 2 : i32 loc(#loc1)
%c2_i32 = arith.constant 2 : i32 loc(#loc1)
%c512_i32 = arith.constant 512 : i32 loc(#loc1)
%c512_i32 = arith.constant 512 : i32 loc(#loc1)
%c1024_i32 = arith.constant 1024 : i32 loc(#loc1)
%c1024_i32 = arith.constant 1024 : i32 loc(#loc1)
%cst = arith.constant dense<4> : tensor<512xi8, #blocked> loc(#loc1)
%cst = arith.constant dense<4> : tensor<512xi8, #blocked> loc(#loc1)
%cst_0 = arith.constant dense<15> : tensor<512xi8, #blocked> loc(#loc1)
%cst_0 = arith.constant dense<15> : tensor<512xi8, #blocked> loc(#loc1)
%cst_1 = arith.constant dense<8> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_1 = arith.constant dense<8> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_2 = arith.constant dense<0> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_2 = arith.constant dense<0> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_3 = arith.constant dense<1> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_3 = arith.constant dense<1> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_4 = arith.constant dense<1> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_4 = arith.constant dense<1> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_5 = arith.constant dense<127> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_5 = arith.constant dense<127> : tensor<1024xi8, #blocked1> loc(#loc1)
%cst_6 = arith.constant dense<23> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_6 = arith.constant dense<23> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_7 = arith.constant dense<22> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_7 = arith.constant dense<22> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_8 = arith.constant dense<1056964608> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_8 = arith.constant dense<1056964608> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_9 = arith.constant dense<28> : tensor<1024xi32, #blocked1> loc(#loc1)
%cst_9 = arith.constant dense<28> : tensor<1024xi32, #blocked1> loc(#loc1)
%0 = tt.get_program_id x : i32 loc(#loc2)
%0 = tt.get_program_id x : i32 loc(#loc2)
%1 = arith.muli %arg2, %c2_i32 : i32 loc(#loc3)
%1 = arith.muli %arg2, %c2_i32 : i32 loc(#loc3)
%2 = arith.muli %0, %c512_i32 : i32 loc(#loc4)
%2 = arith.muli %0, %c512_i32 : i32 loc(#loc4)
%3 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked> loc(#loc5)
%3 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked> loc(#loc5)
%4 = tt.splat %2 : i32 -> tensor<512xi32, #blocked> loc(#loc6)
%4 = tt.splat %2 : i32 -> tensor<512xi32, #blocked> loc(#loc6)
%5 = arith.addi %4, %3 : tensor<512xi32, #blocked> loc(#loc6)
%5 = arith.addi %4, %3 : tensor<512xi32, #blocked> loc(#loc6)
%6 = tt.splat %arg2 : i32 -> tensor<512xi32, #blocked> loc(#loc7)
%6 = tt.splat %arg2 : i32 -> tensor<512xi32, #blocked> loc(#loc7)
%7 = arith.cmpi slt, %5, %6 : tensor<512xi32, #blocked> loc(#loc7)
%7 = arith.cmpi slt, %5, %6 : tensor<512xi32, #blocked> loc(#loc7)
%8 = tt.splat %arg0 : !tt.ptr<i8> -> tensor<512x!tt.ptr<i8>, #blocked> loc(#loc8)
%8 = tt.splat %arg0 : !tt.ptr<i8> -> tensor<512x!tt.ptr<i8>, #blocked> loc(#loc8)
%9 = tt.addptr %8, %5 : tensor<512x!tt.ptr<i8>, #blocked>, tensor<512xi32, #blocked> loc(#loc8)
%9 = tt.addptr %8, %5 : tensor<512x!tt.ptr<i8>, #blocked>, tensor<512xi32, #blocked> loc(#loc8)
%10 = tt.load %9, %7 : tensor<512x!tt.ptr<i8>, #blocked> loc(#loc9)
%10 = tt.load %9, %7 : tensor<512x!tt.ptr<i8>, #blocked> loc(#loc9)
%11 = arith.shrui %10, %cst : tensor<512xi8, #blocked> loc(#loc43)
%11 = arith.shrui %10, %cst : tensor<512xi8, #blocked> loc(#loc43)
%12 = arith.andi %10, %cst_0 : tensor<512xi8, #blocked> loc(#loc44)
%12 = arith.andi %10, %cst_0 : tensor<512xi8, #blocked> loc(#loc44)
%13 = tt.join %11, %12 : tensor<512xi8, #blocked> -> tensor<512x2xi8, #blocked2> loc(#loc67)
%13 = tt.join %11, %12 : tensor<512xi8, #blocked> -> tensor<512x2xi8, #blocked2> loc(#loc67)
%14 = tt.reshape %13 : tensor<512x2xi8, #blocked2> -> tensor<1024xi8, #blocked1> loc(#loc68)
%14 = tt.reshape %13 : tensor<512x2xi8, #blocked2> -> tensor<1024xi8, #blocked1> loc(#loc68)
%15 = arith.andi %14, %cst_1 : tensor<1024xi8, #blocked1> loc(#loc47)
%15 = arith.andi %14, %cst_1 : tensor<1024xi8, #blocked1> loc(#loc47)
%16 = arith.xori %14, %15 : tensor<1024xi8, #blocked1> loc(#loc48)
%16 = arith.xori %14, %15 : tensor<1024xi8, #blocked1> loc(#loc48)
%17 = arith.extui %16 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc49)
%17 = arith.extui %16 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc49)
%18 = arith.cmpi eq, %17, %cst_2 : tensor<1024xi32, #blocked1> loc(#loc49)
%18 = arith.cmpi eq, %17, %cst_2 : tensor<1024xi32, #blocked1> loc(#loc49)
%19 = arith.cmpi eq, %17, %cst_3 : tensor<1024xi32, #blocked1> loc(#loc50)
%19 = arith.cmpi eq, %17, %cst_3 : tensor<1024xi32, #blocked1> loc(#loc50)
%20 = arith.shrui %16, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc51)
%20 = arith.shrui %16, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc51)
%21 = arith.subi %20, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc52)
%21 = arith.subi %20, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc52)
%22 = arith.addi %21, %cst_5 : tensor<1024xi8, #blocked1> loc(#loc53)
%22 = arith.addi %21, %cst_5 : tensor<1024xi8, #blocked1> loc(#loc53)
%23 = arith.extui %22 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc54)
%23 = arith.extui %22 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc54)
%24 = arith.shli %23, %cst_6 : tensor<1024xi32, #blocked1> loc(#loc55)
%24 = arith.shli %23, %cst_6 : tensor<1024xi32, #blocked1> loc(#loc55)
%25 = arith.andi %16, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc56)
%25 = arith.andi %16, %cst_4 : tensor<1024xi8, #blocked1> loc(#loc56)
%26 = arith.extui %25 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc57)
%26 = arith.extui %25 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc57)
%27 = arith.shli %26, %cst_7 : tensor<1024xi32, #blocked1> loc(#loc58)
%27 = arith.shli %26, %cst_7 : tensor<1024xi32, #blocked1> loc(#loc58)
%28 = arith.ori %24, %27 : tensor<1024xi32, #blocked1> loc(#loc59)
%28 = arith.ori %24, %27 : tensor<1024xi32, #blocked1> loc(#loc59)
%29 = arith.select %18, %cst_2, %28 : tensor<1024xi1, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc60)
%29 = arith.select %18, %cst_2, %28 : tensor<1024xi1, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc60)
%30 = arith.select %19, %cst_8, %29 : tensor<1024xi1, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc61)
%30 = arith.select %19, %cst_8, %29 : tensor<1024xi1, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc61)
%31 = arith.extui %15 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc62)
%31 = arith.extui %15 : tensor<1024xi8, #blocked1> to tensor<1024xi32, #blocked1> loc(#loc62)
%32 = arith.shli %31, %cst_9 : tensor<1024xi32, #blocked1> loc(#loc63)
%32 = arith.shli %31, %cst_9 : tensor<1024xi32, #blocked1> loc(#loc63)
%33 = arith.ori %30, %32 : tensor<1024xi32, #blocked1> loc(#loc64)
%33 = arith.ori %30, %32 : tensor<1024xi32, #blocked1> loc(#loc64)
%34 = tt.bitcast %33 : tensor<1024xi32, #blocked1> -> tensor<1024xf32, #blocked1> loc(#loc65)
%34 = tt.bitcast %33 : tensor<1024xi32, #blocked1> -> tensor<1024xf32, #blocked1> loc(#loc65)
%35 = arith.truncf %34 : tensor<1024xf32, #blocked1> to tensor<1024xbf16, #blocked1> loc(#loc66)
%35 = arith.truncf %34 : tensor<1024xf32, #blocked1> to tensor<1024xbf16, #blocked1> loc(#loc66)
%36 = arith.muli %0, %c1024_i32 : i32 loc(#loc36)
%36 = arith.muli %0, %c1024_i32 : i32 loc(#loc36)
%37 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked1> loc(#loc37)
%37 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked1> loc(#loc37)
%38 = tt.splat %36 : i32 -> tensor<1024xi32, #blocked1> loc(#loc38)
%38 = tt.splat %36 : i32 -> tensor<1024xi32, #blocked1> loc(#loc38)
%39 = arith.addi %38, %37 : tensor<1024xi32, #blocked1> loc(#loc38)
%39 = arith.addi %38, %37 : tensor<1024xi32, #blocked1> loc(#loc38)
%40 = tt.splat %1 : i32 -> tensor<1024xi32, #blocked1> loc(#loc39)
%40 = tt.splat %1 : i32 -> tensor<1024xi32, #blocked1> loc(#loc39)
%41 = arith.cmpi slt, %39, %40 : tensor<1024xi32, #blocked1> loc(#loc39)
%41 = arith.cmpi slt, %39, %40 : tensor<1024xi32, #blocked1> loc(#loc39)
%42 = tt.splat %arg1 : !tt.ptr<bf16> -> tensor<1024x!tt.ptr<bf16>, #blocked1> loc(#loc40)
%42 = tt.splat %arg1 : !tt.ptr<bf16> -> tensor<1024x!tt.ptr<bf16>, #blocked1> loc(#loc40)
%43 = tt.addptr %42, %39 : tensor<1024x!tt.ptr<bf16>, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc40)
%43 = tt.addptr %42, %39 : tensor<1024x!tt.ptr<bf16>, #blocked1>, tensor<1024xi32, #blocked1> loc(#loc40)
tt.store %43, %35, %41 : tensor<1024x!tt.ptr<bf16>, #blocked1> loc(#loc41)
tt.store %43, %35, %41 : tensor<1024x!tt.ptr<bf16>, #blocked1> loc(#loc41)
tt.return loc(#loc42)
tt.return loc(#loc42)
} loc(#loc)
} loc(#loc)
} loc(#loc)
} loc(#loc)
#loc1 = loc(unknown)
#loc1 = loc(unknown)
#loc2 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":194:24)
#loc2 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":194:24)
#loc3 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":195:37)
#loc3 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":195:37)
#loc4 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":198:27)
#loc4 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":198:27)
#loc5 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":199:47)
#loc5 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":199:47)
#loc6 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":199:34)
#loc6 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":199:34)
#loc7 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":201:27)
#loc7 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":201:27)
#loc8 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":204:31)
#loc8 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":204:31)
#loc9 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":204:23)
#loc9 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":204:23)
#loc10 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":123:29)
#loc10 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":123:29)
#loc11 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":216:8)
#loc11 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":216:8)
#loc12 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":124:29)
#loc12 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":124:29)
#loc13 = loc("/home/drisspg/meta/triton/python/triton/language/standard.py":460:21)
#loc13 = loc("/home/drisspg/.conda/envs/dev-triton-main/lib/python3.12/site-packages/triton/language/standard.py":443:21)
#loc14 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":125:34)
#loc14 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":125:34)
#loc15 = loc("/home/drisspg/meta/triton/python/triton/language/standard.py":469:31)
#loc15 = loc("/home/drisspg/.conda/envs/dev-triton-main/lib/python3.12/site-packages/triton/language/standard.py":452:31)
#loc16 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":131:18)
#loc16 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":131:18)
#loc17 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":134:16)
#loc17 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":134:16)
#loc18 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":137:25)
#loc18 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":137:25)
#loc19 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":143:29)
#loc19 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":143:29)
#loc20 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":146:29)
#loc20 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":146:29)
#loc21 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":147:37)
#loc21 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":147:37)
#loc22 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":147:56)
#loc22 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":147:56)
#loc23 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":148:39)
#loc23 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":148:39)
#loc24 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":148:52)
#loc24 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":148:52)
#loc25 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":151:26)
#loc25 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":151:26)
#loc26 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":152:34)
#loc26 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":152:34)
#loc27 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":152:48)
#loc27 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":152:48)
#loc28 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":156:30)
#loc28 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":156:30)
#loc29 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":158:48)
#loc29 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":158:48)
#loc30 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":160:63)
#loc30 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":160:63)
#loc31 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":163:26)
#loc31 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":163:26)
#loc32 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":164:8)
#loc32 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":164:8)
#loc33 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":166:22)
#loc33 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":166:22)
#loc34 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":173:23)
#loc34 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":173:23)
#loc35 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":174:23)
#loc35 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":174:23)
#loc36 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":220:28)
#loc36 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":220:28)
#loc37 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":221:49)
#loc37 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":221:49)
#loc38 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":221:36)
#loc38 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":221:36)
#loc39 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":222:29)
#loc39 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":222:29)
#loc40 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:26)
#loc40 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:26)
#loc41 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:39)
#loc41 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:39)
#loc42 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:4)
#loc42 = loc("/home/drisspg/meta/ao/torchao/prototype/mx_formats/custom_cast.py":224:4)
#loc43 = loc(callsite(#loc10 at #loc11))
#loc43 = loc(callsite(#loc10 at #loc11))
#loc44 = loc(callsite(#loc12 at #loc11))
#loc44 = loc(callsite(#loc12 at #loc11))
#loc45 = loc(callsite(#loc13 at #loc14))
#loc45 = loc(callsite(#loc13 at #loc14))
#loc46 = loc(callsite(#loc15 at #loc14))
#loc46 = loc(callsite(#loc15 at #loc14))
#loc47 = loc(callsite(#loc16 at #loc11))
#loc47 = loc(callsite(#loc16 at #loc11))
#loc48 = loc(callsite(#loc17 at #loc11))
#loc48 = loc(callsite(#loc17 at #loc11))
#loc49 = loc(callsite(#loc18 at #loc11))
#loc49 = loc(callsite(#loc18 at #loc11))
#loc50 = loc(callsite(#loc19 at #loc11))
#loc50 = loc(callsite(#loc19 at #loc11))
#loc51 = loc(callsite(#loc20 at #loc11))
#loc51 = loc(callsite(#loc20 at #loc11))
#loc52 = loc(callsite(#loc21 at #loc11))
#loc52 = loc(callsite(#loc21 at #loc11))
#loc53 = loc(callsite(#loc22 at #loc11))
#loc53 = loc(callsite(#loc22 at #loc11))
#loc54 = loc(callsite(#loc23 at #loc11))
#loc54 = loc(callsite(#loc23 at #loc11))
#loc55 = loc(callsite(#loc24 at #loc11))
#loc55 = loc(callsite(#loc24 at #loc11))
#loc56 = loc(callsite(#loc25 at #loc11))
#loc56 = loc(callsite(#loc25 at #loc11))
#loc57 = loc(callsite(#loc26 at #loc11))
#loc57 = loc(callsite(#loc26 at #loc11))
#loc58 = loc(callsite(#loc27 at #loc11))
#loc58 = loc(callsite(#loc27 at #loc11))
#loc59 = loc(callsite(#loc28 at #loc11))
#loc59 = loc(callsite(#loc28 at #loc11))
#loc60 = loc(callsite(#loc29 at #loc11))
#loc60 = loc(callsite(#loc29 at #loc11))
#loc61 = loc(callsite(#loc30 at #loc11))
#loc61 = loc(callsite(#loc30 at #loc11))
#loc62 = loc(callsite(#loc31 at #loc11))
#loc62 = loc(callsite(#loc31 at #loc11))
#loc63 = loc(callsite(#loc32 at #loc11))
#loc63 = loc(callsite(#loc32 at #loc11))
#loc64 = loc(callsite(#loc33 at #loc11))
#loc64 = loc(callsite(#loc33 at #loc11))
#loc65 = loc(callsite(#loc34 at #loc11))
#loc65 = loc(callsite(#loc34 at #loc11))
#loc66 = loc(callsite(#loc35 at #loc11))
#loc66 = loc(callsite(#loc35 at #loc11))
#loc67 = loc(callsite(#loc45 at #loc11))
#loc67 = loc(callsite(#loc45 at #loc11))
#loc68 = loc(callsite(#loc46 at #loc11))
#loc68 = loc(callsite(#loc46 at #loc11))