Total (samples)Self
    23%2,6752,675
    19%2,1952,195
    11%1,2601,260
    9.9%1,1361,136
    7.9%913913
    6.2%714714
    4.4%507507
    4.2%482482
    4.1%470470
    3.4%391391
    1.2%143143
    1.2%139139
    1.2%139139
    0.7%8383
    0.7%8080
    0.5%6161
    0.4%4848
    0.2%2121
    0.1%1515
    0.1%1414
    0.1%77
    0.0%22
    0.0%11
    0.0%11

    cdef_filter4_pri_edged_8bpc_neon

    TotalSelf
    adrp x8, $+0xb6000
    add x8, x8, #0x18c
    and w9, w3, #0x1
    add x8, x8, w9, lsl #1
    adrp x9, $+0xb6000
    add x9, x9, #0x170
    add x5, x9, w5, lsl #1
    movi v30.8b, #0x7
    dup v28.8b, w6
    dup v25.16b, w3
    trn1 v24.8b, v25.8b, v27.8b
    clz v24.8b, v24.8b
    sub v24.8b, v30.8b, v24.8b
    uqsub v24.8b, v28.8b, v24.8b
    neg v24.8b, v24.8b
    mov v24.16b, v24.b[0]
    add x12, x2, #0x8
    add x13, x2, #0x10
    add x14, x2, #0x18
    ld1 {v0.s}[0], [x2]
    ld1 {v0.s}[1], [x12]
    ld1 {v0.s}[2], [x13]
    ld1 {v0.s}[3], [x14]
    movi v1.16b, #0xff
    movi v2.16b, #0x0
    mov w11, #0x2
    ldrb w9, [x5]
    add x6, x2, w9, sxtb
    sub x9, x2, w9, sxtb
    ld1 {v5.s}[0], [x6]
    add x6, x6, #0x8
    ld1 {v6.s}[0], [x9]
    add x9, x9, #0x8
    ld1 {v5.s}[1], [x6]
    add x6, x6, #0x8
    ld1 {v6.s}[1], [x9]
    add x9, x9, #0x8
    ld1 {v5.s}[2], [x6]
    add x6, x6, #0x8
    ld1 {v6.s}[2], [x9]
    add x9, x9, #0x8
    ld1 {v5.s}[3], [x6]
    ld1 {v6.s}[3], [x9]
    ldrb w10, [x8]
    uabd v16.16b, v0.16b, v5.16b
    uabd v20.16b, v0.16b, v6.16b
    ushl v17.16b, v16.16b, v24.16b
    ushl v21.16b, v20.16b, v24.16b
    uqsub v17.16b, v25.16b, v17.16b
    uqsub v21.16b, v25.16b, v21.16b
    cmhi v18.16b, v0.16b, v5.16b
    cmhi v22.16b, v0.16b, v6.16b
    umin v17.16b, v17.16b, v16.16b
    umin v21.16b, v21.16b, v20.16b
    dup v19.16b, w10
    neg v16.16b, v17.16b
    neg v20.16b, v21.16b
    bsl v18.16b, v16.16b, v17.16b
    bsl v22.16b, v20.16b, v21.16b
    mla v1.16b, v18.16b, v19.16b
    mla v2.16b, v22.16b, v19.16b
    add x5, x5, #0x1
    subs w11, w11, #0x1
    add x8, x8, #0x1
    b.ne $-0x98
    srhadd v5.16b, v1.16b, v2.16b
    shadd v6.16b, v1.16b, v2.16b
    cmlt v1.16b, v5.16b, #0x0
    bsl v1.16b, v6.16b, v5.16b
    srshr v1.16b, v1.16b, #0x3
    usqadd v0.16b, v1.16b
    st1 {v0.s}[0], [x0], x1
    add x2, x2, #0x20
    st1 {v0.s}[1], [x0], x1
    subs w7, w7, #0x4
    Drop a saved profile here