1//hadd = (x+y)>>1
2//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set)
3//This saves us having to do any checks for overflow in the addition sum
4_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
5    return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1);
6}
7