git: 78217ad1c063 - main - databases/caterva: fix build on powerpc64*
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 24 Mar 2023 00:42:58 UTC
The branch main has been updated by pkubaj: URL: https://cgit.FreeBSD.org/ports/commit/?id=78217ad1c063bc0cebec56b49e842ca5300d68c3 commit 78217ad1c063bc0cebec56b49e842ca5300d68c3 Author: Piotr Kubaj <pkubaj@FreeBSD.org> AuthorDate: 2023-03-23 20:36:08 +0000 Commit: Piotr Kubaj <pkubaj@FreeBSD.org> CommitDate: 2023-03-23 23:39:16 +0000 databases/caterva: fix build on powerpc64* Use non-typedef types when in conjunction with vector: /wrkdirs/usr/ports/databases/caterva/work/caterva-2db4e9c/contribs/c-blosc2/blosc/transpose-altivec.h:18:23: error: redefinition of 'uint8_t' as different kind of symbol /usr/include/sys/_stdint.h:56:20: note: previous definition is here typedef __uint8_t uint8_t; --- ...ch-contribs_c-blosc2_blosc_bitshuffle-altivec.c | 221 +++++++++++++++++++++ ...patch-contribs_c-blosc2_blosc_shuffle-altivec.c | 134 +++++++++++++ ...tch-contribs_c-blosc2_blosc_transpose-altivec.h | 58 ++++++ 3 files changed, 413 insertions(+) diff --git a/databases/caterva/files/patch-contribs_c-blosc2_blosc_bitshuffle-altivec.c b/databases/caterva/files/patch-contribs_c-blosc2_blosc_bitshuffle-altivec.c new file mode 100644 index 000000000000..a5f4cc517c65 --- /dev/null +++ b/databases/caterva/files/patch-contribs_c-blosc2_blosc_bitshuffle-altivec.c @@ -0,0 +1,221 @@ +--- contribs/c-blosc2/blosc/bitshuffle-altivec.c.orig 2023-03-23 20:29:41 UTC ++++ contribs/c-blosc2/blosc/bitshuffle-altivec.c +@@ -37,15 +37,15 @@ + #include <stdio.h> + #include <string.h> + +-static void helper_print(__vector uint8_t v, char* txt){ ++static void helper_print(__vector unsigned char v, char* txt){ + printf("%s %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",txt, + v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15]); + } + #endif + + +-static inline __vector uint8_t gen_save_mask(size_t offset){ +- __vector uint8_t mask; ++static inline __vector unsigned char gen_save_mask(size_t offset){ ++ __vector unsigned char mask; + size_t k; + for (k = 0; k < 16; k++) + mask[k] = (k<offset)?0:0xFF; +@@ -54,8 +54,8 @@ static inline __vector uint8_t gen_save_mask(size_t of + + + // Build and return a bit-permutation mask +-static __vector uint8_t make_bitperm_mask(int type_size, int bit) { +- __vector uint8_t result; ++static __vector unsigned char make_bitperm_mask(int type_size, int bit) { ++ __vector unsigned char result; + if (type_size == 1) { + // data_type is 8 bits long + for (int i = 0; i < 16; i++) +@@ -90,29 +90,29 @@ bitunshuffle1_altivec(void* _src, void* dest, const si + size_t nbyte_row = size / 8; + + // working vectors +- __vector uint8_t xmm0[8], xmm1[8], masks[8]; ++ __vector unsigned char xmm0[8], xmm1[8], masks[8]; + // Vector masks +- static const __vector uint8_t lo01 = (const __vector uint8_t) { ++ static const __vector unsigned char lo01 = (const __vector unsigned char) { + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1c, 0x1d}; +- static const __vector uint8_t hi01 = (const __vector uint8_t) { ++ static const __vector unsigned char hi01 = (const __vector unsigned char) { + 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, + 0x12, 0x13, 0x16, 0x17, 0x1a, 0x1b, 0x1e, 0x1f}; +- static const __vector uint8_t lo02 = (const __vector uint8_t) { ++ static const __vector unsigned char lo02 = (const __vector unsigned char) { + 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, + 0x02, 0x03, 0x0a, 0x0b, 0x12, 0x13, 0x1a, 0x1b}; +- static const __vector uint8_t hi02 = (const __vector uint8_t) { ++ static const __vector unsigned char hi02 = (const __vector unsigned char) { + 0x04, 0x05, 0x0c, 0x0d, 0x14, 0x15, 0x1c, 0x1d, + 0x06, 0x07, 0x0e, 0x0f, 0x16, 0x17, 0x1e, 0x1f}; +- static const __vector uint8_t epi64_low = (const __vector uint8_t) { ++ static const __vector unsigned char epi64_low = (const __vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17}; +- static const __vector uint8_t epi64_hi = (const __vector uint8_t) { ++ static const __vector unsigned char epi64_hi = (const __vector unsigned char) { + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; + + for (kk = 0; kk < 8; kk++){ +- __vector uint8_t msk; ++ __vector unsigned char msk; + for (ii = 0; ii < 8; ii++){ + msk[ii] = 127-(16*ii+2*kk); + msk[ii+8] = 127-(16*ii+2*kk+1); +@@ -161,13 +161,13 @@ bitunshuffle1_altivec(void* _src, void* dest, const si + + // At this stage each vector xmm1 contains the data from 16 adjacent bytes + for (int ll = 0; ll < 8; ll++){ +- __vector uint8_t xmm = xmm1[ll]; ++ __vector unsigned char xmm = xmm1[ll]; + //helper_print(xmm, "vector transposed"); + for (kk = 0; kk < 8; kk++) { +- __vector uint16_t tmp; +- tmp = (__vector uint16_t) vec_bperm(xmm, masks[kk]); ++ __vector unsigned short tmp; ++ tmp = (__vector unsigned short) vec_bperm(xmm, masks[kk]); + //printf("%d %d\n", vp, tmp[4]); +- //helper_print((__vector uint8_t)tmp, "tmp"); ++ //helper_print((__vector unsigned char)tmp, "tmp"); + out_s[vp++] = tmp[4]; + } + } +@@ -179,7 +179,7 @@ int64_t bshuf_trans_byte_elem_16(void* in, void* out, + /* Transpose bytes within elements for 16 bit elements. */ + int64_t bshuf_trans_byte_elem_16(void* in, void* out, const size_t size) { + static const uint8_t bytesoftype = 2; +- __vector uint8_t xmm0[2]; ++ __vector unsigned char xmm0[2]; + + for (size_t i = 0; i + 15 < size; i += 16) { + for (int j = 0; j < bytesoftype; j++) +@@ -199,7 +199,7 @@ int64_t bshuf_trans_byte_elem_32(void* in, void* out, + /* Transpose bytes within elements for 32 bit elements. */ + int64_t bshuf_trans_byte_elem_32(void* in, void* out, const size_t size) { + static const uint8_t bytesoftype = 4; +- __vector uint8_t xmm0[4]; ++ __vector unsigned char xmm0[4]; + + for (size_t i = 0; i + 15 < size; i += 16) { + for (int j = 0; j < bytesoftype; j++) +@@ -219,7 +219,7 @@ int64_t bshuf_trans_byte_elem_64(void* in, void* out, + /* Transpose bytes within elements for 64 bit elements. */ + int64_t bshuf_trans_byte_elem_64(void* in, void* out, const size_t size) { + static const uint8_t bytesoftype = 8; +- __vector uint8_t xmm0[8]; ++ __vector unsigned char xmm0[8]; + + for (size_t i = 0; i + 15 < size; i += 16) { + for (int j = 0; j < bytesoftype; j++) +@@ -239,7 +239,7 @@ int64_t bshuf_trans_byte_elem_128(void* in, void* out, + /* Transpose bytes within elements for 128 bit elements. */ + int64_t bshuf_trans_byte_elem_128(void* in, void* out, const size_t size) { + static const uint8_t bytesoftype = 16; +- __vector uint8_t xmm0[16]; ++ __vector unsigned char xmm0[16]; + + for (size_t i = 0; i + 15 < size; i += 16) { + for (int j = 0; j < bytesoftype; j++) +@@ -306,7 +306,7 @@ int64_t bshuf_trans_byte_elem_altivec(void* in, void* + + if ((elem_size % 16) == 0) { + nchunk_elem = elem_size / 16; +- TRANS_ELEM_TYPE(in, out, size, nchunk_elem, __vector uint8_t); ++ TRANS_ELEM_TYPE(in, out, size, nchunk_elem, __vector unsigned char); + count = bshuf_trans_byte_elem_128(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 16, nchunk_elem, size); +@@ -343,7 +343,7 @@ int64_t bshuf_trans_bit_byte_altivec(void* in, void* o + uint8_t* out_b = (uint8_t*)out; + int64_t count; + size_t nbyte = elem_size * size; +- __vector uint8_t data, masks[8]; ++ __vector unsigned char data, masks[8]; + size_t ii, kk; + + CHECK_MULT_EIGHT(nbyte); +@@ -356,9 +356,9 @@ int64_t bshuf_trans_bit_byte_altivec(void* in, void* o + for (ii = 0; ii + 15 < nbyte; ii += 16) { + data = vec_xl(ii, in_b); + for (kk = 0; kk < 8; kk++) { +- __vector uint16_t tmp; ++ __vector unsigned short tmp; + uint16_t* oui16; +- tmp = (__vector uint16_t) vec_bperm(data, masks[kk]); ++ tmp = (__vector unsigned short) vec_bperm(data, masks[kk]); + oui16 = (uint16_t*)&out_b[(ii + kk*nbyte) >> 3]; + *oui16 = tmp[4]; + } +@@ -390,28 +390,28 @@ int64_t bshuf_trans_byte_bitrow_altivec(void* in, void + * the bytes. */ + int64_t bshuf_trans_byte_bitrow_altivec(void* in, void* out, const size_t size, + const size_t elem_size) { +- static const __vector uint8_t epi8_low = (const __vector uint8_t) { ++ static const __vector unsigned char epi8_low = (const __vector unsigned char) { + 0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; +- static const __vector uint8_t epi8_hi = (const __vector uint8_t) { ++ static const __vector unsigned char epi8_hi = (const __vector unsigned char) { + 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, + 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f}; +- static const __vector uint8_t epi16_low = (const __vector uint8_t) { ++ static const __vector unsigned char epi16_low = (const __vector unsigned char) { + 0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17}; +- static const __vector uint8_t epi16_hi = (const __vector uint8_t) { ++ static const __vector unsigned char epi16_hi = (const __vector unsigned char) { + 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, + 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f}; +- static const __vector uint8_t epi32_low = (const __vector uint8_t) { ++ static const __vector unsigned char epi32_low = (const __vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; +- static const __vector uint8_t epi32_hi = (const __vector uint8_t) { ++ static const __vector unsigned char epi32_hi = (const __vector unsigned char) { + 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f}; +- static const __vector uint8_t epi64_low = (const __vector uint8_t) { ++ static const __vector unsigned char epi64_low = (const __vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17}; +- static const __vector uint8_t epi64_hi = (const __vector uint8_t) { ++ static const __vector unsigned char epi64_hi = (const __vector unsigned char) { + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; + +@@ -419,7 +419,7 @@ int64_t bshuf_trans_byte_bitrow_altivec(void* in, void + uint8_t* out_b = (uint8_t*)out; + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; +- __vector uint8_t xmm0[16], xmm1[16]; ++ __vector unsigned char xmm0[16], xmm1[16]; + + CHECK_MULT_EIGHT(size); + +@@ -546,7 +546,7 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(void* in, + const uint8_t* in_b = (const uint8_t*)in; + uint8_t* out_b = (uint8_t*)out; + size_t nbyte = elem_size * size; +- __vector uint8_t masks[8], data; ++ __vector unsigned char masks[8], data; + + CHECK_MULT_EIGHT(size); + +@@ -563,9 +563,9 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(void* in, + for (size_t jj = 0; jj + 15 < 8 * elem_size; jj += 16) { + data = vec_xl(ii + jj, in_b); + for (size_t kk = 0; kk < 8; kk++) { +- __vector uint16_t tmp; ++ __vector unsigned short tmp; + uint16_t* oui16; +- tmp = (__vector uint16_t) vec_bperm(data, masks[kk]); ++ tmp = (__vector unsigned short) vec_bperm(data, masks[kk]); + oui16 = (uint16_t*)&out_b[ii + (jj>>3) + kk * elem_size]; + *oui16 = tmp[4]; + } diff --git a/databases/caterva/files/patch-contribs_c-blosc2_blosc_shuffle-altivec.c b/databases/caterva/files/patch-contribs_c-blosc2_blosc_shuffle-altivec.c new file mode 100644 index 000000000000..69a9ed1dbe69 --- /dev/null +++ b/databases/caterva/files/patch-contribs_c-blosc2_blosc_shuffle-altivec.c @@ -0,0 +1,134 @@ +--- contribs/c-blosc2/blosc/shuffle-altivec.c.orig 2023-03-23 20:25:30 UTC ++++ contribs/c-blosc2/blosc/shuffle-altivec.c +@@ -25,7 +25,7 @@ shuffle2_altivec(uint8_t* const dest, const uint8_t* c + const int32_t vectorizable_elements, const int32_t total_elements){ + static const int32_t bytesoftype = 2; + uint32_t i, j; +- __vector uint8_t xmm0[2]; ++ __vector unsigned char xmm0[2]; + + for (j = 0; j < vectorizable_elements; j += 16){ + /* Fetch 16 elements (32 bytes) */ +@@ -47,7 +47,7 @@ shuffle4_altivec(uint8_t* const dest, const uint8_t* c + const int32_t vectorizable_elements, const int32_t total_elements){ + static const int32_t bytesoftype = 4; + int32_t i, j; +- __vector uint8_t xmm0[4]; ++ __vector unsigned char xmm0[4]; + + for (j = 0; j < vectorizable_elements; j += 16) + { +@@ -73,7 +73,7 @@ shuffle8_altivec(uint8_t* const dest, const uint8_t* c + const int32_t vectorizable_elements, const int32_t total_elements) { + static const uint8_t bytesoftype = 8; + int32_t i, j; +- __vector uint8_t xmm0[8]; ++ __vector unsigned char xmm0[8]; + + for (j = 0; j < vectorizable_elements; j += 16) + { +@@ -96,7 +96,7 @@ shuffle16_altivec(uint8_t* const dest, const uint8_t* + const int32_t vectorizable_elements, const int32_t total_elements) { + static const int32_t bytesoftype = 16; + int32_t i, j; +- __vector uint8_t xmm0[16]; ++ __vector unsigned char xmm0[16]; + + for (j = 0; j < vectorizable_elements; j += 16) + { +@@ -121,7 +121,7 @@ shuffle16_tiled_altivec(uint8_t* const dest, const uin + const int32_t bytesoftype) { + int32_t j, k; + const int32_t vecs_per_el_rem = bytesoftype & 0xF; +- __vector uint8_t xmm[16]; ++ __vector unsigned char xmm[16]; + + for (j = 0; j < vectorizable_elements; j += 16) { + /* Advance the offset into the type by the vector size (in bytes), unless this is +@@ -152,7 +152,7 @@ unshuffle2_altivec(uint8_t* const dest, const uint8_t* + const int32_t vectorizable_elements, const int32_t total_elements) { + static const int32_t bytesoftype = 2; + uint32_t i, j; +- __vector uint8_t xmm0[2], xmm1[2]; ++ __vector unsigned char xmm0[2], xmm1[2]; + + for (j = 0; j < vectorizable_elements; j += 16) { + /* Load 16 elements (32 bytes) into 2 vectors registers. */ +@@ -176,7 +176,7 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t* + const int32_t vectorizable_elements, const int32_t total_elements) { + static const int32_t bytesoftype = 4; + uint32_t i, j; +- __vector uint8_t xmm0[4], xmm1[4]; ++ __vector unsigned char xmm0[4], xmm1[4]; + + for (j = 0; j < vectorizable_elements; j += 16) { + /* Load 16 elements (64 bytes) into 4 vectors registers. */ +@@ -191,11 +191,11 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t* + /* Shuffle 2-byte words */ + for (i = 0; i < 2; i++) { + /* Compute the low 32 bytes */ +- xmm0[i] = (__vector uint8_t) vec_vmrghh((__vector uint16_t)xmm1[i * 2], +- (__vector uint16_t) xmm1[i * 2 + 1]); ++ xmm0[i] = (__vector unsigned char) vec_vmrghh((__vector unsigned short)xmm1[i * 2], ++ (__vector unsigned short) xmm1[i * 2 + 1]); + /* Compute the hi 32 bytes */ +- xmm0[i+2] = (__vector uint8_t) vec_vmrglh((__vector uint16_t)xmm1[i * 2], +- (__vector uint16_t)xmm1[i * 2 + 1]); ++ xmm0[i+2] = (__vector unsigned char) vec_vmrglh((__vector unsigned short)xmm1[i * 2], ++ (__vector unsigned short)xmm1[i * 2 + 1]); + } + /* Store the result vectors in proper order */ + vec_xst(xmm0[0], bytesoftype * j, dest); +@@ -211,7 +211,7 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t* + const int32_t vectorizable_elements, const int32_t total_elements) { + static const uint8_t bytesoftype = 8; + uint32_t i, j; +- __vector uint8_t xmm0[8], xmm1[8]; ++ __vector unsigned char xmm0[8], xmm1[8]; + + // Initialize permutations for writing + for (j = 0; j < vectorizable_elements; j += 16) { +@@ -225,17 +225,17 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t* + } + /* Shuffle 2-byte words */ + for (i = 0; i < 4; i++) { +- xmm0[i] = (__vector uint8_t)vec_vmrghh((__vector uint16_t)xmm1[i * 2], +- (__vector uint16_t)xmm1[i * 2 + 1]); +- xmm0[4 + i] = (__vector uint8_t)vec_vmrglh((__vector uint16_t)xmm1[i * 2], +- (__vector uint16_t)xmm1[i * 2 + 1]); ++ xmm0[i] = (__vector unsigned char)vec_vmrghh((__vector unsigned short)xmm1[i * 2], ++ (__vector unsigned short)xmm1[i * 2 + 1]); ++ xmm0[4 + i] = (__vector unsigned char)vec_vmrglh((__vector unsigned short)xmm1[i * 2], ++ (__vector unsigned short)xmm1[i * 2 + 1]); + } + /* Shuffle 4-byte dwords */ + for (i = 0; i < 4; i++) { +- xmm1[i] = (__vector uint8_t)vec_vmrghw((__vector uint32_t)xmm0[i * 2], +- (__vector uint32_t)xmm0[i * 2 + 1]); +- xmm1[4 + i] = (__vector uint8_t)vec_vmrglw((__vector uint32_t)xmm0[i * 2], +- (__vector uint32_t)xmm0[i * 2 + 1]); ++ xmm1[i] = (__vector unsigned char)vec_vmrghw((__vector unsigned int)xmm0[i * 2], ++ (__vector unsigned int)xmm0[i * 2 + 1]); ++ xmm1[4 + i] = (__vector unsigned char)vec_vmrglw((__vector unsigned int)xmm0[i * 2], ++ (__vector unsigned int)xmm0[i * 2 + 1]); + } + /* Store the result vectors in proper order */ + vec_xst(xmm1[0], bytesoftype * j, dest); +@@ -256,7 +256,7 @@ unshuffle16_altivec(uint8_t* const dest, const uint8_t + const int32_t vectorizable_elements, const int32_t total_elements) { + static const int32_t bytesoftype = 16; + uint32_t i, j; +- __vector uint8_t xmm0[16]; ++ __vector unsigned char xmm0[16]; + + for (j = 0; j < vectorizable_elements; j += 16) { + /* Load 16 elements (64 bytes) into 4 vectors registers. */ +@@ -280,7 +280,7 @@ unshuffle16_tiled_altivec(uint8_t* const dest, const u + const int32_t bytesoftype) { + int32_t i, j, offset_into_type; + const int32_t vecs_per_el_rem = bytesoftype & 0xF; +- __vector uint8_t xmm[16]; ++ __vector unsigned char xmm[16]; + + + /* Advance the offset into the type by the vector size (in bytes), unless this is diff --git a/databases/caterva/files/patch-contribs_c-blosc2_blosc_transpose-altivec.h b/databases/caterva/files/patch-contribs_c-blosc2_blosc_transpose-altivec.h new file mode 100644 index 000000000000..060dceb6dde1 --- /dev/null +++ b/databases/caterva/files/patch-contribs_c-blosc2_blosc_transpose-altivec.h @@ -0,0 +1,58 @@ +--- contribs/c-blosc2/blosc/transpose-altivec.h.orig 2023-03-23 20:13:07 UTC ++++ contribs/c-blosc2/blosc/transpose-altivec.h +@@ -15,18 +15,18 @@ extern "C" { + extern "C" { + #endif + +-static const __vector uint8_t even = (const __vector uint8_t) { ++static const __vector unsigned char even = (const __vector unsigned char) { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e}; + +-static const __vector uint8_t odd = (const __vector uint8_t) { ++static const __vector unsigned char odd = (const __vector unsigned char) { + 0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1b, 0x1d, 0x1f}; + + + /* Transpose inplace 2 vectors of 16 bytes in src into dst. */ +-static void transpose2x16(__vector uint8_t *xmm0) { +- __vector uint8_t xmm1[2]; ++static void transpose2x16(__vector unsigned char *xmm0) { ++ __vector unsigned char xmm1[2]; + xmm1[0] = vec_perm(xmm0[0], xmm0[1], even); + xmm1[1] = vec_perm(xmm0[0], xmm0[1], odd); + +@@ -38,8 +38,8 @@ static void transpose2x16(__vector uint8_t *xmm0) { + + /* Transpose inplace 4 vectors of 16 bytes in src into dst. + * Total cost: 8 calls to vec_perm. */ +-static void transpose4x16(__vector uint8_t *xmm0) { +- __vector uint8_t xmm1[4]; ++static void transpose4x16(__vector unsigned char *xmm0) { ++ __vector unsigned char xmm1[4]; + + /* Transpose vectors 0-1*/ + xmm1[0] = vec_perm(xmm0[0], xmm0[1], even); +@@ -56,8 +56,8 @@ static void transpose4x16(__vector uint8_t *xmm0) { + + /* Transpose inplace 8 vectors of 16 bytes in src into dst. + * Total cost: 24 calls to vec_perm. */ +-static void transpose8x16(__vector uint8_t *xmm0) { +- __vector uint8_t xmm1[8]; ++static void transpose8x16(__vector unsigned char *xmm0) { ++ __vector unsigned char xmm1[8]; + + /* Transpose vectors 0-1*/ + for (int i = 0; i < 8; i += 2){ +@@ -85,8 +85,8 @@ static void transpose8x16(__vector uint8_t *xmm0) { + + /* Transpose inplace 16 vectors of 16 bytes in src into dst. + * Total cost: 64 calls to vec_perm. */ +-static void transpose16x16(__vector uint8_t * xmm0){ +- __vector uint8_t xmm1[16]; ++static void transpose16x16(__vector unsigned char * xmm0){ ++ __vector unsigned char xmm1[16]; + /* Transpose vectors 0-1*/ + for (int i = 0; i < 16; i += 2){ + xmm1[i] = vec_perm(xmm0[i], xmm0[i+1], even);