1 /* Generic conversion to and from 8bit charsets - S390 version.
2    Copyright (C) 2016-2021 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #if defined HAVE_S390_VX_ASM_SUPPORT
20 
21 # if defined HAVE_S390_VX_GCC_SUPPORT
22 #  define ASM_CLOBBER_VR(NR) , NR
23 # else
24 #  define ASM_CLOBBER_VR(NR)
25 # endif
26 
27 /* Generate the conversion loop routines without vector instructions as
28    fallback, if vector instructions aren't available at runtime.  */
29 # define IGNORE_ICONV_SKELETON
30 # define from_generic __from_generic_c
31 # define to_generic __to_generic_c
32 # include "iconvdata/8bit-generic.c"
33 # undef IGNORE_ICONV_SKELETON
34 # undef from_generic
35 # undef to_generic
36 
37 /* Generate the converion routines with vector instructions. The vector
38    routines can only be used with charsets where the maximum UCS4 value
39    fits in 1 byte size. Then the hardware translate-instruction is used
40    to translate between multiple generic characters and "1 byte UCS4"
41    characters at once. The vector instructions are used to convert between
42    the "1 byte UCS4" and UCS4.  */
43 # include <ifunc-resolve.h>
44 
45 # undef FROM_LOOP
46 # undef TO_LOOP
47 # define FROM_LOOP		__from_generic_vx
48 # define TO_LOOP		__to_generic_vx
49 
50 # define MIN_NEEDED_FROM	1
51 # define MIN_NEEDED_TO		4
52 # define ONE_DIRECTION		0
53 
54 /* First define the conversion function from the 8bit charset to UCS4.  */
55 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
56 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
57 # define LOOPFCT		FROM_LOOP
58 # define BODY_FROM_ORIG \
59   {									      \
60     uint32_t ch = to_ucs4[*inptr];					      \
61 									      \
62     if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && *inptr != '\0')     \
63       {									      \
64 	/* This is an illegal character.  */				      \
65 	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
66       }									      \
67 									      \
68     put32 (outptr, ch);							      \
69     outptr += 4;							      \
70     ++inptr;								      \
71   }
72 
73 # define BODY								\
74   {									\
75     if (__builtin_expect (inend - inptr < 16, 1)			\
76 	|| outend - outptr < 64)					\
77       /* Convert remaining bytes with c code.  */			\
78       BODY_FROM_ORIG							\
79     else								\
80        {								\
81 	 /* Convert 16 ... 256 bytes at once with tr-instruction.  */	\
82 	 size_t index;							\
83 	 char buf[256];							\
84 	 size_t loop_count = (inend - inptr) / 16;			\
85 	 if (loop_count > (outend - outptr) / 64)			\
86 	   loop_count = (outend - outptr) / 64;				\
87 	 if (loop_count > 16)						\
88 	   loop_count = 16;						\
89 	 __asm__ volatile (".machine push\n\t"				\
90 			   ".machine \"z13\"\n\t"			\
91 			   ".machinemode \"zarch_nohighgprs\"\n\t"	\
92 			   "    sllk %[R_I],%[R_LI],4\n\t"		\
93 			   "    ahi %[R_I],-1\n\t"			\
94 			   /* Execute mvc and tr with correct len.  */	\
95 			   "    exrl %[R_I],21f\n\t"			\
96 			   "    exrl %[R_I],22f\n\t"			\
97 			   /* Post-processing.  */			\
98 			   "    lghi %[R_I],0\n\t"			\
99 			   "    vzero %%v0\n\t"				\
100 			   "0:  \n\t"					\
101 			   /* Find invalid character - value is zero.  */ \
102 			   "    vl %%v16,0(%[R_I],%[R_BUF])\n\t"	\
103 			   "    vceqbs %%v23,%%v0,%%v16\n\t"		\
104 			   "    jle 10f\n\t"				\
105 			   "1:  \n\t"					\
106 			   /* Enlarge to UCS4.  */			\
107 			   "    vuplhb %%v17,%%v16\n\t"			\
108 			   "    vupllb %%v18,%%v16\n\t"			\
109 			   "    vuplhh %%v19,%%v17\n\t"			\
110 			   "    vupllh %%v20,%%v17\n\t"			\
111 			   "    vuplhh %%v21,%%v18\n\t"			\
112 			   "    vupllh %%v22,%%v18\n\t"			\
113 			   /* Store 64bytes to buf_out.  */		\
114 			   "    vstm %%v19,%%v22,0(%[R_OUT])\n\t"	\
115 			   "    aghi %[R_I],16\n\t"			\
116 			   "    la %[R_OUT],64(%[R_OUT])\n\t"		\
117 			   "    brct %[R_LI],0b\n\t"			\
118 			   "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
119 			   "    j 20f\n\t"				\
120 			   "21: mvc 0(1,%[R_BUF]),0(%[R_IN])\n\t"	\
121 			   "22: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t"	\
122 			   /* Possibly invalid character found.  */	\
123 			   "10: \n\t"					\
124 			   /* Test if input was zero, too.  */		\
125 			   "    vl %%v24,0(%[R_I],%[R_IN])\n\t"		\
126 			   "    vceqb %%v24,%%v0,%%v24\n\t"		\
127 			   /* Zeros in buf (v23) and inptr (v24) are marked \
128 			      with one bits. After xor, invalid characters \
129 			      are marked as one bits. Proceed, if no	\
130 			      invalid characters are found.  */		\
131 			   "    vx %%v24,%%v23,%%v24\n\t"		\
132 			   "    vfenebs %%v24,%%v24,%%v0\n\t"		\
133 			   "    jo 1b\n\t"				\
134 			   /* Found an invalid translation.		\
135 			      Store the preceding chars.  */		\
136 			   "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
137 			   "    vlgvb %[R_I],%%v24,7\n\t"		\
138 			   "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
139 			   "    sll %[R_I],2\n\t"			\
140 			   "    ahi %[R_I],-1\n\t"			\
141 			   "    jl 20f\n\t"				\
142 			   "    lgr %[R_LI],%[R_I]\n\t"			\
143 			   "    vuplhb %%v17,%%v16\n\t"			\
144 			   "    vuplhh %%v19,%%v17\n\t"			\
145 			   "    vstl %%v19,%[R_I],0(%[R_OUT])\n\t"	\
146 			   "    ahi %[R_I],-16\n\t"			\
147 			   "    jl 11f\n\t"				\
148 			   "    vupllh %%v20,%%v17\n\t"			\
149 			   "    vstl %%v20,%[R_I],16(%[R_OUT])\n\t"	\
150 			   "    ahi %[R_I],-16\n\t"			\
151 			   "    jl 11f\n\t"				\
152 			   "    vupllb %%v18,%%v16\n\t"			\
153 			   "    vuplhh %%v21,%%v18\n\t"			\
154 			   "    vstl %%v21,%[R_I],32(%[R_OUT])\n\t"	\
155 			   "    ahi %[R_I],-16\n\t"			\
156 			   "    jl 11f\n\t"				\
157 			   "    vupllh %%v22,%%v18\n\t"			\
158 			   "    vstl %%v22,%[R_I],48(%[R_OUT])\n\t"	\
159 			   "11: \n\t"					\
160 			   "    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"	\
161 			   "20: \n\t"					\
162 			   ".machine pop"				\
163 			   : /* outputs */ [R_IN] "+a" (inptr)		\
164 			     , [R_OUT] "+a" (outptr), [R_I] "=&a" (index) \
165 			     , [R_LI] "+a" (loop_count)			\
166 			   : /* inputs */ [R_BUF] "a" (buf)		\
167 			     , [R_TBL] "a" (to_ucs1)			\
168 			   : /* clobber list*/ "memory", "cc"		\
169 			     ASM_CLOBBER_VR ("v0")  ASM_CLOBBER_VR ("v16") \
170 			     ASM_CLOBBER_VR ("v17") ASM_CLOBBER_VR ("v18") \
171 			     ASM_CLOBBER_VR ("v19") ASM_CLOBBER_VR ("v20") \
172 			     ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") \
173 			     ASM_CLOBBER_VR ("v23") ASM_CLOBBER_VR ("v24") \
174 			   );						\
175 	 /* Error occured?  */						\
176 	 if (loop_count != 0)						\
177 	   {								\
178 	     /* Found an invalid character!  */				\
179 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				\
180 	  }								\
181       }									\
182     }
183 
184 # define LOOP_NEED_FLAGS
185 # include <iconv/loop.c>
186 
187 /* Next, define the other direction - from UCS4 to 8bit charset.  */
188 # define MIN_NEEDED_INPUT	MIN_NEEDED_TO
189 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
190 # define LOOPFCT		TO_LOOP
191 # define BODY_TO_ORIG \
192   {									      \
193     uint32_t ch = get32 (inptr);					      \
194 									      \
195     if (__builtin_expect (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]), 0)\
196 	|| (__builtin_expect (from_ucs4[ch], '\1') == '\0' && ch != 0))	      \
197       {									      \
198 	UNICODE_TAG_HANDLER (ch, 4);					      \
199 									      \
200 	/* This is an illegal character.  */				      \
201 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
202       }									      \
203 									      \
204     *outptr++ = from_ucs4[ch];						      \
205     inptr += 4;								      \
206   }
207 # define BODY								\
208   {									\
209     if (__builtin_expect (inend - inptr < 64, 1)			\
210 	|| outend - outptr < 16)					\
211       /* Convert remaining bytes with c code.  */			\
212       BODY_TO_ORIG							\
213     else								\
214       {									\
215 	/* Convert 64 ... 1024 bytes at once with tr-instruction.  */	\
216 	size_t index, tmp;						\
217 	char buf[256];							\
218 	size_t loop_count = (inend - inptr) / 64;			\
219 	uint32_t max = sizeof (from_ucs4) / sizeof (from_ucs4[0]);	\
220 	if (loop_count > (outend - outptr) / 16)			\
221 	  loop_count = (outend - outptr) / 16;				\
222 	if (loop_count > 16)						\
223 	  loop_count = 16;						\
224 	size_t remaining_loop_count = loop_count;			\
225 	/* Step 1: Check for ch>=max, ch == 0 and shorten to bytes.	\
226 	   (ch == 0 is no error, but is handled differently)  */	\
227 	__asm__ volatile (".machine push\n\t"				\
228 			  ".machine \"z13\"\n\t"			\
229 			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
230 			  /* Setup to check for ch >= max.  */		\
231 			  "    vzero %%v21\n\t"				\
232 			  "    vleih %%v21,-24576,0\n\t" /* element 0:   >  */ \
233 			  "    vleih %%v21,-8192,2\n\t"  /* element 1: =<>  */ \
234 			  "    vlvgf %%v20,%[R_MAX],0\n\t" /* element 0: val  */ \
235 			  /* Process in 64byte - 16 characters blocks.  */ \
236 			  "    lghi %[R_I],0\n\t"			\
237 			  "    lghi %[R_TMP],0\n\t"			\
238 			  "0:  \n\t"					\
239 			  "    vlm %%v16,%%v19,0(%[R_IN])\n\t"		\
240 			  /* Test for ch >= max and ch == 0.  */	\
241 			  "    vstrczfs %%v22,%%v16,%%v20,%%v21\n\t"	\
242 			  "    jno 10f\n\t"				\
243 			  "    vstrczfs %%v22,%%v17,%%v20,%%v21\n\t"	\
244 			  "    jno 11f\n\t"				\
245 			  "    vstrczfs %%v22,%%v18,%%v20,%%v21\n\t"	\
246 			  "    jno 12f\n\t"				\
247 			  "    vstrczfs %%v22,%%v19,%%v20,%%v21\n\t"	\
248 			  "    jno 13f\n\t"				\
249 			  /* Shorten to byte values.  */		\
250 			  "    vpkf %%v16,%%v16,%%v17\n\t"		\
251 			  "    vpkf %%v18,%%v18,%%v19\n\t"		\
252 			  "    vpkh %%v16,%%v16,%%v18\n\t"		\
253 			  /* Store 16bytes to buf.  */			\
254 			  "    vst %%v16,0(%[R_I],%[R_BUF])\n\t"	\
255 			  /* Loop until all blocks are processed.  */	\
256 			  "    la %[R_IN],64(%[R_IN])\n\t"		\
257 			  "    aghi %[R_I],16\n\t"			\
258 			  "    brct %[R_LI],0b\n\t"			\
259 			  "    j 20f\n\t"				\
260 			  /* Found error ch >= max or ch == 0. */	\
261 			  "13: aghi %[R_TMP],4\n\t"			\
262 			  "12: aghi %[R_TMP],4\n\t"			\
263 			  "11: aghi %[R_TMP],4\n\t"			\
264 			  "10: vlgvb %[R_I],%%v22,7\n\t"		\
265 			  "    srlg %[R_I],%[R_I],2\n\t"		\
266 			  "    agr %[R_I],%[R_TMP]\n\t"			\
267 			  "20: \n\t"					\
268 			  ".machine pop"				\
269 			  : /* outputs */ [R_IN] "+a" (inptr)		\
270 			    , [R_I] "=&a" (index)			\
271 			    , [R_TMP] "=d" (tmp)			\
272 			    , [R_LI] "+d" (remaining_loop_count)	\
273 			  : /* inputs */ [R_BUF] "a" (buf)		\
274 			    , [R_MAX] "d" (max)				\
275 			  : /* clobber list*/ "memory", "cc"		\
276 			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
277 			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
278 			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
279 			    ASM_CLOBBER_VR ("v22")			\
280 			  );						\
281 	/* Error occured in step 1? An error (ch >= max || ch == 0)	\
282 	   occured, if remaining_loop_count > 0. The error occured	\
283 	   at character-index (index) after already processed blocks.  */ \
284 	loop_count -= remaining_loop_count;				\
285 	if (loop_count > 0)						\
286 	  {								\
287 	    /* Step 2: Translate already processed blocks in buf and	\
288 	       check for errors (from_ucs4[ch] == 0).  */		\
289 	    __asm__ volatile (".machine push\n\t"			\
290 			      ".machine \"z13\"\n\t"			\
291 			      ".machinemode \"zarch_nohighgprs\"\n\t"	\
292 			      "    sllk %[R_I],%[R_LI],4\n\t"		\
293 			      "    ahi %[R_I],-1\n\t"			\
294 			      /* Execute tr with correct len.  */	\
295 			      "    exrl %[R_I],21f\n\t"			\
296 			      /* Post-processing.  */			\
297 			      "    lghi %[R_I],0\n\t"			\
298 			      "0:  \n\t"				\
299 			      /* Find invalid character - value == 0.  */ \
300 			      "    vl %%v16,0(%[R_I],%[R_BUF])\n\t"	\
301 			      "    vfenezbs %%v17,%%v16,%%v16\n\t"	\
302 			      "    je 10f\n\t"				\
303 			      /* Store 16bytes to buf_out.  */		\
304 			      "    vst %%v16,0(%[R_I],%[R_OUT])\n\t"	\
305 			      "    aghi %[R_I],16\n\t"			\
306 			      "    brct %[R_LI],0b\n\t"			\
307 			      "    la %[R_OUT],0(%[R_I],%[R_OUT])\n\t"	\
308 			      "    j 20f\n\t"				\
309 			      "21: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t"	\
310 			      /* Found an error: from_ucs4[ch] == 0.  */ \
311 			      "10: la %[R_OUT],0(%[R_I],%[R_OUT])\n\t"	\
312 			      "    vlgvb %[R_I],%%v17,7\n\t"		\
313 			      "20: \n\t"				\
314 			      ".machine pop"				\
315 			      : /* outputs */ [R_OUT] "+a" (outptr)	\
316 				, [R_I] "=&a" (tmp)			\
317 				, [R_LI] "+d" (loop_count)		\
318 			      : /* inputs */ [R_BUF] "a" (buf)		\
319 				, [R_TBL] "a" (from_ucs4)		\
320 			      : /* clobber list*/ "memory", "cc"	\
321 				ASM_CLOBBER_VR ("v16")			\
322 				ASM_CLOBBER_VR ("v17")			\
323 			      );					\
324 	    /* Error occured in processed bytes of step 2?		\
325 	       Thus possible error in step 1 is obselete.*/		\
326 	    if (tmp < 16)						\
327 	      {								\
328 		index = tmp;						\
329 		inptr -= loop_count * 64;				\
330 	      }								\
331 	  }								\
332 	/* Error occured in step 1/2?  */				\
333 	if (index < 16)							\
334 	  {								\
335 	    /* Found an invalid character (see step 2) or zero		\
336 	       (see step 1) at index! Convert the chars before index	\
337 	       manually. If there is a zero at index detected by step 1, \
338 	       there could be invalid characters before this zero.  */	\
339 	    int i;							\
340 	    uint32_t ch;						\
341 	    for (i = 0; i < index; i++)					\
342 	      {								\
343 		ch = get32 (inptr);					\
344 		if (__builtin_expect (from_ucs4[ch], '\1') == '\0')     \
345 		  break;						\
346 		*outptr++ = from_ucs4[ch];				\
347 		inptr += 4;						\
348 	      }								\
349 	    if (i == index)						\
350 	      {								\
351 		ch = get32 (inptr);					\
352 		if (ch == 0)						\
353 		  {							\
354 		    /* This is no error, but handled differently.  */	\
355 		    *outptr++ = from_ucs4[ch];				\
356 		    inptr += 4;						\
357 		    continue;						\
358 		  }							\
359 	      }								\
360 									\
361 	    /* iconv/loop.c disables -Wmaybe-uninitialized for a false	\
362 	       positive warning in this code with -Os and has a		\
363 	       comment referencing this code accordingly.  Updates in	\
364 	       one place may require updates in the other.  */		\
365 	    UNICODE_TAG_HANDLER (ch, 4);				\
366 									\
367 	    /* This is an illegal character.  */			\
368 	    STANDARD_TO_LOOP_ERR_HANDLER (4);				\
369 	  }								\
370       }									\
371   }
372 
373 # define LOOP_NEED_FLAGS
374 # include <iconv/loop.c>
375 
376 
377 /* Generate ifunc'ed loop function.  */
378 s390_libc_ifunc_expr (__from_generic_c, __from_generic,
379 		      (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256
380 		       && hwcap & HWCAP_S390_VX)
381 		      ? __from_generic_vx
382 		      : __from_generic_c);
383 
384 s390_libc_ifunc_expr (__to_generic_c, __to_generic,
385 		      (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256
386 		       && hwcap & HWCAP_S390_VX)
387 		      ? __to_generic_vx
388 		      : __to_generic_c);
389 
390 strong_alias (__to_generic_c_single, __to_generic_single)
391 
392 # undef FROM_LOOP
393 # undef TO_LOOP
394 # define FROM_LOOP		__from_generic
395 # define TO_LOOP		__to_generic
396 # include <iconv/skeleton.c>
397 
398 #else
399 /* Generate this module without ifunc if build environment lacks vector
400    support.  Instead the common 8bit-generic.c is used.  */
401 # include "iconvdata/8bit-generic.c"
402 #endif /* !defined HAVE_S390_VX_ASM_SUPPORT */
403