1#!/usr/bin/env python3
2
3# OpenCL built-in library: type conversion functions
4#
5# Copyright (c) 2013 Victor Oliveira <[email protected]>
6# Copyright (c) 2013 Jesse Towner <[email protected]>
7#
8# Permission is hereby granted, free of charge, to any person obtaining a copy
9# of this software and associated documentation files (the "Software"), to deal
10# in the Software without restriction, including without limitation the rights
11# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12# copies of the Software, and to permit persons to whom the Software is
13# furnished to do so, subject to the following conditions:
14#
15# The above copyright notice and this permission notice shall be included in
16# all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24# THE SOFTWARE.
25
26# This script generates the file convert_type.cl, which contains all of the
27# OpenCL functions in the form:
28#
29# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
30
31types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']
32int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong']
33unsigned_types = ['uchar', 'ushort', 'uint', 'ulong']
34float_types = ['float', 'double']
35int64_types = ['long', 'ulong']
36float64_types = ['double']
37vector_sizes = ['', '2', '3', '4', '8', '16']
38half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')]
39
40saturation = ['','_sat']
41rounding_modes = ['_rtz','_rte','_rtp','_rtn']
42float_prefix = {'float':'FLT_', 'double':'DBL_'}
43float_suffix = {'float':'f', 'double':''}
44
45bool_type = {'char'  : 'char',
46             'uchar' : 'char',
47             'short' : 'short',
48             'ushort': 'short',
49             'int'   : 'int',
50             'uint'  : 'int',
51             'long'  : 'long',
52             'ulong' : 'long',
53             'float'  : 'int',
54             'double' : 'long'}
55
56unsigned_type = {'char'  : 'uchar',
57                 'uchar' : 'uchar',
58                 'short' : 'ushort',
59                 'ushort': 'ushort',
60                 'int'   : 'uint',
61                 'uint'  : 'uint',
62                 'long'  : 'ulong',
63                 'ulong' : 'ulong'}
64
65sizeof_type = {'char'  : 1, 'uchar'  : 1,
66               'short' : 2, 'ushort' : 2,
67               'int'   : 4, 'uint'   : 4,
68               'long'  : 8, 'ulong'  : 8,
69               'float' : 4, 'double' : 8}
70
71limit_max = {'char'  : 'CHAR_MAX',
72             'uchar' : 'UCHAR_MAX',
73             'short' : 'SHRT_MAX',
74             'ushort': 'USHRT_MAX',
75             'int'   : 'INT_MAX',
76             'uint'  : 'UINT_MAX',
77             'long'  : 'LONG_MAX',
78             'ulong' : 'ULONG_MAX'}
79
80limit_min = {'char'  : 'CHAR_MIN',
81             'uchar' : '0',
82             'short' : 'SHRT_MIN',
83             'ushort': '0',
84             'int'   : 'INT_MIN',
85             'uint'  : '0',
86             'long'  : 'LONG_MIN',
87             'ulong' : '0'}
88
89def conditional_guard(src, dst):
90  int64_count = 0
91  float64_count = 0
92  if src in int64_types:
93    int64_count = int64_count +1
94  elif src in float64_types:
95    float64_count = float64_count + 1
96  if dst in int64_types:
97    int64_count = int64_count +1
98  elif dst in float64_types:
99    float64_count = float64_count + 1
100  if float64_count > 0:
101    #In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
102    print("#ifdef cl_khr_fp64")
103    return True
104  elif int64_count > 0:
105    print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
106    return True
107  return False
108
109
110print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
111
112   DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
113   $ ./generate-conversion-type-cl.sh
114
115   OpenCL type conversion functions
116
117   Copyright (c) 2013 Victor Oliveira <[email protected]>
118   Copyright (c) 2013 Jesse Towner <[email protected]>
119
120   Permission is hereby granted, free of charge, to any person obtaining a copy
121   of this software and associated documentation files (the "Software"), to deal
122   in the Software without restriction, including without limitation the rights
123   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
124   copies of the Software, and to permit persons to whom the Software is
125   furnished to do so, subject to the following conditions:
126
127   The above copyright notice and this permission notice shall be included in
128   all copies or substantial portions of the Software.
129
130   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
131   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
132   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
133   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
134   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
135   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
136   THE SOFTWARE.
137*/
138
139#include <clc/clc.h>
140
141#ifdef cl_khr_fp64
142#pragma OPENCL EXTENSION cl_khr_fp64 : enable
143
144#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
145#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
146#endif
147
148#endif
149
150#ifdef cles_khr_int64
151#pragma OPENCL EXTENSION cles_khr_int64 : enable
152#endif
153
154""")
155
156#
157# Default Conversions
158#
159# All conversions are in accordance with the OpenCL specification,
160# which cites the C99 conversion rules.
161#
162# Casting from floating point to integer results in conversions
163# with truncation, so it should be suitable for the default convert
164# functions.
165#
166# Conversions from integer to floating-point, and floating-point to
167# floating-point through casting is done with the default rounding
168# mode. While C99 allows dynamically changing the rounding mode
169# during runtime, it is not a supported feature in OpenCL according
170# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
171#
172# Therefore, we can assume for optimization purposes that the
173# rounding mode is fixed to round-to-nearest-even. Platform target
174# authors should ensure that the rounding-control registers remain
175# in this state, and that this invariant holds.
176#
177# Also note, even though the OpenCL specification isn't entirely
178# clear on this matter, we implement all rounding mode combinations
179# even for integer-to-integer conversions. When such a conversion
180# is used, the rounding mode is ignored.
181#
182
183def generate_default_conversion(src, dst, mode):
184  close_conditional = conditional_guard(src, dst)
185
186  # scalar conversions
187  print("""_CLC_DEF _CLC_OVERLOAD
188{DST} convert_{DST}{M}({SRC} x)
189{{
190  return ({DST})x;
191}}
192""".format(SRC=src, DST=dst, M=mode))
193
194  # vector conversions, done through decomposition to components
195  for size, half_size in half_sizes:
196    print("""_CLC_DEF _CLC_OVERLOAD
197{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
198{{
199  return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
200}}
201""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode))
202
203  # 3-component vector conversions
204  print("""_CLC_DEF _CLC_OVERLOAD
205{DST}3 convert_{DST}3{M}({SRC}3 x)
206{{
207  return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
208}}""".format(SRC=src, DST=dst, M=mode))
209
210  if close_conditional:
211    print("#endif")
212
213
214for src in types:
215  for dst in types:
216    generate_default_conversion(src, dst, '')
217
218for src in int_types:
219  for dst in int_types:
220    for mode in rounding_modes:
221      generate_default_conversion(src, dst, mode)
222
223#
224# Saturated Conversions To Integers
225#
226# These functions are dependent on the unsaturated conversion functions
227# generated above, and use clamp, max, min, and select to eliminate
228# branching and vectorize the conversions.
229#
230# Again, as above, we allow all rounding modes for integer-to-integer
231# conversions with saturation.
232#
233
234def generate_saturated_conversion(src, dst, size):
235  # Header
236  close_conditional = conditional_guard(src, dst)
237  print("""_CLC_DEF _CLC_OVERLOAD
238{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
239{{""".format(DST=dst, SRC=src, N=size))
240
241  # FIXME: This is a work around for lack of select function with
242  # signed third argument when the first two arguments are unsigned types.
243  # We cast to the signed type for sign-extension, then do a bitcast to
244  # the unsigned type.
245  if dst in unsigned_types:
246    bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size);
247    bool_suffix = ")"
248  else:
249    bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size);
250    bool_suffix = ""
251
252  # Body
253  if src == dst:
254
255    # Conversion between same types
256    print("  return x;")
257
258  elif src in float_types:
259
260    # Conversion from float to int
261    print("""  {DST}{N} y = convert_{DST}{N}(x);
262  y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
263  y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
264  return y;""".format(SRC=src, DST=dst, N=size,
265      DST_MIN=limit_min[dst], DST_MAX=limit_max[dst],
266      BP=bool_prefix, BS=bool_suffix))
267
268  else:
269
270    # Integer to integer convesion with sizeof(src) == sizeof(dst)
271    if sizeof_type[src] == sizeof_type[dst]:
272      if src in unsigned_types:
273        print("  x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
274      else:
275        print("  x = max(x, ({SRC})0);".format(SRC=src))
276
277    # Integer to integer conversion where sizeof(src) > sizeof(dst)
278    elif sizeof_type[src] > sizeof_type[dst]:
279      if src in unsigned_types:
280        print("  x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
281      else:
282        print("  x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});"
283          .format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]))
284
285    # Integer to integer conversion where sizeof(src) < sizeof(dst)
286    elif src not in unsigned_types and dst in unsigned_types:
287        print("  x = max(x, ({SRC})0);".format(SRC=src))
288
289    print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
290
291  # Footer
292  print("}")
293  if close_conditional:
294    print("#endif")
295
296
297for src in types:
298  for dst in int_types:
299    for size in vector_sizes:
300      generate_saturated_conversion(src, dst, size)
301
302
303def generate_saturated_conversion_with_rounding(src, dst, size, mode):
304  # Header
305  close_conditional = conditional_guard(src, dst)
306
307  # Body
308  print("""_CLC_DEF _CLC_OVERLOAD
309{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
310{{
311  return convert_{DST}{N}_sat(x);
312}}
313""".format(DST=dst, SRC=src, N=size, M=mode))
314
315  # Footer
316  if close_conditional:
317    print("#endif")
318
319
320for src in int_types:
321  for dst in int_types:
322    for size in vector_sizes:
323      for mode in rounding_modes:
324        generate_saturated_conversion_with_rounding(src, dst, size, mode)
325
326#
327# Conversions To/From Floating-Point With Rounding
328#
329# Note that we assume as above that casts from floating-point to
330# integer are done with truncation, and that the default rounding
331# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
332# rounding rules.
333#
334# These functions rely on the use of abs, ceil, fabs, floor,
335# nextafter, sign, rint and the above generated conversion functions.
336#
337# Only conversions to integers can have saturation.
338#
339
340def generate_float_conversion(src, dst, size, mode, sat):
341  # Header
342  close_conditional = conditional_guard(src, dst)
343  print("""_CLC_DEF _CLC_OVERLOAD
344{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
345{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat))
346
347  # Perform conversion
348  if dst in int_types:
349    if mode == '_rte':
350      print("  x = rint(x);");
351    elif mode == '_rtp':
352      print("  x = ceil(x);");
353    elif mode == '_rtn':
354      print("  x = floor(x);");
355    print("  return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
356  elif mode == '_rte':
357    print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
358  else:
359    print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
360    print("  {SRC}{N} y = convert_{SRC}{N}(y);".format(SRC=src, N=size))
361    if mode == '_rtz':
362      if src in int_types:
363        print("  {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size))
364        print("  {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size))
365      else:
366        print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
367        print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
368      print("  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));"
369        .format(DST=dst, N=size, BOOL=bool_type[dst]))
370    if mode == '_rtp':
371      print("  return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));"
372        .format(DST=dst, N=size, BOOL=bool_type[dst]))
373    if mode == '_rtn':
374      print("  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));"
375        .format(DST=dst, N=size, BOOL=bool_type[dst]))
376
377  # Footer
378  print("}")
379  if close_conditional:
380    print("#endif")
381
382
383for src in float_types:
384  for dst in int_types:
385    for size in vector_sizes:
386      for mode in rounding_modes:
387        for sat in saturation:
388          generate_float_conversion(src, dst, size, mode, sat)
389
390
391for src in types:
392  for dst in float_types:
393    for size in vector_sizes:
394      for mode in rounding_modes:
395        generate_float_conversion(src, dst, size, mode, '')
396