The OpenD Programming Language

1 /*
2 Boost Software License - Version 1.0 - August 17th, 2003
3 Permission is hereby granted, free of charge, to any person or organization
4 obtaining a copy of the software and accompanying documentation covered by
5 this license ( the "Software" ) to use, reproduce, display, distribute,
6 execute, and transmit the Software, and to prepare derivative works of the
7 Software, and to permit third-parties to whom the Software is furnished to
8 do so, all subject to the following:
9 The copyright notices in the Software and this entire statement, including
10 the above license grant, this restriction and the following disclaimer,
11 must be included in all copies of the Software, in whole or in part, and
12 all derivative works of the Software, unless such copies or derivative
13 works are solely in the form of machine-executable object code generated by
14 a source language processor.
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
19 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
20 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 DEALINGS IN THE SOFTWARE.
22 */
23 module derelict.cuda.runtimeapi;
24 
25 /*
26  * CUDA Runtime API
27  * Translation of cuda_runtime_api.h and headers included by it
28  */
29 
30 // Current API version supported by DerelictCUDA is 6.5
31 enum CUDART_VERSION = 10000;
32 
33 import derelict.util.loader;
34 
35 private
36 {
37   import derelict.util.system;
38 
39   static if(Derelict_OS_Windows)
40     enum libNames = "cudart32_100.dll,cudart64_100.dll";
41   else static if (Derelict_OS_Mac)
42     enum libNames = "libcudart.dylib,/usr/local/lib/libcudart.dylib";
43   else static if (Derelict_OS_Linux)
44   {
45     version(X86)
46       enum libNames = "libcudart.so,libcudart.so.10.0,/opt/cuda/lib/libcudart.so";
47     else version(X86_64)
48       enum libNames = "libcudart.so,libcudart.so.10.0,/opt/cuda/lib64/libcudart.so,/usr/lib/x86_64-linux-gnu/libcudart.so.10.0";
49     else
50       static assert(0, "Need to implement CUDA libNames for this arch.");
51   }
52   else
53     static assert(0, "Need to implement CUDA libNames for this operating system.");
54 }
55 
56 
57 // library_types.h
58 
59 
60 alias cudaDataType = int;
61 enum : cudaDataType {
62 	CUDA_R_16F= 2,  /* real as a half */
63 	CUDA_C_16F= 6,  /* complex as a pair of half numbers */
64 	CUDA_R_32F= 0,  /* real as a float */
65 	CUDA_C_32F= 4,  /* complex as a pair of float numbers */
66 	CUDA_R_64F= 1,  /* real as a double */
67 	CUDA_C_64F= 5,  /* complex as a pair of double numbers */
68 	CUDA_R_8I = 3,  /* real as a signed char */
69 	CUDA_C_8I = 7,  /* complex as a pair of signed char numbers */
70 	CUDA_R_8U = 8,  /* real as a dchar */
71 	CUDA_C_8U = 9,  /* complex as a pair of dchar numbers */
72 	CUDA_R_32I= 10, /* real as a signed int */
73 	CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
74 	CUDA_R_32U= 12, /* real as a uint */
75 	CUDA_C_32U= 13  /* complex as a pair of uint numbers */
76 }
77 
78 
79 alias libraryPropertyType = int;
80 enum : libraryPropertyType {
81 	MAJOR_VERSION,
82 	MINOR_VERSION,
83 	PATCH_LEVEL
84 }
85 
86 
87 
88 
89 //struct cuComplex;
90 //struct cuDoubleComplex;
91 
92 
93 
94 
95 
96 // vector_types.h
97 // only dim3 translated
98 // TODO(naetherm): Are the other required as well?
99 struct dim3 {
100   uint x = 1,
101        y = 1,
102        z = 1;
103 }
104 
105 struct float2 {
106   float x, y;
107 }
108 
109 struct double2 {
110   double x, y;
111 }
112 
113 // device_types.h
114 
115 alias cudaRoundMode = int;
116 enum : cudaRoundMode
117 {
118   cudaRoundNearest,
119   cudaRoundZero,
120   cudaRoundPosInf,
121   cudaRoundMinInf
122 }
123 
124 // driver_types.h
125 
126 enum cudaHostAllocDefault           = 0x00;
127 enum cudaHostAllocPortable          = 0x01;
128 enum cudaHostAllocMapped            = 0x02;
129 enum cudaHostAllocWriteCombined     = 0x04;
130 enum cudaHostRegisterDefault        = 0x00;
131 enum cudaHostRegisterPortable       = 0x01;
132 enum cudaHostRegisterMapped         = 0x02;
133 enum cudaHosatRegisterIoMemory      = 0x04;
134 
135 enum cudaPeerAccessDefault          = 0x00;
136 
137 enum cudaStreamDefault              = 0x00;
138 enum cudaStreamNonBlocking          = 0x01;
139 
140 //#define cudaStreamLegacy                    ((cudaStream_t)0x1)
141 //#define cudaStreamPerThread                 ((cudaStream_t)0x2)
142 
143 enum cudaEventDefault               = 0x00;
144 enum cudaEventBlockingSync          = 0x01;
145 enum cudaEventDisableTiming         = 0x02;
146 enum cudaEventInterprocess          = 0x04;
147 
148 enum cudaDeviceScheduleAuto         = 0x00;
149 enum cudaDeviceScheduleSpin         = 0x01;
150 enum cudaDeviceScheduleYield        = 0x02;
151 enum cudaDeviceScheduleBlockingSync = 0x04;
152 enum cudaDeviceBlockingSync         = 0x04;
153 
154 enum cudaDeviceScheduleMask         = 0x07;
155 enum cudaDeviceMapHost              = 0x08;
156 enum cudaDeviceLmemResizeToMax      = 0x10;
157 enum cudaDeviceMask                 = 0x1f;
158 
159 enum cudaArrayDefault               = 0x00;
160 enum cudaArrayLayered               = 0x01;
161 enum cudaArraySurfaceLoadStore      = 0x02;
162 enum cudaArrayCubemap               = 0x04;
163 enum cudaArrayTextureGather         = 0x08;
164 enum cudaArrayColorAttachment       = 0x20;
165 
166 enum cudaIpcMemLazyEnablePeerAccess = 0x01;
167 
168 enum cudaMemAttachGlobal            = 0x01;
169 enum cudaMemAttachHost              = 0x02;
170 enum cudaMemAttachSingle            = 0x04;
171 
172 enum cudaCpuDeviceId               = (cast(uint)-1);
173 enum cudaInvalidDeviceId           = (cast(uint)-2);
174 
175 enum cudaCooperativeLaunchMultiDeviceNoPreSync  = 0x01;
176 enum cudaCooperativeLaunchMultiDeviceNoPostSync = 0x02;
177 
178 
179 alias cudaError = int;
180 enum : cudaError
181 {
182   /**
183    * The API call returned with no errors. In the case of query calls, this
184    * also means that the operation being queried is complete (see
185    * ::cudaEventQuery() and ::cudaStreamQuery()).
186    */
187   cudaSuccess                           =      0,
188 
189   /**
190    * The device function being invoked (usually via ::cudaLaunchKernel()) was not
191    * previously configured via the ::cudaConfigureCall() function.
192    */
193   cudaErrorMissingConfiguration         =      1,
194 
195   /**
196    * The API call failed because it was unable to allocate enough memory to
197    * perform the requested operation.
198    */
199   cudaErrorMemoryAllocation             =      2,
200 
201   /**
202    * The API call failed because the CUDA driver and runtime could not be
203    * initialized.
204    */
205   cudaErrorInitializationError          =      3,
206 
207   /**
208    * An exception occurred on the device while executing a kernel. Common
209    * causes include dereferencing an invalid device pointer and accessing
210    * out of bounds shared memory. All existing device memory allocations
211    * are invalid. To continue using CUDA, the process must be terminated
212    * and relaunched.
213    */
214   cudaErrorLaunchFailure                =      4,
215 
216   /**
217    * This indicated that a previous kernel launch failed. This was previously
218    * used for device emulation of kernel launches.
219    * \deprecated
220    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
221    * removed with the CUDA 3.1 release.
222    */
223   cudaErrorPriorLaunchFailure           =      5,
224 
225   /**
226    * This indicates that the device kernel took too long to execute. This can
227    * only occur if timeouts are enabled - see the device property
228    * \ref ::cudaDeviceProp::kernelExecTimeoutEnabled "kernelExecTimeoutEnabled"
229    * for more information.
230    * This leaves the process in an inconsistent state and any further CUDA work
231    * will return the same error. To continue using CUDA, the process must be terminated
232    * and relaunched.
233    */
234   cudaErrorLaunchTimeout                =      6,
235 
236   /**
237    * This indicates that a launch did not occur because it did not have
238    * appropriate resources. Although this error is similar to
239    * ::cudaErrorInvalidConfiguration, this error usually indicates that the
240    * user has attempted to pass too many arguments to the device kernel, or the
241    * kernel launch specifies too many threads for the kernel's register count.
242    */
243   cudaErrorLaunchOutOfResources         =      7,
244 
245   /**
246    * The requested device function does not exist or is not compiled for the
247    * proper device architecture.
248    */
249   cudaErrorInvalidDeviceFunction        =      8,
250 
251   /**
252    * This indicates that a kernel launch is requesting resources that can
253    * never be satisfied by the current device. Requesting more shared memory
254    * per block than the device supports will trigger this error, as will
255    * requesting too many threads or blocks. See ::cudaDeviceProp for more
256    * device limitations.
257    */
258   cudaErrorInvalidConfiguration         =      9,
259 
260   /**
261    * This indicates that the device ordinal supplied by the user does not
262    * correspond to a valid CUDA device.
263    */
264   cudaErrorInvalidDevice                =     10,
265 
266   /**
267    * This indicates that one or more of the parameters passed to the API call
268    * is not within an acceptable range of values.
269    */
270   cudaErrorInvalidValue                 =     11,
271 
272   /**
273    * This indicates that one or more of the pitch-related parameters passed
274    * to the API call is not within the acceptable range for pitch.
275    */
276   cudaErrorInvalidPitchValue            =     12,
277 
278   /**
279    * This indicates that the symbol name/identifier passed to the API call
280    * is not a valid name or identifier.
281    */
282   cudaErrorInvalidSymbol                =     13,
283 
284   /**
285    * This indicates that the buffer object could not be mapped.
286    */
287   cudaErrorMapBufferObjectFailed        =     14,
288 
289   /**
290    * This indicates that the buffer object could not be unmapped.
291    */
292   cudaErrorUnmapBufferObjectFailed      =     15,
293 
294   /**
295    * This indicates that at least one host pointer passed to the API call is
296    * not a valid host pointer.
297    */
298   cudaErrorInvalidHostPointer           =     16,
299 
300   /**
301    * This indicates that at least one device pointer passed to the API call is
302    * not a valid device pointer.
303    */
304   cudaErrorInvalidDevicePointer         =     17,
305 
306   /**
307    * This indicates that the texture passed to the API call is not a valid
308    * texture.
309    */
310   cudaErrorInvalidTexture               =     18,
311 
312   /**
313    * This indicates that the texture binding is not valid. This occurs if you
314    * call ::cudaGetTextureAlignmentOffset() with an unbound texture.
315    */
316   cudaErrorInvalidTextureBinding        =     19,
317 
318   /**
319    * This indicates that the channel descriptor passed to the API call is not
320    * valid. This occurs if the format is not one of the formats specified by
321    * ::cudaChannelFormatKind, or if one of the dimensions is invalid.
322    */
323   cudaErrorInvalidChannelDescriptor     =     20,
324 
325   /**
326    * This indicates that the direction of the memcpy passed to the API call is
327    * not one of the types specified by ::cudaMemcpyKind.
328    */
329   cudaErrorInvalidMemcpyDirection       =     21,
330 
331   /**
332    * This indicated that the user has taken the address of a constant variable,
333    * which was forbidden up until the CUDA 3.1 release.
334    * \deprecated
335    * This error return is deprecated as of CUDA 3.1. Variables in constant
336    * memory may now have their address taken by the runtime via
337    * ::cudaGetSymbolAddress().
338    */
339   cudaErrorAddressOfConstant            =     22,
340 
341   /**
342    * This indicated that a texture fetch was not able to be performed.
343    * This was previously used for device emulation of texture operations.
344    * \deprecated
345    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
346    * removed with the CUDA 3.1 release.
347    */
348   cudaErrorTextureFetchFailed           =     23,
349 
350   /**
351    * This indicated that a texture was not bound for access.
352    * This was previously used for device emulation of texture operations.
353    * \deprecated
354    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
355    * removed with the CUDA 3.1 release.
356    */
357   cudaErrorTextureNotBound              =     24,
358 
359   /**
360    * This indicated that a synchronization operation had failed.
361    * This was previously used for some device emulation functions.
362    * \deprecated
363    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
364    * removed with the CUDA 3.1 release.
365    */
366   cudaErrorSynchronizationError         =     25,
367 
368   /**
369    * This indicates that a non-float texture was being accessed with linear
370    * filtering. This is not supported by CUDA.
371    */
372   cudaErrorInvalidFilterSetting         =     26,
373 
374   /**
375    * This indicates that an attempt was made to read a non-float texture as a
376    * normalized float. This is not supported by CUDA.
377    */
378   cudaErrorInvalidNormSetting           =     27,
379 
380   /**
381    * Mixing of device and device emulation code was not allowed.
382    * \deprecated
383    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
384    * removed with the CUDA 3.1 release.
385    */
386   cudaErrorMixedDeviceExecution         =     28,
387 
388   /**
389    * This indicates that a CUDA Runtime API call cannot be executed because
390    * it is being called during process shut down, at a point in time after
391    * CUDA driver has been unloaded.
392    */
393   cudaErrorCudartUnloading              =     29,
394 
395   /**
396    * This indicates that an unknown internal error has occurred.
397    */
398   cudaErrorUnknown                      =     30,
399 
400   /**
401    * This indicates that the API call is not yet implemented. Production
402    * releases of CUDA will never return this error.
403    * \deprecated
404    * This error return is deprecated as of CUDA 4.1.
405    */
406   cudaErrorNotYetImplemented            =     31,
407 
408   /**
409    * This indicated that an emulated device pointer exceeded the 32-bit address
410    * range.
411    * \deprecated
412    * This error return is deprecated as of CUDA 3.1. Device emulation mode was
413    * removed with the CUDA 3.1 release.
414    */
415   cudaErrorMemoryValueTooLarge          =     32,
416 
417   /**
418    * This indicates that a resource handle passed to the API call was not
419    * valid. Resource handles are opaque types like ::cudaStream_t and
420    * ::cudaEvent_t.
421    */
422   cudaErrorInvalidResourceHandle        =     33,
423 
424   /**
425    * This indicates that asynchronous operations issued previously have not
426    * completed yet. This result is not actually an error, but must be indicated
427    * differently than ::cudaSuccess (which indicates completion). Calls that
428    * may return this value include ::cudaEventQuery() and ::cudaStreamQuery().
429    */
430   cudaErrorNotReady                     =     34,
431 
432   /**
433    * This indicates that the installed NVIDIA CUDA driver is older than the
434    * CUDA runtime library. This is not a supported configuration. Users should
435    * install an updated NVIDIA display driver to allow the application to run.
436    */
437   cudaErrorInsufficientDriver           =     35,
438 
439   /**
440    * This indicates that the user has called ::cudaSetValidDevices(),
441    * ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),
442    * ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or
443    * ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by
444    * calling non-device management operations (allocating memory and
445    * launching kernels are examples of non-device management operations).
446    * This error can also be returned if using runtime/driver
447    * interoperability and there is an existing ::CUcontext active on the
448    * host thread.
449    */
450   cudaErrorSetOnActiveProcess           =     36,
451 
452   /**
453    * This indicates that the surface passed to the API call is not a valid
454    * surface.
455    */
456   cudaErrorInvalidSurface               =     37,
457 
458   /**
459    * This indicates that no CUDA-capable devices were detected by the installed
460    * CUDA driver.
461    */
462   cudaErrorNoDevice                     =     38,
463 
464   /**
465    * This indicates that an uncorrectable ECC error was detected during
466    * execution.
467    */
468   cudaErrorECCUncorrectable             =     39,
469 
470   /**
471    * This indicates that a link to a shared object failed to resolve.
472    */
473   cudaErrorSharedObjectSymbolNotFound   =     40,
474 
475   /**
476    * This indicates that initialization of a shared object failed.
477    */
478   cudaErrorSharedObjectInitFailed       =     41,
479 
480   /**
481    * This indicates that the ::cudaLimit passed to the API call is not
482    * supported by the active device.
483    */
484   cudaErrorUnsupportedLimit             =     42,
485 
486   /**
487    * This indicates that multiple global or constant variables (across separate
488    * CUDA source files in the application) share the same string name.
489    */
490   cudaErrorDuplicateVariableName        =     43,
491 
492   /**
493    * This indicates that multiple textures (across separate CUDA source
494    * files in the application) share the same string name.
495    */
496   cudaErrorDuplicateTextureName         =     44,
497 
498   /**
499    * This indicates that multiple surfaces (across separate CUDA source
500    * files in the application) share the same string name.
501    */
502   cudaErrorDuplicateSurfaceName         =     45,
503 
504   /**
505    * This indicates that all CUDA devices are busy or unavailable at the current
506    * time. Devices are often busy/unavailable due to use of
507    * ::cudaComputeModeExclusive, ::cudaComputeModeProhibited or when long
508    * running CUDA kernels have filled up the GPU and are blocking new work
509    * from starting. They can also be unavailable due to memory constraints
510    * on a device that already has active CUDA work being performed.
511    */
512   cudaErrorDevicesUnavailable           =     46,
513 
514   /**
515    * This indicates that the device kernel image is invalid.
516    */
517   cudaErrorInvalidKernelImage           =     47,
518 
519   /**
520    * This indicates that there is no kernel image available that is suitable
521    * for the device. This can occur when a user specifies code generation
522    * options for a particular CUDA source file that do not include the
523    * corresponding device configuration.
524    */
525   cudaErrorNoKernelImageForDevice       =     48,
526 
527   /**
528    * This indicates that the current context is not compatible with this
529    * the CUDA Runtime. This can only occur if you are using CUDA
530    * Runtime/Driver interoperability and have created an existing Driver
531    * context using the driver API. The Driver context may be incompatible
532    * either because the Driver context was created using an older version
533    * of the API, because the Runtime API call expects a primary driver
534    * context and the Driver context is not primary, or because the Driver
535    * context has been destroyed. Please see \ref CUDART_DRIVER "Interactions
536    * with the CUDA Driver API" for more information.
537    */
538   cudaErrorIncompatibleDriverContext    =     49,
539 
540   /**
541    * This error indicates that a call to ::cudaDeviceEnablePeerAccess() is
542    * trying to re-enable peer addressing on from a context which has already
543    * had peer addressing enabled.
544    */
545   cudaErrorPeerAccessAlreadyEnabled     =     50,
546 
547   /**
548    * This error indicates that ::cudaDeviceDisablePeerAccess() is trying to
549    * disable peer addressing which has not been enabled yet via
550    * ::cudaDeviceEnablePeerAccess().
551    */
552   cudaErrorPeerAccessNotEnabled         =     51,
553 
554   /**
555    * This indicates that a call tried to access an exclusive-thread device that
556    * is already in use by a different thread.
557    */
558   cudaErrorDeviceAlreadyInUse           =     54,
559 
560   /**
561    * This indicates profiler is not initialized for this run. This can
562    * happen when the application is running with external profiling tools
563    * like visual profiler.
564    */
565   cudaErrorProfilerDisabled             =     55,
566 
567   /**
568    * \deprecated
569    * This error return is deprecated as of CUDA 5.0. It is no longer an error
570    * to attempt to enable/disable the profiling via ::cudaProfilerStart or
571    * ::cudaProfilerStop without initialization.
572    */
573   cudaErrorProfilerNotInitialized       =     56,
574 
575   /**
576    * \deprecated
577    * This error return is deprecated as of CUDA 5.0. It is no longer an error
578    * to call cudaProfilerStart() when profiling is already enabled.
579    */
580   cudaErrorProfilerAlreadyStarted       =     57,
581 
582   /**
583    * \deprecated
584    * This error return is deprecated as of CUDA 5.0. It is no longer an error
585    * to call cudaProfilerStop() when profiling is already disabled.
586    */
587    cudaErrorProfilerAlreadyStopped       =    58,
588 
589   /**
590    * An assert triggered in device code during kernel execution. The device
591    * cannot be used again. All existing allocations are invalid. To continue
592    * using CUDA, the process must be terminated and relaunched.
593    */
594   cudaErrorAssert                        =    59,
595 
596   /**
597    * This error indicates that the hardware resources required to enable
598    * peer access have been exhausted for one or more of the devices
599    * passed to ::cudaEnablePeerAccess().
600    */
601   cudaErrorTooManyPeers                 =     60,
602 
603   /**
604    * This error indicates that the memory range passed to ::cudaHostRegister()
605    * has already been registered.
606    */
607   cudaErrorHostMemoryAlreadyRegistered  =     61,
608 
609   /**
610    * This error indicates that the pointer passed to ::cudaHostUnregister()
611    * does not correspond to any currently registered memory region.
612    */
613   cudaErrorHostMemoryNotRegistered      =     62,
614 
615   /**
616    * This error indicates that an OS call failed.
617    */
618   cudaErrorOperatingSystem              =     63,
619 
620   /**
621    * This error indicates that P2P access is not supported across the given
622    * devices.
623    */
624   cudaErrorPeerAccessUnsupported        =     64,
625 
626   /**
627    * This error indicates that a device runtime grid launch did not occur
628    * because the depth of the child grid would exceed the maximum supported
629    * number of nested grid launches.
630    */
631   cudaErrorLaunchMaxDepthExceeded       =     65,
632 
633   /**
634    * This error indicates that a grid launch did not occur because the kernel
635    * uses file-scoped textures which are unsupported by the device runtime.
636    * Kernels launched via the device runtime only support textures created with
637    * the Texture Object API's.
638    */
639   cudaErrorLaunchFileScopedTex          =     66,
640 
641   /**
642    * This error indicates that a grid launch did not occur because the kernel
643    * uses file-scoped surfaces which are unsupported by the device runtime.
644    * Kernels launched via the device runtime only support surfaces created with
645    * the Surface Object API's.
646    */
647   cudaErrorLaunchFileScopedSurf         =     67,
648 
649   /**
650    * This error indicates that a call to ::cudaDeviceSynchronize made from
651    * the device runtime failed because the call was made at grid depth greater
652    * than than either the default (2 levels of grids) or user specified device
653    * limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on
654    * launched grids at a greater depth successfully, the maximum nested
655    * depth at which ::cudaDeviceSynchronize will be called must be specified
656    * with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit
657    * api before the host-side launch of a kernel using the device runtime.
658    * Keep in mind that additional levels of sync depth require the runtime
659    * to reserve large amounts of device memory that cannot be used for
660    * user allocations.
661    */
662   cudaErrorSyncDepthExceeded            =     68,
663 
664   /**
665    * This error indicates that a device runtime grid launch failed because
666    * the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount.
667    * For this launch to proceed successfully, ::cudaDeviceSetLimit must be
668    * called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher
669    * than the upper bound of outstanding launches that can be issued to the
670    * device runtime. Keep in mind that raising the limit of pending device
671    * runtime launches will require the runtime to reserve device memory that
672    * cannot be used for user allocations.
673    */
674   cudaErrorLaunchPendingCountExceeded   =     69,
675 
676   /**
677    * This error indicates the attempted operation is not permitted.
678    */
679   cudaErrorNotPermitted                 =     70,
680 
681   /**
682    * This error indicates the attempted operation is not supported
683    * on the current system or device.
684    */
685   cudaErrorNotSupported                 =     71,
686 
687   /**
688    * Device encountered an error in the call stack during kernel execution,
689    * possibly due to stack corruption or exceeding the stack size limit.
690    * This leaves the process in an inconsistent state and any further CUDA work
691    * will return the same error. To continue using CUDA, the process must be terminated
692    * and relaunched.
693    */
694   cudaErrorHardwareStackError           =     72,
695 
696   /**
697    * The device encountered an illegal instruction during kernel execution
698    * This leaves the process in an inconsistent state and any further CUDA work
699    * will return the same error. To continue using CUDA, the process must be terminated
700    * and relaunched.
701    */
702   cudaErrorIllegalInstruction           =     73,
703 
704   /**
705    * The device encountered a load or store instruction
706    * on a memory address which is not aligned.
707    * This leaves the process in an inconsistent state and any further CUDA work
708    * will return the same error. To continue using CUDA, the process must be terminated
709    * and relaunched.
710    */
711   cudaErrorMisalignedAddress            =     74,
712 
713   /**
714    * While executing a kernel, the device encountered an instruction
715    * which can only operate on memory locations in certain address spaces
716    * (global, shared, or local), but was supplied a memory address not
717    * belonging to an allowed address space.
718    * This leaves the process in an inconsistent state and any further CUDA work
719    * will return the same error. To continue using CUDA, the process must be terminated
720    * and relaunched.
721    */
722   cudaErrorInvalidAddressSpace          =     75,
723 
724   /**
725    * The device encountered an invalid program counter.
726    * This leaves the process in an inconsistent state and any further CUDA work
727    * will return the same error. To continue using CUDA, the process must be terminated
728    * and relaunched.
729    */
730   cudaErrorInvalidPc                    =     76,
731 
732   /**
733    * The device encountered a load or store instruction on an invalid memory address.
734    * This leaves the process in an inconsistent state and any further CUDA work
735    * will return the same error. To continue using CUDA, the process must be terminated
736    * and relaunched.
737    */
738   cudaErrorIllegalAddress               =     77,
739 
740   /**
741    * A PTX compilation failed. The runtime may fall back to compiling PTX if
742    * an application does not contain a suitable binary for the current device.
743    */
744   cudaErrorInvalidPtx                   =     78,
745 
746   /**
747    * This indicates an error with the OpenGL or DirectX context.
748    */
749   cudaErrorInvalidGraphicsContext       =     79,
750 
751   /**
752    * This indicates that an uncorrectable NVLink error was detected during the
753    * execution.
754    */
755   cudaErrorNvlinkUncorrectable          =     80,
756 
757   /**
758    * This indicates that the PTX JIT compiler library was not found. The JIT Compiler
759    * library is used for PTX compilation. The runtime may fall back to compiling PTX
760    * if an application does not contain a suitable binary for the current device.
761    */
762   cudaErrorJitCompilerNotFound          =     81,
763 
764   /**
765    * This error indicates that the number of blocks launched per grid for a kernel that was
766    * launched via either ::cudaLaunchCooperativeKernel or ::cudaLaunchCooperativeKernelMultiDevice
767    * exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor
768    * or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
769    * as specified by the device attribute ::cudaDevAttrMultiProcessorCount.
770    */
771   cudaErrorCooperativeLaunchTooLarge    =     82,
772 
773   /**
774    * This error indicates that the system is not yet ready to start any CUDA
775    * work.  To continue using CUDA, verify the system configuration is in a
776    * valid state and all required driver daemons are actively running.
777    */
778   cudaErrorSystemNotReady               =     83,
779 
780   /**
781    * This indicates that a resource required by the API call is not in a
782    * valid state to perform the requested operation.
783    */
784   cudaErrorIllegalState                 =     84,
785 
786   /**
787    * This indicates an internal startup failure in the CUDA runtime.
788    */
789   cudaErrorStartupFailure               =    127,
790 
791   /**
792    * The operation is not permitted when the stream is capturing.
793    */
794   cudaErrorStreamCaptureUnsupported     =    900,
795 
796   /**
797    * The current capture sequence on the stream has been invalidated due to
798    * a previous error.
799    */
800   cudaErrorStreamCaptureInvalidated     =    901,
801 
802   /**
803    * The operation would have resulted in a merge of two independent capture
804    * sequences.
805    */
806   cudaErrorStreamCaptureMerge           =    902,
807 
808   /**
809    * The capture was not initiated in this stream.
810    */
811   cudaErrorStreamCaptureUnmatched       =    903,
812 
813   /**
814    * The capture sequence contains a fork that was not joined to the primary
815    * stream.
816    */
817   cudaErrorStreamCaptureUnjoined        =    904,
818 
819   /**
820    * A dependency would have been created which crosses the capture sequence
821    * boundary. Only implicit in-stream ordering dependencies are allowed to
822    * cross the boundary.
823    */
824   cudaErrorStreamCaptureIsolation       =    905,
825 
826   /**
827    * The operation would have resulted in a disallowed implicit dependency on
828    * a current capture sequence from cudaStreamLegacy.
829    */
830   cudaErrorStreamCaptureImplicit        =    906,
831 
832   /**
833    * The operation is not permitted on an event which was last recorded in a
834    * capturing stream.
835    */
836   cudaErrorCapturedEvent                =    907,
837 
838   /**
839    * Any unhandled CUDA driver error is added to this value and returned via
840    * the runtime. Production releases of CUDA should not return such errors.
841    * \deprecated
842    * This error return is deprecated as of CUDA 4.1.
843    */
844   cudaErrorApiFailureBase               =  10000
845 }
846 
847 /**
848  * Channel format kind
849  */
850 alias cudaChannelFormatKind = int;
851 enum : cudaChannelFormatKind
852 {
853   cudaChannelFormatKindSigned           =   0,
854   cudaChannelFormatKindUnsigned         =   1,
855   cudaChannelFormatKindFloat            =   2,
856   cudaChannelFormatKindNone             =   3
857 }
858 
859 /**
860  * CUDA Channel format descriptor
861  */
862 struct cudaChannelFormatDesc
863 {
864   int                   x;
865   int                   y;
866   int                   z;
867   int                   w;
868   cudaChannelFormatKind f;
869 }
870 
871 /**
872  * CUDA array
873  */
874 struct cudaArray;
875 alias cudaArray_t = void*;
876 alias cudaArray_const_t = const(void)*;
877 alias cudaMipmappedArray_t = void*;
878 alias cudaMipmappedArray_const_t = const(void)*;
879 
880 alias cudaMemoryType = int;
881 enum : cudaMemoryType
882 {
883 cudaMemoryTypeUnregistered = 0, /**< Unregistered memory */
884 cudaMemoryTypeHost         = 1, /**< Host memory */
885 cudaMemoryTypeDevice       = 2, /**< Device memory */
886 cudaMemoryTypeManaged      = 3  /**< Managed memory */
887 }
888 
889 alias cudaMemcpyKind = int;
890 enum : cudaMemcpyKind
891 {
892   cudaMemcpyHostToHost          =   0,
893   cudaMemcpyHostToDevice        =   1,
894   cudaMemcpyDeviceToHost        =   2,
895   cudaMemcpyDeviceToDevice      =   3,
896   cudaMemcpyDefault             =   4
897 }
898 
899 /**
900  * CUDA Pitched memory pointer
901  *
902  * \sa ::make_cudaPitchedPtr
903  */
904 struct cudaPitchedPtr
905 {
906   void   *ptr;
907   size_t  pitch;
908   size_t  xsize;
909   size_t  ysize;
910 }
911 
912 /**
913  * CUDA extent
914  *
915  * \sa ::make_cudaExtent
916  */
917 struct cudaExtent
918 {
919   size_t width;
920   size_t height;
921   size_t depth;
922 }
923 
924 /**
925  * CUDA 3D position
926  *
927  * \sa ::make_cudaPos
928  */
929 struct cudaPos
930 {
931   size_t x;
932   size_t y;
933   size_t z;
934 }
935 
936 /**
937  * CUDA 3D memory copying parameters
938  */
939 struct cudaMemcpy3DParms
940 {
941   cudaArray_t     srcArray;
942   cudaPos         srcPos;
943   cudaPitchedPtr  srcPtr;
944 
945   cudaArray_t     dstArray;
946   cudaPos         dstPos;
947   cudaPitchedPtr  dstPtr;
948 
949   cudaExtent      extent;
950   cudaMemcpyKind  kind;
951 }
952 
953 /**
954  * CUDA 3D cross-device memory copying parameters
955  */
956 struct cudaMemcpy3DPeerParms
957 {
958   cudaArray_t     srcArray;
959   cudaPos         srcPos;
960   cudaPitchedPtr  srcPtr;
961   int             srcDevice;
962   cudaArray_t     dstArray;
963   cudaPos         dstPos;
964   cudaPitchedPtr  dstPtr;
965   int             dstDevice;
966   cudaExtent      extent;
967 }
968 
969 /**
970  * CUDA Memset node parameters
971  */
972 struct cudaMemsetParams {
973   void *dst;                              /**< Destination device pointer */
974   size_t pitch;                           /**< Pitch of destination device pointer. Unused if height is 1 */
975   uint value;                     /**< Value to be set */
976   uint elementSize;               /**< Size of each element in bytes. Must be 1, 2, or 4. */
977   size_t width;                           /**< Width in bytes, of the row */
978   size_t height;                          /**< Number of rows */
979 }
980 
981 
982 /**
983  * CUDA host function
984  * \param userData Argument value passed to the function
985  */
986 extern(System) nothrow
987 {
988   alias cudaHostFn_t = void function(void *userData);
989 }
990 
991 
992 /**
993  * CUDA host node parameters
994  */
995 struct cudaHostNodeParams {
996   cudaHostFn_t fn;    /**< The function to call when the node executes */
997   void* userData; /**< Argument to pass to the function */
998 }
999 
1000 /**
1001  * Possible stream capture statuses returned by ::cudaStreamIsCapturing
1002  */
1003 alias cudaStreamCaptureStatus = int;
1004 enum : cudaStreamCaptureStatus {
1005   cudaStreamCaptureStatusNone        = 0, /**< Stream is not capturing */
1006   cudaStreamCaptureStatusActive      = 1, /**< Stream is actively capturing */
1007   cudaStreamCaptureStatusInvalidated = 2  /**< Stream is part of a capture sequence that
1008                                                    has been invalidated, but not terminated */
1009 }
1010 
1011 struct cudaGraphicsResource;
1012 
1013 alias cudaGraphicsRegisterFlags = int;
1014 enum : cudaGraphicsRegisterFlags{
1015   cudaGraphicsRegisterFlagsNone             = 0,
1016   cudaGraphicsRegisterFlagsReadOnly         = 1,
1017   cudaGraphicsRegisterFlagsWriteDiscard     = 2,
1018   cudaGraphicsRegisterFlagsSurfaceLoadStore = 4,
1019   cudaGraphicsRegisterFlagsTextureGather    = 8
1020 }
1021 
1022 alias cudaGraphicsMapFlags = int;
1023 enum : cudaGraphicsMapFlags{
1024   cudaGraphicsMapFlagsNone         = 0,
1025   cudaGraphicsMapFlagsReadOnly     = 1,
1026   cudaGraphicsMapFlagsWriteDiscard = 2
1027 }
1028 
1029 alias cudaGraphicsCubeFace = int;
1030 enum : cudaGraphicsCubeFace{
1031   cudaGraphicsCubeFacePositiveX = 0x00,
1032   cudaGraphicsCubeFaceNegativeX = 0x01,
1033   cudaGraphicsCubeFacePositiveY = 0x02,
1034   cudaGraphicsCubeFaceNegativeY = 0x03,
1035   cudaGraphicsCubeFacePositiveZ = 0x04,
1036   cudaGraphicsCubeFaceNegativeZ = 0x05
1037 }
1038 
1039 alias cudaResourceType = int;
1040 enum : cudaResourceType{
1041   cudaResourceTypeArray          = 0x00,
1042   cudaResourceTypeMipmappedArray = 0x01,
1043   cudaResourceTypeLinear         = 0x02,
1044   cudaResourceTypePitch2D        = 0x03
1045 }
1046 
1047 alias cudaResourceViewFormat = int;
1048 enum : cudaResourceViewFormat{
1049   cudaResViewFormatNone                      = 0x00,
1050   cudaResViewFormatUnsignedChar1             = 0x01,
1051   cudaResViewFormatUnsignedChar2             = 0x02,
1052   cudaResViewFormatUnsignedChar4             = 0x03,
1053   cudaResViewFormatSignedChar1               = 0x04,
1054   cudaResViewFormatSignedChar2               = 0x05,
1055   cudaResViewFormatSignedChar4               = 0x06,
1056   cudaResViewFormatUnsignedShort1            = 0x07,
1057   cudaResViewFormatUnsignedShort2            = 0x08,
1058   cudaResViewFormatUnsignedShort4            = 0x09,
1059   cudaResViewFormatSignedShort1              = 0x0a,
1060   cudaResViewFormatSignedShort2              = 0x0b,
1061   cudaResViewFormatSignedShort4              = 0x0c,
1062   cudaResViewFormatUnsignedInt1              = 0x0d,
1063   cudaResViewFormatUnsignedInt2              = 0x0e,
1064   cudaResViewFormatUnsignedInt4              = 0x0f,
1065   cudaResViewFormatSignedInt1                = 0x10,
1066   cudaResViewFormatSignedInt2                = 0x11,
1067   cudaResViewFormatSignedInt4                = 0x12,
1068   cudaResViewFormatHalf1                     = 0x13,
1069   cudaResViewFormatHalf2                     = 0x14,
1070   cudaResViewFormatHalf4                     = 0x15,
1071   cudaResViewFormatFloat1                    = 0x16,
1072   cudaResViewFormatFloat2                    = 0x17,
1073   cudaResViewFormatFloat4                    = 0x18,
1074   cudaResViewFormatUnsignedBlockCompressed1  = 0x19,
1075   cudaResViewFormatUnsignedBlockCompressed2  = 0x1a,
1076   cudaResViewFormatUnsignedBlockCompressed3  = 0x1b,
1077   cudaResViewFormatUnsignedBlockCompressed4  = 0x1c,
1078   cudaResViewFormatSignedBlockCompressed4    = 0x1d,
1079   cudaResViewFormatUnsignedBlockCompressed5  = 0x1e,
1080   cudaResViewFormatSignedBlockCompressed5    = 0x1f,
1081   cudaResViewFormatUnsignedBlockCompressed6H = 0x20,
1082   cudaResViewFormatSignedBlockCompressed6H   = 0x21,
1083   cudaResViewFormatUnsignedBlockCompressed7  = 0x22
1084 }
1085 
1086 /**
1087  * CUDA resource descriptor
1088  */
1089 struct cudaResourceDesc
1090 {
1091   cudaResourceType resType;
1092 
1093   union res_st
1094   {
1095     struct array_st
1096     {
1097       cudaArray_t array;
1098     }
1099     array_st array;
1100 
1101     struct mipmap_st
1102     {
1103       cudaMipmappedArray_t mipmap;
1104     }
1105     mipmap_st mipmap;
1106 
1107     struct linear_st
1108     {
1109       void *devPtr;
1110       cudaChannelFormatDesc desc;
1111       size_t sizeInBytes;
1112     }
1113     linear_st linear;
1114 
1115     struct pitch2D_st
1116     {
1117       void *devPtr;
1118       cudaChannelFormatDesc desc;
1119       size_t width;
1120       size_t height;
1121       size_t pitchInBytes;
1122     }
1123     pitch2D_st pitch2D;
1124   }
1125   res_st res;
1126 }
1127 
1128 /**
1129  * CUDA resource view descriptor
1130  */
1131 struct cudaResourceViewDesc {
1132   cudaResourceViewFormat format;
1133   size_t                 width;
1134   size_t                 height;
1135   size_t                 depth;
1136   uint                   firstMipmapLevel;
1137   uint                   lastMipmapLevel;
1138   uint                   firstLayer;
1139   uint                   lastLayer;
1140 }
1141 
1142 /**
1143  * CUDA pointer attributes
1144  */
1145 struct cudaPointerAttributes {
1146   cudaMemoryType memoryType;
1147   cudaMemoryType type;
1148   int device;
1149   void *devicePointer;
1150   void *hostPointer;
1151   int isManaged;
1152 }
1153 
1154 /**
1155  * CUDA function attributes
1156  */
1157 struct cudaFuncAttributes {
1158  size_t sharedSizeBytes;
1159  size_t constSizeBytes;
1160  size_t localSizeBytes;
1161  int maxThreadsPerBlock;
1162  int numRegs;
1163  int ptxVersion;
1164  int binaryVersion;
1165  int cacheModeCA;
1166  int maxDynamicSharedSizeBytes;
1167  int preferredShmemCarveout;
1168 }
1169 
1170 alias cudaFuncAttribute = int;
1171 enum : cudaFuncAttribute {
1172   cudaFuncAttributeMaxDynamicSharedMemorySize = 8, /**< Maximum dynamic shared memory size */
1173   cudaFuncAttributePreferredSharedMemoryCarveout = 9, /**< Preferred shared memory-L1 cache split ratio */
1174   cudaFuncAttributeMax
1175 }
1176 
1177 /**
1178  * CUDA function attributes that can be set using cudaFuncSetAttribute
1179  */
1180 alias cudaFuncCache = int;
1181 enum : cudaFuncCache {
1182   cudaFuncCachePreferNone   = 0,
1183   cudaFuncCachePreferShared = 1,
1184   cudaFuncCachePreferL1     = 2,
1185   cudaFuncCachePreferEqual  = 3
1186 }
1187 
1188 /**
1189  * CUDA shared memory configuration
1190  */
1191 alias cudaSharedMemConfig = int;
1192 enum : cudaSharedMemConfig {
1193   cudaSharedMemBankSizeDefault   = 0,
1194   cudaSharedMemBankSizeFourByte  = 1,
1195   cudaSharedMemBankSizeEightByte = 2
1196 }
1197 
1198 /**
1199  * Shared memory carveout configurations
1200  */
1201 alias cudaSharedCarveout = int;
1202 enum : cudaSharedCarveout {
1203   cudaSharedmemCarveoutDefault      = -1,  /* * < no preference for shared memory or L1 (default) */
1204   cudaSharedmemCarveoutMaxShared    = 100, /* * < prefer maximum available shared memory, minimum L1 cache */
1205   cudaSharedmemCarveoutMaxL1        = 0    /* * < prefer maximum available L1 cache, minimum shared memory */
1206 }
1207 
1208 /**
1209  * CUDA device compute modes
1210  */
1211 alias cudaComputeMode = int;
1212 enum : cudaComputeMode {
1213   cudaComputeModeDefault          = 0,
1214   cudaComputeModeExclusive        = 1,
1215   cudaComputeModeProhibited       = 2,
1216   cudaComputeModeExclusiveProcess = 3
1217 }
1218 
1219 
1220 /**
1221  * CUDA Limits
1222  */
1223 alias cudaLimit = int;
1224 enum : cudaLimit {
1225   cudaLimitStackSize                    = 0x00,
1226   cudaLimitPrintfFifoSize               = 0x01,
1227   cudaLimitMallocHeapSize               = 0x02,
1228   cudaLimitDevRuntimeSyncDepth          = 0x03,
1229   cudaLimitDevRuntimePendingLaunchCount = 0x04,
1230   cudaLimitMaxL2FetchGranularity        = 0x05
1231 }
1232 
1233 /**
1234  * CUDA Memory Advise values
1235  */
1236 alias cudaMemoryAdvise = int;
1237 enum : cudaMemoryAdvise {
1238   cudaMemAdviseSetReadMostly          = 1, /**< Data will mostly be read and only occassionally be written to */
1239   cudaMemAdviseUnsetReadMostly        = 2, /**< Undo the effect of ::cudaMemAdviseSetReadMostly */
1240   cudaMemAdviseSetPreferredLocation   = 3, /**< Set the preferred location for the data as the specified device */
1241   cudaMemAdviseUnsetPreferredLocation = 4, /**< Clear the preferred location for the data */
1242   cudaMemAdviseSetAccessedBy          = 5, /**< Data will be accessed by the specified device, so prevent page faults as much as possible */
1243   cudaMemAdviseUnsetAccessedBy        = 6  /**< Let the Unified Memory subsystem decide on the page faulting policy for the specified device */
1244 }
1245 
1246 /**
1247  * CUDA range attributes
1248  */
1249 alias cudaMemRangeAttribute = int;
1250 enum : cudaMemRangeAttribute {
1251   cudaMemRangeAttributeReadMostly           = 1, /**< Whether the range will mostly be read and only occassionally be written to */
1252   cudaMemRangeAttributePreferredLocation    = 2, /**< The preferred location of the range */
1253   cudaMemRangeAttributeAccessedBy           = 3, /**< Memory range has ::cudaMemAdviseSetAccessedBy set for specified device */
1254   cudaMemRangeAttributeLastPrefetchLocation = 4  /**< The last location to which the range was prefetched */
1255 }
1256 
1257 /**
1258  * CUDA Profiler Output modes
1259  */
1260 alias cudaOutputMode = int;
1261 enum : cudaOutputMode {
1262   cudaKeyValuePair    = 0x00,
1263   cudaCSV             = 0x01
1264 }
1265 
1266 /**
1267  * CUDA device attributes
1268  */
1269 alias cudaDeviceAttr = int;
1270 enum : cudaDeviceAttr {
1271   cudaDevAttrMaxThreadsPerBlock             = 1,  /**< Maximum number of threads per block */
1272   cudaDevAttrMaxBlockDimX                   = 2,  /**< Maximum block dimension X */
1273   cudaDevAttrMaxBlockDimY                   = 3,  /**< Maximum block dimension Y */
1274   cudaDevAttrMaxBlockDimZ                   = 4,  /**< Maximum block dimension Z */
1275   cudaDevAttrMaxGridDimX                    = 5,  /**< Maximum grid dimension X */
1276   cudaDevAttrMaxGridDimY                    = 6,  /**< Maximum grid dimension Y */
1277   cudaDevAttrMaxGridDimZ                    = 7,  /**< Maximum grid dimension Z */
1278   cudaDevAttrMaxSharedMemoryPerBlock        = 8,  /**< Maximum shared memory available per block in bytes */
1279   cudaDevAttrTotalConstantMemory            = 9,  /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
1280   cudaDevAttrWarpSize                       = 10, /**< Warp size in threads */
1281   cudaDevAttrMaxPitch                       = 11, /**< Maximum pitch in bytes allowed by memory copies */
1282   cudaDevAttrMaxRegistersPerBlock           = 12, /**< Maximum number of 32-bit registers available per block */
1283   cudaDevAttrClockRate                      = 13, /**< Peak clock frequency in kilohertz */
1284   cudaDevAttrTextureAlignment               = 14, /**< Alignment requirement for textures */
1285   cudaDevAttrGpuOverlap                     = 15, /**< Device can possibly copy memory and execute a kernel concurrently */
1286   cudaDevAttrMultiProcessorCount            = 16, /**< Number of multiprocessors on device */
1287   cudaDevAttrKernelExecTimeout              = 17, /**< Specifies whether there is a run time limit on kernels */
1288   cudaDevAttrIntegrated                     = 18, /**< Device is integrated with host memory */
1289   cudaDevAttrCanMapHostMemory               = 19, /**< Device can map host memory into CUDA address space */
1290   cudaDevAttrComputeMode                    = 20, /**< Compute mode (See ::cudaComputeMode for details) */
1291   cudaDevAttrMaxTexture1DWidth              = 21, /**< Maximum 1D texture width */
1292   cudaDevAttrMaxTexture2DWidth              = 22, /**< Maximum 2D texture width */
1293   cudaDevAttrMaxTexture2DHeight             = 23, /**< Maximum 2D texture height */
1294   cudaDevAttrMaxTexture3DWidth              = 24, /**< Maximum 3D texture width */
1295   cudaDevAttrMaxTexture3DHeight             = 25, /**< Maximum 3D texture height */
1296   cudaDevAttrMaxTexture3DDepth              = 26, /**< Maximum 3D texture depth */
1297   cudaDevAttrMaxTexture2DLayeredWidth       = 27, /**< Maximum 2D layered texture width */
1298   cudaDevAttrMaxTexture2DLayeredHeight      = 28, /**< Maximum 2D layered texture height */
1299   cudaDevAttrMaxTexture2DLayeredLayers      = 29, /**< Maximum layers in a 2D layered texture */
1300   cudaDevAttrSurfaceAlignment               = 30, /**< Alignment requirement for surfaces */
1301   cudaDevAttrConcurrentKernels              = 31, /**< Device can possibly execute multiple kernels concurrently */
1302   cudaDevAttrEccEnabled                     = 32, /**< Device has ECC support enabled */
1303   cudaDevAttrPciBusId                       = 33, /**< PCI bus ID of the device */
1304   cudaDevAttrPciDeviceId                    = 34, /**< PCI device ID of the device */
1305   cudaDevAttrTccDriver                      = 35, /**< Device is using TCC driver model */
1306   cudaDevAttrMemoryClockRate                = 36, /**< Peak memory clock frequency in kilohertz */
1307   cudaDevAttrGlobalMemoryBusWidth           = 37, /**< Global memory bus width in bits */
1308   cudaDevAttrL2CacheSize                    = 38, /**< Size of L2 cache in bytes */
1309   cudaDevAttrMaxThreadsPerMultiProcessor    = 39, /**< Maximum resident threads per multiprocessor */
1310   cudaDevAttrAsyncEngineCount               = 40, /**< Number of asynchronous engines */
1311   cudaDevAttrUnifiedAddressing              = 41, /**< Device shares a unified address space with the host */
1312   cudaDevAttrMaxTexture1DLayeredWidth       = 42, /**< Maximum 1D layered texture width */
1313   cudaDevAttrMaxTexture1DLayeredLayers      = 43, /**< Maximum layers in a 1D layered texture */
1314   cudaDevAttrMaxTexture2DGatherWidth        = 45, /**< Maximum 2D texture width if cudaArrayTextureGather is set */
1315   cudaDevAttrMaxTexture2DGatherHeight       = 46, /**< Maximum 2D texture height if cudaArrayTextureGather is set */
1316   cudaDevAttrMaxTexture3DWidthAlt           = 47, /**< Alternate maximum 3D texture width */
1317   cudaDevAttrMaxTexture3DHeightAlt          = 48, /**< Alternate maximum 3D texture height */
1318   cudaDevAttrMaxTexture3DDepthAlt           = 49, /**< Alternate maximum 3D texture depth */
1319   cudaDevAttrPciDomainId                    = 50, /**< PCI domain ID of the device */
1320   cudaDevAttrTexturePitchAlignment          = 51, /**< Pitch alignment requirement for textures */
1321   cudaDevAttrMaxTextureCubemapWidth         = 52, /**< Maximum cubemap texture width/height */
1322   cudaDevAttrMaxTextureCubemapLayeredWidth  = 53, /**< Maximum cubemap layered texture width/height */
1323   cudaDevAttrMaxTextureCubemapLayeredLayers = 54, /**< Maximum layers in a cubemap layered texture */
1324   cudaDevAttrMaxSurface1DWidth              = 55, /**< Maximum 1D surface width */
1325   cudaDevAttrMaxSurface2DWidth              = 56, /**< Maximum 2D surface width */
1326   cudaDevAttrMaxSurface2DHeight             = 57, /**< Maximum 2D surface height */
1327   cudaDevAttrMaxSurface3DWidth              = 58, /**< Maximum 3D surface width */
1328   cudaDevAttrMaxSurface3DHeight             = 59, /**< Maximum 3D surface height */
1329   cudaDevAttrMaxSurface3DDepth              = 60, /**< Maximum 3D surface depth */
1330   cudaDevAttrMaxSurface1DLayeredWidth       = 61, /**< Maximum 1D layered surface width */
1331   cudaDevAttrMaxSurface1DLayeredLayers      = 62, /**< Maximum layers in a 1D layered surface */
1332   cudaDevAttrMaxSurface2DLayeredWidth       = 63, /**< Maximum 2D layered surface width */
1333   cudaDevAttrMaxSurface2DLayeredHeight      = 64, /**< Maximum 2D layered surface height */
1334   cudaDevAttrMaxSurface2DLayeredLayers      = 65, /**< Maximum layers in a 2D layered surface */
1335   cudaDevAttrMaxSurfaceCubemapWidth         = 66, /**< Maximum cubemap surface width */
1336   cudaDevAttrMaxSurfaceCubemapLayeredWidth  = 67, /**< Maximum cubemap layered surface width */
1337   cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68, /**< Maximum layers in a cubemap layered surface */
1338   cudaDevAttrMaxTexture1DLinearWidth        = 69, /**< Maximum 1D linear texture width */
1339   cudaDevAttrMaxTexture2DLinearWidth        = 70, /**< Maximum 2D linear texture width */
1340   cudaDevAttrMaxTexture2DLinearHeight       = 71, /**< Maximum 2D linear texture height */
1341   cudaDevAttrMaxTexture2DLinearPitch        = 72, /**< Maximum 2D linear texture pitch in bytes */
1342   cudaDevAttrMaxTexture2DMipmappedWidth     = 73, /**< Maximum mipmapped 2D texture width */
1343   cudaDevAttrMaxTexture2DMipmappedHeight    = 74, /**< Maximum mipmapped 2D texture height */
1344   cudaDevAttrComputeCapabilityMajor         = 75, /**< Major compute capability version number */
1345   cudaDevAttrComputeCapabilityMinor         = 76, /**< Minor compute capability version number */
1346   cudaDevAttrMaxTexture1DMipmappedWidth     = 77, /**< Maximum mipmapped 1D texture width */
1347   cudaDevAttrStreamPrioritiesSupported      = 78, /**< Device supports stream priorities */
1348   cudaDevAttrGlobalL1CacheSupported         = 79, /**< Device supports caching globals in L1 */
1349   cudaDevAttrLocalL1CacheSupported          = 80, /**< Device supports caching locals in L1 */
1350   cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81, /**< Maximum shared memory available per multiprocessor in bytes */
1351   cudaDevAttrMaxRegistersPerMultiprocessor  = 82, /**< Maximum number of 32-bit registers available per multiprocessor */
1352   cudaDevAttrManagedMemory                  = 83, /**< Device can allocate managed memory on this system */
1353   cudaDevAttrIsMultiGpuBoard                = 84, /**< Device is on a multi-GPU board */
1354   cudaDevAttrMultiGpuBoardGroupID           = 85, /**< Unique identifier for a group of devices on the same multi-GPU board */
1355   cudaDevAttrHostNativeAtomicSupported      = 86, /**< Link between the device and the host supports native atomic operations */
1356   cudaDevAttrSingleToDoublePrecisionPerfRatio = 87, /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
1357   cudaDevAttrPageableMemoryAccess           = 88, /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
1358   cudaDevAttrConcurrentManagedAccess        = 89, /**< Device can coherently access managed memory concurrently with the CPU */
1359   cudaDevAttrComputePreemptionSupported     = 90, /**< Device supports Compute Preemption */
1360   cudaDevAttrCanUseHostPointerForRegisteredMem = 91, /**< Device can access host registered memory at the same virtual address as the CPU */
1361   cudaDevAttrReserved92                     = 92,
1362   cudaDevAttrReserved93                     = 93,
1363   cudaDevAttrReserved94                     = 94,
1364   cudaDevAttrCooperativeLaunch              = 95, /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel*/
1365   cudaDevAttrCooperativeMultiDeviceLaunch   = 96, /**< Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice */
1366   cudaDevAttrMaxSharedMemoryPerBlockOptin   = 97, /**< The maximum optin shared memory per block. This value may vary by chip. See ::cudaFuncSetAttribute */
1367   cudaDevAttrCanFlushRemoteWrites           = 98, /**< Device supports flushing of outstanding remote writes. */
1368   cudaDevAttrHostRegisterSupported          = 99, /**< Device supports host memory registration via ::cudaHostRegister. */
1369   cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100, /**< Device accesses pageable memory via the host's page tables. */
1370   cudaDevAttrDirectManagedMemAccessFromHost = 101 /**< Host can directly access managed memory on the device without migration. */
1371 }
1372 
1373 /**
1374  * CUDA device P2P attributes
1375  */
1376 alias cudaDeviceP2PAttr = int;
1377 enum : cudaDeviceP2PAttr {
1378   cudaDevP2PAttrPerformanceRank              = 1, /**< A relative value indicating the performance of the link between two devices */
1379   cudaDevP2PAttrAccessSupported              = 2, /**< Peer access is enabled */
1380   cudaDevP2PAttrNativeAtomicSupported        = 3, /**< Native atomic operation over the link supported */
1381   cudaDevP2PAttrCudaArrayAccessSupported     = 4  /**< Accessing CUDA arrays over the link supported */
1382 }
1383 
1384 /**
1385  * CUDA UUID types
1386  */
1387 struct CUuuid_st {
1388   char[16] bytes;
1389 }
1390 alias cudaUUID_t = CUuuid_st;
1391 
1392 /**
1393  * CUDA device properties
1394  */
1395 struct cudaDeviceProp
1396 {
1397   char[256]    name;                  /**< ASCII string identifying device */
1398   cudaUUID_t   uuid;                       /**< 16-byte unique identifier */
1399   char[8]      luid;                    /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
1400   uint         luidDeviceNodeMask;         /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
1401   size_t       totalGlobalMem;             /**< Global memory available on device in bytes */
1402   size_t       sharedMemPerBlock;          /**< Shared memory available per block in bytes */
1403   int          regsPerBlock;               /**< 32-bit registers available per block */
1404   int          warpSize;                   /**< Warp size in threads */
1405   size_t       memPitch;                   /**< Maximum pitch in bytes allowed by memory copies */
1406   int          maxThreadsPerBlock;         /**< Maximum number of threads per block */
1407   int[3]       maxThreadsDim;           /**< Maximum size of each dimension of a block */
1408   int[3]          maxGridSize;             /**< Maximum size of each dimension of a grid */
1409   int          clockRate;                  /**< Clock frequency in kilohertz */
1410   size_t       totalConstMem;              /**< Constant memory available on device in bytes */
1411   int          major;                      /**< Major compute capability */
1412   int          minor;                      /**< Minor compute capability */
1413   size_t       textureAlignment;           /**< Alignment requirement for textures */
1414   size_t       texturePitchAlignment;      /**< Pitch alignment requirement for texture references bound to pitched memory */
1415   int          deviceOverlap;              /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
1416   int          multiProcessorCount;        /**< Number of multiprocessors on device */
1417   int          kernelExecTimeoutEnabled;   /**< Specified whether there is a run time limit on kernels */
1418   int          integrated;                 /**< Device is integrated as opposed to discrete */
1419   int          canMapHostMemory;           /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
1420   int          computeMode;                /**< Compute mode (See ::cudaComputeMode) */
1421   int          maxTexture1D;               /**< Maximum 1D texture size */
1422   int          maxTexture1DMipmap;         /**< Maximum 1D mipmapped texture size */
1423   int          maxTexture1DLinear;         /**< Maximum size for 1D textures bound to linear memory */
1424   int[2]          maxTexture2D;            /**< Maximum 2D texture dimensions */
1425   int[2]          maxTexture2DMipmap;      /**< Maximum 2D mipmapped texture dimensions */
1426   int[3]          maxTexture2DLinear;      /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
1427   int[2]          maxTexture2DGather;      /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
1428   int[3]          maxTexture3D;            /**< Maximum 3D texture dimensions */
1429   int[3]          maxTexture3DAlt;         /**< Maximum alternate 3D texture dimensions */
1430   int          maxTextureCubemap;          /**< Maximum Cubemap texture dimensions */
1431   int[2]          maxTexture1DLayered;     /**< Maximum 1D layered texture dimensions */
1432   int[3]          maxTexture2DLayered;     /**< Maximum 2D layered texture dimensions */
1433   int[2]          maxTextureCubemapLayered;/**< Maximum Cubemap layered texture dimensions */
1434   int          maxSurface1D;               /**< Maximum 1D surface size */
1435   int[2]          maxSurface2D;            /**< Maximum 2D surface dimensions */
1436   int[3]          maxSurface3D;            /**< Maximum 3D surface dimensions */
1437   int[2]          maxSurface1DLayered;     /**< Maximum 1D layered surface dimensions */
1438   int[3]          maxSurface2DLayered;     /**< Maximum 2D layered surface dimensions */
1439   int          maxSurfaceCubemap;          /**< Maximum Cubemap surface dimensions */
1440   int[2]          maxSurfaceCubemapLayered;/**< Maximum Cubemap layered surface dimensions */
1441   size_t       surfaceAlignment;           /**< Alignment requirements for surfaces */
1442   int          concurrentKernels;          /**< Device can possibly execute multiple kernels concurrently */
1443   int          ECCEnabled;                 /**< Device has ECC support enabled */
1444   int          pciBusID;                   /**< PCI bus ID of the device */
1445   int          pciDeviceID;                /**< PCI device ID of the device */
1446   int          pciDomainID;                /**< PCI domain ID of the device */
1447   int          tccDriver;                  /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
1448   int          asyncEngineCount;           /**< Number of asynchronous engines */
1449   int          unifiedAddressing;          /**< Device shares a unified address space with the host */
1450   int          memoryClockRate;            /**< Peak memory clock frequency in kilohertz */
1451   int          memoryBusWidth;             /**< Global memory bus width in bits */
1452   int          l2CacheSize;                /**< Size of L2 cache in bytes */
1453   int          maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
1454   int          streamPrioritiesSupported;  /**< Device supports stream priorities */
1455   int          globalL1CacheSupported;     /**< Device supports caching globals in L1 */
1456   int          localL1CacheSupported;      /**< Device supports caching locals in L1 */
1457   size_t       sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
1458   int          regsPerMultiprocessor;      /**< 32-bit registers available per multiprocessor */
1459   int          managedMemory;              /**< Device supports allocating managed memory on this system */
1460   int          isMultiGpuBoard;            /**< Device is on a multi-GPU board */
1461   int          multiGpuBoardGroupID;       /**< Unique identifier for a group of devices on the same multi-GPU board */
1462   int          hostNativeAtomicSupported;  /**< Link between the device and the host supports native atomic operations */
1463   int          singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
1464   int          pageableMemoryAccess;       /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
1465   int          concurrentManagedAccess;    /**< Device can coherently access managed memory concurrently with the CPU */
1466   int          computePreemptionSupported; /**< Device supports Compute Preemption */
1467   int          canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
1468   int          cooperativeLaunch;          /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
1469   int          cooperativeMultiDeviceLaunch; /**< Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice */
1470   size_t       sharedMemPerBlockOptin;     /**< Per device maximum shared memory per block usable by special opt in */
1471   int          pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
1472   int          directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
1473 }
1474 
1475 static immutable cudaDeviceProp cudaDevicePropDontCare = cudaDeviceProp(
1476   "\0",    /* char         name[256];               */
1477   cudaUUID_t(),     /* cudaUUID_t   uuid;                    */
1478   "\0",    /* char         luid[8];                 */
1479   0,         /* uint luidDeviceNodeMask       */
1480   0,         /* size_t       totalGlobalMem;          */
1481   0,         /* size_t       sharedMemPerBlock;       */
1482   0,         /* int          regsPerBlock;            */
1483   0,         /* int          warpSize;                */
1484   0,         /* size_t       memPitch;                */
1485   0,         /* int          maxThreadsPerBlock;      */
1486   [0, 0, 0], /* int          maxThreadsDim[3];        */
1487   [0, 0, 0], /* int          maxGridSize[3];          */
1488   0,         /* int          clockRate;               */
1489   0,         /* size_t       totalConstMem;           */
1490   -1,        /* int          major;                   */
1491   -1,        /* int          minor;                   */
1492   0,         /* size_t       textureAlignment;        */
1493   0,         /* size_t       texturePitchAlignment    */
1494   -1,        /* int          deviceOverlap;           */
1495   0,         /* int          multiProcessorCount;     */
1496   0,         /* int          kernelExecTimeoutEnabled */
1497   0,         /* int          integrated               */
1498   0,         /* int          canMapHostMemory         */
1499   0,         /* int          computeMode              */
1500   0,         /* int          maxTexture1D             */
1501   0,         /* int          maxTexture1DMipmap       */
1502   0,         /* int          maxTexture1DLinear       */
1503   [0, 0],    /* int          maxTexture2D[2]          */
1504   [0, 0],    /* int          maxTexture2DMipmap[2]    */
1505   [0, 0, 0], /* int          maxTexture2DLinear[3]    */
1506   [0, 0],    /* int          maxTexture2DGather[2]    */
1507   [0, 0, 0], /* int          maxTexture3D[3]          */
1508   [0, 0, 0], /* int          maxTexture3DAlt[3]       */
1509   0,         /* int          maxTextureCubemap        */
1510   [0, 0],    /* int          maxTexture1DLayered[2]   */
1511   [0, 0, 0], /* int          maxTexture2DLayered[3]   */
1512   [0, 0],    /* int          maxTextureCubemapLayered[2] */
1513   0,         /* int          maxSurface1D             */
1514   [0, 0],    /* int          maxSurface2D[2]          */
1515   [0, 0, 0], /* int          maxSurface3D[3]          */
1516   [0, 0],    /* int          maxSurface1DLayered[2]   */
1517   [0, 0, 0], /* int          maxSurface2DLayered[3]   */
1518   0,         /* int          maxSurfaceCubemap        */
1519   [0, 0],    /* int          maxSurfaceCubemapLayered[2] */
1520   0,         /* size_t       surfaceAlignment         */
1521   0,         /* int          concurrentKernels        */
1522   0,         /* int          ECCEnabled               */
1523   0,         /* int          pciBusID                 */
1524   0,         /* int          pciDeviceID              */
1525   0,         /* int          pciDomainID              */
1526   0,         /* int          tccDriver                */
1527   0,         /* int          asyncEngineCount         */
1528   0,         /* int          unifiedAddressing        */
1529   0,         /* int          memoryClockRate          */
1530   0,         /* int          memoryBusWidth           */
1531   0,         /* int          l2CacheSize              */
1532   0,         /* int          maxThreadsPerMultiProcessor */
1533   0,         /* int          streamPrioritiesSupported */
1534   0,         /* int          globalL1CacheSupported   */
1535   0,         /* int          localL1CacheSupported    */
1536   0,         /* size_t       sharedMemPerMultiprocessor; */
1537   0,         /* int          regsPerMultiprocessor;   */
1538   0,         /* int          managedMemory            */
1539   0,         /* int          isMultiGpuBoard          */
1540   0,         /* int          multiGpuBoardGroupID     */
1541   0,         /* int          hostNativeAtomicSupported */
1542   0,         /* int          singleToDoublePrecisionPerfRatio */
1543   0,         /* int          pageableMemoryAccess     */
1544   0,         /* int          concurrentManagedAccess  */
1545   0,         /* int          computePreemptionSupported */
1546   0,         /* int          canUseHostPointerForRegisteredMem */
1547   0,         /* int          cooperativeLaunch */
1548   0,         /* int          cooperativeMultiDeviceLaunch */
1549   0,         /* size_t       sharedMemPerBlockOptin */
1550   0,         /* int          pageableMemoryAccessUsesHostPageTables */
1551   0,         /* int          directManagedMemAccessFromHost */
1552 );
1553 
1554 enum CUDA_IPC_HANDLE_SIZE = 64;
1555 
1556 struct cudaIpcEventHandle_t
1557 {
1558   char[CUDA_IPC_HANDLE_SIZE] reserved;
1559 }
1560 
1561 struct cudaIpcMemHandle_t
1562 {
1563   char[CUDA_IPC_HANDLE_SIZE] reserved;
1564 }
1565 
1566 
1567 /**
1568  * External memory handle types
1569  */
1570 alias cudaExternalMemoryHandleType = int;
1571 enum : cudaExternalMemoryHandleType {
1572   /**
1573    * Handle is an opaque file descriptor
1574    */
1575   cudaExternalMemoryHandleTypeOpaqueFd       = 1,
1576   /**
1577    * Handle is an opaque shared NT handle
1578    */
1579   cudaExternalMemoryHandleTypeOpaqueWin32    = 2,
1580   /**
1581    * Handle is an opaque, globally shared handle
1582    */
1583   cudaExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
1584   /**
1585    * Handle is a D3D12 heap object
1586    */
1587   cudaExternalMemoryHandleTypeD3D12Heap      = 4,
1588   /**
1589    * Handle is a D3D12 committed resource
1590    */
1591   cudaExternalMemoryHandleTypeD3D12Resource  = 5
1592 }
1593 
1594 enum cudaExternalMemoryDedicated = 0x1;
1595 
1596 // TODO(naetherm): driver_types.h:1747
1597 struct cudaExternalMemoryHandleDesc {
1598   cudaExternalMemoryHandleType type;
1599 
1600   union handle_st {
1601     int fd;
1602 
1603     struct win32_st {
1604       void * handle;
1605       const void * name;
1606     }
1607 
1608     win32_st win32;
1609   }
1610 
1611   handle_st handle;
1612   /**
1613    * Size of the memory allocation
1614    */
1615   ulong size;
1616   /**
1617    * Flags must either be zero or ::cudaExternalMemoryDedicated
1618    */
1619   uint flags;
1620 }
1621 
1622 
1623 /**
1624  * External memory buffer descriptor
1625  */
1626 struct cudaExternalMemoryBufferDesc {
1627   /**
1628    * Offset into the memory object where the buffer's base is
1629    */
1630   ulong offset;
1631   /**
1632    * Size of the buffer
1633    */
1634   ulong size;
1635   /**
1636    * Flags reserved for future use. Must be zero.
1637    */
1638   uint flags;
1639 }
1640 
1641 /**
1642  * External memory mipmap descriptor
1643  */
1644 struct cudaExternalMemoryMipmappedArrayDesc {
1645   /**
1646    * Offset into the memory object where the base level of the
1647    * mipmap chain is.
1648    */
1649   long offset;
1650   /**
1651    * Format of base level of the mipmap chain
1652    */
1653   cudaChannelFormatDesc formatDesc;
1654   /**
1655    * Dimensions of base level of the mipmap chain
1656    */
1657   cudaExtent extent;
1658   /**
1659    * Flags associated with CUDA mipmapped arrays.
1660    * See ::cudaMallocMipmappedArray
1661    */
1662   uint flags;
1663   /**
1664    * Total number of levels in the mipmap chain
1665    */
1666   uint numLevels;
1667 }
1668 
1669 /**
1670  * External semaphore handle types
1671  */
1672 alias cudaExternalSemaphoreHandleType = int;
1673 enum : cudaExternalSemaphoreHandleType {
1674   /**
1675    * Handle is an opaque file descriptor
1676    */
1677   cudaExternalSemaphoreHandleTypeOpaqueFd       = 1,
1678   /**
1679    * Handle is an opaque shared NT handle
1680    */
1681   cudaExternalSemaphoreHandleTypeOpaqueWin32    = 2,
1682   /**
1683    * Handle is an opaque, globally shared handle
1684    */
1685   cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3,
1686   /**
1687    * Handle is a shared NT handle referencing a D3D12 fence object
1688    */
1689   cudaExternalSemaphoreHandleTypeD3D12Fence     = 4
1690 }
1691 
1692 
1693 
1694 /**
1695  * External semaphore handle descriptor
1696  */
1697 struct cudaExternalSemaphoreHandleDesc {
1698   /**
1699    * Type of the handle
1700    */
1701   cudaExternalSemaphoreHandleType type;
1702   union handle_st {
1703     /**
1704      * File descriptor referencing the semaphore object. Valid
1705      * when type is ::cudaExternalSemaphoreHandleTypeOpaqueFd
1706      */
1707     int fd;
1708     /**
1709      * Win32 handle referencing the semaphore object. Valid when
1710      * type is one of the following:
1711      * - ::cudaExternalSemaphoreHandleTypeOpaqueWin32
1712      * - ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
1713      * - ::cudaExternalSemaphoreHandleTypeD3D12Fence
1714      * Exactly one of 'handle' and 'name' must be non-NULL. If
1715      * type is ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
1716      * then 'name' must be NULL.
1717      */
1718     struct win32_st {
1719       /**
1720        * Valid NT handle. Must be NULL if 'name' is non-NULL
1721        */
1722       void *handle;
1723       /**
1724        * Name of a valid synchronization primitive.
1725        * Must be NULL if 'handle' is non-NULL.
1726        */
1727       const void *name;
1728     }
1729 
1730     win32_st win32;
1731   }
1732 
1733   handle_st handle;
1734   /**
1735    * Flags reserved for the future. Must be zero.
1736    */
1737   uint flags;
1738 }
1739 
1740 /**
1741  * External semaphore  signal parameters
1742  */
1743 struct cudaExternalSemaphoreSignalParams {
1744   union params_st {
1745     /**
1746      * Parameters for fence objects
1747      */
1748     struct fence_st {
1749       /**
1750        * Value of fence to be signaled
1751        */
1752       ulong value;
1753     }
1754 
1755     fence_st fence;
1756   }
1757 
1758   params_st params;
1759   /**
1760    * Flags reserved for the future. Must be zero.
1761    */
1762   uint flags;
1763 }
1764 
1765 /**
1766 * External semaphore wait parameters
1767 */
1768 struct cudaExternalSemaphoreWaitParams {
1769   union params_st {
1770     /**
1771     * Parameters for fence objects
1772     */
1773     struct fence_st {
1774       /**
1775       * Value of fence to be waited on
1776       */
1777       ulong value;
1778     }
1779     fence_st fence;
1780   }
1781   params_st params;
1782   /**
1783   * Flags reserved for the future. Must be zero.
1784   */
1785   uint flags;
1786 }
1787 
1788 alias cudaError_t = cudaError;
1789 alias cudaStream_t = void*;
1790 alias cudaEvent_t = void*;
1791 alias cudaGraphicsResource_t = void*;
1792 alias cudaOutputMode_t = cudaOutputMode;
1793 alias cudaExternalMemory_t = void*;
1794 alias cudaExternalSemaphore_t = void*;
1795 alias cudaGraph_t = void*;
1796 alias cudaGraphNode_t = void*;
1797 
1798 
1799 /**
1800  * CUDA cooperative group scope
1801  */
1802 alias cudaCGScope = int;
1803 enum : cudaCGScope {
1804   cudaCGScopeInvalid   = 0, /**< Invalid cooperative group scope */
1805   cudaCGScopeGrid      = 1, /**< Scope represented by a grid_group */
1806   cudaCGScopeMultiGrid = 2  /**< Scope represented by a multi_grid_group */
1807 }
1808 
1809 /**
1810  * CUDA launch parameters
1811  */
1812 struct cudaLaunchParams {
1813   void *func;          /**< Device function symbol */
1814   dim3 gridDim;        /**< Grid dimentions */
1815   dim3 blockDim;       /**< Block dimentions */
1816   void **args;         /**< Arguments */
1817   size_t sharedMem;    /**< Shared memory */
1818   cudaStream_t stream; /**< Stream identifier */
1819 }
1820 
1821 /**
1822  * CUDA GPU kernel node parameters
1823  */
1824 struct cudaKernelNodeParams {
1825   void* func;                     /**< Kernel to launch */
1826   dim3 gridDim;                   /**< Grid dimensions */
1827   dim3 blockDim;                  /**< Block dimensions */
1828   uint sharedMemBytes;    /**< Dynamic shared-memory size per thread block in bytes */
1829   void **kernelParams;            /**< Array of pointers to individual kernel arguments*/
1830   void **extra;                   /**< Pointer to kernel arguments in the "extra" format */
1831 }
1832 
1833 /**
1834 * CUDA Graph node types
1835 */
1836 alias cudaGraphNodeType = int;
1837 enum : cudaGraphNodeType {
1838   cudaGraphNodeTypeKernel  = 0x00, /**< GPU kernel node */
1839   cudaGraphNodeTypeMemcpy  = 0x01, /**< Memcpy node */
1840   cudaGraphNodeTypeMemset  = 0x02, /**< Memset node */
1841   cudaGraphNodeTypeHost    = 0x03, /**< Host (executable) node */
1842   cudaGraphNodeTypeGraph   = 0x04, /**< Node which executes an embedded graph */
1843   cudaGraphNodeTypeEmpty   = 0x05, /**< Empty (no-op) node */
1844   cudaGraphNodeTypeCount
1845 }
1846 
1847 /**
1848  * CUDA executable (launchable) graph
1849  */
1850 struct CUgraphExec_st;
1851 alias cudaGraphExec_t = CUgraphExec_st*;
1852 
1853 
1854 // surface_types.h
1855 
1856 enum cudaSurfaceType1D              = 0x01;
1857 enum cudaSurfaceType2D              = 0x02;
1858 enum cudaSurfaceType3D              = 0x03;
1859 enum cudaSurfaceTypeCubemap         = 0x0C;
1860 enum cudaSurfaceType1DLayered       = 0xF1;
1861 enum cudaSurfaceType2DLayered       = 0xF2;
1862 enum cudaSurfaceTypeCubemapLayered  = 0xFC;
1863 
1864 /**
1865  * CUDA Surface boundary modes
1866  */
1867 alias cudaSurfaceBoundaryMode = int;
1868 enum : cudaSurfaceBoundaryMode {
1869   cudaBoundaryModeZero  = 0,
1870   cudaBoundaryModeClamp = 1,
1871   cudaBoundaryModeTrap  = 2
1872 }
1873 
1874 /**
1875  * CUDA Surface format modes
1876  */
1877 alias cudaSurfaceFormatMode = int;
1878 enum : cudaSurfaceFormatMode {
1879   cudaFormatModeForced = 0,
1880   cudaFormatModeAuto = 1
1881 }
1882 
1883 /**
1884  * CUDA Surface reference
1885  */
1886 struct surfaceReference {
1887   cudaChannelFormatDesc channelDesc;
1888 }
1889 
1890 /**
1891  * An opaque value that represents a CUDA Surface object
1892  */
1893 alias cudaSurfaceObject_t = ulong;
1894 
1895 
1896 // texture_types.h
1897 
1898 enum cudaTextureType1D              = 0x01;
1899 enum cudaTextureType2D              = 0x02;
1900 enum cudaTextureType3D              = 0x03;
1901 enum cudaTextureTypeCubemap         = 0x0C;
1902 enum cudaTextureType1DLayered       = 0xF1;
1903 enum cudaTextureType2DLayered       = 0xF2;
1904 enum cudaTextureTypeCubemapLayered  = 0xFC;
1905 
1906 /**
1907  * CUDA texture address modes
1908  */
1909 alias cudaTextureAddressMode = int;
1910 enum : cudaTextureAddressMode {
1911   cudaAddressModeWrap   = 0,
1912   cudaAddressModeClamp  = 1,
1913   cudaAddressModeMirror = 2,
1914   cudaAddressModeBorder = 3
1915 }
1916 
1917 /**
1918  * CUDA texture filter modes
1919  */
1920 alias cudaTextureFilterMode = int;
1921 enum : cudaTextureFilterMode {
1922   cudaFilterModePoint  = 0,
1923   cudaFilterModeLinear = 1
1924 }
1925 
1926 /**
1927  * CUDA texture read modes
1928  */
1929 alias cudaTextureReadMode = int;
1930 enum : cudaTextureReadMode {
1931   cudaReadModeElementType     = 0,
1932   cudaReadModeNormalizedFloat = 1
1933 }
1934 
1935 /**
1936  * CUDA texture reference
1937  */
1938 struct textureReference {
1939   int                          normalized;
1940   cudaTextureFilterMode   filterMode;
1941   cudaTextureAddressMode[3]  addressMode;
1942   cudaChannelFormatDesc channelDesc;
1943   int                          sRGB;
1944   uint                 maxAnisotropy;
1945   cudaTextureFilterMode   mipmapFilterMode;
1946   float                        mipmapLevelBias;
1947   float                        minMipmapLevelClamp;
1948   float                        maxMipmapLevelClamp;
1949   int[15]                          __cudaReserved;
1950 }
1951 
1952 /**
1953  * CUDA texture descriptor
1954  */
1955 struct cudaTextureDesc {
1956   cudaTextureAddressMode[3] addressMode;
1957   cudaTextureFilterMode  filterMode;
1958   cudaTextureReadMode    readMode;
1959   int                         sRGB;
1960   float[4]                       borderColor;
1961   int                         normalizedCoords;
1962   uint                maxAnisotropy;
1963   cudaTextureFilterMode  mipmapFilterMode;
1964   float                       mipmapLevelBias;
1965   float                       minMipmapLevelClamp;
1966   float                       maxMipmapLevelClamp;
1967 }
1968 
1969 /**
1970  * An opaque value that represents a CUDA texture object
1971  */
1972 alias cudaTextureObject_t = ulong;
1973 
1974 alias cuFloatComplex = float2;
1975 alias cuDoubleComplex = double2;
1976 alias cuComplex = cuFloatComplex;
1977 
1978 
1979 // cuda_runtime_api.h
1980 
1981 extern(System) nothrow {
1982   alias cudaStreamCallback_t = void function(cudaStream_t stream, cudaError_t status, void *userData);
1983 }
1984 
1985 
1986 extern(System) @nogc nothrow {
1987   alias da_cudaDeviceReset = cudaError_t function();
1988   alias da_cudaDeviceSynchronize = cudaError_t function();
1989   alias da_cudaDeviceSetLimit = cudaError_t function(cudaLimit limit, size_t value);
1990   alias da_cudaDeviceGetLimit = cudaError_t function(size_t *pValue, cudaLimit limit);
1991   alias da_cudaDeviceGetCacheConfig = cudaError_t function(cudaFuncCache *pCacheConfig);
1992   alias da_cudaDeviceGetStreamPriorityRange = cudaError_t function(int *leastPriority, int *greatestPriority);
1993   alias da_cudaDeviceSetCacheConfig = cudaError_t function(cudaFuncCache cacheConfig);
1994   alias da_cudaDeviceGetSharedMemConfig = cudaError_t function(cudaSharedMemConfig *pConfig);
1995   alias da_cudaDeviceSetSharedMemConfig = cudaError_t function(cudaSharedMemConfig config);
1996   alias da_cudaDeviceGetByPCIBusId = cudaError_t function(int *device, const char *pciBusId);
1997   alias da_cudaDeviceGetPCIBusId = cudaError_t function(char *pciBusId, int len, int device);
1998   alias da_cudaIpcGetEventHandle = cudaError_t function(cudaIpcEventHandle_t *handle, cudaEvent_t event);
1999   alias da_cudaIpcOpenEventHandle = cudaError_t function(cudaEvent_t *event, cudaIpcEventHandle_t handle);
2000   alias da_cudaIpcGetMemHandle = cudaError_t function(cudaIpcMemHandle_t *handle, void *devPtr);
2001   alias da_cudaIpcOpenMemHandle = cudaError_t function(void **devPtr, cudaIpcMemHandle_t handle, uint flags);
2002   alias da_cudaIpcCloseMemHandle = cudaError_t function(void *devPtr);
2003   alias da_cudaThreadExit = cudaError_t function();
2004   alias da_cudaThreadSynchronize = cudaError_t function();
2005   alias da_cudaThreadSetLimit = cudaError_t function(cudaLimit limit, size_t value);
2006   alias da_cudaThreadGetLimit = cudaError_t function(size_t *pValue, cudaLimit limit);
2007   alias da_cudaThreadGetCacheConfig = cudaError_t function(cudaFuncCache *pCacheConfig);
2008   alias da_cudaThreadSetCacheConfig = cudaError_t function(cudaFuncCache cacheConfig);
2009   alias da_cudaGetLastError = cudaError_t function();
2010   alias da_cudaPeekAtLastError = cudaError_t function();
2011   alias da_cudaGetErrorName = const char* function(cudaError_t error);
2012   alias da_cudaGetErrorString = const char* function(cudaError_t error);
2013   alias da_cudaGetDeviceCount = cudaError_t function(int *count);
2014   alias da_cudaGetDeviceProperties = cudaError_t function(cudaDeviceProp *prop, int device);
2015   alias da_cudaDeviceGetAttribute = cudaError_t function(int *value, cudaDeviceAttr attr, int device);
2016   alias da_cudaDeviceGetP2PAttribute = cudaError_t function(int *value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice);
2017   alias da_cudaChooseDevice = cudaError_t function(int *device, const cudaDeviceProp *prop);
2018   alias da_cudaSetDevice = cudaError_t function(int device);
2019   alias da_cudaGetDevice = cudaError_t function(int *device);
2020   alias da_cudaSetValidDevices = cudaError_t function(int *device_arr, int len);
2021   alias da_cudaSetDeviceFlags = cudaError_t function( uint flags );
2022   alias da_cudaGetDeviceFlags = cudaError_t function( uint *flags );
2023   alias da_cudaStreamCreate = cudaError_t function(cudaStream_t *pStream);
2024   alias da_cudaStreamCreateWithFlags = cudaError_t function(cudaStream_t *pStream, uint flags);
2025   alias da_cudaStreamCreateWithPriority = cudaError_t function(cudaStream_t *pStream, uint flags, int priority);
2026   alias da_cudaStreamGetPriority = cudaError_t function(cudaStream_t hStream, int *priority);
2027   alias da_cudaStreamGetFlags = cudaError_t function(cudaStream_t hStream, uint *flags);
2028   alias da_cudaStreamDestroy = cudaError_t function(cudaStream_t stream);
2029   alias da_cudaStreamWaitEvent = cudaError_t function(cudaStream_t stream, cudaEvent_t event, uint flags);
2030   alias da_cudaStreamAddCallback = cudaError_t function(cudaStream_t stream, cudaStreamCallback_t callback, void *userData, uint flags);
2031   alias da_cudaStreamSynchronize = cudaError_t function(cudaStream_t stream);
2032   alias da_cudaStreamQuery = cudaError_t function(cudaStream_t stream);
2033   alias da_cudaStreamAttachMemAsync = cudaError_t function(cudaStream_t stream, void *devPtr, size_t length = 0, uint flags = cudaMemAttachSingle);
2034   alias da_cudaStreamBeginCapture = cudaError_t function(cudaStream_t stream);
2035   alias da_cudaStreamEndCapture = cudaError_t function(cudaStream_t stream, cudaGraph_t *pGraph);
2036   alias da_cudaStreamIsCapturing = cudaError_t function(cudaStream_t stream, cudaStreamCaptureStatus *pCaptureStatus);
2037   alias da_cudaEventCreate = cudaError_t function(cudaEvent_t *event);
2038   alias da_cudaEventCreateWithFlags = cudaError_t function(cudaEvent_t *event, uint flags);
2039   alias da_cudaEventRecord = cudaError_t function(cudaEvent_t event, cudaStream_t stream = null);
2040   alias da_cudaEventQuery = cudaError_t function(cudaEvent_t event);
2041   alias da_cudaEventSynchronize = cudaError_t function(cudaEvent_t event);
2042   alias da_cudaEventDestroy = cudaError_t function(cudaEvent_t event);
2043   alias da_cudaEventElapsedTime = cudaError_t function(float *ms, cudaEvent_t start, cudaEvent_t end);
2044   alias da_cudaImportExternalMemory = cudaError_t function(cudaExternalMemory_t *extMem_out, const cudaExternalMemoryHandleDesc *memHandleDesc);
2045   alias da_cudaExternalMemoryGetMappedBuffer = cudaError_t function(void **devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc *bufferDesc);
2046   alias da_cudaExternalMemoryGetMappedMipmappedArray = cudaError_t function(cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc *mipmapDesc);
2047   alias da_cudaDestroyExternalMemory = cudaError_t function(cudaExternalMemory_t extMem);
2048   alias da_cudaImportExternalSemaphore = cudaError_t function(cudaExternalSemaphore_t *extSem_out, const cudaExternalSemaphoreHandleDesc *semHandleDesc);
2049   alias da_cudaSignalExternalSemaphoresAsync = cudaError_t function(const cudaExternalSemaphore_t *extSemArray, const cudaExternalSemaphoreSignalParams *paramsArray, uint numExtSems, cudaStream_t stream = null);
2050   alias da_cudaWaitExternalSemaphoresAsync = cudaError_t function(const cudaExternalSemaphore_t *extSemArray, const cudaExternalSemaphoreWaitParams *paramsArray, uint numExtSems, cudaStream_t stream = null);
2051   alias da_cudaDestroyExternalSemaphore = cudaError_t function(cudaExternalSemaphore_t extSem);
2052   alias da_cudaLaunchKernel = cudaError_t function(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream);
2053   alias da_cudaLaunchCooperativeKernel = cudaError_t function(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream);
2054   alias da_cudaLaunchCooperativeKernelMultiDevice = cudaError_t function(cudaLaunchParams *launchParamsList, uint numDevices, uint flags  = 0);
2055   alias da_cudaFuncSetCacheConfig = cudaError_t function(const void *func, cudaFuncCache cacheConfig);
2056   alias da_cudaFuncSetSharedMemConfig = cudaError_t function(const void *func, cudaSharedMemConfig config);
2057   alias da_cudaFuncGetAttributes = cudaError_t function(cudaFuncAttributes *attr, const void *func);
2058   alias da_cudaFuncSetAttribute = cudaError_t function(const void *func, cudaFuncAttribute attr, int value);
2059   alias da_cudaSetDoubleForDevice = cudaError_t function(double *d);
2060   alias da_cudaSetDoubleForHost = cudaError_t function(double *d);
2061   alias da_cudaLaunchHostFunc = cudaError_t function(cudaStream_t stream, cudaHostFn_t fn, void *userData);
2062   alias da_cudaOccupancyMaxActiveBlocksPerMultiprocessor = cudaError_t function(int *numBlocks, const void *func, int blockSize, size_t dynamicSMemSize);
2063   alias da_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = cudaError_t function(int *numBlocks, const void *func, int blockSize, size_t dynamicSMemSize, uint flags);
2064   alias da_cudaConfigureCall = cudaError_t function(dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, cudaStream_t stream = null);
2065   alias da_cudaSetupArgument = cudaError_t function(const void *arg, size_t size, size_t offset);
2066   alias da_cudaLaunch = cudaError_t function(const void *func);
2067   alias da_cudaMallocManaged = cudaError_t function(void **devPtr, size_t size, uint flags = cudaMemAttachGlobal);
2068   alias da_cudaMalloc = cudaError_t function(void **devPtr, size_t size);
2069   alias da_cudaMallocHost = cudaError_t function(void **ptr, size_t size);
2070   alias da_cudaMallocPitch = cudaError_t function(void **devPtr, size_t *pitch, size_t width, size_t height);
2071   alias da_cudaMallocArray = cudaError_t function(cudaArray_t *array, const cudaChannelFormatDesc *desc, size_t width, size_t height = 0, uint flags = 0);
2072   alias da_cudaFree = cudaError_t function(void *devPtr);
2073   alias da_cudaFreeHost = cudaError_t function(void *ptr);
2074   alias da_cudaFreeArray = cudaError_t function(cudaArray_t array);
2075   alias da_cudaFreeMipmappedArray = cudaError_t function(cudaMipmappedArray_t mipmappedArray);
2076   alias da_cudaHostAlloc = cudaError_t function(void **pHost, size_t size, uint flags);
2077   alias da_cudaHostRegister = cudaError_t function(void *ptr, size_t size, uint flags);
2078   alias da_cudaHostUnregister = cudaError_t function(void *ptr);
2079   alias da_cudaHostGetDevicePointer = cudaError_t function(void **pDevice, void *pHost, uint flags);
2080   alias da_cudaHostGetFlags = cudaError_t function(uint *pFlags, void *pHost);
2081   alias da_cudaMalloc3D = cudaError_t function(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent);
2082   alias da_cudaMalloc3DArray = cudaError_t function(cudaArray_t *array, const cudaChannelFormatDesc* desc, cudaExtent extent, uint flags = 0);
2083   alias da_cudaMallocMipmappedArray = cudaError_t function(cudaMipmappedArray_t *mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, uint numLevels, uint flags = 0);
2084   alias da_cudaGetMipmappedArrayLevel = cudaError_t function(cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, uint level);
2085   alias da_cudaMemcpy3D = cudaError_t function(const cudaMemcpy3DParms *p);
2086   alias da_cudaMemcpy3DPeer = cudaError_t function(const cudaMemcpy3DPeerParms *p);
2087   alias da_cudaMemcpy3DAsync = cudaError_t function(const cudaMemcpy3DParms *p, cudaStream_t stream = null);
2088   alias da_cudaMemcpy3DPeerAsync = cudaError_t function(const cudaMemcpy3DPeerParms *p, cudaStream_t stream = null);
2089   alias da_cudaMemGetInfo = cudaError_t function(size_t *free, size_t *total);
2090   alias da_cudaArrayGetInfo = cudaError_t function(cudaChannelFormatDesc *desc, cudaExtent *extent, uint *flags, cudaArray_t array);
2091   alias da_cudaMemcpy = cudaError_t function(void *dst, const void *src, size_t count, cudaMemcpyKind kind);
2092   alias da_cudaMemcpyPeer = cudaError_t function(void *dst, int dstDevice, const void *src, int srcDevice, size_t count);
2093   alias da_cudaMemcpyToArray = cudaError_t function(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, cudaMemcpyKind kind);
2094   alias da_cudaMemcpyFromArray = cudaError_t function(void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind);
2095   alias da_cudaMemcpyArrayToArray = cudaError_t function(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind = cudaMemcpyDeviceToDevice);
2096   alias da_cudaMemcpy2D = cudaError_t function(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind);
2097   alias da_cudaMemcpy2DToArray = cudaError_t function(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind);
2098   alias da_cudaMemcpy2DFromArray = cudaError_t function(void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind);
2099   alias da_cudaMemcpy2DArrayToArray = cudaError_t function(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind = cudaMemcpyDeviceToDevice);
2100   alias da_cudaMemcpyToSymbol = cudaError_t function(const void *symbol, const void *src, size_t count, size_t offset = 0, cudaMemcpyKind kind = cudaMemcpyHostToDevice);
2101   alias da_cudaMemcpyFromSymbol = cudaError_t function(void *dst, const void *symbol, size_t count, size_t offset = 0, cudaMemcpyKind kind = cudaMemcpyDeviceToHost);
2102   alias da_cudaMemcpyAsync = cudaError_t function(void *dst, const void *src, size_t count, cudaMemcpyKind kind, cudaStream_t stream = null);
2103   alias da_cudaMemcpyPeerAsync = cudaError_t function(void *dst, int dstDevice, const void *src, int srcDevice, size_t count, cudaStream_t stream = null);
2104   alias da_cudaMemcpyToArrayAsync = cudaError_t function(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, cudaMemcpyKind kind, cudaStream_t stream = null);
2105   alias da_cudaMemcpyFromArrayAsync = cudaError_t function(void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream = null);
2106   alias da_cudaMemcpy2DAsync = cudaError_t function(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream = null);
2107   alias da_cudaMemcpy2DToArrayAsync = cudaError_t function(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream = null);
2108   alias da_cudaMemcpy2DFromArrayAsync = cudaError_t function(void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream = null);
2109   alias da_cudaMemcpyToSymbolAsync = cudaError_t function(const void *symbol, const void *src, size_t count, size_t offset, cudaMemcpyKind kind, cudaStream_t stream = null);
2110   alias da_cudaMemcpyFromSymbolAsync = cudaError_t function(void *dst, const void *symbol, size_t count, size_t offset, cudaMemcpyKind kind, cudaStream_t stream = null);
2111   alias da_cudaMemset = cudaError_t function(void *devPtr, int value, size_t count);
2112   alias da_cudaMemset2D = cudaError_t function(void *devPtr, size_t pitch, int value, size_t width, size_t height);
2113   alias da_cudaMemset3D = cudaError_t function(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent);
2114   alias da_cudaMemsetAsync = cudaError_t function(void *devPtr, int value, size_t count, cudaStream_t stream = null);
2115   alias da_cudaMemset2DAsync = cudaError_t function(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream = null);
2116   alias da_cudaMemset3DAsync = cudaError_t function(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream = null);
2117   alias da_cudaGetSymbolAddress = cudaError_t function(void **devPtr, const void *symbol);
2118   alias da_cudaGetSymbolSize = cudaError_t function(size_t *size, const void *symbol);
2119   alias da_cudaMemPrefetchAsync = cudaError_t function(const void *devPtr, size_t count, int dstDevice, cudaStream_t stream = null);
2120   alias da_cudaMemAdvise = cudaError_t function(const void *devPtr, size_t count, cudaMemoryAdvise advice, int device);
2121   alias da_cudaMemRangeGetAttribute = cudaError_t function(void *data, size_t dataSize, cudaMemRangeAttribute attribute, const void *devPtr, size_t count);
2122   alias da_cudaMemRangeGetAttributes = cudaError_t function(void **data, size_t *dataSizes, cudaMemRangeAttribute *attributes, size_t numAttributes, const void *devPtr, size_t count);
2123   alias da_cudaPointerGetAttributes = cudaError_t function(cudaPointerAttributes *attributes, const void *ptr);
2124   alias da_cudaDeviceCanAccessPeer = cudaError_t function(int *canAccessPeer, int device, int peerDevice);
2125   alias da_cudaDeviceEnablePeerAccess = cudaError_t function(int peerDevice, uint flags);
2126   alias da_cudaDeviceDisablePeerAccess = cudaError_t function(int peerDevice);
2127   alias da_cudaGraphicsUnregisterResource = cudaError_t function(cudaGraphicsResource_t resource);
2128   alias da_cudaGraphicsResourceSetMapFlags = cudaError_t function(cudaGraphicsResource_t resource, uint flags);
2129   alias da_cudaGraphicsMapResources = cudaError_t function(int count, cudaGraphicsResource_t *resources, cudaStream_t stream = null);
2130   alias da_cudaGraphicsUnmapResources = cudaError_t function(int count, cudaGraphicsResource_t *resources, cudaStream_t stream = null);
2131   alias da_cudaGraphicsResourceGetMappedPointer = cudaError_t function(void **devPtr, size_t *size, cudaGraphicsResource_t resource);
2132   alias da_cudaGraphicsSubResourceGetMappedArray = cudaError_t function(cudaArray_t *array, cudaGraphicsResource_t resource, uint arrayIndex, uint mipLevel);
2133   alias da_cudaGraphicsResourceGetMappedMipmappedArray = cudaError_t function(cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource);
2134   alias da_cudaGetChannelDesc = cudaError_t function(cudaChannelFormatDesc *desc, cudaArray_const_t array);
2135   alias da_cudaBindTexture = cudaError_t function(size_t *offset, const textureReference *texref, const void *devPtr, const cudaChannelFormatDesc *desc, size_t size = uint.max);
2136   alias da_cudaBindTexture2D = cudaError_t function(size_t *offset, const textureReference *texref, const void *devPtr, const cudaChannelFormatDesc *desc, size_t width, size_t height, size_t pitch);
2137   alias da_cudaBindTextureToArray = cudaError_t function(const textureReference *texref, cudaArray_const_t array, const cudaChannelFormatDesc *desc);
2138   alias da_cudaBindTextureToMipmappedArray = cudaError_t function(const textureReference *texref, cudaMipmappedArray_const_t mipmappedArray, const cudaChannelFormatDesc *desc);
2139   alias da_cudaUnbindTexture = cudaError_t function(const textureReference *texref);
2140   alias da_cudaGetTextureAlignmentOffset = cudaError_t function(size_t *offset, const textureReference *texref);
2141   alias da_cudaGetTextureReference = cudaError_t function(const textureReference **texref, const void *symbol);
2142   alias da_cudaBindSurfaceToArray = cudaError_t function(const surfaceReference *surfref, cudaArray_const_t array, const cudaChannelFormatDesc *desc);
2143   alias da_cudaGetSurfaceReference = cudaError_t function(const surfaceReference **surfref, const void *symbol);
2144   alias da_cudaCreateTextureObject = cudaError_t function(cudaTextureObject_t *pTexObject, const cudaResourceDesc *pResDesc, const cudaTextureDesc *pTexDesc, const cudaResourceViewDesc *pResViewDesc);
2145   alias da_cudaDestroyTextureObject = cudaError_t function(cudaTextureObject_t texObject);
2146   alias da_cudaGetTextureObjectResourceDesc = cudaError_t function(cudaResourceDesc *pResDesc, cudaTextureObject_t texObject);
2147   alias da_cudaGetTextureObjectTextureDesc = cudaError_t function(cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject);
2148   alias da_cudaGetTextureObjectResourceViewDesc = cudaError_t function(cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject);
2149   alias da_cudaCreateSurfaceObject = cudaError_t function(cudaSurfaceObject_t *pSurfObject, const cudaResourceDesc *pResDesc);
2150   alias da_cudaDestroySurfaceObject = cudaError_t function(cudaSurfaceObject_t surfObject);
2151   alias da_cudaGetSurfaceObjectResourceDesc = cudaError_t function(cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject);
2152   alias da_cudaDriverGetVersion = cudaError_t function(int *driverVersion);
2153   alias da_cudaRuntimeGetVersion = cudaError_t function(int *runtimeVersion);
2154   alias da_cudaGraphCreate = cudaError_t function(cudaGraph_t *pGraph, uint flags);
2155   alias da_cudaGraphAddKernelNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies, const cudaKernelNodeParams *pNodeParams);
2156   alias da_cudaGraphKernelNodeGetParams = cudaError_t function(cudaGraphNode_t node, cudaKernelNodeParams *pNodeParams);
2157   alias da_cudaGraphKernelNodeSetParams = cudaError_t function(cudaGraphNode_t node, const cudaKernelNodeParams *pNodeParams);
2158   alias da_cudaGraphAddMemcpyNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies, const cudaMemcpy3DParms *pCopyParams);
2159   alias da_cudaGraphMemcpyNodeGetParams = cudaError_t function(cudaGraphNode_t node, cudaMemcpy3DParms *pNodeParams);
2160   alias da_cudaGraphMemcpyNodeSetParams = cudaError_t function(cudaGraphNode_t node, const cudaMemcpy3DParms *pNodeParams);
2161   alias da_cudaGraphAddMemsetNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies, const cudaMemsetParams *pMemsetParams);
2162   alias da_cudaGraphMemsetNodeGetParams = cudaError_t function(cudaGraphNode_t node, cudaMemsetParams *pNodeParams);
2163   alias da_cudaGraphMemsetNodeSetParams = cudaError_t function(cudaGraphNode_t node, const cudaMemsetParams *pNodeParams);
2164   alias da_cudaGraphAddHostNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies, const cudaHostNodeParams *pNodeParams);
2165   alias da_cudaGraphHostNodeGetParams = cudaError_t function(cudaGraphNode_t node, cudaHostNodeParams *pNodeParams);
2166   alias da_cudaGraphHostNodeSetParams = cudaError_t function(cudaGraphNode_t node, const cudaHostNodeParams *pNodeParams);
2167   alias da_cudaGraphAddChildGraphNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies, cudaGraph_t childGraph);
2168   alias da_cudaGraphChildGraphNodeGetGraph = cudaError_t function(cudaGraphNode_t node, cudaGraph_t *pGraph);
2169   alias da_cudaGraphAddEmptyNode = cudaError_t function(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, cudaGraphNode_t *pDependencies, size_t numDependencies);
2170   alias da_cudaGraphClone = cudaError_t function(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph);
2171   alias da_cudaGraphNodeFindInClone = cudaError_t function(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph);
2172   alias da_cudaGraphNodeGetType = cudaError_t function(cudaGraphNode_t node, cudaGraphNodeType *pType);
2173   alias da_cudaGraphGetNodes = cudaError_t function(cudaGraph_t graph, cudaGraphNode_t *nodes, size_t *numNodes);
2174   alias da_cudaGraphGetRootNodes = cudaError_t function(cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes);
2175   alias da_cudaGraphGetEdges = cudaError_t function(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t *numEdges);
2176   alias da_cudaGraphNodeGetDependencies = cudaError_t function(cudaGraphNode_t node, cudaGraphNode_t *pDependencies, size_t *pNumDependencies);
2177   alias da_cudaGraphNodeGetDependentNodes = cudaError_t function(cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, size_t *pNumDependentNodes);
2178   alias da_cudaGraphAddDependencies = cudaError_t function(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t numDependencies);
2179   alias da_cudaGraphRemoveDependencies = cudaError_t function(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t numDependencies);
2180   alias da_cudaGraphDestroyNode = cudaError_t function(cudaGraphNode_t node);
2181   alias da_cudaGraphInstantiate = cudaError_t function(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize);
2182   alias da_cudaGraphLaunch = cudaError_t function(cudaGraphExec_t graphExec, cudaStream_t stream);
2183   alias da_cudaGraphExecDestroy = cudaError_t function(cudaGraphExec_t graphExec);
2184   alias da_cudaGraphDestroy = cudaError_t function(cudaGraph_t graph);
2185   alias da_cudaGetExportTable = cudaError_t function(const void **ppExportTable, const cudaUUID_t *pExportTableId);
2186 
2187 }
2188 
2189 __gshared
2190 {
2191   da_cudaDeviceReset cudaDeviceReset;
2192   da_cudaDeviceSynchronize cudaDeviceSynchronize;
2193   da_cudaDeviceSetLimit cudaDeviceSetLimit;
2194   da_cudaDeviceGetLimit cudaDeviceGetLimit;
2195   da_cudaDeviceGetCacheConfig cudaDeviceGetCacheConfig;
2196   da_cudaDeviceGetStreamPriorityRange cudaDeviceGetStreamPriorityRange;
2197   da_cudaDeviceSetCacheConfig cudaDeviceSetCacheConfig;
2198   da_cudaDeviceGetSharedMemConfig cudaDeviceGetSharedMemConfig;
2199   da_cudaDeviceSetSharedMemConfig cudaDeviceSetSharedMemConfig;
2200   da_cudaDeviceGetByPCIBusId cudaDeviceGetByPCIBusId;
2201   da_cudaDeviceGetPCIBusId cudaDeviceGetPCIBusId;
2202   da_cudaIpcGetEventHandle cudaIpcGetEventHandle;
2203   da_cudaIpcOpenEventHandle cudaIpcOpenEventHandle;
2204   da_cudaIpcGetMemHandle cudaIpcGetMemHandle;
2205   da_cudaIpcOpenMemHandle cudaIpcOpenMemHandle;
2206   da_cudaIpcCloseMemHandle cudaIpcCloseMemHandle;
2207   da_cudaThreadExit cudaThreadExit;
2208   da_cudaThreadSynchronize cudaThreadSynchronize;
2209   da_cudaThreadSetLimit cudaThreadSetLimit;
2210   da_cudaThreadGetLimit cudaThreadGetLimit;
2211   da_cudaThreadGetCacheConfig cudaThreadGetCacheConfig;
2212   da_cudaThreadSetCacheConfig cudaThreadSetCacheConfig;
2213   da_cudaGetLastError cudaGetLastError;
2214   da_cudaPeekAtLastError cudaPeekAtLastError;
2215   da_cudaGetErrorName cudaGetErrorName;
2216   da_cudaGetErrorString cudaGetErrorString;
2217   da_cudaGetDeviceCount cudaGetDeviceCount;
2218   da_cudaGetDeviceProperties cudaGetDeviceProperties;
2219   da_cudaDeviceGetAttribute cudaDeviceGetAttribute;
2220   da_cudaDeviceGetP2PAttribute cudaDeviceGetP2PAttribute;
2221   da_cudaChooseDevice cudaChooseDevice;
2222   da_cudaSetDevice cudaSetDevice;
2223   da_cudaGetDevice cudaGetDevice;
2224   da_cudaSetValidDevices cudaSetValidDevices;
2225   da_cudaSetDeviceFlags cudaSetDeviceFlags;
2226   da_cudaGetDeviceFlags cudaGetDeviceFlags;
2227   da_cudaStreamCreate cudaStreamCreate;
2228   da_cudaStreamCreateWithFlags cudaStreamCreateWithFlags;
2229   da_cudaStreamCreateWithPriority cudaStreamCreateWithPriority;
2230   da_cudaStreamGetPriority cudaStreamGetPriority;
2231   da_cudaStreamGetFlags cudaStreamGetFlags;
2232   da_cudaStreamDestroy cudaStreamDestroy;
2233   da_cudaStreamWaitEvent cudaStreamWaitEvent;
2234   da_cudaStreamAddCallback cudaStreamAddCallback;
2235   da_cudaStreamSynchronize cudaStreamSynchronize;
2236   da_cudaStreamQuery cudaStreamQuery;
2237   da_cudaStreamAttachMemAsync cudaStreamAttachMemAsync;
2238   da_cudaStreamBeginCapture cudaStreamBeginCapture;
2239   da_cudaStreamEndCapture cudaStreamEndCapture;
2240   da_cudaStreamIsCapturing cudaStreamIsCapturing;
2241   da_cudaEventCreate cudaEventCreate;
2242   da_cudaEventCreateWithFlags cudaEventCreateWithFlags;
2243   da_cudaEventRecord cudaEventRecord;
2244   da_cudaEventQuery cudaEventQuery;
2245   da_cudaEventSynchronize cudaEventSynchronize;
2246   da_cudaEventDestroy cudaEventDestroy;
2247   da_cudaEventElapsedTime cudaEventElapsedTime;
2248   da_cudaImportExternalMemory cudaImportExternalMemory;
2249   da_cudaExternalMemoryGetMappedBuffer cudaExternalMemoryGetMappedBuffer;
2250   da_cudaExternalMemoryGetMappedMipmappedArray cudaExternalMemoryGetMappedMipmappedArray;
2251   da_cudaDestroyExternalMemory cudaDestroyExternalMemory;
2252   da_cudaImportExternalSemaphore cudaImportExternalSemaphore;
2253   da_cudaSignalExternalSemaphoresAsync cudaSignalExternalSemaphoresAsync;
2254   da_cudaWaitExternalSemaphoresAsync cudaWaitExternalSemaphoresAsync;
2255   da_cudaDestroyExternalSemaphore cudaDestroyExternalSemaphore;
2256   da_cudaLaunchKernel cudaLaunchKernel;
2257   da_cudaLaunchCooperativeKernel cudaLaunchCooperativeKernel;
2258   da_cudaLaunchCooperativeKernelMultiDevice cudaLaunchCooperativeKernelMultiDevice;
2259   da_cudaFuncSetCacheConfig cudaFuncSetCacheConfig;
2260   da_cudaFuncSetSharedMemConfig cudaFuncSetSharedMemConfig;
2261   da_cudaFuncGetAttributes cudaFuncGetAttributes;
2262   da_cudaFuncSetAttribute cudaFuncSetAttribute;
2263   da_cudaSetDoubleForDevice cudaSetDoubleForDevice;
2264   da_cudaSetDoubleForHost cudaSetDoubleForHost;
2265   da_cudaLaunchHostFunc cudaLaunchHostFunc;
2266   da_cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaOccupancyMaxActiveBlocksPerMultiprocessor;
2267   da_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
2268   da_cudaConfigureCall cudaConfigureCall;
2269   da_cudaSetupArgument cudaSetupArgument;
2270   da_cudaLaunch cudaLaunch;
2271   da_cudaMallocManaged cudaMallocManaged;
2272   da_cudaMalloc cudaMalloc;
2273   da_cudaMallocHost cudaMallocHost;
2274   da_cudaMallocPitch cudaMallocPitch;
2275   da_cudaMallocArray cudaMallocArray;
2276   da_cudaFree cudaFree;
2277   da_cudaFreeHost cudaFreeHost;
2278   da_cudaFreeArray cudaFreeArray;
2279   da_cudaFreeMipmappedArray cudaFreeMipmappedArray;
2280   da_cudaHostAlloc cudaHostAlloc;
2281   da_cudaHostRegister cudaHostRegister;
2282   da_cudaHostUnregister cudaHostUnregister;
2283   da_cudaHostGetDevicePointer cudaHostGetDevicePointer;
2284   da_cudaHostGetFlags cudaHostGetFlags;
2285   da_cudaMalloc3D cudaMalloc3D;
2286   da_cudaMalloc3DArray cudaMalloc3DArray;
2287   da_cudaMallocMipmappedArray cudaMallocMipmappedArray;
2288   da_cudaGetMipmappedArrayLevel cudaGetMipmappedArrayLevel;
2289   da_cudaMemcpy3D cudaMemcpy3D;
2290   da_cudaMemcpy3DPeer cudaMemcpy3DPeer;
2291   da_cudaMemcpy3DAsync cudaMemcpy3DAsync;
2292   da_cudaMemcpy3DPeerAsync cudaMemcpy3DPeerAsync;
2293   da_cudaMemGetInfo cudaMemGetInfo;
2294   da_cudaArrayGetInfo cudaArrayGetInfo;
2295   da_cudaMemcpy cudaMemcpy;
2296   da_cudaMemcpyPeer cudaMemcpyPeer;
2297   da_cudaMemcpyToArray cudaMemcpyToArray;
2298   da_cudaMemcpyFromArray cudaMemcpyFromArray;
2299   da_cudaMemcpyArrayToArray cudaMemcpyArrayToArray;
2300   da_cudaMemcpy2D cudaMemcpy2D;
2301   da_cudaMemcpy2DToArray cudaMemcpy2DToArray;
2302   da_cudaMemcpy2DFromArray cudaMemcpy2DFromArray;
2303   da_cudaMemcpy2DArrayToArray cudaMemcpy2DArrayToArray;
2304   da_cudaMemcpyToSymbol cudaMemcpyToSymbol;
2305   da_cudaMemcpyFromSymbol cudaMemcpyFromSymbol;
2306   da_cudaMemcpyAsync cudaMemcpyAsync;
2307   da_cudaMemcpyPeerAsync cudaMemcpyPeerAsync;
2308   da_cudaMemcpyToArrayAsync cudaMemcpyToArrayAsync;
2309   da_cudaMemcpyFromArrayAsync cudaMemcpyFromArrayAsync;
2310   da_cudaMemcpy2DAsync cudaMemcpy2DAsync;
2311   da_cudaMemcpy2DToArrayAsync cudaMemcpy2DToArrayAsync;
2312   da_cudaMemcpy2DFromArrayAsync cudaMemcpy2DFromArrayAsync;
2313   da_cudaMemcpyToSymbolAsync cudaMemcpyToSymbolAsync;
2314   da_cudaMemcpyFromSymbolAsync cudaMemcpyFromSymbolAsync;
2315   da_cudaMemset cudaMemset;
2316   da_cudaMemset2D cudaMemset2D;
2317   da_cudaMemset3D cudaMemset3D;
2318   da_cudaMemsetAsync cudaMemsetAsync;
2319   da_cudaMemset2DAsync cudaMemset2DAsync;
2320   da_cudaMemset3DAsync cudaMemset3DAsync;
2321   da_cudaGetSymbolAddress cudaGetSymbolAddress;
2322   da_cudaGetSymbolSize cudaGetSymbolSize;
2323   da_cudaMemPrefetchAsync cudaMemPrefetchAsync;
2324   da_cudaMemAdvise cudaMemAdvise;
2325   da_cudaMemRangeGetAttribute cudaMemRangeGetAttribute;
2326   da_cudaMemRangeGetAttributes cudaMemRangeGetAttributes;
2327   da_cudaPointerGetAttributes cudaPointerGetAttributes;
2328   da_cudaDeviceCanAccessPeer cudaDeviceCanAccessPeer;
2329   da_cudaDeviceEnablePeerAccess cudaDeviceEnablePeerAccess;
2330   da_cudaDeviceDisablePeerAccess cudaDeviceDisablePeerAccess;
2331   da_cudaGraphicsUnregisterResource cudaGraphicsUnregisterResource;
2332   da_cudaGraphicsResourceSetMapFlags cudaGraphicsResourceSetMapFlags;
2333   da_cudaGraphicsMapResources cudaGraphicsMapResources;
2334   da_cudaGraphicsUnmapResources cudaGraphicsUnmapResources;
2335   da_cudaGraphicsResourceGetMappedPointer cudaGraphicsResourceGetMappedPointer;
2336   da_cudaGraphicsSubResourceGetMappedArray cudaGraphicsSubResourceGetMappedArray;
2337   da_cudaGraphicsResourceGetMappedMipmappedArray cudaGraphicsResourceGetMappedMipmappedArray;
2338   da_cudaGetChannelDesc cudaGetChannelDesc;
2339   da_cudaBindTexture cudaBindTexture;
2340   da_cudaBindTexture2D cudaBindTexture2D;
2341   da_cudaBindTextureToArray cudaBindTextureToArray;
2342   da_cudaBindTextureToMipmappedArray cudaBindTextureToMipmappedArray;
2343   da_cudaUnbindTexture cudaUnbindTexture;
2344   da_cudaGetTextureAlignmentOffset cudaGetTextureAlignmentOffset;
2345   da_cudaGetTextureReference cudaGetTextureReference;
2346   da_cudaBindSurfaceToArray cudaBindSurfaceToArray;
2347   da_cudaGetSurfaceReference cudaGetSurfaceReference;
2348   da_cudaCreateTextureObject cudaCreateTextureObject;
2349   da_cudaDestroyTextureObject cudaDestroyTextureObject;
2350   da_cudaGetTextureObjectResourceDesc cudaGetTextureObjectResourceDesc;
2351   da_cudaGetTextureObjectTextureDesc cudaGetTextureObjectTextureDesc;
2352   da_cudaGetTextureObjectResourceViewDesc cudaGetTextureObjectResourceViewDesc;
2353   da_cudaCreateSurfaceObject cudaCreateSurfaceObject;
2354   da_cudaDestroySurfaceObject cudaDestroySurfaceObject;
2355   da_cudaGetSurfaceObjectResourceDesc cudaGetSurfaceObjectResourceDesc;
2356   da_cudaDriverGetVersion cudaDriverGetVersion;
2357   da_cudaRuntimeGetVersion cudaRuntimeGetVersion;
2358   da_cudaGraphCreate cudaGraphCreate;
2359   da_cudaGraphAddKernelNode cudaGraphAddKernelNode;
2360   da_cudaGraphKernelNodeGetParams cudaGraphKernelNodeGetParams;
2361   da_cudaGraphKernelNodeSetParams cudaGraphKernelNodeSetParams;
2362   da_cudaGraphAddMemcpyNode cudaGraphAddMemcpyNode;
2363   da_cudaGraphMemcpyNodeGetParams cudaGraphMemcpyNodeGetParams;
2364   da_cudaGraphMemcpyNodeSetParams cudaGraphMemcpyNodeSetParams;
2365   da_cudaGraphAddMemsetNode cudaGraphAddMemsetNode;
2366   da_cudaGraphMemsetNodeGetParams cudaGraphMemsetNodeGetParams;
2367   da_cudaGraphMemsetNodeSetParams cudaGraphMemsetNodeSetParams;
2368   da_cudaGraphAddHostNode cudaGraphAddHostNode;
2369   da_cudaGraphHostNodeGetParams cudaGraphHostNodeGetParams;
2370   da_cudaGraphHostNodeSetParams cudaGraphHostNodeSetParams;
2371   da_cudaGraphAddChildGraphNode cudaGraphAddChildGraphNode;
2372   da_cudaGraphChildGraphNodeGetGraph cudaGraphChildGraphNodeGetGraph;
2373   da_cudaGraphAddEmptyNode cudaGraphAddEmptyNode;
2374   da_cudaGraphClone cudaGraphClone;
2375   da_cudaGraphNodeFindInClone cudaGraphNodeFindInClone;
2376   da_cudaGraphNodeGetType cudaGraphNodeGetType;
2377   da_cudaGraphGetNodes cudaGraphGetNodes;
2378   da_cudaGraphGetRootNodes cudaGraphGetRootNodes;
2379   da_cudaGraphGetEdges cudaGraphGetEdges;
2380   da_cudaGraphNodeGetDependencies cudaGraphNodeGetDependencies;
2381   da_cudaGraphNodeGetDependentNodes cudaGraphNodeGetDependentNodes;
2382   da_cudaGraphAddDependencies cudaGraphAddDependencies;
2383   da_cudaGraphRemoveDependencies cudaGraphRemoveDependencies;
2384   da_cudaGraphDestroyNode cudaGraphDestroyNode;
2385   da_cudaGraphInstantiate cudaGraphInstantiate;
2386   da_cudaGraphLaunch cudaGraphLaunch;
2387   da_cudaGraphExecDestroy cudaGraphExecDestroy;
2388   da_cudaGraphDestroy cudaGraphDestroy;
2389   da_cudaGetExportTable cudaGetExportTable;
2390 }
2391 
2392 // Runtime API loader
2393 class DerelictCUDARuntimeLoader : SharedLibLoader
2394 {
2395   protected
2396   {
2397     override void loadSymbols()
2398     {
2399       bindFunc(cast(void**)&cudaDeviceReset, "cudaDeviceReset");
2400       bindFunc(cast(void**)&cudaDeviceSynchronize, "cudaDeviceSynchronize");
2401       bindFunc(cast(void**)&cudaDeviceSetLimit, "cudaDeviceSetLimit");
2402       bindFunc(cast(void**)&cudaDeviceGetLimit, "cudaDeviceGetLimit");
2403       bindFunc(cast(void**)&cudaDeviceGetCacheConfig, "cudaDeviceGetCacheConfig");
2404       bindFunc(cast(void**)&cudaDeviceGetStreamPriorityRange, "cudaDeviceGetStreamPriorityRange");
2405       bindFunc(cast(void**)&cudaDeviceSetCacheConfig, "cudaDeviceSetCacheConfig");
2406       bindFunc(cast(void**)&cudaDeviceGetSharedMemConfig, "cudaDeviceGetSharedMemConfig");
2407       bindFunc(cast(void**)&cudaDeviceSetSharedMemConfig, "cudaDeviceSetSharedMemConfig");
2408       bindFunc(cast(void**)&cudaDeviceGetByPCIBusId, "cudaDeviceGetByPCIBusId");
2409       bindFunc(cast(void**)&cudaDeviceGetPCIBusId, "cudaDeviceGetPCIBusId");
2410       bindFunc(cast(void**)&cudaIpcGetEventHandle, "cudaIpcGetEventHandle");
2411       bindFunc(cast(void**)&cudaIpcOpenEventHandle, "cudaIpcOpenEventHandle");
2412       bindFunc(cast(void**)&cudaIpcGetMemHandle, "cudaIpcGetMemHandle");
2413       bindFunc(cast(void**)&cudaIpcOpenMemHandle, "cudaIpcOpenMemHandle");
2414       bindFunc(cast(void**)&cudaIpcCloseMemHandle, "cudaIpcCloseMemHandle");
2415       bindFunc(cast(void**)&cudaThreadExit, "cudaThreadExit");
2416       bindFunc(cast(void**)&cudaThreadSynchronize, "cudaThreadSynchronize");
2417       bindFunc(cast(void**)&cudaThreadSetLimit, "cudaThreadSetLimit");
2418       bindFunc(cast(void**)&cudaThreadGetLimit, "cudaThreadGetLimit");
2419       bindFunc(cast(void**)&cudaThreadGetCacheConfig, "cudaThreadGetCacheConfig");
2420       bindFunc(cast(void**)&cudaThreadSetCacheConfig, "cudaThreadSetCacheConfig");
2421       bindFunc(cast(void**)&cudaGetLastError, "cudaGetLastError");
2422       bindFunc(cast(void**)&cudaPeekAtLastError, "cudaPeekAtLastError");
2423       bindFunc(cast(void**)&cudaGetErrorName, "cudaGetErrorName");
2424       bindFunc(cast(void**)&cudaGetErrorString, "cudaGetErrorString");
2425       bindFunc(cast(void**)&cudaGetDeviceCount, "cudaGetDeviceCount");
2426       bindFunc(cast(void**)&cudaGetDeviceProperties, "cudaGetDeviceProperties");
2427       bindFunc(cast(void**)&cudaDeviceGetAttribute, "cudaDeviceGetAttribute");
2428       bindFunc(cast(void**)&cudaDeviceGetP2PAttribute, "cudaDeviceGetP2PAttribute");
2429       bindFunc(cast(void**)&cudaChooseDevice, "cudaChooseDevice");
2430       bindFunc(cast(void**)&cudaSetDevice, "cudaSetDevice");
2431       bindFunc(cast(void**)&cudaGetDevice, "cudaGetDevice");
2432       bindFunc(cast(void**)&cudaSetValidDevices, "cudaSetValidDevices");
2433       bindFunc(cast(void**)&cudaSetDeviceFlags, "cudaSetDeviceFlags");
2434       bindFunc(cast(void**)&cudaGetDeviceFlags, "cudaGetDeviceFlags");
2435       bindFunc(cast(void**)&cudaStreamCreate, "cudaStreamCreate");
2436       bindFunc(cast(void**)&cudaStreamCreateWithFlags, "cudaStreamCreateWithFlags");
2437       bindFunc(cast(void**)&cudaStreamCreateWithPriority, "cudaStreamCreateWithPriority");
2438       bindFunc(cast(void**)&cudaStreamGetPriority, "cudaStreamGetPriority");
2439       bindFunc(cast(void**)&cudaStreamGetFlags, "cudaStreamGetFlags");
2440       bindFunc(cast(void**)&cudaStreamDestroy, "cudaStreamDestroy");
2441       bindFunc(cast(void**)&cudaStreamWaitEvent, "cudaStreamWaitEvent");
2442       bindFunc(cast(void**)&cudaStreamAddCallback, "cudaStreamAddCallback");
2443       bindFunc(cast(void**)&cudaStreamSynchronize, "cudaStreamSynchronize");
2444       bindFunc(cast(void**)&cudaStreamQuery, "cudaStreamQuery");
2445       bindFunc(cast(void**)&cudaStreamAttachMemAsync, "cudaStreamAttachMemAsync");
2446       bindFunc(cast(void**)&cudaStreamBeginCapture, "cudaStreamBeginCapture");
2447       bindFunc(cast(void**)&cudaStreamEndCapture, "cudaStreamEndCapture");
2448       bindFunc(cast(void**)&cudaStreamIsCapturing, "cudaStreamIsCapturing");
2449       bindFunc(cast(void**)&cudaEventCreate, "cudaEventCreate");
2450       bindFunc(cast(void**)&cudaEventCreateWithFlags, "cudaEventCreateWithFlags");
2451       bindFunc(cast(void**)&cudaEventRecord, "cudaEventRecord");
2452       bindFunc(cast(void**)&cudaEventQuery, "cudaEventQuery");
2453       bindFunc(cast(void**)&cudaEventSynchronize, "cudaEventSynchronize");
2454       bindFunc(cast(void**)&cudaEventDestroy, "cudaEventDestroy");
2455       bindFunc(cast(void**)&cudaEventElapsedTime, "cudaEventElapsedTime");
2456       bindFunc(cast(void**)&cudaImportExternalMemory, "cudaImportExternalMemory");
2457       bindFunc(cast(void**)&cudaExternalMemoryGetMappedBuffer, "cudaExternalMemoryGetMappedBuffer");
2458       bindFunc(cast(void**)&cudaExternalMemoryGetMappedMipmappedArray, "cudaExternalMemoryGetMappedMipmappedArray");
2459       bindFunc(cast(void**)&cudaDestroyExternalMemory, "cudaDestroyExternalMemory");
2460       bindFunc(cast(void**)&cudaImportExternalSemaphore, "cudaImportExternalSemaphore");
2461       bindFunc(cast(void**)&cudaSignalExternalSemaphoresAsync, "cudaSignalExternalSemaphoresAsync");
2462       bindFunc(cast(void**)&cudaWaitExternalSemaphoresAsync, "cudaWaitExternalSemaphoresAsync");
2463       bindFunc(cast(void**)&cudaDestroyExternalSemaphore, "cudaDestroyExternalSemaphore");
2464       bindFunc(cast(void**)&cudaLaunchKernel, "cudaLaunchKernel");
2465       bindFunc(cast(void**)&cudaLaunchCooperativeKernel, "cudaLaunchCooperativeKernel");
2466       bindFunc(cast(void**)&cudaLaunchCooperativeKernelMultiDevice, "cudaLaunchCooperativeKernelMultiDevice");
2467       bindFunc(cast(void**)&cudaFuncSetCacheConfig, "cudaFuncSetCacheConfig");
2468       bindFunc(cast(void**)&cudaFuncSetSharedMemConfig, "cudaFuncSetSharedMemConfig");
2469       bindFunc(cast(void**)&cudaFuncGetAttributes, "cudaFuncGetAttributes");
2470       bindFunc(cast(void**)&cudaFuncSetAttribute, "cudaFuncSetAttribute");
2471       bindFunc(cast(void**)&cudaSetDoubleForDevice, "cudaSetDoubleForDevice");
2472       bindFunc(cast(void**)&cudaSetDoubleForHost, "cudaSetDoubleForHost");
2473       bindFunc(cast(void**)&cudaLaunchHostFunc, "cudaLaunchHostFunc");
2474       bindFunc(cast(void**)&cudaOccupancyMaxActiveBlocksPerMultiprocessor, "cudaOccupancyMaxActiveBlocksPerMultiprocessor");
2475       bindFunc(cast(void**)&cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags");
2476       bindFunc(cast(void**)&cudaConfigureCall, "cudaConfigureCall");
2477       bindFunc(cast(void**)&cudaSetupArgument, "cudaSetupArgument");
2478       bindFunc(cast(void**)&cudaLaunch, "cudaLaunch");
2479       bindFunc(cast(void**)&cudaMallocManaged, "cudaMallocManaged");
2480       bindFunc(cast(void**)&cudaMalloc, "cudaMalloc");
2481       bindFunc(cast(void**)&cudaMallocHost, "cudaMallocHost");
2482       bindFunc(cast(void**)&cudaMallocPitch, "cudaMallocPitch");
2483       bindFunc(cast(void**)&cudaMallocArray, "cudaMallocArray");
2484       bindFunc(cast(void**)&cudaFree, "cudaFree");
2485       bindFunc(cast(void**)&cudaFreeHost, "cudaFreeHost");
2486       bindFunc(cast(void**)&cudaFreeArray, "cudaFreeArray");
2487       bindFunc(cast(void**)&cudaFreeMipmappedArray, "cudaFreeMipmappedArray");
2488       bindFunc(cast(void**)&cudaHostAlloc, "cudaHostAlloc");
2489       bindFunc(cast(void**)&cudaHostRegister, "cudaHostRegister");
2490       bindFunc(cast(void**)&cudaHostUnregister, "cudaHostUnregister");
2491       bindFunc(cast(void**)&cudaHostGetDevicePointer, "cudaHostGetDevicePointer");
2492       bindFunc(cast(void**)&cudaHostGetFlags, "cudaHostGetFlags");
2493       bindFunc(cast(void**)&cudaMalloc3D, "cudaMalloc3D");
2494       bindFunc(cast(void**)&cudaMalloc3DArray, "cudaMalloc3DArray");
2495       bindFunc(cast(void**)&cudaMallocMipmappedArray, "cudaMallocMipmappedArray");
2496       bindFunc(cast(void**)&cudaGetMipmappedArrayLevel, "cudaGetMipmappedArrayLevel");
2497       bindFunc(cast(void**)&cudaMemcpy3D, "cudaMemcpy3D");
2498       bindFunc(cast(void**)&cudaMemcpy3DPeer, "cudaMemcpy3DPeer");
2499       bindFunc(cast(void**)&cudaMemcpy3DAsync, "cudaMemcpy3DAsync");
2500       bindFunc(cast(void**)&cudaMemcpy3DPeerAsync, "cudaMemcpy3DPeerAsync");
2501       bindFunc(cast(void**)&cudaMemGetInfo, "cudaMemGetInfo");
2502       bindFunc(cast(void**)&cudaArrayGetInfo, "cudaArrayGetInfo");
2503       bindFunc(cast(void**)&cudaMemcpy, "cudaMemcpy");
2504       bindFunc(cast(void**)&cudaMemcpyPeer, "cudaMemcpyPeer");
2505       bindFunc(cast(void**)&cudaMemcpyToArray, "cudaMemcpyToArray");
2506       bindFunc(cast(void**)&cudaMemcpyFromArray, "cudaMemcpyFromArray");
2507       bindFunc(cast(void**)&cudaMemcpyArrayToArray, "cudaMemcpyArrayToArray");
2508       bindFunc(cast(void**)&cudaMemcpy2D, "cudaMemcpy2D");
2509       bindFunc(cast(void**)&cudaMemcpy2DToArray, "cudaMemcpy2DToArray");
2510       bindFunc(cast(void**)&cudaMemcpy2DFromArray, "cudaMemcpy2DFromArray");
2511       bindFunc(cast(void**)&cudaMemcpy2DArrayToArray, "cudaMemcpy2DArrayToArray");
2512       bindFunc(cast(void**)&cudaMemcpyToSymbol, "cudaMemcpyToSymbol");
2513       bindFunc(cast(void**)&cudaMemcpyFromSymbol, "cudaMemcpyFromSymbol");
2514       bindFunc(cast(void**)&cudaMemcpyAsync, "cudaMemcpyAsync");
2515       bindFunc(cast(void**)&cudaMemcpyPeerAsync, "cudaMemcpyPeerAsync");
2516       bindFunc(cast(void**)&cudaMemcpyToArrayAsync, "cudaMemcpyToArrayAsync");
2517       bindFunc(cast(void**)&cudaMemcpyFromArrayAsync, "cudaMemcpyFromArrayAsync");
2518       bindFunc(cast(void**)&cudaMemcpy2DAsync, "cudaMemcpy2DAsync");
2519       bindFunc(cast(void**)&cudaMemcpy2DToArrayAsync, "cudaMemcpy2DToArrayAsync");
2520       bindFunc(cast(void**)&cudaMemcpy2DFromArrayAsync, "cudaMemcpy2DFromArrayAsync");
2521       bindFunc(cast(void**)&cudaMemcpyToSymbolAsync, "cudaMemcpyToSymbolAsync");
2522       bindFunc(cast(void**)&cudaMemcpyFromSymbolAsync, "cudaMemcpyFromSymbolAsync");
2523       bindFunc(cast(void**)&cudaMemset, "cudaMemset");
2524       bindFunc(cast(void**)&cudaMemset2D, "cudaMemset2D");
2525       bindFunc(cast(void**)&cudaMemset3D, "cudaMemset3D");
2526       bindFunc(cast(void**)&cudaMemsetAsync, "cudaMemsetAsync");
2527       bindFunc(cast(void**)&cudaMemset2DAsync, "cudaMemset2DAsync");
2528       bindFunc(cast(void**)&cudaMemset3DAsync, "cudaMemset3DAsync");
2529       bindFunc(cast(void**)&cudaGetSymbolAddress, "cudaGetSymbolAddress");
2530       bindFunc(cast(void**)&cudaGetSymbolSize, "cudaGetSymbolSize");
2531       bindFunc(cast(void**)&cudaMemPrefetchAsync, "cudaMemPrefetchAsync");
2532       bindFunc(cast(void**)&cudaMemAdvise, "cudaMemAdvise");
2533       bindFunc(cast(void**)&cudaMemRangeGetAttribute, "cudaMemRangeGetAttribute");
2534       bindFunc(cast(void**)&cudaMemRangeGetAttributes, "cudaMemRangeGetAttributes");
2535       bindFunc(cast(void**)&cudaPointerGetAttributes, "cudaPointerGetAttributes");
2536       bindFunc(cast(void**)&cudaDeviceCanAccessPeer, "cudaDeviceCanAccessPeer");
2537       bindFunc(cast(void**)&cudaDeviceEnablePeerAccess, "cudaDeviceEnablePeerAccess");
2538       bindFunc(cast(void**)&cudaDeviceDisablePeerAccess, "cudaDeviceDisablePeerAccess");
2539       bindFunc(cast(void**)&cudaGraphicsUnregisterResource, "cudaGraphicsUnregisterResource");
2540       bindFunc(cast(void**)&cudaGraphicsResourceSetMapFlags, "cudaGraphicsResourceSetMapFlags");
2541       bindFunc(cast(void**)&cudaGraphicsMapResources, "cudaGraphicsMapResources");
2542       bindFunc(cast(void**)&cudaGraphicsUnmapResources, "cudaGraphicsUnmapResources");
2543       bindFunc(cast(void**)&cudaGraphicsResourceGetMappedPointer, "cudaGraphicsResourceGetMappedPointer");
2544       bindFunc(cast(void**)&cudaGraphicsSubResourceGetMappedArray, "cudaGraphicsSubResourceGetMappedArray");
2545       bindFunc(cast(void**)&cudaGraphicsResourceGetMappedMipmappedArray, "cudaGraphicsResourceGetMappedMipmappedArray");
2546       bindFunc(cast(void**)&cudaGetChannelDesc, "cudaGetChannelDesc");
2547       bindFunc(cast(void**)&cudaBindTexture, "cudaBindTexture");
2548       bindFunc(cast(void**)&cudaBindTexture2D, "cudaBindTexture2D");
2549       bindFunc(cast(void**)&cudaBindTextureToArray, "cudaBindTextureToArray");
2550       bindFunc(cast(void**)&cudaBindTextureToMipmappedArray, "cudaBindTextureToMipmappedArray");
2551       bindFunc(cast(void**)&cudaUnbindTexture, "cudaUnbindTexture");
2552       bindFunc(cast(void**)&cudaGetTextureAlignmentOffset, "cudaGetTextureAlignmentOffset");
2553       bindFunc(cast(void**)&cudaGetTextureReference, "cudaGetTextureReference");
2554       bindFunc(cast(void**)&cudaBindSurfaceToArray, "cudaBindSurfaceToArray");
2555       bindFunc(cast(void**)&cudaGetSurfaceReference, "cudaGetSurfaceReference");
2556       bindFunc(cast(void**)&cudaCreateTextureObject, "cudaCreateTextureObject");
2557       bindFunc(cast(void**)&cudaDestroyTextureObject, "cudaDestroyTextureObject");
2558       bindFunc(cast(void**)&cudaGetTextureObjectResourceDesc, "cudaGetTextureObjectResourceDesc");
2559       bindFunc(cast(void**)&cudaGetTextureObjectTextureDesc, "cudaGetTextureObjectTextureDesc");
2560       bindFunc(cast(void**)&cudaGetTextureObjectResourceViewDesc, "cudaGetTextureObjectResourceViewDesc");
2561       bindFunc(cast(void**)&cudaCreateSurfaceObject, "cudaCreateSurfaceObject");
2562       bindFunc(cast(void**)&cudaDestroySurfaceObject, "cudaDestroySurfaceObject");
2563       bindFunc(cast(void**)&cudaGetSurfaceObjectResourceDesc, "cudaGetSurfaceObjectResourceDesc");
2564       bindFunc(cast(void**)&cudaDriverGetVersion, "cudaDriverGetVersion");
2565       bindFunc(cast(void**)&cudaRuntimeGetVersion, "cudaRuntimeGetVersion");
2566       bindFunc(cast(void**)&cudaGraphCreate, "cudaGraphCreate");
2567       bindFunc(cast(void**)&cudaGraphAddKernelNode, "cudaGraphAddKernelNode");
2568       bindFunc(cast(void**)&cudaGraphKernelNodeGetParams, "cudaGraphKernelNodeGetParams");
2569       bindFunc(cast(void**)&cudaGraphKernelNodeSetParams, "cudaGraphKernelNodeSetParams");
2570       bindFunc(cast(void**)&cudaGraphAddMemcpyNode, "cudaGraphAddMemcpyNode");
2571       bindFunc(cast(void**)&cudaGraphMemcpyNodeGetParams, "cudaGraphMemcpyNodeGetParams");
2572       bindFunc(cast(void**)&cudaGraphMemcpyNodeSetParams, "cudaGraphMemcpyNodeSetParams");
2573       bindFunc(cast(void**)&cudaGraphAddMemsetNode, "cudaGraphAddMemsetNode");
2574       bindFunc(cast(void**)&cudaGraphMemsetNodeGetParams, "cudaGraphMemsetNodeGetParams");
2575       bindFunc(cast(void**)&cudaGraphMemsetNodeSetParams, "cudaGraphMemsetNodeSetParams");
2576       bindFunc(cast(void**)&cudaGraphAddHostNode, "cudaGraphAddHostNode");
2577       bindFunc(cast(void**)&cudaGraphHostNodeGetParams, "cudaGraphHostNodeGetParams");
2578       bindFunc(cast(void**)&cudaGraphHostNodeSetParams, "cudaGraphHostNodeSetParams");
2579       bindFunc(cast(void**)&cudaGraphAddChildGraphNode, "cudaGraphAddChildGraphNode");
2580       bindFunc(cast(void**)&cudaGraphChildGraphNodeGetGraph, "cudaGraphChildGraphNodeGetGraph");
2581       bindFunc(cast(void**)&cudaGraphAddEmptyNode, "cudaGraphAddEmptyNode");
2582       bindFunc(cast(void**)&cudaGraphClone, "cudaGraphClone");
2583       bindFunc(cast(void**)&cudaGraphNodeFindInClone, "cudaGraphNodeFindInClone");
2584       bindFunc(cast(void**)&cudaGraphNodeGetType, "cudaGraphNodeGetType");
2585       bindFunc(cast(void**)&cudaGraphGetNodes, "cudaGraphGetNodes");
2586       bindFunc(cast(void**)&cudaGraphGetRootNodes, "cudaGraphGetRootNodes");
2587       bindFunc(cast(void**)&cudaGraphGetEdges, "cudaGraphGetEdges");
2588       bindFunc(cast(void**)&cudaGraphNodeGetDependencies, "cudaGraphNodeGetDependencies");
2589       bindFunc(cast(void**)&cudaGraphNodeGetDependentNodes, "cudaGraphNodeGetDependentNodes");
2590       bindFunc(cast(void**)&cudaGraphAddDependencies, "cudaGraphAddDependencies");
2591       bindFunc(cast(void**)&cudaGraphRemoveDependencies, "cudaGraphRemoveDependencies");
2592       bindFunc(cast(void**)&cudaGraphDestroyNode, "cudaGraphDestroyNode");
2593       bindFunc(cast(void**)&cudaGraphInstantiate, "cudaGraphInstantiate");
2594       bindFunc(cast(void**)&cudaGraphLaunch, "cudaGraphLaunch");
2595       bindFunc(cast(void**)&cudaGraphExecDestroy, "cudaGraphExecDestroy");
2596       bindFunc(cast(void**)&cudaGraphDestroy, "cudaGraphDestroy");
2597       bindFunc(cast(void**)&cudaGetExportTable, "cudaGetExportTable");
2598     }
2599   }
2600 
2601   public
2602   {
2603     this()
2604     {
2605       super(libNames);
2606     }
2607   }
2608 }
2609 
2610 __gshared DerelictCUDARuntimeLoader DerelictCUDARuntime;
2611 
2612 shared static this()
2613 {
2614     DerelictCUDARuntime = new DerelictCUDARuntimeLoader();
2615 }