The OpenD Programming Language

1 /++
2 $(H2 High level abstraction on top of all architectures.)
3 
4 $(GREEN This module is compatible with betterC compilation mode.)
5 
6 
7 License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 
9 Authors:   Ilia Ki
10 +/
11 module cpuid.unified;
12 
13 ///
14 unittest
15 {
16     void smallReport()
17     {
18         import cpuid.unified;
19 
20         import std.stdio: writefln;
21         enum fmt = "%14s: %s";
22 
23         fmt.writefln("cores", cores);
24         fmt.writefln("threads", threads);
25 
26         fmt.writefln("data caches", dCache.length);
27         fmt.writefln("code caches", iCache.length);
28         fmt.writefln("unified caches", uCache.length);
29 
30         fmt.writefln("data TLBs", dTlb.length);
31         fmt.writefln("code TLBs", iTlb.length);
32         fmt.writefln("unified TLBs", uTlb.length);
33     }
34 }
35 
36 public import cpuid.common;
37 
38 version(X86)
39     version = X86_Any;
40 version(X86_64)
41     version = X86_Any;
42 
43 version(all)
44 {
45     enum uint _dCache_max_length = 1;
46     enum uint _iCache_max_length = 1;
47     enum uint _uCache_max_length = 3;
48 
49     enum uint _dTlb_max_length   = 2;
50     enum uint _iTlb_max_length   = 2;
51     enum uint _uTlb_max_length   = 1;
52 }
53 else
54 static assert(0);
55 
56 private __gshared
57 {
58     immutable uint _cpus;
59     immutable uint _cores;
60     immutable uint _threads;
61     immutable uint _iCache_length; immutable Cache[_iCache_max_length] _iCache;
62     immutable uint _dCache_length; immutable Cache[_dCache_max_length] _dCache;
63     immutable uint _uCache_length; immutable Cache[_uCache_max_length] _uCache;
64     immutable uint _iTlb_length;   immutable Tlb[_iTlb_max_length] _iTlb;
65     immutable uint _dTlb_length;   immutable Tlb[_dTlb_max_length] _dTlb;
66     immutable uint _uTlb_length;   immutable Tlb[_uTlb_max_length] _uTlb;
67 }
68 
69 private T2 assocCopy(T2, T1)(T1 from)
70 {
71     import std.traits: Unqual;
72     Unqual!T2 to = cast(T2) from;
73     static if(!is(Unqual!T1 == Unqual!T2))
74     {
75         if(from == T1.max)
76         {
77             to = T2.max;
78         }
79     }
80     return to;
81 }
82 
83 package ref T _mut(T)(return ref immutable T value)
84 {
85     return *cast(T*)&value;
86 }
87 
88 export
89 nothrow @nogc
90 extern(C):
91 
92 /++
93 Initialize basic CPU information including basic architecture.
94 It is safe to call this function multiple times.
95 It calls appropriate basic initialization for each module (`cpuid_x86_any_init` for X86 machines).
96 +/
97 version(X86_Any)
98 pragma(crt_constructor)
99 void mir_cpuid_init()
100 {
101     static if (__VERSION__ >= 2068)
102         pragma(inline, false);
103 
104     if (_cpus)
105         return; // already initialized
106 
107     import cpuid.x86_any;
108 
109     mir_cpuid_x86_any_init();
110 
111     static import cpuid.intel;
112     static import cpuid.amd;
113 
114     /// for old CPUs
115     if(htt)
116     {
117         _threads._mut = _cores._mut = maxLogicalProcessors;
118         _cores._mut /= 2;
119     }
120 
121     if (vendorIndex == VendorIndex.amd || 
122         vendorIndex == VendorIndex.amd_old || 
123         vendorIndex == VendorIndex.centaur || 
124         vendorIndex == VendorIndex.hygon)
125     {
126         // Caches and TLB
127         if(maxExtendedLeaf >= 0x8000_0005)
128         {
129             // Level 1
130             auto leafExt5 = cpuid.amd.LeafExt5Information(_cpuid(0x8000_0005));
131 
132             alias CacheAssoc = typeof(Cache.associative);
133             alias TlbAssoc = typeof(Tlb.associative);
134 
135              if(leafExt5.L1DTlb4KSize)
136              {
137                 _dTlb._mut[0].page = 4;
138                 _dTlb._mut[0].entries = leafExt5.L1DTlb4KSize;
139                 _dTlb._mut[0].associative = leafExt5.L1DTlb4KAssoc.assocCopy!TlbAssoc;
140                 _dTlb_length._mut = 1;
141              }
142              if(leafExt5.L1ITlb4KSize)
143              {
144                 _iTlb._mut[0].page = 4;
145                 _iTlb._mut[0].entries = leafExt5.L1ITlb4KSize;
146                 _iTlb._mut[0].associative = leafExt5.L1ITlb4KAssoc.assocCopy!TlbAssoc;
147                 _iTlb_length._mut = 1;
148             }
149             if(leafExt5.L1DcSize)
150             {
151                 _dCache_length._mut = 1;
152                 _dCache._mut[0].size = leafExt5.L1DcSize;
153                 _dCache._mut[0].line = leafExt5.L1DcLineSize;
154                 _dCache._mut[0].associative = leafExt5.L1DcAssoc.assocCopy!CacheAssoc;
155             }
156             if(leafExt5.L1IcSize)
157             {
158                 _iCache_length._mut = 1;
159                 _iCache._mut[0].size = leafExt5.L1IcSize;
160                 _iCache._mut[0].line = leafExt5.L1IcLineSize;
161                 _iCache._mut[0].associative = leafExt5.L1IcAssoc.assocCopy!CacheAssoc;
162             }
163 
164             // Levels 2 and 3
165             if(maxExtendedLeaf >= 0x8000_0006)
166             {
167                 import cpuid.amd: decodeL2or3Assoc;
168                 auto leafExt6 = cpuid.amd.LeafExt6Information(_cpuid(0x8000_0006));
169 
170                 if(leafExt6.L2DTlb4KSize)
171                 {
172                     _dTlb._mut[_dTlb_length].page = 4;
173                     _dTlb._mut[_dTlb_length].entries = leafExt6.L2DTlb4KSize;
174                     _dTlb._mut[_dTlb_length].associative = leafExt6.L2DTlb4KAssoc.decodeL2or3Assoc!TlbAssoc;
175                     _dTlb_length._mut++;
176                 }
177                 if(leafExt6.L2ITlb4KSize)
178                 {
179                     _iTlb._mut[_iTlb_length].page = 4;
180                     _iTlb._mut[_iTlb_length].entries = leafExt6.L2ITlb4KSize;
181                     _iTlb._mut[_iTlb_length].associative = leafExt6.L2ITlb4KAssoc.decodeL2or3Assoc!TlbAssoc;
182                     _iTlb_length._mut++;
183                 }
184                 if(leafExt6.L2Size)
185                 {
186                     _uCache._mut[_uCache_length].size = leafExt6.L2Size;
187                     _uCache._mut[_uCache_length].line = cast(typeof(Cache.line)) leafExt6.L2LineSize;
188                     _uCache._mut[_uCache_length].associative = leafExt6.L2Assoc.decodeL2or3Assoc!CacheAssoc;
189                     _uCache_length._mut++;
190                 }
191                 if(leafExt6.L3Size)
192                 {
193                     _uCache._mut[_uCache_length].size = leafExt6.L3Size * 512;
194                     _uCache._mut[_uCache_length].line = cast(typeof(Cache.line)) leafExt6.L3LineSize;
195                     _uCache._mut[_uCache_length].associative = leafExt6.L3Assoc.decodeL2or3Assoc!CacheAssoc;
196                     _uCache_length._mut++;
197                 }
198 
199                 if(maxExtendedLeaf >= 0x8000_0008)
200                 {
201                     auto leafExt8 = cpuid.amd.LeafExt8Information(_cpuid(0x8000_0008));
202                     _threads._mut = leafExt8.NC + 1;
203 
204                     if (maxExtendedLeaf >= 0x8000_001E)
205                     {
206                         auto leafExt1E = cpuid.amd.LeafExt1EInformation(_cpuid(0x8000_001E));
207                         _cores._mut = _threads / (leafExt1E.ThreadsPerCore + 1);
208                     }
209                 }
210             }
211         }
212     }
213     else
214     {
215         /// Other vendors
216         if(maxBasicLeaf >= 0x2)
217         {
218             /// Get TLB and Cache info
219             auto leaf2 = cpuid.intel.Leaf2Information(_cpuid(2));
220 
221             /// Fill cache info
222             if(leaf2.dtlb.size)
223             {
224                 _dTlb._mut[0] = leaf2.dtlb;
225                 _dTlb_length._mut = 1;
226             }
227             if(leaf2.dtlb1.size)
228             {
229                 _dTlb._mut[_dTlb_length] = leaf2.dtlb1;
230                 _dTlb_length._mut++;
231             }
232             if(leaf2.itlb.size)
233             {
234                 _iTlb._mut[0] = leaf2.itlb;
235                 _iTlb_length._mut = 1;
236             }
237             if(leaf2.utlb.size)
238             {
239                 _uTlb._mut[0] = leaf2.utlb;
240                 _uTlb_length._mut = 1;
241             }
242 
243             if(maxBasicLeaf >= 0x4)
244             {
245                 /// Fill cache info from leaf 4
246                 cpuid.intel.Leaf4Information leaf4 = void;
247                 Cache cache;
248                 Leaf4Loop: foreach(uint ecx; 0 .. 12)
249                 {
250                     leaf4.info = _cpuid(4, ecx);
251                     leaf4.fill(cache);
252 
253                     with(cpuid.intel.Leaf4Information.Type)
254                     switch(leaf4.type)
255                     {
256                         case data:
257                             if(_dCache_length < _dCache.length)
258                                 _dCache._mut[_dCache_length._mut++] = cache;
259                             break;
260                         case instruction:
261                             if(_iCache_length < _iCache.length)
262                                 _iCache._mut[_iCache_length._mut++] = cache;
263                             break;
264                         case unified:
265                             if(_uCache_length < _uCache.length)
266                                 _uCache._mut[_uCache_length._mut++] = cache;
267                             break;
268                         default: break Leaf4Loop;
269                     }
270                     /// Fill core number for old CPUs
271                     _cores._mut = leaf4.maxCorePerCPU;
272                 }
273                 if(maxBasicLeaf >= 0xB)
274                 {
275                     auto th = cast(ushort) _cpuid(0xB, 1).b;
276                     if(th > 0)
277                         _threads._mut = th;
278                     auto threadsPerCore = cast(ushort) _cpuid(0xB, 0).b;
279                     if(threadsPerCore)
280                     {
281                         _cores._mut = _threads / threadsPerCore;
282                     }
283                 }
284             }
285             else
286             {
287                 /// Fill cache info from leaf 2
288                 if(leaf2.l1.size)
289                 {
290                     _dCache._mut[0] = leaf2.l1;
291                     _dCache_length._mut = 1;
292                 }
293                 if(leaf2.il1.size)
294                 {
295                     _iCache._mut[0] = leaf2.il1;
296                     _iCache_length._mut = 1;
297                 }
298                 if(leaf2.l2.size)
299                 {
300                     _uCache._mut[0] = leaf2.l2;
301                     _uCache_length._mut = 1;
302                 }
303                 if(leaf2.l3.size)
304                 {
305                     _uCache._mut[_uCache_length] = leaf2.l3;
306                     _uCache_length._mut++;
307                 }
308             }
309         }
310     }
311 
312     if(!_cpus) _cpus._mut = 1;
313     if(!_cores) _cores._mut = 1;
314     if(!_threads) _threads._mut = 1;
315     if(_threads < _cores) _threads._mut = _cores;
316 
317     if(_iCache_length) _iCache._mut[0].cores = 1;
318     if(_dCache_length) _dCache._mut[0].cores = 1;
319     switch(_uCache_length)
320     {
321         case 0:
322             break;
323         case 1:
324             _uCache._mut[0].cores = cast(typeof(Cache.cores)) _cores;
325             break;
326         default:
327             _uCache._mut[0].cores = 1;
328             foreach(i; 1.._uCache_length)
329                 _uCache._mut[i].cores = cast(typeof(Cache.cores)) _cores;
330     }
331 }
332 else
333 pragma(crt_constructor)
334 void mir_cpuid_init()
335 {
336     _cpus._mut = 1;
337     _cores._mut = 1;
338     _threads._mut = 1;
339 }
340 /// ditto
341 
342 alias cpuid_init = mir_cpuid_init;
343 
344 unittest // make sure a 2nd invocation after the implicit CRT constructor doesn't throw
345 {
346     mir_cpuid_init();
347 }
348 
349 pure @trusted:
350 
351 /++
352 Total number of CPU packages.
353 Note: not implemented
354 +/
355 uint mir_cpuid_cpus() { return _cpus; }
356 /// ditto
357 alias cpus = mir_cpuid_cpus;
358 
359 /++
360 Total number of cores per CPU.
361 +/
362 uint mir_cpuid_cores() { return _cores; }
363 /// ditto
364 alias cores = mir_cpuid_cores;
365 
366 /++
367 Total number of threads per CPU.
368 +/
369 uint mir_cpuid_threads() { return _threads; }
370 /// ditto
371 alias threads = mir_cpuid_threads;
372 
373 /++
374 Data Caches
375 
376 Returns:
377     Array composed of detected data caches. Array is sorted in ascending order.
378 +/
379 immutable(Cache)[] mir_cpuid_dCache() { return _dCache[0 .. _dCache_length]; }
380 /// ditto
381 alias dCache = mir_cpuid_dCache;
382 
383 /++
384 Instruction Caches
385 
386 Returns:
387     Array composed of detected instruction caches. Array is sorted in ascending order.
388 +/
389 immutable(Cache)[] mir_cpuid_iCache() { return _iCache[0 .. _iCache_length]; }
390 /// ditto
391 alias iCache = mir_cpuid_iCache;
392 
393 /++
394 Unified Caches
395 
396 Returns:
397     Array composed of detected unified caches. Array is sorted in ascending order.
398 +/
399 immutable(Cache)[] mir_cpuid_uCache() { return _uCache[0 .. _uCache_length]; }
400 /// ditto
401 alias uCache = mir_cpuid_uCache;
402 
403 /++
404 Data Translation Lookaside Buffers
405 
406 Returns:
407     Array composed of detected data translation lookaside buffers. Array is sorted in ascending order.
408 +/
409 immutable(Tlb)[] mir_cpuid_dTlb() { return _dTlb[0 .. _dTlb_length]; }
410 /// ditto
411 alias dTlb = mir_cpuid_dTlb;
412 
413 /++
414 Instruction Translation Lookaside Buffers
415 
416 Returns:
417     Array composed of detected instruction translation lookaside buffers. Array is sorted in ascending order.
418 +/
419 immutable(Tlb)[] mir_cpuid_iTlb() { return _iTlb[0 .. _iTlb_length]; }
420 /// ditto
421 alias iTlb = mir_cpuid_iTlb;
422 
423 /++
424 Unified Translation Lookaside Buffers
425 
426 Returns:
427     Array composed of detected unified translation lookaside buffers. Array is sorted in ascending order.
428 +/
429 immutable(Tlb)[] mir_cpuid_uTlb() { return _uTlb[0 .. _uTlb_length]; }
430 /// ditto
431 alias uTlb = mir_cpuid_uTlb;