cpuid.go 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
  2. // Package cpuid provides information about the CPU running the current program.
  3. //
  4. // CPU features are detected on startup, and kept for fast access through the life of the application.
  5. // Currently x86 / x64 (AMD64) is supported.
  6. //
  7. // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
  8. //
  9. // Package home: https://github.com/klauspost/cpuid
  10. package cpuid
  11. import "strings"
  12. // Vendor is a representation of a CPU vendor.
  13. type Vendor int
  14. const (
  15. Other Vendor = iota
  16. Intel
  17. AMD
  18. VIA
  19. Transmeta
  20. NSC
  21. KVM // Kernel-based Virtual Machine
  22. MSVM // Microsoft Hyper-V or Windows Virtual PC
  23. VMware
  24. XenHVM
  25. )
  26. const (
  27. CMOV = 1 << iota // i686 CMOV
  28. NX // NX (No-Execute) bit
  29. AMD3DNOW // AMD 3DNOW
  30. AMD3DNOWEXT // AMD 3DNowExt
  31. MMX // standard MMX
  32. MMXEXT // SSE integer functions or AMD MMX ext
  33. SSE // SSE functions
  34. SSE2 // P4 SSE functions
  35. SSE3 // Prescott SSE3 functions
  36. SSSE3 // Conroe SSSE3 functions
  37. SSE4 // Penryn SSE4.1 functions
  38. SSE4A // AMD Barcelona microarchitecture SSE4a instructions
  39. SSE42 // Nehalem SSE4.2 functions
  40. AVX // AVX functions
  41. AVX2 // AVX2 functions
  42. FMA3 // Intel FMA 3
  43. FMA4 // Bulldozer FMA4 functions
  44. XOP // Bulldozer XOP functions
  45. F16C // Half-precision floating-point conversion
  46. BMI1 // Bit Manipulation Instruction Set 1
  47. BMI2 // Bit Manipulation Instruction Set 2
  48. TBM // AMD Trailing Bit Manipulation
  49. LZCNT // LZCNT instruction
  50. POPCNT // POPCNT instruction
  51. AESNI // Advanced Encryption Standard New Instructions
  52. CLMUL // Carry-less Multiplication
  53. HTT // Hyperthreading (enabled)
  54. HLE // Hardware Lock Elision
  55. RTM // Restricted Transactional Memory
  56. RDRAND // RDRAND instruction is available
  57. RDSEED // RDSEED instruction is available
  58. ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  59. SHA // Intel SHA Extensions
  60. AVX512F // AVX-512 Foundation
  61. AVX512DQ // AVX-512 Doubleword and Quadword Instructions
  62. AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
  63. AVX512PF // AVX-512 Prefetch Instructions
  64. AVX512ER // AVX-512 Exponential and Reciprocal Instructions
  65. AVX512CD // AVX-512 Conflict Detection Instructions
  66. AVX512BW // AVX-512 Byte and Word Instructions
  67. AVX512VL // AVX-512 Vector Length Extensions
  68. AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
  69. MPX // Intel MPX (Memory Protection Extensions)
  70. ERMS // Enhanced REP MOVSB/STOSB
  71. RDTSCP // RDTSCP Instruction
  72. CX16 // CMPXCHG16B Instruction
  73. SGX // Software Guard Extensions
  74. // Performance indicators
  75. SSE2SLOW // SSE2 is supported, but usually not faster
  76. SSE3SLOW // SSE3 is supported, but usually not faster
  77. ATOM // Atom processor, some SSSE3 instructions are slower
  78. )
  79. var flagNames = map[Flags]string{
  80. CMOV: "CMOV", // i686 CMOV
  81. NX: "NX", // NX (No-Execute) bit
  82. AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
  83. AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
  84. MMX: "MMX", // Standard MMX
  85. MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
  86. SSE: "SSE", // SSE functions
  87. SSE2: "SSE2", // P4 SSE2 functions
  88. SSE3: "SSE3", // Prescott SSE3 functions
  89. SSSE3: "SSSE3", // Conroe SSSE3 functions
  90. SSE4: "SSE4.1", // Penryn SSE4.1 functions
  91. SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  92. SSE42: "SSE4.2", // Nehalem SSE4.2 functions
  93. AVX: "AVX", // AVX functions
  94. AVX2: "AVX2", // AVX functions
  95. FMA3: "FMA3", // Intel FMA 3
  96. FMA4: "FMA4", // Bulldozer FMA4 functions
  97. XOP: "XOP", // Bulldozer XOP functions
  98. F16C: "F16C", // Half-precision floating-point conversion
  99. BMI1: "BMI1", // Bit Manipulation Instruction Set 1
  100. BMI2: "BMI2", // Bit Manipulation Instruction Set 2
  101. TBM: "TBM", // AMD Trailing Bit Manipulation
  102. LZCNT: "LZCNT", // LZCNT instruction
  103. POPCNT: "POPCNT", // POPCNT instruction
  104. AESNI: "AESNI", // Advanced Encryption Standard New Instructions
  105. CLMUL: "CLMUL", // Carry-less Multiplication
  106. HTT: "HTT", // Hyperthreading (enabled)
  107. HLE: "HLE", // Hardware Lock Elision
  108. RTM: "RTM", // Restricted Transactional Memory
  109. RDRAND: "RDRAND", // RDRAND instruction is available
  110. RDSEED: "RDSEED", // RDSEED instruction is available
  111. ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  112. SHA: "SHA", // Intel SHA Extensions
  113. AVX512F: "AVX512F", // AVX-512 Foundation
  114. AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  115. AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  116. AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
  117. AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  118. AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
  119. AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
  120. AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
  121. AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  122. MPX: "MPX", // Intel MPX (Memory Protection Extensions)
  123. ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
  124. RDTSCP: "RDTSCP", // RDTSCP Instruction
  125. CX16: "CX16", // CMPXCHG16B Instruction
  126. SGX: "SGX", // Software Guard Extensions
  127. // Performance indicators
  128. SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
  129. SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
  130. ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
  131. }
  132. // CPUInfo contains information about the detected system CPU.
  133. type CPUInfo struct {
  134. BrandName string // Brand name reported by the CPU
  135. VendorID Vendor // Comparable CPU vendor ID
  136. Features Flags // Features of the CPU
  137. PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  138. ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
  139. LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  140. Family int // CPU family number
  141. Model int // CPU model number
  142. CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
  143. Cache struct {
  144. L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  145. L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
  146. L2 int // L2 Cache (per core or shared). Will be -1 if undetected
  147. L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  148. }
  149. SGX SGXSupport
  150. maxFunc uint32
  151. maxExFunc uint32
  152. }
  153. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  154. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  155. var xgetbv func(index uint32) (eax, edx uint32)
  156. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  157. // CPU contains information about the CPU as detected on startup,
  158. // or when Detect last was called.
  159. //
  160. // Use this as the primary entry point to you data,
  161. // this way queries are
  162. var CPU CPUInfo
  163. func init() {
  164. initCPU()
  165. Detect()
  166. }
  167. // Detect will re-detect current CPU info.
  168. // This will replace the content of the exported CPU variable.
  169. //
  170. // Unless you expect the CPU to change while you are running your program
  171. // you should not need to call this function.
  172. // If you call this, you must ensure that no other goroutine is accessing the
  173. // exported CPU variable.
  174. func Detect() {
  175. CPU.maxFunc = maxFunctionID()
  176. CPU.maxExFunc = maxExtendedFunction()
  177. CPU.BrandName = brandName()
  178. CPU.CacheLine = cacheLine()
  179. CPU.Family, CPU.Model = familyModel()
  180. CPU.Features = support()
  181. CPU.SGX = hasSGX(CPU.Features&SGX != 0)
  182. CPU.ThreadsPerCore = threadsPerCore()
  183. CPU.LogicalCores = logicalCores()
  184. CPU.PhysicalCores = physicalCores()
  185. CPU.VendorID = vendorID()
  186. CPU.cacheSize()
  187. }
  188. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  189. // Cmov indicates support of CMOV instructions
  190. func (c CPUInfo) Cmov() bool {
  191. return c.Features&CMOV != 0
  192. }
  193. // Amd3dnow indicates support of AMD 3DNOW! instructions
  194. func (c CPUInfo) Amd3dnow() bool {
  195. return c.Features&AMD3DNOW != 0
  196. }
  197. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  198. func (c CPUInfo) Amd3dnowExt() bool {
  199. return c.Features&AMD3DNOWEXT != 0
  200. }
  201. // MMX indicates support of MMX instructions
  202. func (c CPUInfo) MMX() bool {
  203. return c.Features&MMX != 0
  204. }
  205. // MMXExt indicates support of MMXEXT instructions
  206. // (SSE integer functions or AMD MMX ext)
  207. func (c CPUInfo) MMXExt() bool {
  208. return c.Features&MMXEXT != 0
  209. }
  210. // SSE indicates support of SSE instructions
  211. func (c CPUInfo) SSE() bool {
  212. return c.Features&SSE != 0
  213. }
  214. // SSE2 indicates support of SSE 2 instructions
  215. func (c CPUInfo) SSE2() bool {
  216. return c.Features&SSE2 != 0
  217. }
  218. // SSE3 indicates support of SSE 3 instructions
  219. func (c CPUInfo) SSE3() bool {
  220. return c.Features&SSE3 != 0
  221. }
  222. // SSSE3 indicates support of SSSE 3 instructions
  223. func (c CPUInfo) SSSE3() bool {
  224. return c.Features&SSSE3 != 0
  225. }
  226. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  227. func (c CPUInfo) SSE4() bool {
  228. return c.Features&SSE4 != 0
  229. }
  230. // SSE42 indicates support of SSE4.2 instructions
  231. func (c CPUInfo) SSE42() bool {
  232. return c.Features&SSE42 != 0
  233. }
  234. // AVX indicates support of AVX instructions
  235. // and operating system support of AVX instructions
  236. func (c CPUInfo) AVX() bool {
  237. return c.Features&AVX != 0
  238. }
  239. // AVX2 indicates support of AVX2 instructions
  240. func (c CPUInfo) AVX2() bool {
  241. return c.Features&AVX2 != 0
  242. }
  243. // FMA3 indicates support of FMA3 instructions
  244. func (c CPUInfo) FMA3() bool {
  245. return c.Features&FMA3 != 0
  246. }
  247. // FMA4 indicates support of FMA4 instructions
  248. func (c CPUInfo) FMA4() bool {
  249. return c.Features&FMA4 != 0
  250. }
  251. // XOP indicates support of XOP instructions
  252. func (c CPUInfo) XOP() bool {
  253. return c.Features&XOP != 0
  254. }
  255. // F16C indicates support of F16C instructions
  256. func (c CPUInfo) F16C() bool {
  257. return c.Features&F16C != 0
  258. }
  259. // BMI1 indicates support of BMI1 instructions
  260. func (c CPUInfo) BMI1() bool {
  261. return c.Features&BMI1 != 0
  262. }
  263. // BMI2 indicates support of BMI2 instructions
  264. func (c CPUInfo) BMI2() bool {
  265. return c.Features&BMI2 != 0
  266. }
  267. // TBM indicates support of TBM instructions
  268. // (AMD Trailing Bit Manipulation)
  269. func (c CPUInfo) TBM() bool {
  270. return c.Features&TBM != 0
  271. }
  272. // Lzcnt indicates support of LZCNT instruction
  273. func (c CPUInfo) Lzcnt() bool {
  274. return c.Features&LZCNT != 0
  275. }
  276. // Popcnt indicates support of POPCNT instruction
  277. func (c CPUInfo) Popcnt() bool {
  278. return c.Features&POPCNT != 0
  279. }
  280. // HTT indicates the processor has Hyperthreading enabled
  281. func (c CPUInfo) HTT() bool {
  282. return c.Features&HTT != 0
  283. }
  284. // SSE2Slow indicates that SSE2 may be slow on this processor
  285. func (c CPUInfo) SSE2Slow() bool {
  286. return c.Features&SSE2SLOW != 0
  287. }
  288. // SSE3Slow indicates that SSE3 may be slow on this processor
  289. func (c CPUInfo) SSE3Slow() bool {
  290. return c.Features&SSE3SLOW != 0
  291. }
  292. // AesNi indicates support of AES-NI instructions
  293. // (Advanced Encryption Standard New Instructions)
  294. func (c CPUInfo) AesNi() bool {
  295. return c.Features&AESNI != 0
  296. }
  297. // Clmul indicates support of CLMUL instructions
  298. // (Carry-less Multiplication)
  299. func (c CPUInfo) Clmul() bool {
  300. return c.Features&CLMUL != 0
  301. }
  302. // NX indicates support of NX (No-Execute) bit
  303. func (c CPUInfo) NX() bool {
  304. return c.Features&NX != 0
  305. }
  306. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  307. func (c CPUInfo) SSE4A() bool {
  308. return c.Features&SSE4A != 0
  309. }
  310. // HLE indicates support of Hardware Lock Elision
  311. func (c CPUInfo) HLE() bool {
  312. return c.Features&HLE != 0
  313. }
  314. // RTM indicates support of Restricted Transactional Memory
  315. func (c CPUInfo) RTM() bool {
  316. return c.Features&RTM != 0
  317. }
  318. // Rdrand indicates support of RDRAND instruction is available
  319. func (c CPUInfo) Rdrand() bool {
  320. return c.Features&RDRAND != 0
  321. }
  322. // Rdseed indicates support of RDSEED instruction is available
  323. func (c CPUInfo) Rdseed() bool {
  324. return c.Features&RDSEED != 0
  325. }
  326. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  327. func (c CPUInfo) ADX() bool {
  328. return c.Features&ADX != 0
  329. }
  330. // SHA indicates support of Intel SHA Extensions
  331. func (c CPUInfo) SHA() bool {
  332. return c.Features&SHA != 0
  333. }
  334. // AVX512F indicates support of AVX-512 Foundation
  335. func (c CPUInfo) AVX512F() bool {
  336. return c.Features&AVX512F != 0
  337. }
  338. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  339. func (c CPUInfo) AVX512DQ() bool {
  340. return c.Features&AVX512DQ != 0
  341. }
  342. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  343. func (c CPUInfo) AVX512IFMA() bool {
  344. return c.Features&AVX512IFMA != 0
  345. }
  346. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  347. func (c CPUInfo) AVX512PF() bool {
  348. return c.Features&AVX512PF != 0
  349. }
  350. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  351. func (c CPUInfo) AVX512ER() bool {
  352. return c.Features&AVX512ER != 0
  353. }
  354. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  355. func (c CPUInfo) AVX512CD() bool {
  356. return c.Features&AVX512CD != 0
  357. }
  358. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  359. func (c CPUInfo) AVX512BW() bool {
  360. return c.Features&AVX512BW != 0
  361. }
  362. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  363. func (c CPUInfo) AVX512VL() bool {
  364. return c.Features&AVX512VL != 0
  365. }
  366. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  367. func (c CPUInfo) AVX512VBMI() bool {
  368. return c.Features&AVX512VBMI != 0
  369. }
  370. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  371. func (c CPUInfo) MPX() bool {
  372. return c.Features&MPX != 0
  373. }
  374. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  375. func (c CPUInfo) ERMS() bool {
  376. return c.Features&ERMS != 0
  377. }
  378. // RDTSCP Instruction is available.
  379. func (c CPUInfo) RDTSCP() bool {
  380. return c.Features&RDTSCP != 0
  381. }
  382. // CX16 indicates if CMPXCHG16B instruction is available.
  383. func (c CPUInfo) CX16() bool {
  384. return c.Features&CX16 != 0
  385. }
  386. // TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
  387. // So TSX simply checks that.
  388. func (c CPUInfo) TSX() bool {
  389. return c.Features&(MPX|RTM) == MPX|RTM
  390. }
  391. // Atom indicates an Atom processor
  392. func (c CPUInfo) Atom() bool {
  393. return c.Features&ATOM != 0
  394. }
  395. // Intel returns true if vendor is recognized as Intel
  396. func (c CPUInfo) Intel() bool {
  397. return c.VendorID == Intel
  398. }
  399. // AMD returns true if vendor is recognized as AMD
  400. func (c CPUInfo) AMD() bool {
  401. return c.VendorID == AMD
  402. }
  403. // Transmeta returns true if vendor is recognized as Transmeta
  404. func (c CPUInfo) Transmeta() bool {
  405. return c.VendorID == Transmeta
  406. }
  407. // NSC returns true if vendor is recognized as National Semiconductor
  408. func (c CPUInfo) NSC() bool {
  409. return c.VendorID == NSC
  410. }
  411. // VIA returns true if vendor is recognized as VIA
  412. func (c CPUInfo) VIA() bool {
  413. return c.VendorID == VIA
  414. }
  415. // RTCounter returns the 64-bit time-stamp counter
  416. // Uses the RDTSCP instruction. The value 0 is returned
  417. // if the CPU does not support the instruction.
  418. func (c CPUInfo) RTCounter() uint64 {
  419. if !c.RDTSCP() {
  420. return 0
  421. }
  422. a, _, _, d := rdtscpAsm()
  423. return uint64(a) | (uint64(d) << 32)
  424. }
  425. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  426. // This variable is OS dependent, but on Linux contains information
  427. // about the current cpu/core the code is running on.
  428. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  429. func (c CPUInfo) Ia32TscAux() uint32 {
  430. if !c.RDTSCP() {
  431. return 0
  432. }
  433. _, _, ecx, _ := rdtscpAsm()
  434. return ecx
  435. }
  436. // LogicalCPU will return the Logical CPU the code is currently executing on.
  437. // This is likely to change when the OS re-schedules the running thread
  438. // to another CPU.
  439. // If the current core cannot be detected, -1 will be returned.
  440. func (c CPUInfo) LogicalCPU() int {
  441. if c.maxFunc < 1 {
  442. return -1
  443. }
  444. _, ebx, _, _ := cpuid(1)
  445. return int(ebx >> 24)
  446. }
  447. // VM Will return true if the cpu id indicates we are in
  448. // a virtual machine. This is only a hint, and will very likely
  449. // have many false negatives.
  450. func (c CPUInfo) VM() bool {
  451. switch c.VendorID {
  452. case MSVM, KVM, VMware, XenHVM:
  453. return true
  454. }
  455. return false
  456. }
  457. // Flags contains detected cpu features and caracteristics
  458. type Flags uint64
  459. // String returns a string representation of the detected
  460. // CPU features.
  461. func (f Flags) String() string {
  462. return strings.Join(f.Strings(), ",")
  463. }
  464. // Strings returns and array of the detected features.
  465. func (f Flags) Strings() []string {
  466. s := support()
  467. r := make([]string, 0, 20)
  468. for i := uint(0); i < 64; i++ {
  469. key := Flags(1 << i)
  470. val := flagNames[key]
  471. if s&key != 0 {
  472. r = append(r, val)
  473. }
  474. }
  475. return r
  476. }
  477. func maxExtendedFunction() uint32 {
  478. eax, _, _, _ := cpuid(0x80000000)
  479. return eax
  480. }
  481. func maxFunctionID() uint32 {
  482. a, _, _, _ := cpuid(0)
  483. return a
  484. }
  485. func brandName() string {
  486. if maxExtendedFunction() >= 0x80000004 {
  487. v := make([]uint32, 0, 48)
  488. for i := uint32(0); i < 3; i++ {
  489. a, b, c, d := cpuid(0x80000002 + i)
  490. v = append(v, a, b, c, d)
  491. }
  492. return strings.Trim(string(valAsString(v...)), " ")
  493. }
  494. return "unknown"
  495. }
  496. func threadsPerCore() int {
  497. mfi := maxFunctionID()
  498. if mfi < 0x4 || vendorID() != Intel {
  499. return 1
  500. }
  501. if mfi < 0xb {
  502. _, b, _, d := cpuid(1)
  503. if (d & (1 << 28)) != 0 {
  504. // v will contain logical core count
  505. v := (b >> 16) & 255
  506. if v > 1 {
  507. a4, _, _, _ := cpuid(4)
  508. // physical cores
  509. v2 := (a4 >> 26) + 1
  510. if v2 > 0 {
  511. return int(v) / int(v2)
  512. }
  513. }
  514. }
  515. return 1
  516. }
  517. _, b, _, _ := cpuidex(0xb, 0)
  518. if b&0xffff == 0 {
  519. return 1
  520. }
  521. return int(b & 0xffff)
  522. }
  523. func logicalCores() int {
  524. mfi := maxFunctionID()
  525. switch vendorID() {
  526. case Intel:
  527. // Use this on old Intel processors
  528. if mfi < 0xb {
  529. if mfi < 1 {
  530. return 0
  531. }
  532. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  533. // that can be assigned to logical processors in a physical package.
  534. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  535. _, ebx, _, _ := cpuid(1)
  536. logical := (ebx >> 16) & 0xff
  537. return int(logical)
  538. }
  539. _, b, _, _ := cpuidex(0xb, 1)
  540. return int(b & 0xffff)
  541. case AMD:
  542. _, b, _, _ := cpuid(1)
  543. return int((b >> 16) & 0xff)
  544. default:
  545. return 0
  546. }
  547. }
  548. func familyModel() (int, int) {
  549. if maxFunctionID() < 0x1 {
  550. return 0, 0
  551. }
  552. eax, _, _, _ := cpuid(1)
  553. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  554. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  555. return int(family), int(model)
  556. }
  557. func physicalCores() int {
  558. switch vendorID() {
  559. case Intel:
  560. return logicalCores() / threadsPerCore()
  561. case AMD:
  562. if maxExtendedFunction() >= 0x80000008 {
  563. _, _, c, _ := cpuid(0x80000008)
  564. return int(c&0xff) + 1
  565. }
  566. }
  567. return 0
  568. }
  569. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  570. var vendorMapping = map[string]Vendor{
  571. "AMDisbetter!": AMD,
  572. "AuthenticAMD": AMD,
  573. "CentaurHauls": VIA,
  574. "GenuineIntel": Intel,
  575. "TransmetaCPU": Transmeta,
  576. "GenuineTMx86": Transmeta,
  577. "Geode by NSC": NSC,
  578. "VIA VIA VIA ": VIA,
  579. "KVMKVMKVMKVM": KVM,
  580. "Microsoft Hv": MSVM,
  581. "VMwareVMware": VMware,
  582. "XenVMMXenVMM": XenHVM,
  583. }
  584. func vendorID() Vendor {
  585. _, b, c, d := cpuid(0)
  586. v := valAsString(b, d, c)
  587. vend, ok := vendorMapping[string(v)]
  588. if !ok {
  589. return Other
  590. }
  591. return vend
  592. }
  593. func cacheLine() int {
  594. if maxFunctionID() < 0x1 {
  595. return 0
  596. }
  597. _, ebx, _, _ := cpuid(1)
  598. cache := (ebx & 0xff00) >> 5 // cflush size
  599. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  600. _, _, ecx, _ := cpuid(0x80000006)
  601. cache = ecx & 0xff // cacheline size
  602. }
  603. // TODO: Read from Cache and TLB Information
  604. return int(cache)
  605. }
  606. func (c *CPUInfo) cacheSize() {
  607. c.Cache.L1D = -1
  608. c.Cache.L1I = -1
  609. c.Cache.L2 = -1
  610. c.Cache.L3 = -1
  611. vendor := vendorID()
  612. switch vendor {
  613. case Intel:
  614. if maxFunctionID() < 4 {
  615. return
  616. }
  617. for i := uint32(0); ; i++ {
  618. eax, ebx, ecx, _ := cpuidex(4, i)
  619. cacheType := eax & 15
  620. if cacheType == 0 {
  621. break
  622. }
  623. cacheLevel := (eax >> 5) & 7
  624. coherency := int(ebx&0xfff) + 1
  625. partitions := int((ebx>>12)&0x3ff) + 1
  626. associativity := int((ebx>>22)&0x3ff) + 1
  627. sets := int(ecx) + 1
  628. size := associativity * partitions * coherency * sets
  629. switch cacheLevel {
  630. case 1:
  631. if cacheType == 1 {
  632. // 1 = Data Cache
  633. c.Cache.L1D = size
  634. } else if cacheType == 2 {
  635. // 2 = Instruction Cache
  636. c.Cache.L1I = size
  637. } else {
  638. if c.Cache.L1D < 0 {
  639. c.Cache.L1I = size
  640. }
  641. if c.Cache.L1I < 0 {
  642. c.Cache.L1I = size
  643. }
  644. }
  645. case 2:
  646. c.Cache.L2 = size
  647. case 3:
  648. c.Cache.L3 = size
  649. }
  650. }
  651. case AMD:
  652. // Untested.
  653. if maxExtendedFunction() < 0x80000005 {
  654. return
  655. }
  656. _, _, ecx, edx := cpuid(0x80000005)
  657. c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
  658. c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
  659. if maxExtendedFunction() < 0x80000006 {
  660. return
  661. }
  662. _, _, ecx, _ = cpuid(0x80000006)
  663. c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  664. }
  665. return
  666. }
  667. type SGXSupport struct {
  668. Available bool
  669. SGX1Supported bool
  670. SGX2Supported bool
  671. MaxEnclaveSizeNot64 int64
  672. MaxEnclaveSize64 int64
  673. }
  674. func hasSGX(available bool) (rval SGXSupport) {
  675. rval.Available = available
  676. if !available {
  677. return
  678. }
  679. a, _, _, d := cpuidex(0x12, 0)
  680. rval.SGX1Supported = a&0x01 != 0
  681. rval.SGX2Supported = a&0x02 != 0
  682. rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
  683. rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  684. return
  685. }
  686. func support() Flags {
  687. mfi := maxFunctionID()
  688. vend := vendorID()
  689. if mfi < 0x1 {
  690. return 0
  691. }
  692. rval := uint64(0)
  693. _, _, c, d := cpuid(1)
  694. if (d & (1 << 15)) != 0 {
  695. rval |= CMOV
  696. }
  697. if (d & (1 << 23)) != 0 {
  698. rval |= MMX
  699. }
  700. if (d & (1 << 25)) != 0 {
  701. rval |= MMXEXT
  702. }
  703. if (d & (1 << 25)) != 0 {
  704. rval |= SSE
  705. }
  706. if (d & (1 << 26)) != 0 {
  707. rval |= SSE2
  708. }
  709. if (c & 1) != 0 {
  710. rval |= SSE3
  711. }
  712. if (c & 0x00000200) != 0 {
  713. rval |= SSSE3
  714. }
  715. if (c & 0x00080000) != 0 {
  716. rval |= SSE4
  717. }
  718. if (c & 0x00100000) != 0 {
  719. rval |= SSE42
  720. }
  721. if (c & (1 << 25)) != 0 {
  722. rval |= AESNI
  723. }
  724. if (c & (1 << 1)) != 0 {
  725. rval |= CLMUL
  726. }
  727. if c&(1<<23) != 0 {
  728. rval |= POPCNT
  729. }
  730. if c&(1<<30) != 0 {
  731. rval |= RDRAND
  732. }
  733. if c&(1<<29) != 0 {
  734. rval |= F16C
  735. }
  736. if c&(1<<13) != 0 {
  737. rval |= CX16
  738. }
  739. if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  740. if threadsPerCore() > 1 {
  741. rval |= HTT
  742. }
  743. }
  744. // Check XGETBV, OXSAVE and AVX bits
  745. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  746. // Check for OS support
  747. eax, _ := xgetbv(0)
  748. if (eax & 0x6) == 0x6 {
  749. rval |= AVX
  750. if (c & 0x00001000) != 0 {
  751. rval |= FMA3
  752. }
  753. }
  754. }
  755. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  756. if mfi >= 7 {
  757. _, ebx, ecx, _ := cpuidex(7, 0)
  758. if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
  759. rval |= AVX2
  760. }
  761. if (ebx & 0x00000008) != 0 {
  762. rval |= BMI1
  763. if (ebx & 0x00000100) != 0 {
  764. rval |= BMI2
  765. }
  766. }
  767. if ebx&(1<<2) != 0 {
  768. rval |= SGX
  769. }
  770. if ebx&(1<<4) != 0 {
  771. rval |= HLE
  772. }
  773. if ebx&(1<<9) != 0 {
  774. rval |= ERMS
  775. }
  776. if ebx&(1<<11) != 0 {
  777. rval |= RTM
  778. }
  779. if ebx&(1<<14) != 0 {
  780. rval |= MPX
  781. }
  782. if ebx&(1<<18) != 0 {
  783. rval |= RDSEED
  784. }
  785. if ebx&(1<<19) != 0 {
  786. rval |= ADX
  787. }
  788. if ebx&(1<<29) != 0 {
  789. rval |= SHA
  790. }
  791. // Only detect AVX-512 features if XGETBV is supported
  792. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  793. // Check for OS support
  794. eax, _ := xgetbv(0)
  795. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  796. // ZMM16-ZMM31 state are enabled by OS)
  797. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  798. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  799. if ebx&(1<<16) != 0 {
  800. rval |= AVX512F
  801. }
  802. if ebx&(1<<17) != 0 {
  803. rval |= AVX512DQ
  804. }
  805. if ebx&(1<<21) != 0 {
  806. rval |= AVX512IFMA
  807. }
  808. if ebx&(1<<26) != 0 {
  809. rval |= AVX512PF
  810. }
  811. if ebx&(1<<27) != 0 {
  812. rval |= AVX512ER
  813. }
  814. if ebx&(1<<28) != 0 {
  815. rval |= AVX512CD
  816. }
  817. if ebx&(1<<30) != 0 {
  818. rval |= AVX512BW
  819. }
  820. if ebx&(1<<31) != 0 {
  821. rval |= AVX512VL
  822. }
  823. // ecx
  824. if ecx&(1<<1) != 0 {
  825. rval |= AVX512VBMI
  826. }
  827. }
  828. }
  829. }
  830. if maxExtendedFunction() >= 0x80000001 {
  831. _, _, c, d := cpuid(0x80000001)
  832. if (c & (1 << 5)) != 0 {
  833. rval |= LZCNT
  834. rval |= POPCNT
  835. }
  836. if (d & (1 << 31)) != 0 {
  837. rval |= AMD3DNOW
  838. }
  839. if (d & (1 << 30)) != 0 {
  840. rval |= AMD3DNOWEXT
  841. }
  842. if (d & (1 << 23)) != 0 {
  843. rval |= MMX
  844. }
  845. if (d & (1 << 22)) != 0 {
  846. rval |= MMXEXT
  847. }
  848. if (c & (1 << 6)) != 0 {
  849. rval |= SSE4A
  850. }
  851. if d&(1<<20) != 0 {
  852. rval |= NX
  853. }
  854. if d&(1<<27) != 0 {
  855. rval |= RDTSCP
  856. }
  857. /* Allow for selectively disabling SSE2 functions on AMD processors
  858. with SSE2 support but not SSE4a. This includes Athlon64, some
  859. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  860. than SSE2 often enough to utilize this special-case flag.
  861. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  862. so that SSE2 is used unless explicitly disabled by checking
  863. AV_CPU_FLAG_SSE2SLOW. */
  864. if vendorID() != Intel &&
  865. rval&SSE2 != 0 && (c&0x00000040) == 0 {
  866. rval |= SSE2SLOW
  867. }
  868. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  869. * used unless the OS has AVX support. */
  870. if (rval & AVX) != 0 {
  871. if (c & 0x00000800) != 0 {
  872. rval |= XOP
  873. }
  874. if (c & 0x00010000) != 0 {
  875. rval |= FMA4
  876. }
  877. }
  878. if vendorID() == Intel {
  879. family, model := familyModel()
  880. if family == 6 && (model == 9 || model == 13 || model == 14) {
  881. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  882. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  883. * usually slower than mmx. */
  884. if (rval & SSE2) != 0 {
  885. rval |= SSE2SLOW
  886. }
  887. if (rval & SSE3) != 0 {
  888. rval |= SSE3SLOW
  889. }
  890. }
  891. /* The Atom processor has SSSE3 support, which is useful in many cases,
  892. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  893. * on the Atom, but is generally faster on other processors supporting
  894. * SSSE3. This flag allows for selectively disabling certain SSSE3
  895. * functions on the Atom. */
  896. if family == 6 && model == 28 {
  897. rval |= ATOM
  898. }
  899. }
  900. }
  901. return Flags(rval)
  902. }
  903. func valAsString(values ...uint32) []byte {
  904. r := make([]byte, 4*len(values))
  905. for i, v := range values {
  906. dst := r[i*4:]
  907. dst[0] = byte(v & 0xff)
  908. dst[1] = byte((v >> 8) & 0xff)
  909. dst[2] = byte((v >> 16) & 0xff)
  910. dst[3] = byte((v >> 24) & 0xff)
  911. switch {
  912. case dst[0] == 0:
  913. return r[:i*4]
  914. case dst[1] == 0:
  915. return r[:i*4+1]
  916. case dst[2] == 0:
  917. return r[:i*4+2]
  918. case dst[3] == 0:
  919. return r[:i*4+3]
  920. }
  921. }
  922. return r
  923. }