sampler.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package internal
  2. import (
  3. "runtime"
  4. "time"
  5. "github.com/newrelic/go-agent/internal/logger"
  6. "github.com/newrelic/go-agent/internal/sysinfo"
  7. )
  8. // Sample is a system/runtime snapshot.
  9. type Sample struct {
  10. when time.Time
  11. memStats runtime.MemStats
  12. usage sysinfo.Usage
  13. numGoroutine int
  14. numCPU int
  15. }
  16. func bytesToMebibytesFloat(bts uint64) float64 {
  17. return float64(bts) / (1024 * 1024)
  18. }
  19. // GetSample gathers a new Sample.
  20. func GetSample(now time.Time, lg logger.Logger) *Sample {
  21. s := Sample{
  22. when: now,
  23. numGoroutine: runtime.NumGoroutine(),
  24. numCPU: runtime.NumCPU(),
  25. }
  26. if usage, err := sysinfo.GetUsage(); err == nil {
  27. s.usage = usage
  28. } else {
  29. lg.Warn("unable to usage", map[string]interface{}{
  30. "error": err.Error(),
  31. })
  32. }
  33. runtime.ReadMemStats(&s.memStats)
  34. return &s
  35. }
  36. type cpuStats struct {
  37. used time.Duration
  38. fraction float64 // used / (elapsed * numCPU)
  39. }
  40. // Stats contains system information for a period of time.
  41. type Stats struct {
  42. numGoroutine int
  43. allocBytes uint64
  44. heapObjects uint64
  45. user cpuStats
  46. system cpuStats
  47. gcPauseFraction float64
  48. deltaNumGC uint32
  49. deltaPauseTotal time.Duration
  50. minPause time.Duration
  51. maxPause time.Duration
  52. }
  53. // Samples is used as the parameter to GetStats to avoid mixing up the previous
  54. // and current sample.
  55. type Samples struct {
  56. Previous *Sample
  57. Current *Sample
  58. }
  59. // GetStats combines two Samples into a Stats.
  60. func GetStats(ss Samples) Stats {
  61. cur := ss.Current
  62. prev := ss.Previous
  63. elapsed := cur.when.Sub(prev.when)
  64. s := Stats{
  65. numGoroutine: cur.numGoroutine,
  66. allocBytes: cur.memStats.Alloc,
  67. heapObjects: cur.memStats.HeapObjects,
  68. }
  69. // CPU Utilization
  70. totalCPUSeconds := elapsed.Seconds() * float64(cur.numCPU)
  71. if prev.usage.User != 0 && cur.usage.User > prev.usage.User {
  72. s.user.used = cur.usage.User - prev.usage.User
  73. s.user.fraction = s.user.used.Seconds() / totalCPUSeconds
  74. }
  75. if prev.usage.System != 0 && cur.usage.System > prev.usage.System {
  76. s.system.used = cur.usage.System - prev.usage.System
  77. s.system.fraction = s.system.used.Seconds() / totalCPUSeconds
  78. }
  79. // GC Pause Fraction
  80. deltaPauseTotalNs := cur.memStats.PauseTotalNs - prev.memStats.PauseTotalNs
  81. frac := float64(deltaPauseTotalNs) / float64(elapsed.Nanoseconds())
  82. s.gcPauseFraction = frac
  83. // GC Pauses
  84. if deltaNumGC := cur.memStats.NumGC - prev.memStats.NumGC; deltaNumGC > 0 {
  85. // In case more than 256 pauses have happened between samples
  86. // and we are examining a subset of the pauses, we ensure that
  87. // the min and max are not on the same side of the average by
  88. // using the average as the starting min and max.
  89. maxPauseNs := deltaPauseTotalNs / uint64(deltaNumGC)
  90. minPauseNs := deltaPauseTotalNs / uint64(deltaNumGC)
  91. for i := prev.memStats.NumGC + 1; i <= cur.memStats.NumGC; i++ {
  92. pause := cur.memStats.PauseNs[(i+255)%256]
  93. if pause > maxPauseNs {
  94. maxPauseNs = pause
  95. }
  96. if pause < minPauseNs {
  97. minPauseNs = pause
  98. }
  99. }
  100. s.deltaPauseTotal = time.Duration(deltaPauseTotalNs) * time.Nanosecond
  101. s.deltaNumGC = deltaNumGC
  102. s.minPause = time.Duration(minPauseNs) * time.Nanosecond
  103. s.maxPause = time.Duration(maxPauseNs) * time.Nanosecond
  104. }
  105. return s
  106. }
  107. // MergeIntoHarvest implements Harvestable.
  108. func (s Stats) MergeIntoHarvest(h *Harvest) {
  109. h.Metrics.addValue(heapObjectsAllocated, "", float64(s.heapObjects), forced)
  110. h.Metrics.addValue(runGoroutine, "", float64(s.numGoroutine), forced)
  111. h.Metrics.addValueExclusive(memoryPhysical, "", bytesToMebibytesFloat(s.allocBytes), 0, forced)
  112. h.Metrics.addValueExclusive(cpuUserUtilization, "", s.user.fraction, 0, forced)
  113. h.Metrics.addValueExclusive(cpuSystemUtilization, "", s.system.fraction, 0, forced)
  114. h.Metrics.addValue(cpuUserTime, "", s.user.used.Seconds(), forced)
  115. h.Metrics.addValue(cpuSystemTime, "", s.system.used.Seconds(), forced)
  116. h.Metrics.addValueExclusive(gcPauseFraction, "", s.gcPauseFraction, 0, forced)
  117. if s.deltaNumGC > 0 {
  118. h.Metrics.add(gcPauses, "", metricData{
  119. countSatisfied: float64(s.deltaNumGC),
  120. totalTolerated: s.deltaPauseTotal.Seconds(),
  121. exclusiveFailed: 0,
  122. min: s.minPause.Seconds(),
  123. max: s.maxPause.Seconds(),
  124. sumSquares: s.deltaPauseTotal.Seconds() * s.deltaPauseTotal.Seconds(),
  125. }, forced)
  126. }
  127. }