tracing.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. package internal
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "net/http"
  7. "net/url"
  8. "time"
  9. "github.com/newrelic/go-agent/internal/cat"
  10. "github.com/newrelic/go-agent/internal/sysinfo"
  11. )
  12. // MarshalJSON limits the number of decimals.
  13. func (p *Priority) MarshalJSON() ([]byte, error) {
  14. return []byte(fmt.Sprintf(priorityFormat, *p)), nil
  15. }
  16. // WriteJSON limits the number of decimals.
  17. func (p Priority) WriteJSON(buf *bytes.Buffer) {
  18. fmt.Fprintf(buf, priorityFormat, p)
  19. }
  20. // TxnEvent represents a transaction.
  21. // https://source.datanerd.us/agents/agent-specs/blob/master/Transaction-Events-PORTED.md
  22. // https://newrelic.atlassian.net/wiki/display/eng/Agent+Support+for+Synthetics%3A+Forced+Transaction+Traces+and+Analytic+Events
  23. type TxnEvent struct {
  24. FinalName string
  25. Start time.Time
  26. Duration time.Duration
  27. Queuing time.Duration
  28. Zone ApdexZone
  29. Attrs *Attributes
  30. DatastoreExternalTotals
  31. // CleanURL is not used in txn events, but is used in traced errors which embed TxnEvent.
  32. CleanURL string
  33. CrossProcess TxnCrossProcess
  34. BetterCAT BetterCAT
  35. HasError bool
  36. }
  37. // BetterCAT stores the transaction's priority and all fields related
  38. // to a DistributedTracer's Cross-Application Trace.
  39. type BetterCAT struct {
  40. Enabled bool
  41. Priority Priority
  42. Sampled bool
  43. Inbound *Payload
  44. ID string
  45. }
  46. // TraceID returns the trace id.
  47. func (e BetterCAT) TraceID() string {
  48. if nil != e.Inbound {
  49. return e.Inbound.TracedID
  50. }
  51. return e.ID
  52. }
  53. // TxnData contains the recorded data of a transaction.
  54. type TxnData struct {
  55. TxnEvent
  56. IsWeb bool
  57. Name string // Work in progress name.
  58. Errors TxnErrors // Lazily initialized.
  59. Stop time.Time
  60. ApdexThreshold time.Duration
  61. Exclusive time.Duration
  62. finishedChildren time.Duration
  63. stamp segmentStamp
  64. stack []segmentFrame
  65. SpanEventsEnabled bool
  66. rootSpanID string
  67. spanEvents []*SpanEvent
  68. customSegments map[string]*metricData
  69. datastoreSegments map[DatastoreMetricKey]*metricData
  70. externalSegments map[externalMetricKey]*metricData
  71. TxnTrace
  72. SlowQueriesEnabled bool
  73. SlowQueryThreshold time.Duration
  74. SlowQueries *slowQueries
  75. // These better CAT supportability fields are left outside of
  76. // TxnEvent.BetterCAT to minimize the size of transaction event memory.
  77. DistributedTracingSupport
  78. }
  79. type segmentStamp uint64
  80. type segmentTime struct {
  81. Stamp segmentStamp
  82. Time time.Time
  83. }
  84. // SegmentStartTime is embedded into the top level segments (rather than
  85. // segmentTime) to minimize the structure sizes to minimize allocations.
  86. type SegmentStartTime struct {
  87. Stamp segmentStamp
  88. Depth int
  89. }
  90. type segmentFrame struct {
  91. segmentTime
  92. children time.Duration
  93. spanID string
  94. }
  95. type segmentEnd struct {
  96. start segmentTime
  97. stop segmentTime
  98. duration time.Duration
  99. exclusive time.Duration
  100. SpanID string
  101. ParentID string
  102. }
  103. func (end segmentEnd) spanEvent() *SpanEvent {
  104. if "" == end.SpanID {
  105. return nil
  106. }
  107. return &SpanEvent{
  108. GUID: end.SpanID,
  109. ParentID: end.ParentID,
  110. Timestamp: end.start.Time,
  111. Duration: end.duration,
  112. IsEntrypoint: false,
  113. }
  114. }
  115. const (
  116. datastoreProductUnknown = "Unknown"
  117. datastoreOperationUnknown = "other"
  118. )
  119. // HasErrors indicates whether the transaction had errors.
  120. func (t *TxnData) HasErrors() bool {
  121. return len(t.Errors) > 0
  122. }
  123. func (t *TxnData) time(now time.Time) segmentTime {
  124. // Update the stamp before using it so that a 0 stamp can be special.
  125. t.stamp++
  126. return segmentTime{
  127. Time: now,
  128. Stamp: t.stamp,
  129. }
  130. }
  131. // TracerRootChildren is used to calculate a transaction's exclusive duration.
  132. func TracerRootChildren(t *TxnData) time.Duration {
  133. var lostChildren time.Duration
  134. for i := 0; i < len(t.stack); i++ {
  135. lostChildren += t.stack[i].children
  136. }
  137. return t.finishedChildren + lostChildren
  138. }
  139. // StartSegment begins a segment.
  140. func StartSegment(t *TxnData, now time.Time) SegmentStartTime {
  141. tm := t.time(now)
  142. t.stack = append(t.stack, segmentFrame{
  143. segmentTime: tm,
  144. children: 0,
  145. })
  146. return SegmentStartTime{
  147. Stamp: tm.Stamp,
  148. Depth: len(t.stack) - 1,
  149. }
  150. }
  151. // NewSpanID returns a random identifier in the format used for spans and
  152. // transactions.
  153. func NewSpanID() string {
  154. bits := RandUint64()
  155. return fmt.Sprintf("%016x", bits)
  156. }
  157. func (t *TxnData) getRootSpanID() string {
  158. if "" == t.rootSpanID {
  159. t.rootSpanID = NewSpanID()
  160. }
  161. return t.rootSpanID
  162. }
  163. // CurrentSpanIdentifier returns the identifier of the span at the top of the
  164. // segment stack.
  165. func (t *TxnData) CurrentSpanIdentifier() string {
  166. if 0 == len(t.stack) {
  167. return t.getRootSpanID()
  168. }
  169. if "" == t.stack[len(t.stack)-1].spanID {
  170. t.stack[len(t.stack)-1].spanID = NewSpanID()
  171. }
  172. return t.stack[len(t.stack)-1].spanID
  173. }
  174. func (t *TxnData) saveSpanEvent(e *SpanEvent) {
  175. if len(t.spanEvents) < maxSpanEvents {
  176. t.spanEvents = append(t.spanEvents, e)
  177. }
  178. }
  179. var (
  180. errMalformedSegment = errors.New("segment identifier malformed: perhaps unsafe code has modified it?")
  181. errSegmentOrder = errors.New(`improper segment use: the Transaction must be used ` +
  182. `in a single goroutine and segments must be ended in "last started first ended" order: ` +
  183. `see https://github.com/newrelic/go-agent/blob/master/GUIDE.md#segments`)
  184. )
  185. func endSegment(t *TxnData, start SegmentStartTime, now time.Time) (segmentEnd, error) {
  186. if 0 == start.Stamp {
  187. return segmentEnd{}, errMalformedSegment
  188. }
  189. if start.Depth >= len(t.stack) {
  190. return segmentEnd{}, errSegmentOrder
  191. }
  192. if start.Depth < 0 {
  193. return segmentEnd{}, errMalformedSegment
  194. }
  195. frame := t.stack[start.Depth]
  196. if start.Stamp != frame.Stamp {
  197. return segmentEnd{}, errSegmentOrder
  198. }
  199. var children time.Duration
  200. for i := start.Depth; i < len(t.stack); i++ {
  201. children += t.stack[i].children
  202. }
  203. s := segmentEnd{
  204. stop: t.time(now),
  205. start: frame.segmentTime,
  206. }
  207. if s.stop.Time.After(s.start.Time) {
  208. s.duration = s.stop.Time.Sub(s.start.Time)
  209. }
  210. if s.duration > children {
  211. s.exclusive = s.duration - children
  212. }
  213. // Note that we expect (depth == (len(t.stack) - 1)). However, if
  214. // (depth < (len(t.stack) - 1)), that's ok: could be a panic popped
  215. // some stack frames (and the consumer was not using defer).
  216. if 0 == start.Depth {
  217. t.finishedChildren += s.duration
  218. } else {
  219. t.stack[start.Depth-1].children += s.duration
  220. }
  221. t.stack = t.stack[0:start.Depth]
  222. if t.BetterCAT.Sampled && t.SpanEventsEnabled {
  223. s.SpanID = frame.spanID
  224. if "" == s.SpanID {
  225. s.SpanID = NewSpanID()
  226. }
  227. // Note that the current span identifier is the parent's
  228. // identifier because we've already popped the segment that's
  229. // ending off of the stack.
  230. s.ParentID = t.CurrentSpanIdentifier()
  231. }
  232. return s, nil
  233. }
  234. // EndBasicSegment ends a basic segment.
  235. func EndBasicSegment(t *TxnData, start SegmentStartTime, now time.Time, name string) error {
  236. end, err := endSegment(t, start, now)
  237. if nil != err {
  238. return err
  239. }
  240. if nil == t.customSegments {
  241. t.customSegments = make(map[string]*metricData)
  242. }
  243. m := metricDataFromDuration(end.duration, end.exclusive)
  244. if data, ok := t.customSegments[name]; ok {
  245. data.aggregate(m)
  246. } else {
  247. // Use `new` in place of &m so that m is not
  248. // automatically moved to the heap.
  249. cpy := new(metricData)
  250. *cpy = m
  251. t.customSegments[name] = cpy
  252. }
  253. if t.TxnTrace.considerNode(end) {
  254. t.TxnTrace.witnessNode(end, customSegmentMetric(name), nil)
  255. }
  256. if evt := end.spanEvent(); evt != nil {
  257. evt.Name = customSegmentMetric(name)
  258. evt.Category = spanCategoryGeneric
  259. t.saveSpanEvent(evt)
  260. }
  261. return nil
  262. }
  263. // EndExternalSegment ends an external segment.
  264. func EndExternalSegment(t *TxnData, start SegmentStartTime, now time.Time, u *url.URL, method string, resp *http.Response) error {
  265. end, err := endSegment(t, start, now)
  266. if nil != err {
  267. return err
  268. }
  269. host := HostFromURL(u)
  270. if "" == host {
  271. host = "unknown"
  272. }
  273. var appData *cat.AppDataHeader
  274. if resp != nil {
  275. appData, err = t.CrossProcess.ParseAppData(HTTPHeaderToAppData(resp.Header))
  276. if err != nil {
  277. return err
  278. }
  279. }
  280. var crossProcessID string
  281. var transactionName string
  282. var transactionGUID string
  283. if appData != nil {
  284. crossProcessID = appData.CrossProcessID
  285. transactionName = appData.TransactionName
  286. transactionGUID = appData.TransactionGUID
  287. }
  288. key := externalMetricKey{
  289. Host: host,
  290. ExternalCrossProcessID: crossProcessID,
  291. ExternalTransactionName: transactionName,
  292. }
  293. if nil == t.externalSegments {
  294. t.externalSegments = make(map[externalMetricKey]*metricData)
  295. }
  296. t.externalCallCount++
  297. t.externalDuration += end.duration
  298. m := metricDataFromDuration(end.duration, end.exclusive)
  299. if data, ok := t.externalSegments[key]; ok {
  300. data.aggregate(m)
  301. } else {
  302. // Use `new` in place of &m so that m is not
  303. // automatically moved to the heap.
  304. cpy := new(metricData)
  305. *cpy = m
  306. t.externalSegments[key] = cpy
  307. }
  308. if t.TxnTrace.considerNode(end) {
  309. t.TxnTrace.witnessNode(end, externalHostMetric(key), &traceNodeParams{
  310. CleanURL: SafeURL(u),
  311. TransactionGUID: transactionGUID,
  312. })
  313. }
  314. if evt := end.spanEvent(); evt != nil {
  315. evt.Name = externalHostMetric(key)
  316. evt.Category = spanCategoryHTTP
  317. evt.ExternalExtras = &spanExternalExtras{
  318. URL: SafeURL(u),
  319. Method: method,
  320. }
  321. t.saveSpanEvent(evt)
  322. }
  323. return nil
  324. }
  325. // EndDatastoreParams contains the parameters for EndDatastoreSegment.
  326. type EndDatastoreParams struct {
  327. Tracer *TxnData
  328. Start SegmentStartTime
  329. Now time.Time
  330. Product string
  331. Collection string
  332. Operation string
  333. ParameterizedQuery string
  334. QueryParameters map[string]interface{}
  335. Host string
  336. PortPathOrID string
  337. Database string
  338. }
  339. const (
  340. unknownDatastoreHost = "unknown"
  341. unknownDatastorePortPathOrID = "unknown"
  342. )
  343. var (
  344. // ThisHost is the system hostname.
  345. ThisHost = func() string {
  346. if h, err := sysinfo.Hostname(); nil == err {
  347. return h
  348. }
  349. return unknownDatastoreHost
  350. }()
  351. hostsToReplace = map[string]struct{}{
  352. "localhost": {},
  353. "127.0.0.1": {},
  354. "0.0.0.0": {},
  355. "0:0:0:0:0:0:0:1": {},
  356. "::1": {},
  357. "0:0:0:0:0:0:0:0": {},
  358. "::": {},
  359. }
  360. )
  361. func (t TxnData) slowQueryWorthy(d time.Duration) bool {
  362. return t.SlowQueriesEnabled && (d >= t.SlowQueryThreshold)
  363. }
  364. func datastoreSpanAddress(host, portPathOrID string) string {
  365. if "" != host && "" != portPathOrID {
  366. return host + ":" + portPathOrID
  367. }
  368. if "" != host {
  369. return host
  370. }
  371. return portPathOrID
  372. }
  373. // EndDatastoreSegment ends a datastore segment.
  374. func EndDatastoreSegment(p EndDatastoreParams) error {
  375. end, err := endSegment(p.Tracer, p.Start, p.Now)
  376. if nil != err {
  377. return err
  378. }
  379. if p.Operation == "" {
  380. p.Operation = datastoreOperationUnknown
  381. }
  382. if p.Product == "" {
  383. p.Product = datastoreProductUnknown
  384. }
  385. if p.Host == "" && p.PortPathOrID != "" {
  386. p.Host = unknownDatastoreHost
  387. }
  388. if p.PortPathOrID == "" && p.Host != "" {
  389. p.PortPathOrID = unknownDatastorePortPathOrID
  390. }
  391. if _, ok := hostsToReplace[p.Host]; ok {
  392. p.Host = ThisHost
  393. }
  394. // We still want to create a slowQuery if the consumer has not provided
  395. // a Query string (or it has been removed by LASP) since the stack trace
  396. // has value.
  397. if p.ParameterizedQuery == "" {
  398. collection := p.Collection
  399. if "" == collection {
  400. collection = "unknown"
  401. }
  402. p.ParameterizedQuery = fmt.Sprintf(`'%s' on '%s' using '%s'`,
  403. p.Operation, collection, p.Product)
  404. }
  405. key := DatastoreMetricKey{
  406. Product: p.Product,
  407. Collection: p.Collection,
  408. Operation: p.Operation,
  409. Host: p.Host,
  410. PortPathOrID: p.PortPathOrID,
  411. }
  412. if nil == p.Tracer.datastoreSegments {
  413. p.Tracer.datastoreSegments = make(map[DatastoreMetricKey]*metricData)
  414. }
  415. p.Tracer.datastoreCallCount++
  416. p.Tracer.datastoreDuration += end.duration
  417. m := metricDataFromDuration(end.duration, end.exclusive)
  418. if data, ok := p.Tracer.datastoreSegments[key]; ok {
  419. data.aggregate(m)
  420. } else {
  421. // Use `new` in place of &m so that m is not
  422. // automatically moved to the heap.
  423. cpy := new(metricData)
  424. *cpy = m
  425. p.Tracer.datastoreSegments[key] = cpy
  426. }
  427. scopedMetric := datastoreScopedMetric(key)
  428. queryParams := vetQueryParameters(p.QueryParameters)
  429. if p.Tracer.TxnTrace.considerNode(end) {
  430. p.Tracer.TxnTrace.witnessNode(end, scopedMetric, &traceNodeParams{
  431. Host: p.Host,
  432. PortPathOrID: p.PortPathOrID,
  433. Database: p.Database,
  434. Query: p.ParameterizedQuery,
  435. queryParameters: queryParams,
  436. })
  437. }
  438. if p.Tracer.slowQueryWorthy(end.duration) {
  439. if nil == p.Tracer.SlowQueries {
  440. p.Tracer.SlowQueries = newSlowQueries(maxTxnSlowQueries)
  441. }
  442. // Frames to skip:
  443. // this function
  444. // endDatastore
  445. // DatastoreSegment.End
  446. skipFrames := 3
  447. p.Tracer.SlowQueries.observeInstance(slowQueryInstance{
  448. Duration: end.duration,
  449. DatastoreMetric: scopedMetric,
  450. ParameterizedQuery: p.ParameterizedQuery,
  451. QueryParameters: queryParams,
  452. Host: p.Host,
  453. PortPathOrID: p.PortPathOrID,
  454. DatabaseName: p.Database,
  455. StackTrace: GetStackTrace(skipFrames),
  456. })
  457. }
  458. if evt := end.spanEvent(); evt != nil {
  459. evt.Name = scopedMetric
  460. evt.Category = spanCategoryDatastore
  461. evt.DatastoreExtras = &spanDatastoreExtras{
  462. Component: p.Product,
  463. Statement: p.ParameterizedQuery,
  464. Instance: p.Database,
  465. Address: datastoreSpanAddress(p.Host, p.PortPathOrID),
  466. Hostname: p.Host,
  467. }
  468. p.Tracer.saveSpanEvent(evt)
  469. }
  470. return nil
  471. }
  472. // MergeBreakdownMetrics creates segment metrics.
  473. func MergeBreakdownMetrics(t *TxnData, metrics *metricTable) {
  474. scope := t.FinalName
  475. isWeb := t.IsWeb
  476. // Custom Segment Metrics
  477. for key, data := range t.customSegments {
  478. name := customSegmentMetric(key)
  479. // Unscoped
  480. metrics.add(name, "", *data, unforced)
  481. // Scoped
  482. metrics.add(name, scope, *data, unforced)
  483. }
  484. // External Segment Metrics
  485. for key, data := range t.externalSegments {
  486. metrics.add(externalRollupMetric.all, "", *data, forced)
  487. metrics.add(externalRollupMetric.webOrOther(isWeb), "", *data, forced)
  488. hostMetric := externalHostMetric(key)
  489. metrics.add(hostMetric, "", *data, unforced)
  490. if "" != key.ExternalCrossProcessID && "" != key.ExternalTransactionName {
  491. txnMetric := externalTransactionMetric(key)
  492. // Unscoped CAT metrics
  493. metrics.add(externalAppMetric(key), "", *data, unforced)
  494. metrics.add(txnMetric, "", *data, unforced)
  495. // Scoped External Metric
  496. metrics.add(txnMetric, scope, *data, unforced)
  497. } else {
  498. // Scoped External Metric
  499. metrics.add(hostMetric, scope, *data, unforced)
  500. }
  501. }
  502. // Datastore Segment Metrics
  503. for key, data := range t.datastoreSegments {
  504. metrics.add(datastoreRollupMetric.all, "", *data, forced)
  505. metrics.add(datastoreRollupMetric.webOrOther(isWeb), "", *data, forced)
  506. product := datastoreProductMetric(key)
  507. metrics.add(product.all, "", *data, forced)
  508. metrics.add(product.webOrOther(isWeb), "", *data, forced)
  509. if key.Host != "" && key.PortPathOrID != "" {
  510. instance := datastoreInstanceMetric(key)
  511. metrics.add(instance, "", *data, unforced)
  512. }
  513. operation := datastoreOperationMetric(key)
  514. metrics.add(operation, "", *data, unforced)
  515. if "" != key.Collection {
  516. statement := datastoreStatementMetric(key)
  517. metrics.add(statement, "", *data, unforced)
  518. metrics.add(statement, scope, *data, unforced)
  519. } else {
  520. metrics.add(operation, scope, *data, unforced)
  521. }
  522. }
  523. }