1
0

log_formats.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. package nginx_log
  2. import (
  3. "regexp"
  4. )
  5. // AccessLogEntry represents a parsed access log entry
  6. type AccessLogEntry struct {
  7. Timestamp int64 `json:"timestamp"` // Unix timestamp
  8. IP string `json:"ip"`
  9. RegionCode string `json:"region_code"`
  10. Province string `json:"province"`
  11. City string `json:"city"`
  12. Method string `json:"method"`
  13. Path string `json:"path"`
  14. Protocol string `json:"protocol"`
  15. Status int `json:"status"`
  16. BytesSent int64 `json:"bytes_sent"`
  17. Referer string `json:"referer"`
  18. UserAgent string `json:"user_agent"`
  19. Browser string `json:"browser"`
  20. BrowserVer string `json:"browser_version"`
  21. OS string `json:"os"`
  22. OSVersion string `json:"os_version"`
  23. DeviceType string `json:"device_type"`
  24. RequestTime float64 `json:"request_time,omitempty"`
  25. UpstreamTime *float64 `json:"upstream_time,omitempty"`
  26. Raw string `json:"raw"`
  27. }
  28. // LogFormat represents different nginx log format patterns
  29. type LogFormat struct {
  30. Name string
  31. Pattern *regexp.Regexp
  32. Fields []string
  33. }
  34. // UserAgentParser interface for user agent parsing
  35. type UserAgentParser interface {
  36. Parse(userAgent string) UserAgentInfo
  37. }
  38. // UserAgentInfo represents parsed user agent information
  39. type UserAgentInfo struct {
  40. Browser string
  41. BrowserVer string
  42. OS string
  43. OSVersion string
  44. DeviceType string
  45. }
  46. // Constants for optimization
  47. const (
  48. invalidIPString = "invalid"
  49. )
  50. // Valid HTTP methods according to RFC specifications
  51. var validHTTPMethods = map[string]bool{
  52. "GET": true,
  53. "POST": true,
  54. "PUT": true,
  55. "DELETE": true,
  56. "HEAD": true,
  57. "OPTIONS": true,
  58. "PATCH": true,
  59. "TRACE": true,
  60. "CONNECT": true,
  61. }
  62. // Common nginx log formats
  63. var (
  64. // Standard combined log format
  65. CombinedFormat = &LogFormat{
  66. Name: "combined",
  67. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"(?:\s+(\S+))?(?:\s+(\S+))?`),
  68. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time"},
  69. }
  70. // Standard main log format (common log format)
  71. MainFormat = &LogFormat{
  72. Name: "main",
  73. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-)(?:\s+"([^"]*)")?(?:\s+"([^"]*)")?`),
  74. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent"},
  75. }
  76. // Custom format with more details
  77. DetailedFormat = &LogFormat{
  78. Name: "detailed",
  79. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)" (\S+) (\S+) "([^"]*)" (\S+)`),
  80. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time", "x_forwarded_for", "connection"},
  81. }
  82. // All supported formats
  83. SupportedFormats = []*LogFormat{DetailedFormat, CombinedFormat, MainFormat}
  84. )
  85. // DetectLogFormat tries to detect the log format from sample lines
  86. func DetectLogFormat(lines []string) *LogFormat {
  87. if len(lines) == 0 {
  88. return nil
  89. }
  90. for _, format := range SupportedFormats {
  91. matchCount := 0
  92. for _, line := range lines {
  93. if format.Pattern.MatchString(line) {
  94. matchCount++
  95. }
  96. }
  97. // If more than 50% of lines match, consider it a match
  98. if float64(matchCount)/float64(len(lines)) > 0.5 {
  99. return format
  100. }
  101. }
  102. return nil
  103. }