log_formats.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package nginx_log
  2. import (
  3. "regexp"
  4. "time"
  5. )
  6. // AccessLogEntry represents a parsed access log entry
  7. type AccessLogEntry struct {
  8. Timestamp time.Time `json:"timestamp"`
  9. IP string `json:"ip"`
  10. RegionCode string `json:"region_code"`
  11. Province string `json:"province"`
  12. City string `json:"city"`
  13. Method string `json:"method"`
  14. Path string `json:"path"`
  15. Protocol string `json:"protocol"`
  16. Status int `json:"status"`
  17. BytesSent int64 `json:"bytes_sent"`
  18. Referer string `json:"referer"`
  19. UserAgent string `json:"user_agent"`
  20. Browser string `json:"browser"`
  21. BrowserVer string `json:"browser_version"`
  22. OS string `json:"os"`
  23. OSVersion string `json:"os_version"`
  24. DeviceType string `json:"device_type"`
  25. RequestTime float64 `json:"request_time,omitempty"`
  26. UpstreamTime *float64 `json:"upstream_time,omitempty"`
  27. Raw string `json:"raw"`
  28. }
  29. // LogFormat represents different nginx log format patterns
  30. type LogFormat struct {
  31. Name string
  32. Pattern *regexp.Regexp
  33. Fields []string
  34. }
  35. // UserAgentParser interface for user agent parsing
  36. type UserAgentParser interface {
  37. Parse(userAgent string) UserAgentInfo
  38. }
  39. // UserAgentInfo represents parsed user agent information
  40. type UserAgentInfo struct {
  41. Browser string
  42. BrowserVer string
  43. OS string
  44. OSVersion string
  45. DeviceType string
  46. }
  47. // Constants for optimization
  48. const (
  49. invalidIPString = "invalid"
  50. )
  51. // Valid HTTP methods according to RFC specifications
  52. var validHTTPMethods = map[string]bool{
  53. "GET": true,
  54. "POST": true,
  55. "PUT": true,
  56. "DELETE": true,
  57. "HEAD": true,
  58. "OPTIONS": true,
  59. "PATCH": true,
  60. "TRACE": true,
  61. "CONNECT": true,
  62. }
  63. // Common nginx log formats
  64. var (
  65. // Standard combined log format
  66. CombinedFormat = &LogFormat{
  67. Name: "combined",
  68. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"(?:\s+(\S+))?(?:\s+(\S+))?`),
  69. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time"},
  70. }
  71. // Standard main log format (common log format)
  72. MainFormat = &LogFormat{
  73. Name: "main",
  74. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-)(?:\s+"([^"]*)")?(?:\s+"([^"]*)")?`),
  75. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent"},
  76. }
  77. // Custom format with more details
  78. DetailedFormat = &LogFormat{
  79. Name: "detailed",
  80. Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)" (\S+) (\S+) "([^"]*)" (\S+)`),
  81. Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time", "x_forwarded_for", "connection"},
  82. }
  83. // All supported formats
  84. SupportedFormats = []*LogFormat{DetailedFormat, CombinedFormat, MainFormat}
  85. )
  86. // DetectLogFormat tries to detect the log format from sample lines
  87. func DetectLogFormat(lines []string) *LogFormat {
  88. if len(lines) == 0 {
  89. return nil
  90. }
  91. for _, format := range SupportedFormats {
  92. matchCount := 0
  93. for _, line := range lines {
  94. if format.Pattern.MatchString(line) {
  95. matchCount++
  96. }
  97. }
  98. // If more than 50% of lines match, consider it a match
  99. if float64(matchCount)/float64(len(lines)) > 0.5 {
  100. return format
  101. }
  102. }
  103. return nil
  104. }