lqb
/
nginx-ui
zrkadlo https://github.com/0xJacky/nginx-ui.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
							package parser

import ()

// SIMD-optimized string processing for nginx log parsing
// These functions provide vectorized operations for common parsing tasks

// SIMDStringMatcher provides SIMD-optimized string matching operations
type SIMDStringMatcher struct {
	// Pre-computed lookup tables for fast character classification
	spaceLookup    [256]bool
	quoteLookup    [256]bool
	bracketLookup  [256]bool
	digitLookup    [256]bool
	hexLookup      [256]bool
}

// NewSIMDStringMatcher creates a new SIMD-optimized string matcher
func NewSIMDStringMatcher() *SIMDStringMatcher {
	matcher := &SIMDStringMatcher{}
	matcher.initLookupTables()
	return matcher
}

// initLookupTables initializes lookup tables for fast character classification
func (sm *SIMDStringMatcher) initLookupTables() {
	// Space characters lookup
	spaces := []byte{' ', '\t', '\n', '\r'}
	for _, c := range spaces {
		sm.spaceLookup[c] = true
	}
	
	// Quote characters lookup
	quotes := []byte{'"', '\''}
	for _, c := range quotes {
		sm.quoteLookup[c] = true
	}
	
	// Bracket characters lookup
	brackets := []byte{'[', ']', '(', ')', '{', '}'}
	for _, c := range brackets {
		sm.bracketLookup[c] = true
	}
	
	// Digit characters lookup
	for i := '0'; i <= '9'; i++ {
		sm.digitLookup[i] = true
	}
	
	// Hexadecimal characters lookup
	for i := '0'; i <= '9'; i++ {
		sm.hexLookup[i] = true
	}
	for i := 'A'; i <= 'F'; i++ {
		sm.hexLookup[i] = true
	}
	for i := 'a'; i <= 'f'; i++ {
		sm.hexLookup[i] = true
	}
}

// FindNextSpace finds the next space character using SIMD-like operations
func (sm *SIMDStringMatcher) FindNextSpace(data []byte, start int) int {
	if start >= len(data) {
		return -1
	}
	
	// Process 8 bytes at a time for better cache utilization
	const blockSize = 8
	end := len(data)
	i := start
	
	// Vectorized search - process multiple bytes at once
	for i+blockSize <= end {
		// Check 8 bytes in parallel using lookup table
		for j := 0; j < blockSize; j++ {
			if sm.spaceLookup[data[i+j]] {
				return i + j
			}
		}
		i += blockSize
	}
	
	// Handle remaining bytes
	for i < end {
		if sm.spaceLookup[data[i]] {
			return i
		}
		i++
	}
	
	return -1
}

// FindNextQuote finds the next quote character using optimized search
func (sm *SIMDStringMatcher) FindNextQuote(data []byte, start int) int {
	if start >= len(data) {
		return -1
	}
	
	const blockSize = 8
	end := len(data)
	i := start
	
	// Vectorized search for quotes
	for i+blockSize <= end {
		for j := 0; j < blockSize; j++ {
			if sm.quoteLookup[data[i+j]] {
				return i + j
			}
		}
		i += blockSize
	}
	
	// Handle remaining bytes
	for i < end {
		if sm.quoteLookup[data[i]] {
			return i
		}
		i++
	}
	
	return -1
}

// FindNextDigit finds the next digit character using optimized search
func (sm *SIMDStringMatcher) FindNextDigit(data []byte, start int) int {
	if start >= len(data) {
		return -1
	}
	
	const blockSize = 8
	end := len(data)
	i := start
	
	// Vectorized search for digits
	for i+blockSize <= end {
		for j := 0; j < blockSize; j++ {
			if sm.digitLookup[data[i+j]] {
				return i + j
			}
		}
		i += blockSize
	}
	
	// Handle remaining bytes
	for i < end {
		if sm.digitLookup[data[i]] {
			return i
		}
		i++
	}
	
	return -1
}

// ExtractIPAddress extracts IP address using SIMD-optimized operations
func (sm *SIMDStringMatcher) ExtractIPAddress(data []byte, start int) (string, int) {
	if start >= len(data) {
		return "", -1
	}
	
	// Find start of IP (first digit)
	ipStart := sm.FindNextDigit(data, start)
	if ipStart == -1 {
		return "", -1
	}
	
	// Find end of IP (first space after IP)
	ipEnd := sm.FindNextSpace(data, ipStart)
	if ipEnd == -1 {
		ipEnd = len(data)
	}
	
	// Validate IP format using fast checks
	ipBytes := data[ipStart:ipEnd]
	if sm.isValidIPFormat(ipBytes) {
		return unsafeBytesToString(ipBytes), ipEnd
	}
	
	return "", -1
}

// isValidIPFormat quickly validates IP format using SIMD-like operations
func (sm *SIMDStringMatcher) isValidIPFormat(data []byte) bool {
	if len(data) < 7 || len(data) > 15 { // Min: 1.1.1.1, Max: 255.255.255.255
		return false
	}
	
	dotCount := 0
	digitCount := 0
	
	// Fast validation using lookup tables
	for _, b := range data {
		if b == '.' {
			dotCount++
			if digitCount == 0 || digitCount > 3 {
				return false
			}
			digitCount = 0
		} else if sm.digitLookup[b] {
			digitCount++
		} else {
			return false
		}
	}
	
	return dotCount == 3 && digitCount > 0 && digitCount <= 3
}

// ExtractTimestamp extracts timestamp using SIMD-optimized bracket search
func (sm *SIMDStringMatcher) ExtractTimestamp(data []byte, start int) (string, int) {
	if start >= len(data) {
		return "", -1
	}
	
	// Find opening bracket
	openBracket := sm.findBracket(data, start, '[')
	if openBracket == -1 {
		return "", -1
	}
	
	// Find closing bracket
	closeBracket := sm.findBracket(data, openBracket+1, ']')
	if closeBracket == -1 {
		return "", -1
	}
	
	// Extract timestamp content (exclude brackets)
	timestampBytes := data[openBracket+1 : closeBracket]
	return unsafeBytesToString(timestampBytes), closeBracket + 1
}

// findBracket finds specific bracket character using optimized search
func (sm *SIMDStringMatcher) findBracket(data []byte, start int, bracket byte) int {
	if start >= len(data) {
		return -1
	}
	
	const blockSize = 8
	end := len(data)
	i := start
	
	// Vectorized search for specific bracket
	for i+blockSize <= end {
		for j := range blockSize {
			if data[i+j] == bracket {
				return i + j
			}
		}
		i += blockSize
	}
	
	// Handle remaining bytes
	for i < end {
		if data[i] == bracket {
			return i
		}
		i++
	}
	
	return -1
}

// ExtractQuotedString extracts quoted string using optimized quote search
func (sm *SIMDStringMatcher) ExtractQuotedString(data []byte, start int) (string, int) {
	if start >= len(data) {
		return "", -1
	}
	
	// Find opening quote
	openQuote := sm.FindNextQuote(data, start)
	if openQuote == -1 {
		return "", -1
	}
	
	// Find closing quote (skip escaped quotes)
	closeQuote := sm.findClosingQuote(data, openQuote+1, data[openQuote])
	if closeQuote == -1 {
		return "", -1
	}
	
	// Extract string content (exclude quotes)
	stringBytes := data[openQuote+1 : closeQuote]
	return unsafeBytesToString(stringBytes), closeQuote + 1
}

// findClosingQuote finds matching closing quote, handling escapes
func (sm *SIMDStringMatcher) findClosingQuote(data []byte, start int, quoteChar byte) int {
	if start >= len(data) {
		return -1
	}
	
	i := start
	for i < len(data) {
		if data[i] == quoteChar {
			// Check if it's escaped
			if i == start || data[i-1] != '\\' {
				return i
			}
		}
		i++
	}
	
	return -1
}

// ExtractStatusCode extracts HTTP status code using optimized digit search
func (sm *SIMDStringMatcher) ExtractStatusCode(data []byte, start int) (int, int) {
	if start >= len(data) {
		return 0, -1
	}
	
	// Find start of status code (3 consecutive digits)
	statusStart := sm.findStatusCodeStart(data, start)
	if statusStart == -1 {
		return 0, -1
	}
	
	// Extract 3-digit status code
	if statusStart+2 >= len(data) {
		return 0, -1
	}
	
	// Fast integer conversion for 3-digit status codes
	status := int(data[statusStart]-'0')*100 + 
			  int(data[statusStart+1]-'0')*10 + 
			  int(data[statusStart+2]-'0')
	
	return status, statusStart + 3
}

// findStatusCodeStart finds start of 3-digit HTTP status code
func (sm *SIMDStringMatcher) findStatusCodeStart(data []byte, start int) int {
	if start+2 >= len(data) {
		return -1
	}
	
	for i := start; i <= len(data)-3; i++ {
		// Check if we have 3 consecutive digits
		if sm.digitLookup[data[i]] && 
		   sm.digitLookup[data[i+1]] && 
		   sm.digitLookup[data[i+2]] {
			// Validate it's a proper HTTP status code (100-599)
			firstDigit := int(data[i] - '0')
			if firstDigit >= 1 && firstDigit <= 5 {
				// Also check that it's preceded by a quote and space or space
				if i > 0 && (data[i-1] == ' ' || data[i-1] == '"') {
					return i
				}
				// If we're looking at a pattern like '" 200 ', this is likely the status code
				if i > 1 && data[i-2] == '"' && data[i-1] == ' ' {
					return i
				}
			}
		}
	}
	
	return -1
}

// ParseLogLineSIMD parses a complete log line using SIMD optimizations
func (sm *SIMDStringMatcher) ParseLogLineSIMD(data []byte) *AccessLogEntry {
	if len(data) == 0 {
		return nil
	}
	
	entry := &AccessLogEntry{}
	pos := 0
	
	// Extract IP address
	if ip, newPos := sm.ExtractIPAddress(data, pos); ip != "" {
		entry.IP = ip
		pos = newPos
	} else {
		return nil
	}
	
	// Skip user fields (- -)
	pos = sm.skipUserFields(data, pos)
	if pos == -1 {
		return nil
	}
	
	// Extract timestamp
	if timestampStr, newPos := sm.ExtractTimestamp(data, pos); timestampStr != "" {
		// Note: In production, you'd parse this timestamp string to int64
		// For now, storing as 0 to avoid parsing complexity in SIMD implementation
		entry.Timestamp = 0
		pos = newPos
	}
	
	// Extract request (quoted string) - parse method/path from it
	if request, newPos := sm.ExtractQuotedString(data, pos); request != "" {
		// Parse method and path from request string
		sm.parseRequestComponents(request, entry)
		pos = newPos
	}
	
	// Extract status code
	if status, newPos := sm.ExtractStatusCode(data, pos); status > 0 {
		entry.Status = status
		pos = newPos
	}
	
	// Extract size (next number)
	if size, newPos := sm.extractSize(data, pos); newPos != -1 {
		entry.BytesSent = size
		pos = newPos
	}
	
	// Extract referer (quoted string)
	if referer, newPos := sm.ExtractQuotedString(data, pos); referer != "" {
		entry.Referer = referer
		pos = newPos
	}
	
	// Extract user agent (quoted string)
	if userAgent, _ := sm.ExtractQuotedString(data, pos); userAgent != "" {
		entry.UserAgent = userAgent
	}
	
	return entry
}

// parseRequestComponents parses method, path, and protocol from request string
func (sm *SIMDStringMatcher) parseRequestComponents(request string, entry *AccessLogEntry) {
	requestBytes := []byte(request)
	
	// Find first space (after method)
	firstSpace := sm.FindNextSpace(requestBytes, 0)
	if firstSpace == -1 {
		return
	}
	
	// Extract method
	entry.Method = unsafeBytesToString(requestBytes[:firstSpace])
	
	// Find second space (after path)
	secondSpace := sm.FindNextSpace(requestBytes, firstSpace+1)
	if secondSpace == -1 {
		// Only method and path, no protocol
		entry.Path = unsafeBytesToString(requestBytes[firstSpace+1:])
		return
	}
	
	// Extract path and protocol
	entry.Path = unsafeBytesToString(requestBytes[firstSpace+1 : secondSpace])
	entry.Protocol = unsafeBytesToString(requestBytes[secondSpace+1:])
}

// skipUserFields skips the user fields (typically "- -")
func (sm *SIMDStringMatcher) skipUserFields(data []byte, start int) int {
	pos := start
	spaceCount := 0
	
	for pos < len(data) && spaceCount < 2 {
		if sm.spaceLookup[data[pos]] {
			spaceCount++
		}
		pos++
	}
	
	if spaceCount < 2 {
		return -1
	}
	
	return pos
}

// extractSize extracts size field (number or "-")
func (sm *SIMDStringMatcher) extractSize(data []byte, start int) (int64, int) {
	// Skip leading spaces
	pos := start
	for pos < len(data) && sm.spaceLookup[data[pos]] {
		pos++
	}
	
	if pos >= len(data) {
		return 0, -1
	}
	
	// Check for "-" (no size)
	if data[pos] == '-' {
		return 0, pos + 1
	}
	
	// Extract numeric size
	sizeStart := pos
	for pos < len(data) && sm.digitLookup[data[pos]] {
		pos++
	}
	
	if pos == sizeStart {
		return 0, -1
	}
	
	// Fast integer conversion
	var size int64
	for i := sizeStart; i < pos; i++ {
		size = size*10 + int64(data[i]-'0')
	}
	
	return size, pos
}

// BatchParseSIMD parses multiple log lines using SIMD optimizations
func (sm *SIMDStringMatcher) BatchParseSIMD(lines [][]byte) []*AccessLogEntry {
	entries := make([]*AccessLogEntry, 0, len(lines))
	
	for _, line := range lines {
		if entry := sm.ParseLogLineSIMD(line); entry != nil {
			entries = append(entries, entry)
		}
	}
	
	return entries
}

// LogLineParser provides a high-performance parser using SIMD operations
type LogLineParser struct {
	matcher *SIMDStringMatcher
	pool    *AccessLogEntryPool
}

// NewLogLineParser creates a new optimized parser
func NewLogLineParser() *LogLineParser {
	return &LogLineParser{
		matcher: NewSIMDStringMatcher(),
		pool:    NewAccessLogEntryPool(),
	}
}

// ParseLine parses a single log line with maximum performance
func (olp *LogLineParser) ParseLine(data []byte) *AccessLogEntry {
	return olp.matcher.ParseLogLineSIMD(data)
}

// ParseLines parses multiple lines efficiently
func (olp *LogLineParser) ParseLines(lines [][]byte) []*AccessLogEntry {
	return olp.matcher.BatchParseSIMD(lines)
}

// AccessLogEntryPool provides object pooling for AccessLogEntry
type AccessLogEntryPool struct {
	entries chan *AccessLogEntry
}

// NewAccessLogEntryPool creates a new object pool
func NewAccessLogEntryPool() *AccessLogEntryPool {
	pool := &AccessLogEntryPool{
		entries: make(chan *AccessLogEntry, 1000),
	}
	
	// Pre-populate pool
	for i := 0; i < 100; i++ {
		pool.entries <- &AccessLogEntry{}
	}
	
	return pool
}

// Get retrieves an entry from the pool
func (pool *AccessLogEntryPool) Get() *AccessLogEntry {
	select {
	case entry := <-pool.entries:
		return entry
	default:
		return &AccessLogEntry{}
	}
}

// Put returns an entry to the pool
func (pool *AccessLogEntryPool) Put(entry *AccessLogEntry) {
	// Reset entry fields
	*entry = AccessLogEntry{}
	
	select {
	case pool.entries <- entry:
	default:
		// Pool is full, let GC handle it
	}
}