ngx_dir_index.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. //go:generate go run .
  2. package main
  3. import (
  4. "encoding/json"
  5. "net/http"
  6. "os"
  7. "runtime"
  8. "strings"
  9. "path/filepath"
  10. "github.com/uozi-tech/cosy/logger"
  11. "golang.org/x/net/html"
  12. )
  13. type Directive struct {
  14. Links []string `json:"links"`
  15. }
  16. const (
  17. targetPath = "internal/nginx/nginx_directives.json"
  18. nginxURL = "https://nginx.org/en/docs/dirindex.html"
  19. )
  20. func main() {
  21. logger.Init("release")
  22. _, file, _, ok := runtime.Caller(0)
  23. if !ok {
  24. logger.Error("Unable to get the current file")
  25. return
  26. }
  27. basePath := filepath.Join(filepath.Dir(file), "../../")
  28. outputPath := filepath.Join(basePath, targetPath)
  29. // Fetch page content
  30. resp, err := http.Get(nginxURL)
  31. if err != nil {
  32. logger.Errorf("fetching page: %v", err)
  33. return
  34. }
  35. defer resp.Body.Close()
  36. // Parse HTML
  37. doc, err := html.Parse(resp.Body)
  38. if err != nil {
  39. logger.Errorf("parsing HTML: %v", err)
  40. return
  41. }
  42. // Change storage structure to map
  43. directives := make(map[string]Directive)
  44. // Find node with id="content"
  45. var content *html.Node
  46. var findContent func(*html.Node)
  47. findContent = func(n *html.Node) {
  48. if n.Type == html.ElementNode && n.Data == "div" {
  49. for _, attr := range n.Attr {
  50. if attr.Key == "id" && attr.Val == "content" {
  51. content = n
  52. return
  53. }
  54. }
  55. }
  56. for c := n.FirstChild; c != nil; c = c.NextSibling {
  57. findContent(c)
  58. }
  59. }
  60. findContent(doc)
  61. // Extract all a tags from content
  62. if content != nil {
  63. var extractLinks func(*html.Node)
  64. extractLinks = func(n *html.Node) {
  65. if n.Type == html.ElementNode && n.Data == "a" {
  66. var href string
  67. for _, attr := range n.Attr {
  68. if attr.Key == "href" {
  69. href = attr.Val
  70. break
  71. }
  72. }
  73. if href != "" && n.FirstChild != nil {
  74. name := strings.TrimSpace(n.FirstChild.Data)
  75. if name != "" {
  76. fullLink := "https://nginx.org/en/docs/" + href
  77. directive, exists := directives[name]
  78. if !exists {
  79. directives[name] = Directive{
  80. Links: []string{fullLink},
  81. }
  82. } else {
  83. // Check if link already exists to avoid duplicates
  84. linkExists := false
  85. for _, existingLink := range directive.Links {
  86. if existingLink == fullLink {
  87. linkExists = true
  88. break
  89. }
  90. }
  91. if !linkExists {
  92. directive.Links = append(directive.Links, fullLink)
  93. directives[name] = directive
  94. }
  95. }
  96. }
  97. }
  98. }
  99. for c := n.FirstChild; c != nil; c = c.NextSibling {
  100. extractLinks(c)
  101. }
  102. }
  103. extractLinks(content)
  104. }
  105. // Write results to JSON file
  106. jsonData, err := json.MarshalIndent(directives, "", " ")
  107. if err != nil {
  108. logger.Errorf("marshaling JSON: %v", err)
  109. return
  110. }
  111. err = os.WriteFile(outputPath, jsonData, 0644)
  112. if err != nil {
  113. logger.Errorf("writing file: %v", err)
  114. return
  115. }
  116. logger.Infof("Successfully parsed %d directives and saved to %s\n", len(directives), targetPath)
  117. }