1
0

ngx_dir_index.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. package main
  2. import (
  3. "encoding/json"
  4. "log"
  5. "net/http"
  6. "os"
  7. "strings"
  8. "golang.org/x/net/html"
  9. )
  10. type Directive struct {
  11. Links []string `json:"links"`
  12. }
  13. func main() {
  14. if len(os.Args) < 2 {
  15. log.Println("Usage: go run . <output_file>")
  16. }
  17. outputPath := os.Args[1]
  18. // Fetch page content
  19. resp, err := http.Get("https://nginx.org/en/docs/dirindex.html")
  20. if err != nil {
  21. log.Println("[Error] fetching page:", err)
  22. return
  23. }
  24. defer resp.Body.Close()
  25. // Parse HTML
  26. doc, err := html.Parse(resp.Body)
  27. if err != nil {
  28. log.Println("[Error] parsing HTML:", err)
  29. return
  30. }
  31. // Change storage structure to map
  32. directives := make(map[string]Directive)
  33. // Find node with id="content"
  34. var content *html.Node
  35. var findContent func(*html.Node)
  36. findContent = func(n *html.Node) {
  37. if n.Type == html.ElementNode && n.Data == "div" {
  38. for _, attr := range n.Attr {
  39. if attr.Key == "id" && attr.Val == "content" {
  40. content = n
  41. return
  42. }
  43. }
  44. }
  45. for c := n.FirstChild; c != nil; c = c.NextSibling {
  46. findContent(c)
  47. }
  48. }
  49. findContent(doc)
  50. // Extract all a tags from content
  51. if content != nil {
  52. var extractLinks func(*html.Node)
  53. extractLinks = func(n *html.Node) {
  54. if n.Type == html.ElementNode && n.Data == "a" {
  55. var href string
  56. for _, attr := range n.Attr {
  57. if attr.Key == "href" {
  58. href = attr.Val
  59. break
  60. }
  61. }
  62. if href != "" && n.FirstChild != nil {
  63. name := strings.TrimSpace(n.FirstChild.Data)
  64. if name != "" {
  65. fullLink := "https://nginx.org/en/docs/" + href
  66. directive, exists := directives[name]
  67. if !exists {
  68. directives[name] = Directive{
  69. Links: []string{fullLink},
  70. }
  71. } else {
  72. // Check if link already exists to avoid duplicates
  73. linkExists := false
  74. for _, existingLink := range directive.Links {
  75. if existingLink == fullLink {
  76. linkExists = true
  77. break
  78. }
  79. }
  80. if !linkExists {
  81. directive.Links = append(directive.Links, fullLink)
  82. directives[name] = directive
  83. }
  84. }
  85. }
  86. }
  87. }
  88. for c := n.FirstChild; c != nil; c = c.NextSibling {
  89. extractLinks(c)
  90. }
  91. }
  92. extractLinks(content)
  93. }
  94. // Write results to JSON file
  95. jsonData, err := json.MarshalIndent(directives, "", " ")
  96. if err != nil {
  97. log.Println("[Error] marshaling JSON:", err)
  98. return
  99. }
  100. err = os.WriteFile(outputPath, jsonData, 0644)
  101. if err != nil {
  102. log.Println("[Error] writing file:", err)
  103. return
  104. }
  105. log.Printf("[OK] Successfully parsed %d directives and saved to %s\n", len(directives), outputPath)
  106. }