powerbi_url.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. package xnet
  2. import (
  3. "fmt"
  4. "github.com/tebeka/selenium"
  5. "github.com/tebeka/selenium/chrome"
  6. "log"
  7. "net/url"
  8. "regexp"
  9. "strings"
  10. "time"
  11. )
  12. // @title GetPBIPageURL
  13. // @description 获取多页PBI的所有页面url
  14. // @param
  15. // @return
  16. func GetPBIPageURL(chromeDriver string, driverPort int, PBIURL string) ([]string, error) {
  17. PBIUrls := []string{}
  18. if strings.Contains(PBIURL, "powerbi.com") == false {
  19. return PBIUrls, fmt.Errorf("请正确的PBIURL.")
  20. }
  21. if strings.Contains(PBIURL, "pageName") == true {
  22. return PBIUrls, fmt.Errorf("请输入PBI的第一页URL.")
  23. }
  24. opts := []selenium.ServiceOption{}
  25. //selenium.SetDebug(true)
  26. service, err := selenium.NewChromeDriverService(chromeDriver, driverPort, opts...)
  27. if nil != err {
  28. fmt.Println("start a chromedriver service falid", err.Error())
  29. return PBIUrls, err
  30. }
  31. //注意这里,server关闭之后,chrome窗口也会关闭
  32. defer service.Stop()
  33. //链接本地的浏览器 chrome
  34. caps := selenium.Capabilities{
  35. "browserName": "chrome",
  36. }
  37. prefCaps := map[string]interface{}{}
  38. chromeCaps := chrome.Capabilities{
  39. Prefs: prefCaps,
  40. Path: "",
  41. Args: []string{
  42. //"--headless", // 设置Chrome无头模式,在linux下运行,需要设置这个参数,否则会报错
  43. "--kiosk", // 加载启动项页面全屏效果,相当于F11
  44. "--start-maximized", // 最大化运行(全屏窗口),不设置,取元素会报错
  45. "--disable-infobars", // 关闭左上方Chrome 正受到自动测试软件的控制的提示
  46. //"--no-sandbox",// 沙盒,linux下要关闭沙盒模式.
  47. "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", // 模拟user-agent,防反爬
  48. },
  49. }
  50. //以上是设置浏览器参数
  51. caps.AddChrome(chromeCaps)
  52. // 调起chrome浏览器
  53. wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", driverPort))
  54. if err != nil {
  55. //启用--user-data-dir参数后,同时只能打开一个chrome程序.
  56. //如果已打开chrome程序,必须先关闭已打开的chrome,再运行程序
  57. fmt.Println("connect to the webDriver faild", err.Error())
  58. return PBIUrls, err
  59. } else {
  60. log.Println("connect to the webDriver success.")
  61. }
  62. //关闭一个webDriver会对应关闭一个chrome窗口
  63. //但是不会导致seleniumServer关闭
  64. defer wd.Quit()
  65. err = wd.Get(PBIURL)
  66. if err != nil {
  67. fmt.Printf("Get url error : %v\n", err)
  68. return PBIUrls, err
  69. }
  70. handle, _ := wd.CurrentWindowHandle()
  71. wd.ResizeWindow(handle, 1920, 1080)
  72. wd.MaximizeWindow(handle)
  73. err = wd.WaitWithTimeout(ShareExists, 30*time.Second)
  74. if err != nil {
  75. return PBIUrls, err
  76. }
  77. pageNames := []string{}
  78. //pageSource, _ := wd.PageSource()
  79. //cp := regexp.MustCompile("pageName%3D(.*?)\"")
  80. //rets := cp.FindAllString(pageSource, -1)
  81. //if len(rets)>0{
  82. // unescape,_:=url.QueryUnescape(rets[0])
  83. // splits:=strings.Split(unescape,"&pageName=")
  84. // for _,i:=range splits{
  85. // if i[len(i)-1]== '"'{
  86. // pageNames=append(pageNames,i[:len(i)-1])
  87. // break
  88. // }
  89. // }
  90. //}
  91. //time.Sleep(3*time.Second)
  92. for i := 0; i < 999; i++ {
  93. err = wd.WaitWithTimeout(NextPageExists, 30*time.Second)
  94. if err != nil {
  95. log.Printf("NextPageExists WaitWithTimeout error : %v\n", err)
  96. break
  97. }
  98. pageSource, _ := wd.PageSource()
  99. cp := regexp.MustCompile("pageName%3D(.*?)\"")
  100. rets := cp.FindAllString(pageSource, -1)
  101. if len(rets) > 0 {
  102. unescape, _ := url.QueryUnescape(rets[0])
  103. splits := strings.Split(unescape, "&pageName=")
  104. for _, i := range splits {
  105. if i[len(i)-1] == '"' {
  106. pageNames = append(pageNames, i[:len(i)-1])
  107. break
  108. }
  109. }
  110. }
  111. nextPage, err := wd.FindElement(selenium.ByCSSSelector, "i[title='下一页']")
  112. if err != nil {
  113. log.Printf("FindElement 下一页 error : %v\n", err)
  114. break
  115. }
  116. nextPageClass, _ := nextPage.GetAttribute("class")
  117. if strings.Contains(nextPageClass, " inactive") == true {
  118. log.Println("nextPageClass inactive.\n")
  119. break
  120. }
  121. }
  122. //data, err := wd.Screenshot()
  123. //
  124. //f, err := os.Create("pbi.png")
  125. //f.Write(data)
  126. //if err != nil {
  127. // log.Printf("pbi.png, Create error : %v\n", err)
  128. // return PBIUrls, err
  129. //}
  130. //
  131. //defer f.Close()
  132. for _, pageName := range pageNames {
  133. PBIUrls = append(PBIUrls, fmt.Sprintf("%s&%s", PBIURL, pageName))
  134. }
  135. fmt.Printf("PBIUrls==========>%v\n", PBIUrls)
  136. return PBIUrls, nil
  137. }