package xnet import ( "fmt" "github.com/tebeka/selenium" "github.com/tebeka/selenium/chrome" "log" "net/url" "regexp" "strings" "time" ) // @title GetPBIPageURL // @description 获取多页PBI的所有页面url // @param // @return func GetPBIPageURL(chromeDriver string, driverPort int, PBIURL string) ([]string, error) { PBIUrls := []string{} if strings.Contains(PBIURL, "powerbi.com") == false { return PBIUrls, fmt.Errorf("请正确的PBIURL.") } if strings.Contains(PBIURL, "pageName") == true { return PBIUrls, fmt.Errorf("请输入PBI的第一页URL.") } opts := []selenium.ServiceOption{} //selenium.SetDebug(true) service, err := selenium.NewChromeDriverService(chromeDriver, driverPort, opts...) if nil != err { fmt.Println("start a chromedriver service falid", err.Error()) return PBIUrls, err } //注意这里,server关闭之后,chrome窗口也会关闭 defer service.Stop() //链接本地的浏览器 chrome caps := selenium.Capabilities{ "browserName": "chrome", } prefCaps := map[string]interface{}{} chromeCaps := chrome.Capabilities{ Prefs: prefCaps, Path: "", Args: []string{ //"--headless", // 设置Chrome无头模式,在linux下运行,需要设置这个参数,否则会报错 "--kiosk", // 加载启动项页面全屏效果,相当于F11 "--start-maximized", // 最大化运行(全屏窗口),不设置,取元素会报错 "--disable-infobars", // 关闭左上方Chrome 正受到自动测试软件的控制的提示 //"--no-sandbox",// 沙盒,linux下要关闭沙盒模式. "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", // 模拟user-agent,防反爬 }, } //以上是设置浏览器参数 caps.AddChrome(chromeCaps) // 调起chrome浏览器 wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", driverPort)) if err != nil { //启用--user-data-dir参数后,同时只能打开一个chrome程序. //如果已打开chrome程序,必须先关闭已打开的chrome,再运行程序 fmt.Println("connect to the webDriver faild", err.Error()) return PBIUrls, err } else { log.Println("connect to the webDriver success.") } //关闭一个webDriver会对应关闭一个chrome窗口 //但是不会导致seleniumServer关闭 defer wd.Quit() err = wd.Get(PBIURL) if err != nil { fmt.Printf("Get url error : %v\n", err) return PBIUrls, err } handle, _ := wd.CurrentWindowHandle() wd.ResizeWindow(handle, 1920, 1080) wd.MaximizeWindow(handle) err = wd.WaitWithTimeout(ShareExists, 30*time.Second) if err != nil { return PBIUrls, err } pageNames := []string{} //pageSource, _ := wd.PageSource() //cp := regexp.MustCompile("pageName%3D(.*?)\"") //rets := cp.FindAllString(pageSource, -1) //if len(rets)>0{ // unescape,_:=url.QueryUnescape(rets[0]) // splits:=strings.Split(unescape,"&pageName=") // for _,i:=range splits{ // if i[len(i)-1]== '"'{ // pageNames=append(pageNames,i[:len(i)-1]) // break // } // } //} //time.Sleep(3*time.Second) for i := 0; i < 999; i++ { err = wd.WaitWithTimeout(NextPageExists, 30*time.Second) if err != nil { log.Printf("NextPageExists WaitWithTimeout error : %v\n", err) break } pageSource, _ := wd.PageSource() cp := regexp.MustCompile("pageName%3D(.*?)\"") rets := cp.FindAllString(pageSource, -1) if len(rets) > 0 { unescape, _ := url.QueryUnescape(rets[0]) splits := strings.Split(unescape, "&pageName=") for _, i := range splits { if i[len(i)-1] == '"' { pageNames = append(pageNames, i[:len(i)-1]) break } } } nextPage, err := wd.FindElement(selenium.ByCSSSelector, "i[title='下一页']") if err != nil { log.Printf("FindElement 下一页 error : %v\n", err) break } nextPageClass, _ := nextPage.GetAttribute("class") if strings.Contains(nextPageClass, " inactive") == true { log.Println("nextPageClass inactive.\n") break } } //data, err := wd.Screenshot() // //f, err := os.Create("pbi.png") //f.Write(data) //if err != nil { // log.Printf("pbi.png, Create error : %v\n", err) // return PBIUrls, err //} // //defer f.Close() for _, pageName := range pageNames { PBIUrls = append(PBIUrls, fmt.Sprintf("%s&%s", PBIURL, pageName)) } fmt.Printf("PBIUrls==========>%v\n", PBIUrls) return PBIUrls, nil }