1 package fff; 2 3 import java.io.IOException; 4 import java.net.MalformedURLException; 5 import java.util.List; 6 7 import com.gargoylesoftware.htmlunit.BrowserVersion; 8 import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; 9 import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController; 10 import com.gargoylesoftware.htmlunit.WebClient; 11 import com.gargoylesoftware.htmlunit.html.DomElement; 12 import com.gargoylesoftware.htmlunit.html.DomNode; 13 import com.gargoylesoftware.htmlunit.html.DomNodeList; 14 import com.gargoylesoftware.htmlunit.html.HtmlAnchor; 15 import com.gargoylesoftware.htmlunit.html.HtmlButton; 16 import com.gargoylesoftware.htmlunit.html.HtmlElement; 17 import com.gargoylesoftware.htmlunit.html.HtmlForm; 18 import com.gargoylesoftware.htmlunit.html.HtmlInput; 19 import com.gargoylesoftware.htmlunit.html.HtmlPage; 20 import com.gargoylesoftware.htmlunit.html.HtmlPasswordInput; 21 import com.gargoylesoftware.htmlunit.html.HtmlSelect; 22 import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; 23 import com.gargoylesoftware.htmlunit.html.HtmlTextArea; 24 import com.gargoylesoftware.htmlunit.html.HtmlTextInput; 25 26 27 public class CopyOftest { 28 public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException, InterruptedException { 29 WebClient client = new WebClient(BrowserVersion.FIREFOX_45); 30 client.getOptions().setJavaScriptEnabled(true); 31 client.getOptions().setCssEnabled(false); 32 client.getOptions().setRedirectEnabled(true); 33 client.getOptions().setThrowExceptionOnScriptError(false); 34 35 HtmlPage page = client.getPage("https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F"); 36 //System.out.println(page.asText()); 37 //登录 38 HtmlInput ln = page.getHtmlElementById("loginName"); 39 HtmlInput pwd = page.getHtmlElementById("loginPassword"); 40 DomElement bbu =page.getElementById("loginAction"); 41 42 ln.setAttribute("value", "18994131***"); 43 pwd.setAttribute("value", "ap19971***"); 44 45 HtmlPage page2 = bbu.click(); 46 //登录完成,现在可以爬取任意你想要的页面了。 47 System.out.println(page2.asXml()); 48 // DomNodeList<DomNode> iList1 = page2.querySelectorAll(".iconf_navbar_compose"); 49 // HtmlPage page3 =((DomElement) iList1.get(0)).click(); HtmlPage page = client.getPage("https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F"); 50 HtmlPage page3 = client.getPage("http://m.weibo.cn/mblog"); 51 52 System.out.println(page3.asXml()); //无内容 53 DomNodeList<DomNode> textlist = page3.querySelectorAll(".txt-publisher");//class获取textare 54 HtmlTextArea hta=(HtmlTextArea) textlist.get(0); 55 56 // HtmlTextArea hta = page3.getHtmlElementById("txt-publisher"); 57 hta.setAttribute("value", "蛋蛋"); 58 hta.setText("哎哟哎哟呵呵呵"); 59 System.out.println("______________________"+hta.asText());//有内容 60 DomNodeList<DomNode> sendlist = page3.querySelectorAll(".txt-link"); 61 HtmlPage page4 =((DomElement) sendlist.get(1)).click(); 62 HtmlPage pages =((DomElement) sendlist.get(1)).click(); 63 System.out.println(sendlist.size()); 64 System.out.println(page4.asText()); 65 System.out.println(pages.asText()); 66 DomNodeList<DomNode> sendlist2 = page3.querySelectorAll(".disable"); 67 HtmlPage page5 =((DomElement) sendlist2.get(0)).click(); 68 System.out.println(page5.asText());//无内容 69 } 70 }
如图,我编写了一个爬虫,登陆成功,并且进入发布页面,也给框框赋了值,获取到了发布的<A>。
图一在浏览器上操作时,点击发布,第一时间它会给个提示,然后跳到首页。
图二三 在java后台操作,可以获得赋给文本的值,不跳页面,在首页也看不到自己在java后台发布的东西,这是为何?