"Thread-14" prio=10 tid=0x00007f09d4024000 nid=0x6021 runnable [0x00007f0a15e68000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:129)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
- locked <0x000000009a12af98> (a java.io.BufferedInputStream)
at sun.net.www.http.ChunkedInputStream.fastRead(ChunkedInputStream.java:221)
at sun.net.www.http.ChunkedInputStream.read(ChunkedInputStream.java:662)
- locked <0x000000009a12afc0> (a sun.net.www.http.ChunkedInputStream)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:2672)
at sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:264)
at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:306)
at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:158)
- locked <0x000000009a12b060> (a java.io.InputStreamReader)
at java.io.InputStreamReader.read(InputStreamReader.java:167)
at uestc.dmlab.crawler.main.DownLoadPage.getContentFromUrl(DownLoadPage.java:67)
at uestc.dmlab.crawler.main.UrlDataHanding.dataHanding(UrlDataHanding.java:26)
at uestc.dmlab.crawler.main.UrlDataHanding.run(UrlDataHanding.java:51)
at java.lang.Thread.run(Thread.java:662)
"Thread-13" prio=10 tid=0x00007f09d4022000 nid=0x6020 runnable [0x00007f0a15f69000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:129)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
- locked <0x000000008a0bc710> (a java.io.BufferedInputStream)
at sun.net.www.http.ChunkedInputStream.fastRead(ChunkedInputStream.java:221)
at sun.net.www.http.ChunkedInputStream.read(ChunkedInputStream.java:662)
- locked <0x000000008a0c8e58> (a sun.net.www.http.ChunkedInputStream)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:2672)
at sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:264)
at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:306)
at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:158)
- locked <0x000000008a0c8f28> (a java.io.InputStreamReader)
at java.io.InputStreamReader.read(InputStreamReader.java:167)
at uestc.dmlab.crawler.main.DownLoadPage.getContentFromUrl(DownLoadPage.java:67)
at uestc.dmlab.crawler.main.UrlDataHanding.dataHanding(UrlDataHanding.java:26)
at uestc.dmlab.crawler.main.UrlDataHanding.run(UrlDataHanding.java:48)
at java.lang.Thread.run(Thread.java:662)
"Thread-12" prio=10 tid=0x00007f09d4020000 nid=0x601f runnable [0x00007f0a1606a000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:129)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
- locked <0x0000000090902d98> (a java.io.BufferedInputStream)
at sun.net.www.http.ChunkedInputStream.fastRead(ChunkedInputStream.java:221)
at sun.net.www.http.ChunkedInputStream.read(ChunkedInputStream.java:662)
- locked <0x0000000090902dc0> (a sun.net.www.http.ChunkedInputStream)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:2672)
at sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:264)
at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:306)
at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:158)
- locked <0x0000000090902e60> (a java.io.InputStreamReader)
at java.io.InputStreamReader.read(InputStreamReader.java:167)
at uestc.dmlab.crawler.main.DownLoadPage.getContentFromUrl(DownLoadPage.java:67)
at uestc.dmlab.crawler.main.UrlDataHanding.dataHanding(UrlDataHanding.java:26)
at uestc.dmlab.crawler.main.UrlDataHanding.run(UrlDataHanding.java:51)
at java.lang.Thread.run(Thread.java:662)
这部分的代码
URL path = new URL(gd.url); String content = ""; HttpURLConnection connection = (HttpURLConnection) path.openConnection(); connection.connect(); InputStream urlStream = connection.getInputStream(); InputStreamReader isr; if (gd.url.contains("suning") || gd.url.contains("amazon")) { isr = new InputStreamReader(urlStream, "UTF-8"); } else { isr = new InputStreamReader(urlStream, "GBK"); } char[] buf = new char[2048]; int n = 0; while ((n = isr.read(buf, 0, 2048)) != -1) { // System.out.println(new String(buf,0,n).toString()); content += new String(buf, 0, n); }
是什么原因呢
这里大部分搞。net吧,关键是代码这么长,有人会回答么,难
多长 10行左右叫长?上面是线程堆栈情况,无语了。
上面这段代码没有问题的,你调用端怎么写的呢?代码贴完整哟。
应该是服务端没有socket.close()原因吧,所以你这边就一直等待读取。你这里应该是抓取页面信息吧,可以增加一个判断,如果最后一次content中存在</html>标签可以认为已经结束了,主动关闭isr; 或者设置下connect的timeout时间