在尝试 用java对 google 的翻译功能做处理的时候,出现了编码的问题
这个是在java中打印的字符
? ???E0mWw/@?儳h5?|掲獟?!DF<?@坠?C#h3癘e嶺?G?X^?oo眉豒嚣?阨?/
此处文字以程序输出为准
以下是实际会返回的字符
[[["Hello","你好","","Nǐ hǎo"]],[["interjection",["hello","hi","hallo"],[["hello",["喂"],[1],0.034756977],["hi",["嗨"],[1],0.0046309186],["hallo",["你好"],,0.00010072414]]],["phrase",["How are you"],[["How are you",["你好"],,0.0020549577]]]],"zh-CN",,[["Hello",[5],0,0,1000,0,1,0]],[["你好",4,,,""],["你好",5,[["Hello",1000,0,0]],[[0,2]],"你好"]],,,[["zh-CN"]],14]
不知道怎么能够将字符转换出来。
附上源码,希望各位解答
package HTTPUtil;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import javax.servlet.ServletException;
/**
*
* @author Silver
*
*/
public class HTTPRequest {
public static void main(String[] args) {
try {
HTTPRequest tHTTPRequest = new HTTPRequest();
// String str = tHTTPRequest.HttpRequestGet("http://www.baidu.com", "");
// System.out.println(str);
String str = tHTTPRequest.sendGoogleTrans("你好","zh-CN","en");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* The HttpRequestGet method . <br>
*
* This method is calling for a http request test
*
* @param uri
* @param querystring
* @throws IOException if an error occurred
*/
public String HttpRequestGet(String uri, String querystring)
throws IOException {
StringBuffer sbstr = new StringBuffer(256);
if (querystring != null && querystring.length() > 0) {
uri = uri + "?" + querystring;
}
URL url = new URL(uri);
URLConnection conn = url.openConnection();
BufferedReader bufferferreader = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String line;
while ((line = bufferferreader.readLine()) != null) {
sbstr.append(line);
}
bufferferreader.close();
return sbstr.toString();
}
/**
* The sendGoogleTrans method . <br>
*
* This method is calling for a translate test
*
* @param transString the String you want to trans
* @param from <a>GlobalType</a>
* @param to <a>GlobalType</a>
* @throws IOException if an error occurred
*/
public String sendGoogleTrans(String transString,String from,String to) throws IOException
{
String uri = "http://translate.google.cn/translate_a/t?client=t&text="+transString+"&hl=en&sl="+from+"&tl="+to+"&ie=UTF-8&oe=UTF-8&multires=1&otf=1&pc=1&ssel=0&tsel=0&sc=1";
System.out.println(uri);
URL url = new URL(uri);
StringBuffer sbstr = new StringBuffer(256);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setDoOutput(true);
conn.setDoInput(true);
conn.setUseCaches(false);
conn.setAllowUserInteraction(false);
conn.setRequestProperty("Connection", "keep-alive");
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11");
conn.setRequestProperty("Accept", "*/*");
conn.setRequestProperty("Referer", "http://translate.google.cn/?hl=en");
conn.setRequestProperty("Accept-Encoding", "gzip,deflate,sdch");
conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8");
conn.setRequestProperty("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3");
File file= new File("D:/1.txt");
BufferedReader bufferferreader = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String line;
while ((line = bufferferreader.readLine()) != null) {
sbstr.append(line);
}
bufferferreader.close();
String rtn = sbstr.toString();
System.out.println(rtn);
/*Google 返回的编码不知道是什么格式,这里进行处理*/
// String rtndecode1 = java.net.URLDecoder.decode(rtn,"UTF-8");
// System.out.println(rtndecode1);
// String rtndecode2 = new String(rtn.getBytes(rtn),"UTF-8");
// System.out.println(rtndecode2);
// String rtndecode3= java.net.URLDecoder.decode(rtndecode2,"UTF-8");
// System.out.println(rtndecode3);
return rtn;
}
}
现在能确定,是
conn.setRequestProperty("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3");
去掉其中的"GBK,"试试
没有效果
@SilverSteven: 问题可能出在下面这部分代码:
BufferedReader bufferferreader = new BufferedReader( new InputStreamReader(conn.getInputStream())); String line; while ((line = bufferferreader.readLine()) != null) { sbstr.append(line); }
@dudu:
已经能够确定,不是代码的问题造成的,而是因为的串中是经过压缩造成的。
但是不知道怎么解压,通过java的gzip解压的话,也不能够正确显示。