private string ReadPpf()
        {
            string fn = @"E:\PDFReaderTest\article\C#从入门到精通.pdf";
            PdfReader p = new PdfReader(fn);
            //从每一页读出的字符串  
            string str = System.String.Empty;
            //"[......]"内部字符串  
            string subStr = System.String.Empty;
            //函数返回的字符串  
            string rtStr = System.String.Empty;
            //从每一页读出的8位字节数组  
            byte[] b = new byte[0];
            //"[","]","(",")"在字符串中的位置  
            Int32 bg = 0, ed = 0, subbg = 0, subed = 0;
            //取得文档总页数  
            int pg = p.NumberOfPages;
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 1; i <= pg; i++)
            {
                bg = 0;
                ed = 0;
                Array.Resize(ref b, 0);
                //取得第i页的内容  
                b = p.GetPageContent(i);
                //下一行是把每一页的取得的字节数据写入一个txt的文件,仅供研究时用  
                System.IO.File.WriteAllBytes(@"E:\PDFReaderTest\article\xct.txt", b);
                //取得每一页的字节数组,将每一个字节转换为字符,并将数组转换为字符串  
                for (int j = 0; j < b.Length; j++)
                {
                    sb.Append(Convert.ToChar(b[j]));
                }
                str = sb.ToString() ;
            }
            return str;
            
            //System.Text.StringBuilder text = new System.Text.StringBuilder();
            //string fileName = @"E:\PDFReaderTest\article\xct.pdf";
            //if (File.Exists(fileName))
            //{
            //    PdfReader pdfReader = new PdfReader(fileName);
            //    for (int page = 1; page <= pdfReader.NumberOfPages; page++)
            //    {
            //        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
            //        string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
            //        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
            //        text.Append(currentText);
            //    }
            //    pdfReader.Close();
            //}
            //return text.ToString();
            //string fileName = @"E:\PDFReaderTest\article\xct.pdf";
            //PdfReader reader = new PdfReader(file);
            //string text = PdfTextExtractor.GetTextFromPage(reader, 1);
            //try { reader.Close(); }
            //catch { }
            //return text;
            //try
            //{
            //    string pdffilename = @"E:\PDFReaderTest\article\未命名.pdf";
            //    PdfReader pdfReader = new PdfReader(pdffilename);
            //    int numberOfPages = pdfReader.NumberOfPages;
            //    string text = string.Empty;
            //    for (int i = 1; i <= numberOfPages; ++i)
            //    {
            //        byte[] bufferOfPageContent = pdfReader.GetPageContent(i);
            //        text += System.Text.Encoding.UTF8.GetString(bufferOfPageContent);
            //    }
            //    pdfReader.Close();
            //    return text;
            //}
            //catch (Exception ex)
            //{
            //    return null;
            //}
        }
试试将
System.IO.File.WriteAllBytes(@"E:\PDFReaderTest\article\xct.txt", b);
改为
System.IO.File.WriteAllText(@"E:\PDFReaderTest\article\xct.txt", System.Text.Encoding.UTF8.GetString(b));
不可以,还是乱码
@-Ada-: 试试这里的方法:抽取PDF文本
应该是pdf的字符编码与当前默认的编码不一致
楼主解决了吗?我也遇到同样的问题。。