首页新闻找找看学习计划

在python运行正常,C#在IronPython调用各种报错

0
悬赏园豆:20 [已关闭问题] 关闭于 2017-10-24 09:26

c#

using System;
using System.Collections.Generic;
using IronPython.Hosting;
using System.IO;

public partial class Report_fspcDetail : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        if (!IsPostBack) {
            initDetail();
        }
    }

    public void initDetail() {

        var engine = Python.CreateEngine();
        var scope = engine.CreateScope();
        //添加路径
        ICollection<string> Paths = engine.GetSearchPaths();
        Paths.Add("C:\\Python27\\Lib");
        Paths.Add("C:\\Python27\\Lib\\json");
        Paths.Add("C:\\Python27\\Lib\\site-packages\\bs4");
        Paths.Add("C:\\Python27\\Lib\\site-packages");

        engine.SetSearchPaths(Paths);

        var source = engine.CreateScriptSourceFromFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ScrapyCode.py"));
        source.Execute(scope);

        //var getDetail = scope.GetVariable<Func<object, object>>("getDetail");
        //var json = getDetail("8a81819857699d6601577aa1222f6c70");
        var getPlistCode = scope.GetVariable<Func<object,object>> ("getPlistCode");
        var json22 = getPlistCode(1);


    }
}

python

# -*- coding:utf-8 -*-

import urllib
import urllib2
import json
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


def chineseToUnic(ch):
    # return ch.decode('utf-8').encode('unicode_escape')[2:]
    return ch.decode('utf-8').encode('unicode_escape')
# 函数 获取页面数据
def getPlistCode(condition):
    # print(pageNumber)
    # 1.得到这个网页的 html 代码 #
    url = 'http://xy.fspc.gov.cn/tentbaseinfoAction!getTentbaseinfoList.do?'
    # 组装post请求的fromdata数据
    postdata = urllib.urlencode({'page': '1',
                                       'pageSize': 10,
                                       'num': '1',
                                       'leftnum': '1',
                                       'creditquery.enterpriseName': u'佛山市三能灯饰工程有限公司',
                                       'creditquery.businessAddress': '',
                                       'creditquery.bussRegNo': '',
                                       'creditquery.tyxyshdm_query': '',
                                       'validateCode': ''})
    postdata = postdata.encode('utf-8')



    # html = urllib.request.urlopen(url,postdata).read()
    # html = urllib.request.urlopen(url, postdata,10)
    # print(html.status,html.reason)
    # r如果状态吗返回非200 则退出抓取程序
    # if (html.status != 200):
    #     print("{'status':'error','Msg':'返回代码非200'}")
    #     exit()
    html = urllib2.urlopen(url, postdata).read()

    # 2.转换 一种格式,方便查找
    soup = BeautifulSoup(html,"html.parser")
    tables = soup.find_all('table')
    table = ''
    for tb in tables:
        if '以下信息由信用广东网提供' in str(tb):
            table = tb
            # print table.encode('gb18030')

        # if '网页连接超时' in str(tb):
        #     return "网页连接超时"

    # print(table)
    # print table.encode('gb18030')
    if table is not '':
        rows = table.find_all('tr')
        if len(rows) != 2:
            for row in rows:
                if 'openXyhcDetail' in str(row):
                    temp = str(row).split('\'')
                    if temp is not '':
                        getDetail(temp[3])
                    # print(temp[3])
        else:
            getDetail('timeOut')
                # if '网页连接超时' in str(row):


            # print(BeautifulSoup(row).getText())


def getDetail(qyid):
    # print qyid
    if qyid == 'timeOut':
        #print "网络连接超时"
        return "网络连接超时"
    else:
        url = 'http://xy.fspc.gov.cn/tentbaseinfoAction!getDetail.do?'
        # 组装post请求的fromdata数据
        postdata = urllib.urlencode({'qyid':qyid})
        postdata = postdata.encode('utf-8')
        html = urllib2.urlopen(url, postdata).read()

        # tables = tables.encode('gb18030')
        soup = BeautifulSoup(html,"html.parser")

        # print soup
        tables = soup.find_all('table')


        # tables = tables.decode('utf-8').encode('gbk')
        tabList = [] # 创建一个含有'身份证件号码'的list
        tempCon = chineseToUnic('身份证件号码')
        # tempCon = tempCon.encode('utf8').decode('utf8')
        for tb in tables:
            # print chineseToUnic(str(tb))
            if chineseToUnic(str(tb)).find(tempCon)!= -1:
            # if tempCon in str(tb):
                # print type(tb)
                tabList.append(tb)

        # print tabList
        jsonTemp = ''
        arrJson = [[], [], [], [], [], [], [], [], [], [], []]
        indexNum = 0
                # print(tabList)
        # print tb
        tempCon1 = chineseToUnic('姓名').replace('\\','\\\\')
        tempCon2 =chineseToUnic('身份证件号码').replace('\\','\\\\')
        tempCon3 = chineseToUnic('职务').replace('\\','\\\\')
        tempCon4 = chineseToUnic('自然人').replace('\\','\\\\')
        for tb in tabList:
            rows = tb.find_all('tr')
            for row in rows:
                cells = row.find_all('td')
                # print chineseToUnic(str(cells))
                if chineseToUnic(str(cells)).find(tempCon1)!=-1 or chineseToUnic(str(cells)).find(tempCon2)!=-1  or chineseToUnic(str(cells)).find(tempCon3)!=-1  or chineseToUnic(str(cells)).find(tempCon4)!=-1 :
                # if tempCon1 in str(cells) or tempCon2 in str(cells) or tempCon3 in str(cells) or tempCon4 in str(cells):
                # if '姓名' in str(cells) or '身份证件号码' in str(cells) or '职务' in str(cells) or '自然人' in str(cells):
                    jsonTemp += '\'' + cells[0].getText() + '\'' + ":" + '\'' + cells[1].getText() + '\'' + ','
            jsonTemp = jsonTemp[:-1]

            # print(jsonTemp)
            arrJson[indexNum].append(jsonTemp)
            indexNum += 1
            jsonTemp = ''
        # print(str(arrJson).replace('\"', ''))
        # print arrJson
        jsonResult = json.dumps(arrJson, encoding='UTF-8', ensure_ascii=False)
        #print jsonResult
        return jsonResult

            # print(tabList)

#if __name__ == '__main__':
#    # print('**********************************即将进行抓取**********************************')
#    # startIndex = input('请输入您要搜索起始页(数字):')
#    # pageCount = input('请输入您要搜索的结束页(数字):')
#    # condition = input('请输入您要搜索公司全称:')
#    getPlistCode(1)

python在pycharm中正常运行

然后再C#利用IronPython 调用时各种报错我都快哭了

上面代码执行直接报错。我也奇了怪了。

求大神帮我调通,不胜感激

 

CTRA王大大的主页 CTRA王大大 | 初学一级 | 园豆:30
提问于:2017-09-20 17:45
< >
分享
所有回答(3)
0

原生python和ironpython底层的实现应该不太一致可能是导致你出错的原因之一。

另一个,你python的实现的难点无非就是html的解析,但这个在.net下也有第三方组件可以完成,比如Html Agility Pack,直接用类似linq to xml的方式来完成你对节点的查询,后面组装数据这块相比你的代码而言.net在这块会更简单。

如果你实在无法搞定建议你直接用原生的.net来实现。

Daniel Cai | 园豆:10374 (专家六级) | 2017-09-20 19:19
0

python和ironpython结合完全是坑写不下去了,直接C#了

CTRA王大大 | 园豆:30 (初学一级) | 2017-10-24 09:26
0

确实,用IronPython太麻烦了,各种跟直接运行的差异,调起来太耗时,从入坑到放弃的典型。。

CoderMonkey | 园豆:210 (菜鸟二级) | 2018-10-18 11:17
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册