import datetime from scipy.io import savemat def create_assist_date(datestart=None, dateend=None): # 创建日期辅助表 if datestart is None: datestart = '2016-01-01' if dateend is None: dateend = datetime.datetime.now().strftime('%Y-%m-%d') # 转为日期格式 datestart = datetime.datetime.strptime(datestart, '%Y-%m-%d') dateend = datetime.datetime.strptime(dateend, '%Y-%m-%d') date_list = [datestart.strftime('%Y-%m-%d')] while datestart < dateend: # 日期叠加一天 datestart += datetime.timedelta(days=+1) # 日期转字符串存入列表 date_list.append(datestart.strftime('%Y-%m-%d')) return date_list
date_list就是我说的日期辅助表,
for date in date_list
从这里获取不到
其实我是先想生成日期格式表,
再用第二段程序利用这个表筛选
打开CSV文件并清洗数据
思路如下:
打开csv文件,获取保存的弹幕文本
清除无意义的标点符号,这里可以自己进行相应的设置
利用字典记录弹幕出现的次数
根据弹幕次数排序字典
每次写入次数最多的十条弹幕信息到另外的CSV文件中”
原链接代码:https://blog.csdn.net/qq_36178962/article/details/108125794
第二段
import jieba import re,string from zhon.hanzi import punctuation import os import csv danmuCount = dict() danmuNum = 0 punc = '~`!#$%^&*()_+-=|\';":/.,?><~·!@#¥%……&*()——+-=“:’;、。?》《{} oh1O○〇●哈' with open('danmuku3.csv', 'a', encoding='utf-8') as savefile: writer = csv.writer(savefile) writer.writerow(['name','type','value','date']) for date in dateLists: with open('csv3/danmutext_'+ date + '.csv', 'r', encoding='utf-8') as csvfile: print('---分析日期', date, '弹幕...\n') reader = csv.reader(csvfile) for line in reader: danmuNum = danmuNum + 1 line = "".join(line) line = re.sub(r"[%s]+" % punc, "", line) # words_list = jieba.lcut(line) # for word in words_list: # data[line] = data[line] + 1 # line = line.lower() if len(line) >= 2 and len(line) <= 15: if danmuCount.get(line): danmuCount[line] = danmuCount[line] + 1 else: danmuCount[line] = 1 sortList = sorted(danmuCount.items(), key=lambda item:item[1], reverse=True) if len(sortList)>10: pltLists = sortList[:10] for plttuple in pltLists: saveLine = [] saveLine.append(plttuple[0]) saveLine.append('Chinese') saveLine.append(plttuple[1]) saveLine.append(date) writer.writerow(saveLine)
怎么获取? 执行 create_assist_date 函数后赋值 给 data_list , 然后遍历?
1 date_list = create_assist_date(start, end): 2 3 for date in date_list
不是太明白你这意思 😂
应该是,问题是create_assist_date这个函数不知道在那个模块