python-使用正则快速解析QQ群聊记录
2020-11-22 18:27
标签:style blog class code java javascript color int 2014 rgb http 使用正则表达式,对QQ群聊天记录进行解析,用于分析日期、成员等维度发言情况。
原始文本是 解析之后 代码如下 python-使用正则快速解析QQ群聊记录,搜素材,soscw.com python-使用正则快速解析QQ群聊记录 标签:style blog class code java javascript color int 2014 rgb http 原文地址:http://www.cnblogs.com/zhangbo2012/p/3700699.html
2014-03-28 15:04:25 №┽◎Eagle(369029696)
yyyy=2014
mm = 03
dd = 28
hh = 15
mi =04
ss =
25
nick = №┽◎Eagle
qq = 369029696
# -*- coding: utf-8 -*-
"""
zhangbo2012
http://www.cnblogs.com/zhangbo2012/
"""
import re
def resolving_by_user(filepath):
with open(filepath,‘r‘) as rf:
filecontent = rf.read()
resolving_result={}
#2014-03-28 15:04:25 №┽◎Eagle(369029696)
p = re.compile(r‘(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) (.*)\((.*?)\)\n‘)
for [yyyy,mm,dd,hh,mi,ss,nick,qq] in p.findall(filecontent):
if qq in resolving_result.keys():
temps = resolving_result[qq]
temps["qq"]=qq
temps["nick"]=nick
temps["worldcnt"]+=1
resolving_result[qq]=temps
else:
resolving_result[qq] ={"qq":qq,"nick":nick,"worldcnt":1}
for value in resolving_result.values():
print str.rjust(repr(value[‘qq‘]),15)+str.rjust(repr(value[‘worldcnt‘]),10)
if __name__==‘__main__‘:
resolving_by_user("2.txt")