分享一个统计文档中不同key的个数的python脚本
2021-03-19 12:24
标签:error: input close 不重复 样本 coding int ace white 前提: 原统计文档中条目以空格分隔或只有一列(为了方便awk筛选出某一个列) #!/usr/bin/env python# -*- coding:utf-8 -*-import sysimport os def main(): os.system("awk ‘{print $10}‘ count.txt > awkCountitem.txt") 分享一个统计文档中不同key的个数的python脚本 标签:error: input close 不重复 样本 coding int ace white 原文地址:https://www.cnblogs.com/daimadebanyungong/p/14554013.html 1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 import sys
4 import os
5
6 PRINTRED = "\033[1;31m"
7 PRINTGREEN = "\033[0;32;47m" #green color backgroud is white
8 PRINTGREEN_SIM = "\033[1;32m" #no backgroud green color
9 PRINTBLUE = "\033[1;34m"
10 PRINTCOLOR_END= "\033[0m"
11
12 def getAllItemList(fileName):
13 try:
14 fp = open(fileName, ‘r‘)
15 fLines = fp.readlines()
16 fp.close()
17 perList = []
18 for item in fLines:
19 #perSplit = line.split(‘;‘)
20 #for item in perSplit:
21 if item.isspace() == False:
22 # space do nothing
23 perList.append(item.strip())
24
25 #print(perList)
26 return perList
27 except IOError:
28 print("error: file not found, please check it !!!")
29 sys.exit(0)
30
31 def calSameItemCount(itemList):
32 sameCountDict={}
33 for item in itemList:
34 if sameCountDict.has_key(item):
35 sameCountDict[item] += 1
36 else:
37 #print("notfindkey")
38 sameCountDict[item] = 1
39 print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict)))
40 print("不重复的key如下:")
41 for key,value in sameCountDict.items():
42 print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value))
43 #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key))
44
45
46 def main():
47 #想要统计的key的列在第10列,原文档固定名为count.txt
48 os.system("awk ‘{print $10}‘ count.txt > awkCountitem.txt")
49
50 #fileName = raw_input("please input fileName: \n")
51 fileName = "awkCountitem.txt"
52 itemList = getAllItemList(fileName)
53 itemCount = len(itemList)
54 print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount))
55 calSameItemCount(itemList)
56
57 if __name__=="__main__":
58 main()
PRINTRED = "\033[1;31m"PRINTGREEN = "\033[0;32;47m" #green color backgroud is whitePRINTGREEN_SIM = "\033[1;32m" #no backgroud green colorPRINTBLUE = "\033[1;34m"PRINTCOLOR_END= "\033[0m"
def getAllItemList(fileName): try: fp = open(fileName, ‘r‘) fLines = fp.readlines() fp.close() perList = [] for item in fLines: #perSplit = line.split(‘;‘) #for item in perSplit: if item.isspace() == False: # space do nothing perList.append(item.strip())
#print(perList) return perList except IOError: print("error: file not found, please check it !!!") sys.exit(0)
def calSameItemCount(itemList): sameCountDict={} for item in itemList: if sameCountDict.has_key(item): sameCountDict[item] += 1 else: #print("notfindkey") sameCountDict[item] = 1 print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict))) print("不重复的key如下:") for key,value in sameCountDict.items(): print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value)) #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key))
#fileName = raw_input("please input fileName: \n") fileName = "awkCountitem.txt" itemList = getAllItemList(fileName) itemCount = len(itemList) print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount)) calSameItemCount(itemList)
if __name__=="__main__": main()
上一篇:Java泛型