分享一个统计文档中不同key的个数的python脚本

2021-03-19 12:24

阅读:549

标签:error:   input   close   不重复   样本   coding   int   ace   white   

前提:

原统计文档中条目以空格分隔或只有一列(为了方便awk筛选出某一个列)

 1 #!/usr/bin/env python
 2 # -*- coding:utf-8 -*-
 3 import sys
 4 import os
 5 
 6 PRINTRED = "\033[1;31m"
 7 PRINTGREEN = "\033[0;32;47m"   #green color backgroud is white
 8 PRINTGREEN_SIM = "\033[1;32m"  #no backgroud green color
 9 PRINTBLUE = "\033[1;34m"
10 PRINTCOLOR_END= "\033[0m"
11 
12 def getAllItemList(fileName):
13     try:
14         fp = open(fileName, r)
15         fLines = fp.readlines()
16         fp.close()
17         perList = []
18         for item  in  fLines:
19             #perSplit = line.split(‘;‘)
20             #for item in perSplit:
21             if item.isspace() == False:
22                 # space do nothing
23                 perList.append(item.strip())
24 
25         #print(perList)
26         return perList
27     except IOError:
28         print("error: file not found, please check it !!!")
29         sys.exit(0)
30 
31 def calSameItemCount(itemList):
32     sameCountDict={}
33     for item in itemList:
34         if sameCountDict.has_key(item):
35             sameCountDict[item] += 1
36         else:
37             #print("notfindkey")
38             sameCountDict[item] = 1
39     print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict)))
40     print("不重复的key如下:")
41     for key,value in sameCountDict.items():
42         print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value))
43         #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key))
44 
45 
46 def main():
47     #想要统计的key的列在第10列,原文档固定名为count.txt
48     os.system("awk ‘{print $10}‘ count.txt > awkCountitem.txt")
49 
50     #fileName = raw_input("please input fileName: \n")
51     fileName = "awkCountitem.txt"
52     itemList = getAllItemList(fileName)
53     itemCount = len(itemList)
54     print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount))
55     calSameItemCount(itemList)
56 
57 if __name__=="__main__":
58   main()

 

 

#!/usr/bin/env python# -*- coding:utf-8 -*-import sysimport os
PRINTRED = "\033[1;31m"PRINTGREEN = "\033[0;32;47m"   #green color backgroud is whitePRINTGREEN_SIM = "\033[1;32m"  #no backgroud green colorPRINTBLUE = "\033[1;34m"PRINTCOLOR_END= "\033[0m"
def getAllItemList(fileName):    try:        fp = open(fileName, ‘r‘)        fLines = fp.readlines()        fp.close()        perList = []        for item  in  fLines:            #perSplit = line.split(‘;‘)            #for item in perSplit:            if item.isspace() == False:                # space do nothing                perList.append(item.strip())
        #print(perList)        return perList    except IOError:        print("error: file not found, please check it !!!")        sys.exit(0)
def calSameItemCount(itemList):    sameCountDict={}    for item in itemList:        if sameCountDict.has_key(item):            sameCountDict[item] += 1        else:            #print("notfindkey")            sameCountDict[item] = 1    print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict)))    print("不重复的key如下:")    for key,value in sameCountDict.items():        print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value))        #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key))

def main():    os.system("awk ‘{print $10}‘ count.txt > awkCountitem.txt")
    #fileName = raw_input("please input fileName: \n")    fileName = "awkCountitem.txt"    itemList = getAllItemList(fileName)    itemCount = len(itemList)    print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount))    calSameItemCount(itemList)
if __name__=="__main__":  main()

 

分享一个统计文档中不同key的个数的python脚本

标签:error:   input   close   不重复   样本   coding   int   ace   white   

原文地址:https://www.cnblogs.com/daimadebanyungong/p/14554013.html


评论


亲,登录后才可以留言!