Python3自动化_文件批量处理(文本、PDF;读取、筛选、导出)
2021-01-26 13:16
标签:ons 级别 个数 datetime eve 数据库 init 日志处理 sea 利用Python3脚本语言的简练语法,高级语言的丰富类库,快速写了几个文件读取、筛选、导出的“脚本”。 这里简单总结一下关键功能。 检查ini文件是否存在;检查输入的key在ini文件里是否有定义。 检查参数个数;检查参数合法性(长度,是否目录);检查参数是否整个都是汉字。 根据ini文件定义的数据库连接信息,尝试连库;执行SQL文。 定义输出日志的级别;异常级别时,处理结束。 列表元素批量处理,按第二个下划线字符截取字符串。 目录/文件判断;目录的路径分割;完整路径的文件名取得; 来源:https://www.cnblogs.com/alexzhang92/p/11488949.html 转载请注明原文链接,谢谢。 Python3自动化_文件批量处理(文本、PDF;读取、筛选、导出) 标签:ons 级别 个数 datetime eve 数据库 init 日志处理 sea 原文地址:https://www.cnblogs.com/soulxj/p/12788250.html 读取ini配置文件
1 import configparser
2
4 def getConfigInfo(_ini_nm):
5
6 # Open Ini File
7 config = configparser.ConfigParser()
8 if not config.read(os.path.join(os.getcwd(), _ini_nm + r‘.ini‘)):
9 printLog(‘E‘, ‘Read Ini file fail.‘)
10
11 while True:
12 sysCode = input(r‘Please input the system code : (Press [Enter] to quit):‘).strip()
13 if 0 == len(sysCode.strip()):
14 exit()
15
16 # Init ConnectionSettings
17 if sysCode in config.sections():
18 return dict(config[sysCode])
19 else:
20 print(‘Ini info of System [%s] is blank.\n‘ % sysCode)多参数输入的获取
1 def _main():
2
3 path = ‘‘
4 keyWord = ‘‘
5
6 while True:
7 para = input(r‘Please input the PDF directory and Key Word: (Press [Enter] to quit):‘).strip().split()
8
9 if 2 != len(para): continue
10
11 path = para[0]
12 keyWord = para[1]
13
14 if 0 == len(path.strip()):
15 exit()
16
17 if not os.path.exists(path):
18 print(‘input path is not a exists path.‘ + ‘\n‘)
19 continue
20
21 flg = True
22 for char in keyWord.strip():
23 if char ‘\u4e00‘ or char >= u‘\u9fa5‘:
24 flg = False
25 break
26 if not flg:
27 print(‘Please input the Chinese Key Word for search.(Such as \‘物流\‘).‘ + ‘\n‘)
28 continue
29
30 break
PostgreSQL数据库处理
1 import psycopg2
4 import traceback
5
6 def connDB(_cfg):
7 try:
8 conn = psycopg2.connect(database=_cfg[‘servicename‘],
9 user=_cfg[‘dbuser‘],
10 password=_cfg[‘dbpw‘],
11 host=_cfg[‘host‘],
12 port=_cfg[‘port‘])
13 return conn
14 except Exception:
15 printLog(‘E‘, ‘Exception occur at DB Connection.‘ + ‘\n‘ + traceback.format_exc())
16
17 def executeSql(_cfg, _sql):
18 try:
19 conn = connDB(_cfg)
20 cur = conn.cursor()
21 cur.execute(_sql)
22
23 results = cur.fetchall()
24 return list(map(lambda x: x[0], results))
25 except Exception:
26 printLog(‘E‘, ‘Exception occur at Execute SQL.‘ + ‘\n‘ + traceback.format_exc())
27 finally:
28 cur.close()
29 conn.rollback()
30 conn.close()日志处理
1 logging.basicConfig(filename=‘log_‘ + datetime.now().strftime(‘%Y%m%d‘) + ‘.txt‘,
2 level=logging.INFO,
3 format=‘ %(asctime)s - %(levelname)s - %(message)s‘)
4
5 logLevel = {‘D‘: logging.DEBUG,
6 ‘I‘: logging.INFO,
7 ‘W‘: logging.WARNING,
8 ‘E‘: logging.ERROR,
9 ‘C‘: logging.CRITICAL}
10
11 def printLog(_lvl, _msg):
12 logging.log(logLevel[_lvl], _msg)
13 if logging.ERROR == logLevel[_lvl]:
14 print(_msg)
15 exit()
16
17
18 printLog(‘E‘, ‘srcpath is not a exists path.‘)
19 printLog(‘I‘, ‘Get Src Path : %s‘ % srcPath)
MAP函数运用
1 def getPreOfNm(x):
2 if 1 ‘_‘):
3 return x[0:x.find(‘_‘, x.find(‘_‘) + 1)]
4 else:
5 return x
6
7 # Get prefix of CRUD object name
8 prefixObjNm = list(set(map(getPreOfNm, lstTb)))
9 prefixObjNm.sort()
目录处理
1 # Check the srcPath
2 fullFilePaths = []
3 if os.path.isdir(srcPath):
4 for folderName, subFolders, fileNames in os.walk(srcPath):
5 if os.path.split(folderName)[1] in [‘tcs‘, ‘doc‘]: continue
6 for fn in fileNames:
7 # Get src file
8 mObj = fileNmReg.search(fn)
9 if mObj:
10 fullFilePaths.append(os.path.join(folderName, fn))
11 elif os.path.isfile(srcPath):
12 # Get src file
13 fn = os.path.basename(os.path.realpath(srcPath))
14 mObj = fileNmReg.search(fn)
15 if mObj:
16 fullFilePaths.append(srcPath)
PDF文件读取
1 from pdfminer.converter import TextConverter
2 from pdfminer.layout import LAParams
3 from pdfminer.pdfinterp import PDFResourceManager, process_pdf
4 import os
5
6
7 def read_pdf(pdf):
8 # resource manager
9 rsrcmgr = PDFResourceManager()
10 retstr = StringIO()
11 laparams = LAParams()
12 # device
13 device = TextConverter(rsrcmgr, retstr, laparams=laparams)
14 process_pdf(rsrcmgr, device, pdf)
15 device.close()
16 content = retstr.getvalue()
17 retstr.close()
18 # 获取所有行
19 contents = str(content).split("\n")
20
21 return contents
CSV文件导出
1 # Init result file
2 rstFile = open(os.path.join(srcPath, ‘[CRUD]‘ + datetime.now().strftime(‘%Y%m%d%H%M%S‘) + ‘.csv‘), ‘w‘, newline=‘‘)
3 rstWtr = csv.writer(rstFile, delimiter=‘\t‘, lineterminator=‘\n‘)
4 # Write head
5 rstWtr.writerow([‘TYPE‘, ‘CI‘, ‘ENCODE‘, ‘LINE NUM‘, ‘CRUD‘, ‘TABLE NM‘, ‘FULL PATH‘])
文章标题:Python3自动化_文件批量处理(文本、PDF;读取、筛选、导出)
文章链接:http://soscw.com/essay/47293.html