使用python将mdb数据库文件导入postgresql数据库示例

2018-09-22 00:53

阅读:544

  mdb格式文件可以通过mdbtools工具将内中包含的每张表导出到csv格式文件。由于access数据库和postgresQL数据库格式上会存在不通性,所以使用python的文件处理,将所得csv文件修改成正确、能识别的格式。

  导入脚本说明(此脚本运行于linux):

  1.apt-get install mdbtools,安装mdbtools工具

  2.将mdb 文件拷贝到linux虚拟机中,修改脚本中mdb文件目录‘dir

  3.修改服务器及数据库配置

  4.执行脚本

  复制代码 代码如下:
# -*- encoding: utf-8 -*-
import os
import re
import psycopg2
import csv

  #mdb文件目录
dir = r/home/kotaimen/mdb_file/
mdb_tbl_dic = {}

  
def make_create_sql():
if os.path.isfile(dir + create.sql):
os.remove(dir + create.sql)

  
for mdb_file in os.walk(dir):
if len(mdb_file[2]) >0:
for file_p in mdb_file[2]:
if file_p[-3:] == mdb:
print file_p
cmd = mdb-schema %s >>/home/kotaimen/mdb_file/create.sql
cmd = cmd % (dir + file_p)
print cmd
os.system(cmd)
cmd = mdb-tables -1 %s % (dir + file_p)
val = os.popen(cmd).read()
mdb_tbl_dic[file_p] = val.split(\n)
print mdb_tbl_dic

  def modefy_create_sql():
sql_file_name = dir + create.sql
sql_file_name_des = sql_file_name + _new
fobj = open(sql_file_name, r)
fobj_des = open(sql_file_name_des, w)
for eachline in fobj:
#判断表名中是否含有空格
if eachline.find(TABLE ) >= 0:
if eachline.find(;) >= 0:
start_loc = eachline.find(TABLE ) + 6
end_loc = eachline.find(;)
tbl_name = eachline[start_loc:end_loc]
eachline = eachline.replace(tbl_name, + tbl_name + )
else:
start_loc = eachline.find(TABLE ) + 6
end_loc = eachline.find(\n)
tbl_name = eachline[start_loc:end_loc]
eachline = eachline.replace(tbl_name, + tbl_name + )

   if eachline.find(DROP TABLE) >= 0 :
eachline = eachline.replace(DROP TABLE, DROP TABLE IF EXISTS)
if eachline.find(Table) >= 0 :
eachline = eachline.replace(Table, Table)
#create 语句,最后一行没有逗号
if eachline.find(Text ) >= 0 and eachline.find(,) >0:
loc = eachline.find(Text )
eachline = eachline[0:loc] + Text,\n
elif eachline.find(Text ) >= 0 and eachline.find(,) < 0:
loc = eachline.find(Text )
eachline = eachline[0:loc] + Text \n
fobj_des.writelines(eachline)
fobj.close()
fobj_des.close()
os.remove(sql_file_name)
os.rename(sql_file_name_des, sql_file_name)

  def make_insert_csv():
for file_p in mdb_tbl_dic.keys():
for tbl in mdb_tbl_dic[file_p]:
if len(tbl) >0:
cmd = mdb-export %s %s >%s.csv % (dir + file_p, + tbl + , dir + + tbl + )# tbl.replace( , _).replace(, _))
os.system(cmd)

  def modefy_insert_CSV():
for sql_file in os.walk(dir):
if len(sql_file[2]) >0:
for file_p in sql_file[2]:
if file_p[-3:] == csv :
sql_file_name = dir + file_p
sql_file_name_des = sql_file_name + _new
fobj = open(sql_file_name, r)
fobj_des = open(sql_file_name_des, w)
for (num, val) in enumerate(fobj):
eachline = val
if num == 0:
col_list = eachline.split(,)
stat = COPY + + (file_p[0:-4]) + + ( #+ (%s,*len(line))[:-1]+)
for col in col_list:
if col == Table:
col = + Table +
if col.find(\n) >= 0:
col.replace(\n, )
stat = stat + col + ,
stat = stat[:-2] + ) + FROM STDIN WITH CSV ;\n
eachline = stat

   fobj_des.writelines(eachline)
fobj.close()
fobj_des.close()
os.remove(sql_file_name)
os.rename(sql_file_name_des, sql_file_name)

  
def insert_into_database():

   cmd = psql -h 172.26.11.205 -d ap_MapMyIndia_full_Sample -U postgres -f %s 2>>log.txt % (dir + create.sql)
os.system(cmd)

   for sql_file in os.walk(dir):
if len(sql_file[2]) >0:
for file_p in sql_file[2]:
print file_p
if file_p[-3:] == csv :
cmd = psql -h 172.26.11.205 -d ap_MapMyIndia_full_Sample -U postgres -f %s 2>>log.txt % (dir + + file_p + )
os.system(cmd)

  
if __name__ == __main__:
#1.制作mdb文件中所包含TABLE的create脚本
make_create_sql()
#2.修改掉create脚本中的不合法字符
modefy_create_sql()
#3.将mdb中各表导出到csv文件中
make_insert_csv()
#4.修改csv脚本首行,改成copy形式
modefy_insert_CSV()

   insert_into_database()


评论


亲,登录后才可以留言!