预处理算法_3_新增序列
2021-06-10 00:04
#!/usr/bin/env python
# -*- coding:utf-8 -*-#
def execute():
#
inputs = {"table": ‘test‘, }
params = {
"left_columns": "id, score",
"ind": "new_label"}
‘‘‘
载入模块
‘‘‘
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
‘‘‘
连接数据库
‘‘‘
engine = create_engine(‘mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis‘)
‘‘‘
选择目标数据
‘‘‘
left_sql = ‘select ‘ + params[‘left_columns‘] + ‘ from ‘ + inputs[‘table‘]
data_in = pd.read_sql_query(left_sql, engine)
# data_in = db_utils.query(conn, ‘select ‘ + params[‘columns‘] + ‘ from ‘ + inputs[‘data_in‘])‘‘‘
新增自增序列
‘‘‘
new = np.arange(1, data_in.index.size + 1)
new = pd.DataFrame({params[‘ind‘]: new})
data_out = pd.concat([new, data_in], axis=1)‘‘‘
将结果写出
‘‘‘
print(data_out)
‘‘‘
数据示例
new_label id score
0 1 1 80.0
1 2 2 20.0
2 3 3 NaN
3 4 4 5.0
4 5 5 4.0
5 6 6 20.0
‘‘‘
#
if __name__ == ‘__main__‘:
execute()
下一篇:预处理算法_2_类型转换