Python根据csv某列数据操作某一后缀的文件
2021-06-07 12:02
标签:bsp tuples __name__ 数据 完全 not rtu open ext Python根据csv某列数据操作某一后缀的文件 标签:bsp tuples __name__ 数据 完全 not rtu open ext 原文地址:https://www.cnblogs.com/dyc99/p/14584861.html# coding=gbk
import os
import csv
import pandas as pd
import shutil
# 获取指定文件夹中后缀.svs的文件
def get_svsfile(path_svs):
f_list = os.listdir(path_svs)
for i in f_list:
# os.path.splitext():分离文件名与扩展名
if os.path.splitext(i)[1] == ‘.svs‘:
print(os.path.splitext(i)[0])
# 新建CSV文件并指定列名
def create_csv(path_csv):
with open(path_csv,‘w‘) as csvfile:
writer = csv.writer(csvfile)
# 先写入columns_name
csv_head = ["case_id", "slide_id", "label"]
writer.writerow(csv_head)
# 写入多行用writerows
# writer.writerows([[0, 1, 3], [1, 2, 3], [2, 3, 4]])
# 根据某一CSV文件中的数据指定标签
def create_label_csv(path_svs, path_csv):
f_list = os.listdir(path_svs)
with open(path_csv,‘w‘) as csvfile:
writer = csv.writer(csvfile)
# 先写入columns_name
csv_head = ["case_id", "slide_id", "label"]
writer.writerow(csv_head)
for i in f_list:
# os.path.splitext():分离文件名与扩展名
if os.path.splitext(i)[1] == ‘.svs‘:
# print(os.path.splitext(i)[0])
writer.writerow([‘‘,os.path.splitext(i)[0],‘‘])
def excel_to_csv(path_excel):
file_excel = pd.read_excel(path_excel)
file_excel.to_csv(‘meta_yunnan.csv‘)
# 根据csv文件中的某一列值获取另一csv中的对应列的值
def gen_final_csv(path_csv, metadata_csv):
final_csv = pd.read_csv(path_csv, engine=‘python‘)
metadata = pd.read_csv(metadata_csv)
# print(‘201405225‘ in metadata[[‘术前病理号‘]].values)
count = 0
# final_csv[[‘slide_id‘]].itertuples() 遍历csv文件中某一列的值
for i,svsname in final_csv[[‘slide_id‘]].itertuples():
# 判断csv文件中某一列是否包含某个值
if svsname in metadata[[‘术前病理号‘]].values:
# sqblh术前病理号
for j, sqblh in metadata[["术前病理号"]].itertuples():
if svsname == sqblh:
count = count + 1
# 获取csv文件中指定行列的值
final_csv.loc[i, ‘case_id‘] = metadata.iloc[j][‘病理版编号‘]
# print(metadata.iloc[j][‘病理版编号‘])
# print(i ,svsname)
# print(metadata[[‘术前病理号‘]])
# print(final_csv[[‘slide_id‘]])
if metadata.iloc[j][‘肿瘤退缩程度(0:完全退缩,1:单个或小灶癌残余,2:部分癌残留,3:大量癌残留;4 不适合评价)‘] == 0:
final_csv.loc[i, ‘label‘] = 0
else:
final_csv.loc[i, ‘label‘] = 1
elif svsname not in metadata[[‘术前病理号‘]].values:
# 删除csv文件中某一行的值
final_csv = final_csv.drop([i])
# 将final_csv按‘label’列排序
final_csv = final_csv.sort_values(‘label‘)
print(count)
# print(final_csv)
final_csv.to_csv(‘_final_.csv‘)
# 根据文件名读取文件夹下数据并移动到指定文件夹
def copy_file(path_svs, path_final_csv):
final_csv = pd.read_csv(path_final_csv)
for i,svslabel in final_csv[[‘label‘]].itertuples():
if svslabel == 0:
shutil.copy(path_svs+str(final_csv.loc[i, ‘slide_id‘])+‘.svs‘, "train_folder/pcr")
elif svslabel == 1:
shutil.copy(path_svs + str(final_csv.loc[i, ‘slide_id‘]) + ‘.svs‘, "train_folder/non_pcr")
if __name__ == ‘__main__‘:
path_svs = "F:/数据/附三院直肠癌新辅助肠镜病理/lzhpCR图像/"
path_final_csv = "_final_.csv"
# path_csv = "_yunnan_svs_name_.csv"
# metadata_csv = "meta_yunnan.csv"
# # get_svsfile(path_svs)
# # create_csv(path_csv)
# create_label_csv(path_svs, path_csv)
# file_csv = pd.read_csv(path_csv, engine=‘python‘)
# print(file_csv)
# excel_to_csv(‘entireData.xlsx‘)
# gen_final_csv(path_csv, metadata_csv)
copy_file(path_svs, path_final_csv)