Python根据csv某列数据操作某一后缀的文件

2021-06-07 12:02

阅读:687

标签:bsp   tuples   __name__   数据   完全   not   rtu   open   ext   

# coding=gbk
import os
import csv
import pandas as pd
import shutil

# 获取指定文件夹中后缀.svs的文件
def get_svsfile(path_svs):
    f_list = os.listdir(path_svs)
    for i in f_list:
        # os.path.splitext():分离文件名与扩展名
        if os.path.splitext(i)[1] == .svs:
            print(os.path.splitext(i)[0])

# 新建CSV文件并指定列名
def create_csv(path_csv):
    with open(path_csv,w) as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        csv_head = ["case_id", "slide_id", "label"]
        writer.writerow(csv_head)
        # 写入多行用writerows
        # writer.writerows([[0, 1, 3], [1, 2, 3], [2, 3, 4]])

# 根据某一CSV文件中的数据指定标签
def create_label_csv(path_svs, path_csv):
    f_list = os.listdir(path_svs)
    with open(path_csv,w) as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        csv_head = ["case_id", "slide_id", "label"]
        writer.writerow(csv_head)
        for i in f_list:
            # os.path.splitext():分离文件名与扩展名
            if os.path.splitext(i)[1] == .svs:
                # print(os.path.splitext(i)[0])
                writer.writerow([‘‘,os.path.splitext(i)[0],‘‘])

def excel_to_csv(path_excel):
    file_excel = pd.read_excel(path_excel)
    file_excel.to_csv(meta_yunnan.csv)

# 根据csv文件中的某一列值获取另一csv中的对应列的值
def gen_final_csv(path_csv, metadata_csv):
    final_csv = pd.read_csv(path_csv, engine=python)
    metadata = pd.read_csv(metadata_csv)
    # print(‘201405225‘ in metadata[[‘术前病理号‘]].values)
    count = 0
    # final_csv[[‘slide_id‘]].itertuples() 遍历csv文件中某一列的值
    for i,svsname in final_csv[[slide_id]].itertuples():
        # 判断csv文件中某一列是否包含某个值
        if svsname in metadata[[术前病理号]].values:
            # sqblh术前病理号
            for j, sqblh in metadata[["术前病理号"]].itertuples():
                if svsname == sqblh:
                    count = count + 1
                    # 获取csv文件中指定行列的值
                    final_csv.loc[i, case_id] = metadata.iloc[j][病理版编号]
                    # print(metadata.iloc[j][‘病理版编号‘])
                    # print(i ,svsname)
                    # print(metadata[[‘术前病理号‘]])
                    # print(final_csv[[‘slide_id‘]])
                    if metadata.iloc[j][肿瘤退缩程度(0:完全退缩,1:单个或小灶癌残余,2:部分癌残留,3:大量癌残留;4 不适合评价)] == 0:
                        final_csv.loc[i, label] = 0
                    else:
                        final_csv.loc[i, label] = 1
        elif svsname not in metadata[[术前病理号]].values:
            # 删除csv文件中某一行的值
            final_csv = final_csv.drop([i])
    # 将final_csv按‘label’列排序
    final_csv = final_csv.sort_values(label)
    print(count)
    # print(final_csv)
    final_csv.to_csv(_final_.csv)

# 根据文件名读取文件夹下数据并移动到指定文件夹
def copy_file(path_svs, path_final_csv):
    final_csv = pd.read_csv(path_final_csv)
    for i,svslabel in final_csv[[label]].itertuples():
        if svslabel == 0:
            shutil.copy(path_svs+str(final_csv.loc[i, slide_id])+.svs, "train_folder/pcr")
        elif svslabel == 1:
            shutil.copy(path_svs + str(final_csv.loc[i, slide_id]) + .svs, "train_folder/non_pcr")

if __name__ == __main__:
    path_svs = "F:/数据/附三院直肠癌新辅助肠镜病理/lzhpCR图像/"
    path_final_csv = "_final_.csv"
    # path_csv = "_yunnan_svs_name_.csv"
    # metadata_csv = "meta_yunnan.csv"
    # # get_svsfile(path_svs)
    # # create_csv(path_csv)
    # create_label_csv(path_svs, path_csv)
    # file_csv = pd.read_csv(path_csv, engine=‘python‘)
    # print(file_csv)
    # excel_to_csv(‘entireData.xlsx‘)
    # gen_final_csv(path_csv, metadata_csv)
    copy_file(path_svs, path_final_csv)

 

Python根据csv某列数据操作某一后缀的文件

标签:bsp   tuples   __name__   数据   完全   not   rtu   open   ext   

原文地址:https://www.cnblogs.com/dyc99/p/14584861.html


评论


亲,登录后才可以留言!