"""
删除 Doris库中某些表中无效的数据
"""import mysql.connector
import socket
import socks
import pandas as pd
import pymysql
from sqlalchemy import create_engine, text
import csv
from datetime import datetime# Tidb数据库defDorisTableInfo(databaseName, env):if env =="online":# Dorisdb = pymysql.connect(host="yourHost",port=6666,user="yourUser",password="yourPwd",database=databaseName)cursor = db.cursor()cursor.execute("set names utf8")cursor.execute("SET character_set_connection=utf8;")dbName ="MY_DB.tableName"sql="select id from %s where status = 'InValid' order by id asc limit 500000;"%(dbName)print("查询要删除的数据语句:", sql)cursor.execute(sql)data = cursor.fetchall()print("查询 %s 需要物理删除的数据量: "%(dbName),len(data))list=[]for i inrange(0,len(data)):list.append(data[i][0])# print(list)# 1000个一拆分listItem=group_list(list,1000)for i inrange(len(listItem)):deleteIds =','.join(str(item)for item in listItem[i])deleteSql ="delete from %s where id in (%s)"%(dbName, deleteIds)print("[%s/%s]"%(i,len(listItem)), deleteSql)cursor.execute(deleteSql)defgroup_list(lst, group_size):groups ={}for i, item inenumerate(lst):group_number = i // group_sizeif group_number notin groups:groups[group_number]=[]groups[group_number].append(item)return[group for _, group insorted(groups.items())]# Doris 数据库
databaseName ="MY_DB"env ="online"print("start-------")
DorisTableInfo(databaseName, env)print("done-------")