关于大文件导出的优化迭代情况如下:
计算机配置:四核16G内存
初始版本为单线程单文件导出文件,mybatis读 opencsv写,耗时将近三小时;
第一轮优化改为多线程单文件,提高读数据效率,时间仅缩减十分钟;
第二轮改为多线程多文件,提高写文件效率,时间缩减一个半小时;
第三轮使用 Mybatis 流式查询,并改用 Map 封装数据,提高内存利用率,时间缩减十分钟;
第四轮弃用 Mybatis ,改用原生 JDBC 获取数据并直接拼接,时间缩减十分钟;
第五轮弃用 opencsv ,改用 BufferWriter 直接写数据,时间缩减十分钟;
2023-04-23 22:01:30 [main] INFO WriteData - 单线程单文件 total time in 258s
2023-04-23 22:02:44 [main] INFO WriteData - 固定线程单文件 total time in 74s
2023-04-23 22:03:40 [main] INFO WriteData - 固定线程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO WriteData - query in 44s2023-04-23 22:27:00 [Thread-5] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO WriteData - write in 31s2023-04-23 22:27:01 [main] INFO WriteData - 固定线程单文件 total time in 75s2023-04-23 22:27:24 [Thread-14] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO WriteData - query in 23s2023-04-23 22:27:55 [Thread-12] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO WriteData - write in 31s2023-04-23 22:27:56 [main] INFO WriteData - 固定线程多文件 total time in 54s
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.time.Duration;
import java.time.LocalDate;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;public class WriteData {static final Logger log = LoggerFactory.getLogger(WriteData.class);public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\";public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true";public static final String USERNAME = "root";public static final String PASSWORD = "123456";public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?";public static final int TOTAL = 10000000;public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>());public static final HikariDataSource DS;static {HikariConfig config = new HikariConfig();config.setJdbcUrl(URL);config.setUsername(USERNAME);config.setPassword(PASSWORD);DS = new HikariDataSource(config);}public static void main(String[] args) {// 单线程写文件singleThreadWrite();// 固定线程写同concurrentWriteWithFixedThread(true);concurrentWriteWithFixedThread(false);concurrentWrite();}public static void singleThreadWrite() {String file = PARENT_PATH + "file.csv";long start = System.currentTimeMillis();try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setFetchSize(10000);stmt.setFetchDirection(ResultSet.FETCH_REVERSE);stmt.setInt(1, 0);stmt.setInt(2, 10000000);ResultSet rs = stmt.executeQuery();writeToFile(writer, rs);} catch (SQLException | IOException e) {throw new RuntimeException(e);}log.info("单线程单文件 total time in {}s", getSeconds(start));emptyFolder();}private static void concurrentWriteWithFixedThread(boolean writeInOneFile) {int batch_size = 1250000;Thread[] threads = new Thread[TOTAL/batch_size];long start = System.currentTimeMillis();for (int i = 0; i < TOTAL; i = i + batch_size) {final int j = i;int no = i / batch_size;Thread t = new Thread(() -> {String file;if (writeInOneFile) {file = PARENT_PATH + "file.csv";} else {file = PARENT_PATH + "file_" + no + ".csv";}try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setFetchSize(10000);stmt.setFetchDirection(ResultSet.FETCH_REVERSE);stmt.setInt(1, j);stmt.setInt(2, j + batch_size);long queryStart = System.currentTimeMillis();try (ResultSet rs = stmt.executeQuery()) {log.info("query in {}s", getSeconds(queryStart));long writeStart = System.currentTimeMillis();writeToFile(writer, rs);log.info("write in {}s", getSeconds(writeStart));}} catch (SQLException | IOException e) {throw new RuntimeException(e);}});t.start();threads[no] = t;}for (Thread t : threads) {try {t.join();} catch (InterruptedException e) {throw new RuntimeException(e);}}if (writeInOneFile) {log.info("固定线程单文件 total time in {}s", getSeconds(start));} else {log.info("固定线程多文件 total time in {}s", getSeconds(start));}
// emptyFolder();}private static void concurrentWrite() {int batch_size = 10000;CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size];long start = System.currentTimeMillis();for (int i = 0; i < TOTAL; i = i + batch_size) {final int j = i;int no = i / batch_size;CompletableFuture<Void> t = CompletableFuture.runAsync(() -> {String file = PARENT_PATH + "file_" + no + ".csv";try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setInt(1, j);stmt.setInt(2, j + batch_size);try (ResultSet rs = stmt.executeQuery()){writeToFile(writer, rs);}} catch (SQLException | IOException e) {throw new RuntimeException(e);}},POOL);futures[no] = t;}CompletableFuture.allOf(futures).join();log.info("多线程多文件 total time in {}s", getSeconds(start));POOL.shutdown();emptyFolder();}private static void emptyFolder() {File file = new File(PARENT_PATH);File[] files = file.listFiles();for (File f : files) {f.delete();}}private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException {StringBuilder builder = new StringBuilder();while (rs.next()) {String firstName = rs.getString("first_name");String lastName = rs.getString("last_name");LocalDate dob = rs.getDate("date_of_birth").toLocalDate();String gender = rs.getString("gender");String email = rs.getString("email");String phone = rs.getString("phone_number");String address = rs.getString("address");String city = rs.getString("city");String state = rs.getString("state");String zip = rs.getString("zip_code");String country = rs.getString("country");String nationality = rs.getString("nationality");String religion = rs.getString("religion");String emergencyContactName = rs.getString("emergency_contact_name");String emergencyContactPhone = rs.getString("emergency_contact_phone_number");String guardianName = rs.getString("guardian_name");String guardianPhone = rs.getString("guardian_phone_number");String highSchoolName = rs.getString("high_school_name");double highSchoolGpa = rs.getDouble("high_school_gpa");int highSchoolGradYear = rs.getInt("high_school_graduation_year");String major = rs.getString("major");String degreeLevel = rs.getString("degree_level");String enrollmentStatus = rs.getString("enrollment_status");builder.append(firstName).append("|");builder.append(lastName).append("|");builder.append(dob).append("|");builder.append(gender).append("|");builder.append(email).append("|");builder.append(phone).append("|");builder.append(address).append("|");builder.append(city).append("|");builder.append(state).append("|");builder.append(zip).append("|");builder.append(country).append("|");builder.append(nationality).append("|");builder.append(religion).append("|");builder.append(emergencyContactName).append("|");builder.append(emergencyContactPhone).append("|");builder.append(guardianName).append("|");builder.append(guardianPhone).append("|");builder.append(highSchoolName).append("|");builder.append(highSchoolGpa).append("|");builder.append(highSchoolGradYear).append("|");builder.append(major).append("|");builder.append(degreeLevel).append("|");builder.append(enrollmentStatus).append("\n");writer.write(builder.toString());builder.delete(0, builder.length());}}private static long getSeconds(long start) {return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds();}
}