大文件导出

关于大文件导出的优化迭代情况如下:
计算机配置:四核16G内存
初始版本为单线程单文件导出文件,mybatis读 opencsv写,耗时将近三小时;
第一轮优化改为多线程单文件,提高读数据效率,时间仅缩减十分钟;
第二轮改为多线程多文件,提高写文件效率,时间缩减一个半小时;
第三轮使用 Mybatis 流式查询,并改用 Map 封装数据,提高内存利用率,时间缩减十分钟;
第四轮弃用 Mybatis ,改用原生 JDBC 获取数据并直接拼接,时间缩减十分钟;
第五轮弃用 opencsv ,改用 BufferWriter 直接写数据,时间缩减十分钟;

2023-04-23 22:01:30 [main] INFO  WriteData - 单线程单文件 total time in 258s
2023-04-23 22:02:44 [main] INFO  WriteData - 固定线程单文件 total time in 74s
2023-04-23 22:03:40 [main] INFO  WriteData - 固定线程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO  WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO  WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO  WriteData - query in 44s2023-04-23 22:27:00 [Thread-5] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO  WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO  WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO  WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO  WriteData - write in 31s2023-04-23 22:27:01 [main] INFO  WriteData - 固定线程单文件 total time in 75s2023-04-23 22:27:24 [Thread-14] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO  WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO  WriteData - query in 23s2023-04-23 22:27:55 [Thread-12] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO  WriteData - write in 31s2023-04-23 22:27:56 [main] INFO  WriteData - 固定线程多文件 total time in 54s
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.time.Duration;
import java.time.LocalDate;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;public class WriteData {static final Logger log = LoggerFactory.getLogger(WriteData.class);public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\";public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true";public static final String USERNAME = "root";public static final String PASSWORD = "123456";public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?";public static final int TOTAL = 10000000;public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>());public static final HikariDataSource DS;static {HikariConfig config = new HikariConfig();config.setJdbcUrl(URL);config.setUsername(USERNAME);config.setPassword(PASSWORD);DS = new HikariDataSource(config);}public static void main(String[] args) {// 单线程写文件singleThreadWrite();// 固定线程写同concurrentWriteWithFixedThread(true);concurrentWriteWithFixedThread(false);concurrentWrite();}public static void singleThreadWrite() {String file = PARENT_PATH + "file.csv";long start = System.currentTimeMillis();try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setFetchSize(10000);stmt.setFetchDirection(ResultSet.FETCH_REVERSE);stmt.setInt(1, 0);stmt.setInt(2, 10000000);ResultSet rs = stmt.executeQuery();writeToFile(writer, rs);} catch (SQLException | IOException e) {throw new RuntimeException(e);}log.info("单线程单文件 total time in {}s", getSeconds(start));emptyFolder();}private static void concurrentWriteWithFixedThread(boolean writeInOneFile) {int batch_size = 1250000;Thread[] threads = new Thread[TOTAL/batch_size];long start = System.currentTimeMillis();for (int i = 0; i < TOTAL; i = i + batch_size) {final int j = i;int no = i / batch_size;Thread t = new Thread(() -> {String file;if (writeInOneFile) {file = PARENT_PATH + "file.csv";} else {file = PARENT_PATH + "file_" + no + ".csv";}try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setFetchSize(10000);stmt.setFetchDirection(ResultSet.FETCH_REVERSE);stmt.setInt(1, j);stmt.setInt(2, j + batch_size);long queryStart = System.currentTimeMillis();try (ResultSet rs = stmt.executeQuery()) {log.info("query in {}s", getSeconds(queryStart));long writeStart = System.currentTimeMillis();writeToFile(writer, rs);log.info("write in {}s", getSeconds(writeStart));}} catch (SQLException | IOException e) {throw new RuntimeException(e);}});t.start();threads[no] = t;}for (Thread t : threads) {try {t.join();} catch (InterruptedException e) {throw new RuntimeException(e);}}if (writeInOneFile) {log.info("固定线程单文件 total time in {}s", getSeconds(start));} else {log.info("固定线程多文件 total time in {}s", getSeconds(start));}
//        emptyFolder();}private static void concurrentWrite() {int batch_size = 10000;CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size];long start = System.currentTimeMillis();for (int i = 0; i < TOTAL; i = i + batch_size) {final int j = i;int no = i / batch_size;CompletableFuture<Void> t = CompletableFuture.runAsync(() -> {String file = PARENT_PATH + "file_" + no + ".csv";try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));Connection connection = DS.getConnection();PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {stmt.setInt(1, j);stmt.setInt(2, j + batch_size);try (ResultSet rs = stmt.executeQuery()){writeToFile(writer, rs);}} catch (SQLException | IOException e) {throw new RuntimeException(e);}},POOL);futures[no] = t;}CompletableFuture.allOf(futures).join();log.info("多线程多文件 total time in {}s", getSeconds(start));POOL.shutdown();emptyFolder();}private static void emptyFolder() {File file = new File(PARENT_PATH);File[] files = file.listFiles();for (File f : files) {f.delete();}}private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException {StringBuilder builder = new StringBuilder();while (rs.next()) {String firstName = rs.getString("first_name");String lastName = rs.getString("last_name");LocalDate dob = rs.getDate("date_of_birth").toLocalDate();String gender = rs.getString("gender");String email = rs.getString("email");String phone = rs.getString("phone_number");String address = rs.getString("address");String city = rs.getString("city");String state = rs.getString("state");String zip = rs.getString("zip_code");String country = rs.getString("country");String nationality = rs.getString("nationality");String religion = rs.getString("religion");String emergencyContactName = rs.getString("emergency_contact_name");String emergencyContactPhone = rs.getString("emergency_contact_phone_number");String guardianName = rs.getString("guardian_name");String guardianPhone = rs.getString("guardian_phone_number");String highSchoolName = rs.getString("high_school_name");double highSchoolGpa = rs.getDouble("high_school_gpa");int highSchoolGradYear = rs.getInt("high_school_graduation_year");String major = rs.getString("major");String degreeLevel = rs.getString("degree_level");String enrollmentStatus = rs.getString("enrollment_status");builder.append(firstName).append("|");builder.append(lastName).append("|");builder.append(dob).append("|");builder.append(gender).append("|");builder.append(email).append("|");builder.append(phone).append("|");builder.append(address).append("|");builder.append(city).append("|");builder.append(state).append("|");builder.append(zip).append("|");builder.append(country).append("|");builder.append(nationality).append("|");builder.append(religion).append("|");builder.append(emergencyContactName).append("|");builder.append(emergencyContactPhone).append("|");builder.append(guardianName).append("|");builder.append(guardianPhone).append("|");builder.append(highSchoolName).append("|");builder.append(highSchoolGpa).append("|");builder.append(highSchoolGradYear).append("|");builder.append(major).append("|");builder.append(degreeLevel).append("|");builder.append(enrollmentStatus).append("\n");writer.write(builder.toString());builder.delete(0, builder.length());}}private static long getSeconds(long start) {return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds();}
}

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/161321.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

数据分析基础之《matplotlib(1)—介绍》

一、什么是matplotlib 1、专门用于开发2D图表&#xff08;包括3D图表&#xff09; 2、使用起来及其简单 3、以渐进、交互方式实现数据可视化 4、matplotlib mat&#xff1a;matrix&#xff08;矩阵&#xff09; plot&#xff1a;画图 lib&#xff1a;库 二、为什么要学习m…

记录一次因内存不足而导致hiveserver2和namenode进程宕机的排查

背景 最近发现集群主节点总有进程宕机&#xff0c;定位了大半天才找到原因&#xff0c;分享一下 排查过程 查询hiveserver2和namenode日志&#xff0c;都是正常的&#xff0c;突然日志就不记录了&#xff0c;直到我重启之后又恢复工作了。 排查各种日志都是正常的&#xff0…

vue3 + vue-router + keep-alive缓存页面

1.vue-router中增加mate.keepAlive和deepth属性 {path: /,name: home,component: HomeView,meta: {// 当前页面要不要缓存keepAlive: false,// 当前页面层级deepth: 1,}},{path: /list,name: list,component: ListView,meta: {// 当前页面要不要缓存keepAlive: true,// 当前页…

代码规范之-理解ESLint、Prettier、EditorConfig

前言 团队多人协同开发项目&#xff0c;困扰团队管理的一个很大的问题就是&#xff1a;无可避免地会出现每个开发者编码习惯不同、代码风格迥异&#xff0c;为了代码高可用、可维护性&#xff0c;需要从项目管理上尽量统一和规范代码。理想的方式需要在项目工程化方面&#xff…

Kafka官方生产者和消费者脚本简单使用

问题 怎样使用Kafka官方生产者和消费者脚本进行消费生产和消费?这里假设已经下载了kafka官方文件,并已经解压. 生产者配置文件 producer_hr.properties bootstrap.servers10.xx.xx.xxx:9092,10.xx.xx.xxx:9092,10.xx.xx.xxx:9092 compression.typenone security.protocolS…

部署jekins遇到的问题

jdk问题 我用的jdk版本是21的结果版本太新了&#xff0c;启动jekins服务的时候总是报错最后在jekins的安装目录下面的jekinsErr.log查看日志发现是jdk问题最后换了一个17版本的就解决了。 unity和jekins jekins和Git源码管理 jekins和Git联动使用 我想让jekins每次打包的时…

【css/vue】使用css变量,在同一个页面根据不同情况改变字号等样式

解决方法是&#xff1a;将 css 的属性使用 v-bind 与 Vue 组件的属性绑定&#xff0c;当组件的属性变化时&#xff0c;css 对应的属性值也就会随之变化&#xff1b; 具体实现代码&#xff1a; <template><div><span class"navTitle">标题名</s…

3D电路板在线渲染案例

从概念上讲,这是有道理的,因为PCB印制电路板上的走线从一个连接到下一个连接的路线基本上是平面的。 然而,我们生活在一个 3 维世界中,能够以这种方式可视化电路以及相应的组件,对于设计过程很有帮助。本文将介绍KiCad中基本的3D查看功能,以及如何使用NSDT 3DConvert在线…

Day38力扣打卡

打卡记录 网格中的最小路径代价&#xff08;动态规划&#xff09; 链接 class Solution:def minPathCost(self, grid: List[List[int]], moveCost: List[List[int]]) -> int:m, n len(grid), len(grid[0])f [[0x3f3f3f3f3f] * n for _ in range(m)]f[0] grid[0]for i i…

【洛谷 B2010】带余除法 题解(顺序结构+四则运算)

带余除法 题目描述 给定被除数和除数&#xff0c;求整数商及余数。此题中请使用默认的整除和取余运算&#xff0c;无需对结果进行任何特殊处理。 输入格式 一行&#xff0c;包含两个整数&#xff0c;依次为被除数和除数&#xff08;除数非零&#xff09;&#xff0c;中间用…

Sentinel 授权规则 (AuthorityRule)

Sentinel 是面向分布式、多语言异构化服务架构的流量治理组件&#xff0c;主要以流量为切入点&#xff0c;从流量路由、流量控制、流量整形、熔断降级、系统自适应过载保护、热点流量防护等多个维度来帮助开发者保障微服务的稳定性。 SpringbootDubboNacos 集成 Sentinel&…

一分钟快速了解Python3.12新特性

Python 3.12&#xff0c;作为Python编程语言的最新稳定版&#xff0c;引入了一系列对语言和标准库的改变&#xff0c;发布于2023年10月2日。重点变化包括&#xff1a; 新语法特性: PEP 695 引入类型形参语法和 type 语句&#xff0c;允许创建更明确的泛型类和函数。PEP 701 改进…

Unity 三维场景的搭建 软件构造实验报告

实验2&#xff1a;仿真系统功能实现 1.实验目的 &#xff08;1&#xff09;熟悉在Unity中设置仿真场景&#xff1b; &#xff08;2&#xff09;熟悉在Unity中C#语言的使用&#xff1b; &#xff08;3&#xff09;熟悉仿真功能的实现。 2.实验内容 新建一个仿真场景&#x…

SpringBoot_websocket实战

SpringBoot_websocket实战 前言1.websocket入门1.1 websocket最小化配置1.1.1 后端配置1.1.2 前端配置 1.2 websocket使用sockjs1.2.1 后端配置1.2.2 前端配置 1.3 websocket使用stomp协议1.3.1 后端配置1.3.2 前端配置 2.websocket进阶2.1 websocket与stomp有什么区别2.2 webs…

思维模型 重叠效应

本系列文章 主要是 分享 思维模型 &#xff0c;涉及各个领域&#xff0c;重在提升认知。相似内容易被混淆或遗忘。 1 重叠效应的应用 1.1 重叠效应在教育中的应用 1 通过避免重叠效应提升学习效率 为了避免重叠效应&#xff0c;通过对比、归纳等方法来帮助学生更好地理解和掌…

黑马React18: Redux

黑马React: Redux Date: November 19, 2023 Sum: Redux基础、Redux工具、调试、美团案例 Redux介绍 Redux 是React最常用的集中状态管理工具&#xff0c;类似于Vue中的Pinia&#xff08;Vuex&#xff09;&#xff0c;可以独立于框架运行 作用&#xff1a;通过集中管理的方式管…

VPS配置了swap没发挥作用怎么办

1 swap配置了但没用上 我的服务器内存是2G&#xff0c;装多一点东西就不够用&#xff0c;于是我给他分配了2G的swap&#xff0c;等了几小时&#xff0c;swap还是一点都没有使用 Linux中Swap&#xff08;即&#xff1a;交换分区&#xff09;&#xff0c;类似于Windows的虚拟内存…

MongoDB的常用操作以及python连接MongoDB

一,MongoDB的启动 mongod --dbpath..\data\db mongodb注意同时开两个窗口&#xff0c;不要关&#xff01; 二, MongoDB的简单使用 简单介绍一下mongoDB中一些操作 show dbs: 显示所有数据库 show databases: 显示所有数据库 use xxxx: 使用指定数据库/创建数据库&#xff08…

Linux 与大型机 z/OS

大型机 国际商业机器公司&#xff08;International Business Machine Corporation&#xff09;&#xff0c;简称为 IBM&#xff0c;实际上是当今大型机的代名词。作为大型企业技术解决方案提供商&#xff0c;IBM 在其漫长的生命周期中生产了各种产品。 他们的前身是计算、制表…

时序预测 | MATLAB实现基于BiLSTM-AdaBoost双向长短期记忆网络结合AdaBoost时间序列预测

时序预测 | MATLAB实现基于BiLSTM-AdaBoost双向长短期记忆网络结合AdaBoost时间序列预测 目录 时序预测 | MATLAB实现基于BiLSTM-AdaBoost双向长短期记忆网络结合AdaBoost时间序列预测预测效果基本介绍模型描述程序设计参考资料 预测效果 基本介绍 1.Matlab实现BiLSTM-Adaboost…