先把文件中指定列,去重提取出来,然后根据指定列去匹配数据,最后完成多条数据的绘图;
import matplotlib.pyplot as plt
import re
from datetime import datetime
from pylab import mplmpl.rcParams["font.sans-serif"] = ["SimHei"] # 设置显示中文字体
mpl.rcParams["axes.unicode_minus"] = False # 设置正常显示符号"""
out_los_GDHZ.txt文件内容
2024/03/05 05:50:01 sys: 1,prn: 8, f : 0, rr : 22776258.520521, cdts : 9499.326017, dtrpr : 0.000000, ionxr : 16.994414, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 8, f : 0, rr : 22776258.520521, cdts : 9499.326017, dtrpr : 0.000000, ionxr : 16.994414, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 8, f : 1, rr : 22776258.520521, cdts : 9499.326017, dtrpr : 0.000000, ionxr : 30.475578, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 8, f : 1, rr : 22776258.520521, cdts : 9499.326017, dtrpr : 0.000000, ionxr : 30.475578, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 14, f : 0, rr : 23343648.060891, cdts : 102648.741372, dtrpr : 0.000000, ionxr : 19.831918, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 14, f : 0, rr : 23343648.060891, cdts : 102648.741372, dtrpr : 0.000000, ionxr : 19.831918, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 14, f : 1, rr : 23343648.060891, cdts : 102648.741372, dtrpr : 0.000000, ionxr : 35.563990, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 14, f : 1, rr : 23343648.060891, cdts : 102648.741372, dtrpr : 0.000000, ionxr : 35.563990, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 30, f : 0, rr : 21635324.825727, cdts : -127128.592610, dtrpr : 0.000000, ionxr : 14.159144, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
2024/03/05 05:50:01 sys: 1,prn: 30, f : 0, rr : 21635324.825727, cdts : -127128.592610, dtrpr : 0.000000, ionxr : 14.159144, dantr : 0.000000, dantrr[findex] : 0.000000, scb : 0.000000,
"""# 第一步,去重提取出指定列的数据
def get_sys():# 用于存储去重后的数据unique_data = set()with open('out_los_GDHZ_1.txt', 'r') as file:lines = file.readlines()# 因数据重复,隔行读取数据for line in lines[::2]:# 多个分隔符来分割temp = re.split("[,| ]+", line)# 提取指定的多列selected_columns = [temp[3], temp[5], temp[8]]# 将选定的列转换为元组,以便用于集合中的去重selected_columns_tuple = tuple(selected_columns)# 将元组添加到集合中unique_data.add(selected_columns_tuple)# 将集合转换回列表unique_data_list = list(unique_data)# print(unique_data_list)return unique_data_list# 第二步,根据指定的数据再统计、绘图
def static():with open('out_los_GDHZ_1.txt', 'r') as file:lines = file.readlines()sys = get_sys()ls = len(sys)# 使用字典推导式生成对应数量的空列表和0,并为它们重新命名time_lists = {f'time_list_{i}': [] for i in range(ls)}ionxr_lists = {f'ionxr_list_{i}': [] for i in range(ls)}# count_lists = {f'count_list_{i}': 0 for i in range(ls)}# 因数据重复,隔行读取数据for line in lines[::2]:# 多个分隔符来分割temp = re.split("[,| ]+", line)for j in range(0, len(sys)):if (temp[3] == str(sys[j][0])) & (temp[5] == str(sys[j][1])) & (temp[8] == str(sys[j][2])):datee = temp[0]timee = temp[1]ionxr = temp[20]dt = datee + " " + timeetime_list = datetime.strptime(dt, "%Y/%m/%d %H:%M:%S")time_lists['time_list_' + str(j)].append(time_list)ionxr_lists['ionxr_list_' + str(j)].append(ionxr)# count_lists['count_list_' + str(j)] += 1# print(time_lists['time_list_'+str(j)],ionxr_lists['ionxr_list_'+str(j)])# 绘制线形图plt.title(u'电离层活跃状况')for j in range(0, len(sys)):plt.plot(time_lists['time_list_' + str(j)], ionxr_lists['ionxr_list_' + str(j)])# print(count_lists['count_list_' + str(j)])# 设置x轴标签和y轴标签plt.xlabel(u"日期")plt.ylabel(u"电离层系数")# 显示图表plt.show()if __name__ == '__main__':static()
因数据量达到三百多万条,16G内存的笔记本运行时报MemoryError,内存干爆了,消耗了98%,重新截取出二十多万条数据跑出来的结果,有40个线条;