代码
import pandas as pd
import pyarrow._parquet as pqdata = pd.read_parquet("0000.parquet")
total_rows = len(data)
half_row_num = total_rows//2
print(half_row_num)
first_half = data.iloc[:20000]
second_half = data.iloc[20000:20000]
# print(first_half['label'].sum())
# print(second_half['label'].sum())first_half.to_parquet('/train.parquet',engine='pyarrow')
second_half.to_parquet('dataset/test.parquet',engine='pyarrow')
# pq.write_table(first_half.to_parquet().to_arrow(),'dataset/train.parquet')
# pq.write_table(second_half.to_parquet().to_arrow(),'dataset/test.parquet')
# print(first_half)
# print(data)