julia 笔记/论文辅助笔记：T2vec 轨迹处理

deepgtt/harbin/julia/Trip.jl at master · boathit/deepgtt (github.com)

1 导入和声明

using HDF5, CSV, DataFrames, Dates, Sockets
# 引入几个模块，这些模块用于文件操作、数据处理、日期处理、网络操作等功能。using Distances: euclidean
# 从Distances模块中引入euclidean函数，用于计算欧几里得距离using StatsBase:rle
# 从StatsBase模块中引入rle函数，该函数用于计算连续出现的相同元素的长度。import JSON
# 从StatsBase模块中引入rle函数，该函数用于计算连续出现的相同元素的长度。include("util.jl")mutable struct Trip{T<:AbstractFloat}lon::Vector{T}lat::Vector{T}tms::Vector{T}devidroads
end
# 定义一个可变结构体Trip，它有五个字段：经度、纬度、时间、设备ID和道路。# 为Trip结构体定义两个构造函数，方便创建Trip对象。
Trip(lon, lat, tms) = Trip(lon, lat, tms, 0, nothing)
# 当只有经度、纬度和时间时，设备ID默认为0，道路为nothing。Trip(lon, lat, tms, devid) = Trip(lon, lat, tms, devid, nothing)
# 当有经度、纬度、时间和设备ID时，道路为nothing。function Base.show(io::IO, t::Trip)print(io, "Trip: $(length(t.lon)) points")
end
# 重载Base.show函数，用于打印Trip对象时的输出格式。
# 输出形式为：“Trip: x points”，其中x为经度向量的长度。Base.length(t::Trip) = length(t.lon)
# 重载Base.length函数，用于返回Trip对象的经度向量的长度。Base.reverse(trip::Trip) = Trip(reverse(trip.lon), reverse(trip.lat), trip.tms)
# 重载Base.reverse函数，用于反转Trip对象的经度和纬度向量。

2 readtripsh5函数

function readtripsh5(tripfile::String)"""从一个hdf5文件中读取旅行数据（Trip）"""trips = Trip[]# 初始化一个空的Trip类型数组，用于存储从文件中读取的所有Trip对象。h5open(tripfile, "r") do f# 使用h5open函数打开hdf5文件，"r"参数表示以只读模式打开。# do f语法是一种与资源（如文件）相关的操作，它确保资源在使用后被正确关闭。ntrips = read(f["/meta/ntrips"])# 从文件的"/meta/ntrips"路径读取旅行的数量。for i = 1:ntrips# 对每一个旅行进行处理lon = read(f["/trip/$i/lon"])lat = read(f["/trip/$i/lat"])tms = read(f["/trip/$i/tms"])# 从文件中读取第i个旅行的经度、纬度和时间数据。trip = Trip(lon, lat, tms)# 使用读取的数据创建一个新的Trip对象。push!(trips, trip)# 将新创建的Trip对象添加到trips数组中#i >= 10000 && breakendendtrips# 返回从文件中读取的所有Trip对象的数组。
end

3 readtripcsv函数

function readtripscsv(tripfile::String, header=nothing)"""从一个CSV文件中读取旅行数据（Trip）"""df = CSV.File(tripfile, header=header) |> DataFrame# 使用CSV.File函数读取CSV文件，并将其转化为DataFrame格式。# 如果header没有指定，则默认CSV文件必须包含列[:devid, :tripid, :tms, :lon, :lat]。hasdevid = :devid in names(df)# 检查DataFrame中是否包含名为:devid的列。trips = Trip[]# 初始化一个空的Trip类型数组，用于存储从文件中读取的所有Trip对象。for sdf in groupby(df, :tripid)# 对每一个唯一的tripid进行循环处理。sdf = DataFrame(sdf)# 将sdf转化为DataFrame格式。sort!(sdf, :tms)# 根据tms列对sdf进行排序。lon = convert(Array{Float64}, sdf.lon)lat = convert(Array{Float64}, sdf.lat)tms = convert(Array{Float64}, sdf.tms)# 从sdf中提取lon, lat, tms列，并将其转化为Float64类型的数组。devid = hasdevid ? first(sdf.devid) : nothing# 如果DataFrame中包含:devid列，则获取第一个devid值，否则将devid设置为nothing。trip = Trip(lon, lat, tms, devid)# 使用提取的数据创建一个新的Trip对象。push!(trips, trip)# 将新创建的Trip对象添加到trips数组中。endtrips# 返回从文件中读取的所有Trip对象的数组。
end

4 pathdistance 函数

function pathdistance(trip::Trip)"""计算给定Trip对象在路网中的距离（以公里为单位）"""s = 0.0for i = 2:length(trip)# 对Trip对象中的每一个点进行循环处理，从第2个点开始。px, py = gps2webmercator(trip.lon[i-1], trip.lat[i-1])# 使用gps2webmercator函数将第i-1个点的经纬度转化为Web墨卡托坐标。cx, cy = gps2webmercator(trip.lon[i], trip.lat[i])# 使用gps2webmercator函数将第i个点的经纬度转化为Web墨卡托坐标。s += euclidean([px, py], [cx, cy])# 使用euclidean函数计算两点之间的欧几里得距离，并累加到s中。ends / 1000.0# 将总距离s转化为公里单位并返回。
end

5 isvalidtrip 函数

function isvalidtrip(trip::Trip)"""检查给定的Trip对象是否是一个有效的旅程，其中最大速度不能超过35"""for i = 2:length(trip.tms)# 对Trip对象中的每一个点进行循环处理，从第2个点开始。px, py = gps2webmercator(trip.lon[i-1], trip.lat[i-1])cx, cy = gps2webmercator(trip.lon[i], trip.lat[i])# 使用gps2webmercator函数将第点的经纬度转化为Web墨卡托坐标。euclidean([px, py], [cx, cy]) / (trip.tms[i] - trip.tms[i-1]) > 35 && return false# 计算两点之间的欧几里得距离，并除以两点之间的时间差，得到速度。# 如果速度大于35，则返回false。endtrue# 如果所有点的速度都不超过35，则返回true。
end

6 timeslotsubtrips函数

function timeslotsubtrips(trips::Vector{Trip}, stms::T, etms::T) where T"""从给定的Trip对象数组中提取并返回落入指定时间段[stms, etms]的子旅程"""subtrips = Trip[]# 初始化一个空的Trip类型数组，用于存储在指定时间段内的子旅程。for trip in tripsa, b = searchrange(trip.tms, stms, etms)# 使用searchrange函数查找trip.tms中落入[stms, etms]的时间段的开始和结束索引。if a < b# 如果开始索引小于结束索引，说明找到了一个在指定时间段内的子旅程。subtrip = Trip(trip.lon[a:b], trip.lat[a:b], trip.tms[a:b], trip.devid)# 根据找到的索引范围，从原始旅程中提取子旅程。push!(subtrips, subtrip)# 将提取的子旅程添加到subtrips数组中。endendsubtrips# 返回在指定时间段内的所有子旅程。
end

7 timeslottrips函数

function timeslottrips(trips::Vector{Trip}, stms::T, etms::T, Δmins=5) where T"""从给定的Trip对象数组中筛选并返回起始时间落入指定时间段[stms, etms]的旅程"""filter(trips) do tripstms < trip.tms[1] < etms && etms + Δmins*60 > trip.tms[end]# 筛选条件：旅程的起始时间必须落入[stms, etms]时间段，并且旅程的结束时间必须在etms + Δmins*60之前。end
end

8 trip2finetrip函数

function trip2finetrip(trip::Trip, Δ=200)"""将一个粗糙的旅程插值为一个细粒度的旅程Input:trip (Trip)Δ (Real): The minimum Euclidean distance between two consecutive points after interpolation.Output:A trip"""finepoints, tms, _ = linear_interpolate(gps2webmercator.(trip.lon, trip.lat), trip.tms, Δ)# 使用linear_interpolate函数对旅程的点进行插值，得到细粒度的点、时间和其他返回值。# 该函数需要Web墨卡托坐标作为输入，因此我们首先使用gps2webmercator函数将经纬度转换为Web墨卡托坐标。# linear_interpolate函数确保插值后的两个连续点之间的欧几里得距离至少为Δ。x, y = map(first, finepoints), map(last, finepoints)# 从插值后的Web墨卡托坐标中提取x和y坐标。gpspoints = webmercator2gps.(x, y)# 使用webmercator2gps函数将Web墨卡托坐标转换回经纬度坐标。lon, lat = map(first, gpspoints), map(last, gpspoints)# 从返回的经纬度坐标中提取经度和纬度。Trip(lon, lat, tms, trip.devid)# 使用提取的经纬度和时间创建一个新的Trip对象，并返回。
end

9 removeredundantpoints函数

function removeredundantpoints(trip::Trip, δ=10)"""从给定的Trip对象中删除冗余的采样点。"""ind = Int[1]# 初始化一个数组，存储非冗余点的索引。默认第一个点始终是非冗余的。for i = 2:length(trip)if trip.tms[i]-trip.tms[ind[end]] >= δpush!(ind, i)# 检查当前点的时间戳与上一个非冗余点的时间戳之间的差值是否大于或等于δ。# 如果是，则当前点被认为是非冗余的，并将其索引添加到ind数组中。endendTrip(trip.lon[ind], trip.lat[ind], trip.tms[ind], trip.devid)# 使用ind数组中的索引从原始旅程中提取非冗余的点，并创建一个新的Trip对象。
end

10 trip2geojson函数

function trip2geojson(trip::Trip)#将给定的Trip对象转换为GeoJSON格式的数据points = [Dict("type"=>"Feature","geometry"=>Dict("type"=>"Point","coordinates"=>[trip.lon[i], trip.lat[i]]))for i = 1:length(trip)]# 为Trip对象中的每一个点创建一个GeoJSON格式的"Feature"。# 每个"Feature"包含一个"Point"类型的"geometry"，其坐标为该点的经纬度。Dict("type"=>"FeatureCollection","features"=>points)# 创建一个GeoJSON格式的"FeatureCollection"，其中包含所有的"Feature"。
end

11 trip2json函数

function trip2json(trip::Trip)"""将给定的Trip对象转换为地图匹配器所需的JSON格式"""js = Dict{String, String}[]# 初始化一个空的字典数组，用于存储转换后的JSON格式的数据for i = 1:length(trip)lon, lat = trip.lon[i], trip.lat[i]# 提取当前点的经纬度。tms = Dates.format(Dates.unix2datetime(trip.tms[i]), "yyyy-mm-dd HH:MM:SS")# 将当前点的时间戳转换为日期时间格式，并格式化为"yyyy-mm-dd HH:MM:SS"形式。push!(js, Dict("point"=>"POINT($lon $lat)","time"=> "$(tms)+0800","id"=>"\\x0001"))# 创建一个字典，其中包含当前点的经纬度、格式化后的时间和id。# 将这个字典添加到js数组中。endjs
end

12 matchtrip函数

function matchtrip(trip::Trip)"""将Trip对象提交给匹配服务器来匹配旅程"""js = trip |> removeredundantpoints |> trip2json# 先将Trip对象的冗余点移除，然后将其转换为JSON格式request = Dict("format"=>"slimjson", "request"=>js) |> JSON.json# 创建一个请求字典，其中包含格式为"slimjson"和转换后的JSON数据。clientside = connect("localhost", 1234)# 连接到本地的1234端口的匹配服务器。trymessage = request * "\n"write(clientside, message)# 将请求发送到匹配服务器。response = readline(clientside)# 读取匹配服务器的响应。response == "SUCCESS" ? readline(clientside) |> JSON.parse : []# 如果服务器响应为"SUCCESS"，则读取下一行并解析为JSON，否则返回空数组。finallyclose(clientside)# 关闭与服务器的连接。end
end

13 trip2roads函数

function trip2roads(trip::Trip)"""将一个Trip对象映射到一系列的道路段"""result = matchtrip(trip)## 使用matchtrip函数将Trip对象提交给匹配服务器，获取匹配结果。roads, _ = map(d->get(d, "road", -1), result) |> rle# 从匹配结果中提取道路段信息，并使用rle函数进行编码，以获取道路段的连续序列。# rle函数返回两个数组：一个包含连续的值，另一个包含每个值的重复次数。# 在这里，我们只关心连续的道路段，所以只取第一个数组roads# 返回连续的道路段。
end