文章目录
- 1. geoHash
- 2. kdTree算法求最近点
- 3.暴力法
- 4.利用elasticsearch或者lucene
1. geoHash
首先对经纬度点进行编码:
- 利用geoHash把经纬转换成32进制的编码字符串
- 将待搜索的坐标转换成编码与坐标库中的串进行比较,找出前缀匹配长度高放入map中,再从该范围内找出最小值
- 用户输入一个坐标点,找出46458个坐标点中最近的一个
geohash算法测试结果:
前缀匹配长度为2: (输入:100个坐标点)正确率 92% 无法计算 0% 平均计算时间:17.5ms (1000)正确率 92.6% 无法计算 0% 平均计算时间:16.4ms 前缀匹配长度为3: (100)正确率 51% 无法计算 35% 平均计算时间:3.784313725490196ms (1000)正确率 58% 无法计算 29% time:2.4ms
2.2083333333333335** 暴力算法测试平均时间2ms geoHash算法java源码:
package net.work.geoHash;/**
*
* 2018年11月29日
*/
import java.util.BitSet;
import java.util.HashMap;public class GeoHash {private static int numbits = 6 * 5; //经纬度单独编码长度 //32位编码对应字符final static char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; //定义编码映射关系 final static HashMap<Character, Integer> lookup = new HashMap<Character, Integer>(); //初始化编码映射内容static { int i = 0; for (char c : digits) lookup.put(c, i++); } //对编码后的字符串解码public double[] decode(String geohash) { StringBuilder buffer = new StringBuilder(); for (char c : geohash.toCharArray()) { int i = lookup.get(c) + 32; buffer.append( Integer.toString(i, 2).substring(1) ); }BitSet lonset = new BitSet(); BitSet latset = new BitSet(); //偶数位,经度int j =0; for (int i=0; i< numbits*2;i+=2) { boolean isSet = false; if ( i < buffer.length() ) isSet = buffer.charAt(i) == '1'; lonset.set(j++, isSet); } //奇数位,纬度 j=0; for (int i=1; i< numbits*2;i+=2) { boolean isSet = false; if ( i < buffer.length() ) isSet = buffer.charAt(i) == '1'; latset.set(j++, isSet); } double lon = decode(lonset, -180, 180); double lat = decode(latset, -90, 90); return new double[] {lat, lon}; } //根据二进制和范围解码private double decode(BitSet bs, double floor, double ceiling) { double mid = 0; for (int i=0; i<bs.length(); i++) { mid = (floor + ceiling) / 2; if (bs.get(i)) floor = mid; else ceiling = mid; } return mid; } //对经纬度进行编码public String encode(double lat, double lon) { BitSet latbits = getBits(lat, -90, 90); BitSet lonbits = getBits(lon, -180, 180); StringBuilder buffer = new StringBuilder(); for (int i = 0; i < numbits; i++) { buffer.append( (lonbits.get(i))?'1':'0'); buffer.append( (latbits.get(i))?'1':'0'); } return base32(Long.parseLong(buffer.toString(), 2)); } //根据经纬度和范围,获取对应二进制private BitSet getBits(double lat, double floor, double ceiling) { BitSet buffer = new BitSet(numbits); for (int i = 0; i < numbits; i++) { double mid = (floor + ceiling) / 2; if (lat >= mid) { buffer.set(i); floor = mid; } else { ceiling = mid; } } return buffer; } //将经纬度合并后的二进制进行指定的32位编码private String base32(long i) { char[] buf = new char[65]; int charPos = 64; boolean negative = (i < 0); if (!negative) i = -i; while (i <= -32) { buf[charPos--] = digits[(int) (-(i % 32))]; i /= 32; } buf[charPos] = digits[(int) (-i)]; if (negative) buf[--charPos] = '-'; return new String(buf, charPos, (65 - charPos)); } public static void main(String[] args) throws Exception{ GeoHash geohash = new GeoHash();String s = geohash.encode(39.863644, 116.286964);System.out.println(s);double[] geo = geohash.decode(s);System.out.println(geo[0]+" "+geo[1]);}
}
根据编码求最近点:
package net.work.geoHash;import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;public class InputCity {public static HashMap<Integer, String> decode_map;public static HashMap<Integer, Location> city_map;public GeoHash geoHash = new GeoHash();public LenCompator compator = new LenCompator();static {decode_map = new HashMap<Integer, String>();city_map = new HashMap<Integer, Location>();}class LenCompator implements Comparator<Common>{@Overridepublic int compare(Common o1, Common o2) {return Integer.compare(o2.len, o1.len);}}class Common{Integer id;int len;public Common(Integer id, int len) {this.id = id;this.len = len;}@Overridepublic String toString() {return "Common [id=" + id + ", len=" + len + "]";}}class Location{int id;String full_name;int pid;String name;double x;double y;public Location() {}public Location(int id, String full_name, int pid, String name, double x, double y) {this.id = id;this.full_name = name;this.pid = pid;this.name = name;this.x = x;this.y = y;}@Overridepublic String toString() {return "Location [id=" + id + ", full_name=" + full_name + ", pid=" + pid + ", name=" + name + ", x=" + x+ ", y=" + y + "]";}}class ReturnBean{Location location;double distance;public ReturnBean(Location location, double distance) {this.location = location;this.distance = distance;}}public void init() throws IOException {String data = util.Directory.GetAppPath("data");String decode = data + "decode.txt";String city = data + "行政区划及经纬.txt";BufferedReader br = util.MyFileTool.GetBufferReader(decode);BufferedReader br1 = util.MyFileTool.GetBufferReader(city);while(br.ready()) {String line = br.readLine();String[] ls = line.split("\t");int id = Integer.parseInt(ls[0]);String code = ls[1];System.out.println("id: " + id + " " + "code: " + code);decode_map.put(id, code);}while(br1.ready()) {String line = br1.readLine();String[] ls = line.split(" ");int id = Integer.parseInt(ls[0]);String full_name = ls[1];int pid = Integer.parseInt(ls[2]);String name = ls[3];double x = Double.parseDouble(ls[4]);double y = Double.parseDouble(ls[5]);city_map.put(id, new Location(id, full_name, pid, name, x, y));}br.close();br1.close();}public int getPreCommonLength(String pre, String las) {int len = 0;for(int i = 0; i < pre.length(); i++) {if(len == i && pre.charAt(i) == las.charAt(i)) {len ++;}else {break;}}return len;}public ArrayList<Common> getNearbyLocations(String code, LenCompator compator){ArrayList<Common> common_list = new ArrayList<Common>();Set<Entry<Integer,String>> set = decode_map.entrySet();int limit = 3;for(Entry<Integer,String> elem : set) {int id = elem.getKey();String encode = elem.getValue();int len = getPreCommonLength(encode, code);if(len < limit) {continue;}common_list.add(new Common(id, len));Collections.sort(common_list, compator);}return common_list;}public Double getTwoPointDistanceSquare(double x1, double y1, double x2, double y2) {return (x1 - x2) * (x1 - x2) + (y1 - y2)*(y1 - y2);}//如果距离相同只显示一个地点public ReturnBean getSmallestDistanceLocation(double x, double y, ArrayList<Common> common_set, int count) {int i = 0;Integer id;Location minDistanceLocation = null;double minDistance = Double.MAX_VALUE;for(Common com_bean : common_set) {id = com_bean.id;Location location = city_map.get(id);if(location == null) {continue;}Double distance = getTwoPointDistanceSquare(x, y, location.x, location.y);if(distance < minDistance) {minDistance = distance;minDistanceLocation = location;}if(i == count) {break;}}return new ReturnBean(minDistanceLocation, Math.sqrt(minDistance));}public ReturnBean getSmallestDistanceLocation(double x, double y, ArrayList<Common> common_set) {int i = 0;Integer id;Location minDistanceLocation = new Location();double minDistance = Double.MAX_VALUE;for(Common com_bean : common_set) {id = com_bean.id;Location location = city_map.get(id);if(location == null) {continue;}Double distance = getTwoPointDistanceSquare(x, y, location.x, location.y);if(distance < minDistance) {minDistance = distance;minDistanceLocation = location;}}return new ReturnBean(minDistanceLocation, Math.sqrt(minDistance));}public HashMap<String, String> getDistance(double x, double y){long s = System.currentTimeMillis();String encode = geoHash.encode(y, x);
// double[] ds = geoHash.decode(encode);ArrayList<Common> common_list = getNearbyLocations(encode, compator);ReturnBean ans = getSmallestDistanceLocation(x, y, common_list, 10);
// if(ans == null) {
// ans = inputCity.getSmallestDistanceLocation(x, y, common_list);
// }HashMap<String, String> map = new HashMap<String, String>();map.put("status", "ok");if(ans.location == null || ans.location.id == 0) {System.out.println("超出定位范围,无法算出");map.put("status", "error");}else {map.put("id", ans.location.id + "");map.put("path", ans.location.x + "," + ans.location.y);map.put("name", ans.location.name);map.put("len",Math.sqrt(ans.distance) + "");}map.put("input", x + "," + y);map.put("decode", encode);map.put("map_size", common_list.size() + "");long e = System.currentTimeMillis();map.put("time", (e - s) + "");return map;}public static void main(String[] args) throws IOException {GeoHash geoHash = new GeoHash();InputCity inputCity = new InputCity();inputCity.init();LenCompator compator = inputCity.new LenCompator();int count = 10;while(true) {//西城区附近点: 116.368049 39.910508 测试结果:牛街街道//来广营地铁十四号线东路: 116.473489 40.026145 结果://116.498464 39.997745//116.496991 39.999348//116.473489 40.026145//出错例子:116.013857,29.712846 目标地点:九江市 定位:浙江省
// Scanner sc = new Scanner(System.in);
// String[] ss = sc.next().split(",");double x = 56;double y = 78;//double x = Double.parseDouble(ss[0]);//double y = Double.parseDouble(ss[1]);long s = System.currentTimeMillis();//经度、维度String encode = geoHash.encode(y, x);System.out.println("输入坐标: " + x + " " + y);System.out.println("经纬度编码: " + encode);double[] ds = geoHash.decode(encode);System.out.println("解码: " + ds[1] + " " + ds[0]);ArrayList<Common> common_list = inputCity.getNearbyLocations(encode, compator);System.out.println("common_list_size: " + common_list.size());System.out.println("地方编码距离: " + common_list);ReturnBean ans = inputCity.getSmallestDistanceLocation(x, y, common_list, count);
// if(ans == null) {
// ans = inputCity.getSmallestDistanceLocation(x, y, common_list);
// }if(ans.location == null) {System.out.println("超出定位范围,无法算出");}else {System.out.println("最近点: " + ans.location);System.out.println("最近点坐标: " + ans.location.x + "," + ans.location.y);System.out.println("距离: " + ans.distance);long e = System.currentTimeMillis();System.out.println("spend time: " + (e - s) + "ms");}System.exit(-1);}}
}
优秀文章:
https://blog.csdn.net/youhongaa/article/details/78816700
https://blog.csdn.net/u011497262/article/details/81210634
2. kdTree算法求最近点
原理:最近临点问题
在空间上给出一个点,求解距离该点最近的点。
首先通过二叉树搜索(比较待查询节点和分裂节点的分裂维的值,小于等于就进入左子树分支,等于就进入右子树分支直到叶子结点),顺着“搜索路径”很快能找到最近邻的近似点,也就是与待查询点处于同一个子空间的叶子结点;
然后再回溯搜索路径,并判断搜索路径上的结点的其他子结点空间中是否可能有距离查询点更近的数据点,如果有可能,则需要跳到其他子结点空间中去搜索(将其他子结点加入到搜索路径)。
重复这个过程直到搜索路径为空。
需要用到一个队列,存储需要回溯的点,在判断其他子节点空间中是否有可能有距离查询点更近的数据点时,做法是以查询点为圆心,以当前的最近距离为半径画圆,这个圆称为候选超球(candidate hypersphere),如果圆与回溯点的轴相交,则需要将轴另一边的节点都放到回溯队列里面来。
KDTree就是超平面都垂直于轴的BSPTree
原文:https://blog.csdn.net/define_us/article/details/79855133
求最近点代码:
package net.work.kdTree;import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;public class KDTreeMain {public static int KDTCount = 0; // 统计在kdt 搜索的时候,计算了和几个点的距离public static Node root;public static int deep = 0;public static int count_point = 0;//样本点个数(经纬度点的总个数)public static List<Point> pointList;public static KDTreeMain kdt;public static void init(){int xn = 2; // 样本点维数int deep = 0; // 轴String data = util.Directory.GetAppPath("data") + "行政区划及经纬.txt";BufferedReader br = util.MyFileTool.GetBufferReader(data);// 准备数据pointList = new LinkedList<Point>();try {while (br.ready()) {String line = br.readLine();String[] ls = line.split(" ");int id = Integer.parseInt(ls[0]);String full_name = ls[1];int pid = Integer.parseInt(ls[2]);String name = ls[3];System.out.println(name);double lnt = Double.parseDouble(ls[4]);double lat = Double.parseDouble(ls[5]);double[] d = new double[xn];d[0] = lnt;d[1] = lat;//扩大十倍for(int i = 0; i < 10; i++) {pointList.add(new Point(id, full_name, pid, name, d));count_point ++;}
// pointList.add(new Point(id, full_name, pid, name, d));
// count_point ++;}} catch (NumberFormatException | IOException e) {e.printStackTrace();}try {br.close();} catch (IOException e) {e.printStackTrace();}// build treeSystem.out.println("beging insert...");double t1 = System.currentTimeMillis();kdt = new KDTreeMain();root = new Node();insert(root, pointList, deep);double t2 = System.currentTimeMillis();System.out.println("buld kdt time = " + (t2 - t1));}public static void main(String[] args) throws IOException {init();// 目标点double[] f = new double[2];//87.356426,40.800009f[0] = 87.356426;f[1] = 40.800009;Point p = new Point(f);// KDT搜索double t3 = System.currentTimeMillis();double min_dis = Double.MAX_VALUE;Point result_p;result_p = query(root, p, new Point(min_dis), deep);double t4 = System.currentTimeMillis();System.out.println("查询时间:" + (t4 - t3));System.out.println("最近点 " + result_p);System.out.println("KDTCount = " + KDTCount);// 暴力法double t5 = System.currentTimeMillis();int index = 0;double best2 = Double.MAX_VALUE;for (int i = 0; i < count_point; i++) {double dist = getDist(p, pointList.get(i));if (dist < best2) {best2 = dist;index = i;}}double t6 = System.currentTimeMillis();System.out.println("暴力时间: " + (t6 - t5));System.out.println("最短距离: " + best2);// System.out.println("goal point = " + p.x[0] + " , " + p.x[1]);// System.out.println("neast point = " + pointList.get(index).x[0] + " , " + pointList.get(index).x[1]);}// build kdtreestatic private void insert(Node root, List<Point> pointList, int deep) {System.out.println("构建树....");System.out.println(3);int mid = pointList.size() / 2;// 排序后拿到中位数Point.deep = deep;Collections.sort(pointList);// 类似快排的方法拿到中位数// getMedian(pointList, 0, pointList.size() - 1, mid, deep);// showList(pointList);// System.out.println("=========================");int pl = mid;int pr = mid;while(pl >= 0 && pointList.get(pl).x[deep] == pointList.get(mid).x[deep]) pl--;while(pr < pointList.size() && pointList.get(pr).x[deep] == pointList.get(mid).x[deep]) pr++;List<Point> pointListLeft = pointList.subList(0, pl + 1);List<Point> pointListMid = pointList.subList(pl + 1, pr);List<Point> pointListRight = pointList.subList(pr, pointList.size());root.pointList = pointListMid;if (pointListLeft.size() > 0) {root.l = new Node();System.out.println(1);insert(root.l, pointListLeft, (deep + 1) % pointList.get(0).x.length);}System.out.println(1);if (pointListRight.size() > 0) {root.r = new Node();System.out.println(2);insert(root.r, pointListRight, (deep + 1) % pointList.get(0).x.length);}}// search the nearest point to p in KDTreestatic Point query(Node root, Point p, Point best_p, int deep) {if (root == null) return best_p; double dist; if (root.l == null && root.r == null) { for (int i = 0; i < root.pointList.size(); i++) { KDTCount++; dist = getDist(root.pointList.get(i), p); if(dist < best_p.len) {best_p = root.pointList.get(i);best_p.len = dist;}} return best_p; } // left or right if (p.x[deep] <= root.pointList.get(0).x[deep]) { best_p = query(root.l, p, best_p, (deep + 1) % p.x.length);} else { best_p = query(root.r, p, best_p, (deep + 1) % p.x.length);} // cur for (int i = 0; i < root.pointList.size(); i++) { KDTCount++; dist = getDist(root.pointList.get(i), p); if(dist < best_p.len) {best_p = root.pointList.get(i);best_p.len = dist;}} // another side if (best_p.len >= Math.abs(p.x[deep] - root.pointList.get(0).x[deep])) { Point another_p = new Point(Double.MAX_VALUE);if (p.x[deep] <= root.pointList.get(0).x[deep]) { another_p = query(root.r, p, best_p, (deep + 1) % p.x.length);} else { another_p = query(root.l, p, best_p, (deep + 1) % p.x.length);} if (another_p.len < best_p.len) { best_p = another_p;best_p.len = another_p.len;} } return best_p; }// print kdtree@SuppressWarnings("unused")private static void showKDTree(Node root, char[] path, int pi) {if (root == null) return;System.out.print(pi + "# ");for (int i = 0; i < pi; i++) {System.out.print(path[i] + " ");}// midshowList(root.pointList);// leftpath[pi++] = 'L';showKDTree(root.l, path, pi);pi--;// rightpath[pi++] = 'R';showKDTree(root.r, path, pi);pi--;}// 欧式距离private static double getDist(Point p1, Point p2) {double sum = 0;for (int i = 0; i < p1.x.length; i++) {sum += (p1.x[i] - p2.x[i]) * (p1.x[i] - p2.x[i]);}if (sum == 0) return Double.MAX_VALUE;return Math.sqrt(sum);}// 类似快排的思想拿到中位数,O(n)时间复杂度@SuppressWarnings("unused")private void getMedian(List<Point> pointList, int l, int r, int k, int deep) {if (l == r && k == 0) return; int pl = l; int pr = r; double[] tmp = pointList.get(l).x; while (pl < pr) { while (pl < pr && pointList.get(pr).x[deep] > tmp[deep]) pr--; if (pl >= pr) break; pointList.get(pl++).x = pointList.get(pr).x; while (pl < pr && pointList.get(pl).x[deep] < tmp[deep]) pl++; if (pl >= pr) break; pointList.get(pr--).x = pointList.get(pl).x;} pointList.get(pl).x = tmp; if(pl - l == k) return; if(pl - l > k) { getMedian(pointList, l, pl - 1, k, deep); } else { getMedian(pointList, pl + 1, r, k - (pl - l + 1), deep); } }// 打印一个点列表private static void showList(List<Point> pointList) {for (int i = 0; i < pointList.size(); i++) {for( int j = 0; j < pointList.get(i).x.length; j++) {System.out.print(pointList.get(i).x[j] + ",");}System.out.print(" / ");}System.out.println();}
}
// kdtree里的节点
class Node {List<Point> pointList = new LinkedList<Point>();Node l = null;Node r = null;
}
// 数据点
class Point implements Comparable<Point>{public static int deep = 0;double[] x;@Overridepublic String toString() {return "Point [x=" + Arrays.toString(x) + ", id=" + id + ", name=" + name + ", full_name=" + full_name+ ", pid=" + pid + ", len=" + len + "]";}int id;String name;String full_name;int pid;double len;public Point() {}public Point(double len) {this.len = len;}public Point(double[] d) {x = new double[d.length];for (int i = 0; i < d.length; i++) {x[i] = d[i];}}public Point(int id, String full_name, int pid, String name, double[] x){this.id = id;this.full_name = full_name;this.pid = pid;this.name = name;this.x = x;}public int compareTo(Point o) {// return (int)(this.x[deep] == other.x[deep]); 出错,因为x的值在0~1之间,那么int都是0了Point other = (Point)o;if (this.x[deep] == other.x[deep]) return 0;if (this.x[deep] > other.x[deep]) return 1;return -1;}
}
3.暴力法
package net.work.baoli;import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Scanner;/**** 2018年11月29日*/
public class baoli {public static ArrayList<Point> list;static {list = new ArrayList<Point>();String t = util.Directory.GetAppPath("data") + "行政区划及经纬.txt";BufferedReader br = util.MyFileTool.GetBufferReader(t);try {while(br.ready()) {String line = br.readLine();String[] ls = line.split(" ");int id = Integer.parseInt(ls[0]);String full_name = ls[1];int pid = Integer.parseInt(ls[2]);String name = ls[3];System.out.println(name);double lnt = Double.parseDouble(ls[4]);double lat = Double.parseDouble(ls[5]);//扩大十倍
// for(int i = 0; i < 10; i ++) {
// list.add(new Point(id, full_name, pid, name, lnt, lat));list.add(new Point(id, full_name, pid, name, lnt, lat));
// }}} catch (NumberFormatException | IOException e) {e.printStackTrace();}}static class Point{int id;String name;String full_name;int pid;double x,y;public Point(){}public Point(int id, String full_name, int pid, String name, double x, double y){this.id = id;this.full_name = full_name;this.pid = pid;this.name = name;this.x = x;this.y = y;}}public static Double getTwoPointDistanceSquare(double x1, double y1, double x2, double y2) {return (x1 - x2) * (x1 - x2) + (y1 - y2)*(y1 - y2);}public static HashMap<String, String> getDistace(double x, double y) throws IOException {long s = System.currentTimeMillis();double min_x = 0, min_y = 0;double min_dis = Double.MAX_VALUE;String min_name = null;int min_id = 0;for(Point p : list) {Double dis = getTwoPointDistanceSquare(p.x, p.y, x, y);if(min_dis > dis) {min_id = p.id;min_dis = dis;min_x = p.x;min_y = p.y;min_name = p.name;}}HashMap<String, String> map = new HashMap<String, String>();map.put("id", min_id + "");map.put("input", x + ","+ y);map.put("path", min_x + "," + min_y);map.put("len", Math.sqrt(min_dis) + "");map.put("name", min_name);long e = System.currentTimeMillis();map.put("time", (e - s) + "");return map;}public static void main(String[] args) throws NumberFormatException, IOException {while(true){Scanner sc = new Scanner(System.in);String[] ss = sc.next().split(",");double x = Double.parseDouble(ss[0]);double y = Double.parseDouble(ss[1]);HashMap<String, String> map = getDistace(x, y);System.out.println(map);}}
}
总结:
暴力法:
(1000)正确率 100% 平均计算时间:0.9ms
如果样本(行政区划及经纬.txt)扩大十倍:平均时间:3ms
geooHash(不适合计算青海、内蒙古、新疆等点分散比较大的省份,如果其他省份准确率接近99%):
前缀匹配长度为2:
(100)正确率 92% 无法计算 0% 平均计算时间:17.5ms
(1000)正确率 92.6% 无法计算 0% 平均计算时间:16.4ms
前缀匹配长度为3:
(100)正确率 51% 无法计算 35% 平均计算时间:3.784313725490196ms
(1000)正确率 58% 无法计算 29% time:2.4ms 2.2083333333333335ms
如果样本(行政区划及经纬.txt)扩大十倍:前缀匹配长度为3:正确率: 54.3% 平均时间:1.8176795580110496ms
优点:当样本的数据量多的时候可以体现geoHash的优势
缺点:准确率不是很高
kdTree:
缺点:初始化构建树的时候花费时间比较长
(1000)正确率 100% 平均计算时间:54ms
样本数量扩大十倍:平均时间 2523ms
knn算法当取前k个数据为1时候和暴力法原理相同,时间复杂度更高
4.利用elasticsearch或者lucene
如果数据量特别大,录入到es建立索引,然后利用es提供的求最近距离的API即可