Fast location queries using KDBush.

master
Sander Vocke 6 years ago
parent 5800a274f8
commit caf7b76ccc
  1. 1
      package.json
  2. 51
      src/database.js
  3. 127
      src/geo_store.js
  4. 4
      src/queries.js
  5. 1338
      yarn.lock

@ -15,6 +15,7 @@
"@testing-library/react": "^9.3.2",
"@testing-library/user-event": "^7.1.2",
"@turf/points-within-polygon": "^5.1.5",
"@turf/turf": "^5.1.6",
"alasql": "^0.5.4",
"console": "^0.7.2",
"date-fns": "^2.0.0",

@ -1,9 +1,12 @@
import React, { useEffect, useState, useContext } from 'react';
import NodeEnvironment from 'jest-environment-node';
import { add_geo_area_to_store, get_geo_area_from_store } from './geo_store.js';
import * as turf from '@turf/turf'
import {
add_geo_area_to_store, get_geo_area_from_store,
initialize_image_index, image_in_area
} from './geo_store.js';
import * as turf from '@turf/turf'
import KDBush from 'kdbush';
export async function sqljs_async_queries(sqljs_object, queries) {
@ -52,11 +55,8 @@ export function regexp_match(string, regex) {
return string.match(regex) != null;
}
export function is_in_geo_polygon_from_store(lat, long, polygon_hash) {
const area = get_geo_area_from_store(polygon_hash);
var gju = require('geojson-utils');
return gju.pointInPolygon({ "type": "Point", "coordinates": [long, lat] },
area.geojson);
export function is_in_geo_polygon_from_store(image_id, lat, long, polygon_hash) {
return image_in_area(image_id, polygon_hash);
}
// Digikam stores its tree of tags as individual tags,
@ -109,15 +109,18 @@ export async function add_full_tag_info(db) {
}
function polygons_benchmark(database) {
var img_query = "SELECT Images.id, ImagePositions.latitudeNumber, ImagePositions.longitudeNumber FROM Images "
+ "LEFT JOIN ImagePositions ON ImagePositions.imageid=Images.id WHERE ImagePositions.latitudeNumber NOT NULL GROUP BY Images.id;";
var img_query = "SELECT GROUP_CONCAT(Images.id), ImagePositions.latitudeNumber, ImagePositions.longitudeNumber FROM Images "
+ "LEFT JOIN ImagePositions ON ImagePositions.imageid=Images.id WHERE ImagePositions.latitudeNumber NOT NULL GROUP BY ImagePositions.latitudeNumber, ImagePositions.longitudeNumber;";
sqljs_async_queries(database, [img_query]).then(res => {
console.log("Images query result:", res);
fetch("https://nominatim.openstreetmap.org/search?polygon_geojson=1&polygon_threshold=0.001&format=json&limit=1&q=Australia")
.then(res => res.json())
.then(jsonres => {
var polies;
var points = [];
var points = []; // Contains all unique geometric points as [lat long]
var ids_per_point = []; // Contains a list of ids for each unique geometric point
console.log("Nominatim geo answer:", jsonres);
@ -130,10 +133,12 @@ function polygons_benchmark(database) {
var data = res[0].values;
data.forEach(row => {
points.push([parseFloat(row[cols.indexOf("longitudeNumber")]), parseFloat(row[cols.indexOf("latitudeNumber")])]);
ids_per_point.push(row[cols.indexOf("GROUP_CONCAT(Images.id)")].split(',').map(e => parseInt(e)));
});
}
console.log("Points: ", points);
console.log("IDs per point: ", ids_per_point);
console.log("Nominatim GEOJSON: ", polies);
{
@ -151,6 +156,8 @@ function polygons_benchmark(database) {
const index = new KDBush(points, p => p[0], p => p[1], 16, Float64Array);
console.timeEnd("Build KDBush index");
console.time("Total KDBush search time");
// Get bounding boxes for all subpolygons
var boxes = [];
console.time("Build KDBush poly boxes");
@ -184,21 +191,26 @@ function polygons_benchmark(database) {
// Test hit points exactly
console.time("Get exact hits after KDBush");
var realhits = [];
var hitcache = {};
hits.forEach(hit => {
const point = points[hit];
if(point in hitcache && hitcache[point]) {
realhits.push(hit);
return;
}
const is_real_hit = turf.booleanPointInPolygon(turf.point(points[hit]), polies);
hitcache[point] = is_real_hit;
if (is_real_hit) {
realhits.push(hit);
}
});
console.log("Real: ", realhits);
console.timeEnd("Get exact hits after KDBush");
// Expand to all image IDs
console.time("Expand exact hits to IDs");
var hit_ids = [];
realhits.forEach(idx => {
hit_ids = hit_ids.concat(ids_per_point[idx]);
});
console.log("Real hit IDs: ", hit_ids);
console.timeEnd("Expand exact hits to IDs");
console.timeEnd("Total KDBush search time");
}
});
})
@ -213,14 +225,17 @@ export function ProvideDB(props) {
useEffect(() => {
fetch_sqljs_db_from_sqlite(db_url)
.then(db => {
add_full_tag_info(db).then((newdb) => {
add_full_tag_info(db)
.then((newdb) => {
initialize_image_index(newdb).then(() => {
db.create_function("REGEXP", regexp_match);
db.create_function("IS_IN_GEO", is_in_geo_polygon_from_store);
polygons_benchmark(db);
//polygons_benchmark(db);
setError(false);
setDb(newdb);
})
})
})
.catch(error => { setError(error); });
}, [])

@ -1,4 +1,13 @@
var g_GeoStore = {};
import KDBush from 'kdbush';
import { sqljs_async_queries } from './database.js';
import * as turf from '@turf/turf';
var g_GeoStore = {
areas: {},
area_query_results: {},
image_index: {},
};
export function hash_geo_area(geo_area) {
var hash = require('object-hash');
@ -8,7 +17,7 @@ export function hash_geo_area(geo_area) {
export function add_geo_area_to_store(area) {
var h = hash_geo_area(area);
if (!(h in g_GeoStore)) {
g_GeoStore[h] = area;
g_GeoStore["areas"][h] = area;
}
return h;
@ -16,7 +25,119 @@ export function add_geo_area_to_store(area) {
export function get_geo_area_from_store(hash) {
if (hash in g_GeoStore) {
return g_GeoStore[hash];
return g_GeoStore["areas"][hash];
}
throw new Error("Requested non-existent geo area from store.");
}
export function initialize_image_index(database) {
return new Promise((resolve, reject) => {
var img_query = "SELECT GROUP_CONCAT(Images.id), ImagePositions.latitudeNumber, ImagePositions.longitudeNumber FROM Images "
+ "LEFT JOIN ImagePositions ON ImagePositions.imageid=Images.id WHERE ImagePositions.latitudeNumber NOT NULL GROUP BY ImagePositions.latitudeNumber, ImagePositions.longitudeNumber;";
sqljs_async_queries(database, [img_query])
.then(res => {
var points = []; // will contain a list of unique [long, lat] points
var ids_per_point = []; // will contain a list of image ids per point in "points", same indexing
if (res && Array.isArray(res) && res.length > 0) {
var cols = res[0].columns;
var data = res[0].values;
data.forEach(row => {
points.push([parseFloat(row[cols.indexOf("longitudeNumber")]), parseFloat(row[cols.indexOf("latitudeNumber")])]);
ids_per_point.push(row[cols.indexOf("GROUP_CONCAT(Images.id)")].split(',').map(e => parseInt(e)));
});
}
// Store all info we will need later into the image index.
g_GeoStore["image_index"] = {
points: points,
ids_per_point: ids_per_point,
kdbush: new KDBush(points, p => p[0], p => p[1], 16, Float64Array)
};
resolve();
})
.catch(e => { reject(e); });
});
}
// Do a query whether an image is in an area. If not done before, this will
// calculate all images in said area and cache the result for later calls.
export function image_in_area(image_id, area_hash) {
if (area_hash in g_GeoStore["area_query_results"]) {
return g_GeoStore["area_query_results"][area_hash].includes(parseInt(image_id));
}
if (!(area_hash in g_GeoStore["areas"])) {
throw new Error("Queried for images in an unknown area.");
}
//console.time("Total KDBush search time");
// Get bounding boxes for all (sub-)polygons of the area
const full_area = g_GeoStore["areas"][area_hash];
//console.log("Search area:", full_area);
const area = full_area.geojson;
var boxes = [];
const add_box = poly => {
var minx = Number.POSITIVE_INFINITY;
var miny = Number.POSITIVE_INFINITY;
var maxx = Number.NEGATIVE_INFINITY;
var maxy = Number.NEGATIVE_INFINITY;
for (let j = 0; j < poly.length; j++) {
minx = Math.min(minx, poly[j][0]);
miny = Math.min(miny, poly[j][1]);
maxx = Math.max(maxx, poly[j][0]);
maxy = Math.max(maxy, poly[j][1]);
}
boxes.push([minx, miny, maxx, maxy]);
}
if (area.type == "Polygon") {
add_box(area.coordinates[0]);
} else if (area.type == "MultiPolygon") {
for (let i = 0; i < area.coordinates.length; i++) {
add_box(area.coordinates[i][0]);
}
}
// Test points in KD tree against each subpolygon bounding box
//console.time("Find box points in KDBush");
let hits = new Set();
const index = g_GeoStore["image_index"].kdbush;
for (let i = 0; i < boxes.length; i++) {
const idxs = index.range(boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]);
idxs.forEach(e => hits.add(e));
}
//console.timeEnd("Find box points in KDBush");
//console.log("Hits: ", hits);
// Test hit points exactly
// console.time("Get exact hits after KDBush");
var realhits = [];
const points = g_GeoStore["image_index"].points;
hits.forEach(hit => {
const point = points[hit];
const is_real_hit = turf.booleanPointInPolygon(turf.point(point), area);
if (is_real_hit) {
realhits.push(hit);
}
});
//console.log("Real hits: ", realhits);
// console.timeEnd("Get exact hits after KDBush");
// Expand to all image IDs
// console.time("Expand exact hits to IDs");
var hit_ids = [];
const ids_per_point = g_GeoStore["image_index"].ids_per_point;
realhits.forEach(idx => {
hit_ids = hit_ids.concat(ids_per_point[idx]);
});
//console.log("Real hit ImageIds: ", hit_ids);
// console.timeEnd("Expand exact hits to IDs");
//console.timeEnd("Total KDBush search time");
g_GeoStore["area_query_results"][area_hash] = hit_ids;
//console.log("Is ", parseInt(image_id), " inside ", g_GeoStore["area_query_results"][area_hash], "?");
return g_GeoStore["area_query_results"][area_hash].includes(parseInt(image_id));
}

@ -13,7 +13,7 @@ export function do_image_query(query, database, collection_path, collection_thum
var queries = [];
queries.push(query);
sqljs_async_queries(database, queries).then(res => {
//console.log("Query result: ", res);
console.log("Query result: ", res);
var photos = [];
if (res && Array.isArray(res) && res.length > 0) {
var cols = res[0].columns;
@ -320,7 +320,7 @@ export class LocationFilter extends ResultFilter {
// it will have access to said polygon.
const hash = add_geo_area_to_store(this.geo_area);
return '(ImagePositions.latitudeNumber NOT NULL AND ImagePositions.longitudeNumber NOT NULL AND '
+ 'IS_IN_GEO(ImagePositions.latitudeNumber, ImagePositions.longitudeNumber, "' + hash + '"))';
+ 'IS_IN_GEO(Images.id, ImagePositions.latitudeNumber, ImagePositions.longitudeNumber, "' + hash + '"))';
}
simplify() { return this; }

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save