Merge pull request #12 from delfrrr/remove-recursion
Remove recursion, speed up modulus
diff --git a/.gitignore b/.gitignore
index 7dde577..375f2a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,5 @@
mason_packages
.toolchain
.mason
-local.env
\ No newline at end of file
+local.env
+xcode-project
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 903e417..c7c8f72 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,9 @@
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/mason.cmake)
option(WERROR "Add -Werror flag to build (turns warnings into errors)" ON)
+option(BENCHMARK_BIG_O "Calculate Big O in benchmark" OFF)
+option(BENCHMARK_100M "Run against 100M points" OFF)
+option(BENCHMARK_10M "Run against 100M points" OFF)
# configure optimization
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -50,6 +53,18 @@
find_package(Threads REQUIRED)
file(GLOB BENCH_SOURCES bench/*.cpp)
add_executable(bench-tests ${BENCH_SOURCES})
+if(BENCHMARK_BIG_O)
+ message("-- BENCHMARK_BIG_O=1")
+ target_compile_definitions(bench-tests PUBLIC BENCHMARK_BIG_O=1)
+endif()
+if(BENCHMARK_100M)
+ message("-- BENCHMARK_100M=1")
+ target_compile_definitions(bench-tests PUBLIC BENCHMARK_100M=1)
+endif()
+if(BENCHMARK_10M)
+ message("-- BENCHMARK_10M=1")
+ target_compile_definitions(bench-tests PUBLIC BENCHMARK_10M=1)
+endif()
#examples
add_executable(triangulate-geojson examples/triangulate_geojson.cpp)
diff --git a/Makefile b/Makefile
index 5abaceb..8a0ac69 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,7 @@
# Whether to turn compiler warnings into errors
export WERROR ?= true
export BUILD_DIR ?= cmake-build
+export XCODE_PROJ_DIR ?= xcode-project
default: release
@@ -11,6 +12,9 @@
debug:
mkdir -p ./$(BUILD_DIR) && cd ./$(BUILD_DIR) && cmake ../ -DCMAKE_BUILD_TYPE=Debug -DWERROR=$(WERROR) && VERBOSE=1 cmake --build .
+xcode:
+ mkdir -p ./$(XCODE_PROJ_DIR) && cd ./$(XCODE_PROJ_DIR) && cmake -G Xcode ../
+
test:
@if [ -f ./$(BUILD_DIR)/unit-tests ]; then ./$(BUILD_DIR)/unit-tests; else echo "Please run 'make release' or 'make debug' first" && exit 1; fi
diff --git a/README.md b/README.md
index c747122..921ab45 100644
--- a/README.md
+++ b/README.md
@@ -48,16 +48,16 @@
```
Run on (4 X 2300 MHz CPU s)
-2018-09-26 09:28:34
+2018-09-29 09:27:28
------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------
-BM_45K_geojson_nodes 24 ms 24 ms 29
-BM_uniform/2000 1 ms 1 ms 887
-BM_uniform/100000 66 ms 66 ms 9
-BM_uniform/200000 158 ms 155 ms 4
-BM_uniform/500000 441 ms 439 ms 2
-BM_uniform/1000000 1062 ms 1058 ms 1
+BM_45K_geojson_nodes 22 ms 22 ms 32
+BM_uniform/2000 1 ms 1 ms 982
+BM_uniform/100000 63 ms 62 ms 9
+BM_uniform/200000 140 ms 140 ms 4
+BM_uniform/500000 400 ms 399 ms 2
+BM_uniform/1000000 994 ms 993 ms 1
```
Library is ~10% faster then JS version for 1M uniform points ([details](https://github.com/delfrrr/delaunator-cpp/pull/8#issuecomment-422690056))
diff --git a/bench/run.cpp b/bench/run.cpp
index d48f041..9b449b2 100644
--- a/bench/run.cpp
+++ b/bench/run.cpp
@@ -7,6 +7,7 @@
std::vector<double> generate_uniform(std::size_t n) {
std::vector<double> coords;
+ coords.reserve(2 * n);
std::srand(350);
double norm = static_cast<double>(RAND_MAX) / 1e3;
for (size_t i = 0; i < n; i++) {
@@ -31,9 +32,20 @@
while (state.KeepRunning()) {
delaunator::Delaunator delaunator(coords);
}
+ state.SetComplexityN(state.range(0));
}
BENCHMARK(BM_45K_geojson_nodes)->Unit(benchmark::kMillisecond);
BENCHMARK(BM_uniform)->Arg(2000)->Arg(100000)->Arg(200000)->Arg(500000)->Arg(1000000)->Unit(benchmark::kMillisecond);
+#if BENCHMARK_BIG_O
+BENCHMARK(BM_uniform)->RangeMultiplier(2)->Range(1 << 12, 1 << 22)->Unit(benchmark::kMillisecond)->Complexity();
+#endif
+#if BENCHMARK_10M
+BENCHMARK(BM_uniform)->Arg(1000000 * 10)->Unit(benchmark::kMillisecond);
+#endif
+#if BENCHMARK_100M
+BENCHMARK(BM_uniform)->Arg(1000000 * 100)->Unit(benchmark::kMillisecond);
+#endif
+
BENCHMARK_MAIN()
diff --git a/examples/utils.hpp b/examples/utils.hpp
index 02c32df..d22cb42 100644
--- a/examples/utils.hpp
+++ b/examples/utils.hpp
@@ -22,7 +22,7 @@
}
}
-inline std::vector<double> get_geo_json_points(std::string const& json) {
+inline std::vector< double> get_geo_json_points(std::string const& json) {
rapidjson::Document document;
if(document.Parse(json.c_str()).HasParseError()) {
throw std::runtime_error("Cannot parse JSON");
diff --git a/include/delaunator.hpp b/include/delaunator.hpp
index e16e0c3..6bc5221 100644
--- a/include/delaunator.hpp
+++ b/include/delaunator.hpp
@@ -11,6 +11,11 @@
namespace delaunator {
+//@see https://stackoverflow.com/questions/33333363/built-in-mod-vs-custom-mod-function-improve-the-performance-of-modulus-op/33333636#33333636
+inline size_t fast_mod(const size_t i, const size_t c) {
+ return i >= c ? i % c : i;
+}
+
// Kahan and Babuska summation, Neumaier variant; accumulates less FP error
inline double sum(const std::vector<double>& x) {
double sum = x[0];
@@ -93,47 +98,38 @@
return std::make_pair(x, y);
}
-inline double compare(
- std::vector<double> const& coords,
- std::size_t i,
- std::size_t j,
- double cx,
- double cy) {
- const double d1 = dist(coords[2 * i], coords[2 * i + 1], cx, cy);
- const double d2 = dist(coords[2 * j], coords[2 * j + 1], cx, cy);
- const double diff1 = d1 - d2;
- const double diff2 = coords[2 * i] - coords[2 * j];
- const double diff3 = coords[2 * i + 1] - coords[2 * j + 1];
-
- if (diff1 > 0.0 || diff1 < 0.0) {
- return diff1;
- } else if (diff2 > 0.0 || diff2 < 0.0) {
- return diff2;
- } else {
- return diff3;
- }
-}
-
-struct sort_to_center {
+struct compare {
std::vector<double> const& coords;
double cx;
double cy;
bool operator()(std::size_t i, std::size_t j) {
- return compare(coords, i, j, cx, cy) < 0;
+ const double d1 = dist(coords[2 * i], coords[2 * i + 1], cx, cy);
+ const double d2 = dist(coords[2 * j], coords[2 * j + 1], cx, cy);
+ const double diff1 = d1 - d2;
+ const double diff2 = coords[2 * i] - coords[2 * j];
+ const double diff3 = coords[2 * i + 1] - coords[2 * j + 1];
+
+ if (diff1 > 0.0 || diff1 < 0.0) {
+ return diff1 < 0;
+ } else if (diff2 > 0.0 || diff2 < 0.0) {
+ return diff2 < 0;
+ } else {
+ return diff3 < 0;
+ }
}
};
inline bool in_circle(
- double ax,
- double ay,
- double bx,
- double by,
- double cx,
- double cy,
- double px,
- double py) {
+ const double ax,
+ const double ay,
+ const double bx,
+ const double by,
+ const double cx,
+ const double cy,
+ const double px,
+ const double py) {
const double dx = ax - px;
const double dy = ay - py;
const double ex = bx - px;
@@ -159,7 +155,7 @@
}
// monotonically increases with real angle, but doesn't need expensive trigonometry
-inline double pseudo_angle(double dx, double dy) {
+inline double pseudo_angle(const double dx, const double dy) {
const double p = dx / (std::abs(dx) + std::abs(dy));
return (dy > 0.0 ? 3.0 - p : 1.0 + p) / 4.0; // [0..1)
}
@@ -194,9 +190,10 @@
double m_center_x;
double m_center_y;
std::size_t m_hash_size;
+ std::vector<std::size_t> m_edge_stack;
std::size_t legalize(std::size_t a);
- std::size_t hash_key(double x, double y);
+ std::size_t hash_key(double x, double y) const;
std::size_t add_triangle(
std::size_t i0,
std::size_t i1,
@@ -218,7 +215,8 @@
m_hash(),
m_center_x(),
m_center_y(),
- m_hash_size() {
+ m_hash_size(),
+ m_edge_stack() {
std::size_t n = coords.size() >> 1;
double max_x = std::numeric_limits<double>::min();
@@ -305,7 +303,7 @@
std::tie(m_center_x, m_center_y) = circumcenter(i0x, i0y, i1x, i1y, i2x, i2y);
// sort the points by distance from the seed triangle circumcenter
- std::sort(ids.begin(), ids.end(), sort_to_center{ coords, m_center_x, m_center_y });
+ std::sort(ids.begin(), ids.end(), compare{ coords, m_center_x, m_center_y });
// initialize a hash table for storing edges of the advancing convex hull
m_hash_size = static_cast<std::size_t>(std::llround(std::ceil(std::sqrt(n))));
@@ -360,7 +358,7 @@
size_t key = hash_key(x, y);
for (size_t j = 0; j < m_hash_size; j++) {
- start = m_hash[(key + j) % m_hash_size];
+ start = m_hash[fast_mod(key + j, m_hash_size)];
if (start != INVALID_INDEX && start != hull_next[start]) break;
}
@@ -440,84 +438,110 @@
}
std::size_t Delaunator::legalize(std::size_t a) {
- const std::size_t b = halfedges[a];
+ std::size_t i = 0;
+ std::size_t ar = 0;
+ m_edge_stack.clear();
- /* if the pair of triangles doesn't satisfy the Delaunay condition
- * (p1 is inside the circumcircle of [p0, pl, pr]), flip them,
- * then do the same check/flip recursively for the new pair of triangles
- *
- * pl pl
- * /||\ / \
- * al/ || \bl al/ \a
- * / || \ / \
- * / a||b \ flip /___ar___\
- * p0\ || /p1 => p0\---bl---/p1
- * \ || / \ /
- * ar\ || /br b\ /br
- * \||/ \ /
- * pr pr
- */
- const std::size_t a0 = a - a % 3;
- const std::size_t b0 = b - b % 3;
+ // recursion eliminated with a fixed-size stack
+ while (true) {
+ const size_t b = halfedges[a];
- const std::size_t al = a0 + (a + 1) % 3;
- const std::size_t ar = a0 + (a + 2) % 3;
- const std::size_t bl = b0 + (b + 2) % 3;
+ /* if the pair of triangles doesn't satisfy the Delaunay condition
+ * (p1 is inside the circumcircle of [p0, pl, pr]), flip them,
+ * then do the same check/flip recursively for the new pair of triangles
+ *
+ * pl pl
+ * /||\ / \
+ * al/ || \bl al/ \a
+ * / || \ / \
+ * / a||b \ flip /___ar___\
+ * p0\ || /p1 => p0\---bl---/p1
+ * \ || / \ /
+ * ar\ || /br b\ /br
+ * \||/ \ /
+ * pr pr
+ */
+ const size_t a0 = 3 * (a / 3);
+ ar = a0 + (a + 2) % 3;
- const std::size_t p0 = triangles[ar];
- const std::size_t pr = triangles[a];
- const std::size_t pl = triangles[al];
- const std::size_t p1 = triangles[bl];
-
- if (b == INVALID_INDEX) {
- return ar;
- }
-
- const bool illegal = in_circle(
- coords[2 * p0],
- coords[2 * p0 + 1],
- coords[2 * pr],
- coords[2 * pr + 1],
- coords[2 * pl],
- coords[2 * pl + 1],
- coords[2 * p1],
- coords[2 * p1 + 1]);
-
- if (illegal) {
- triangles[a] = p1;
- triangles[b] = p0;
-
- auto hbl = halfedges[bl];
-
- // edge swapped on the other side of the hull (rare); fix the halfedge reference
- if (hbl == INVALID_INDEX) {
- std::size_t e = hull_start;
- do {
- if (hull_tri[e] == bl) {
- hull_tri[e] = a;
- break;
- }
- e = hull_next[e];
- } while (e != hull_start);
+ if (b == INVALID_INDEX) {
+ if (i > 0) {
+ i--;
+ a = m_edge_stack[i];
+ continue;
+ } else {
+ //i = INVALID_INDEX;
+ break;
+ }
}
- link(a, hbl);
- link(b, halfedges[ar]);
- link(ar, bl);
- std::size_t br = b0 + (b + 1) % 3;
+ const size_t b0 = 3 * (b / 3);
+ const size_t al = a0 + (a + 1) % 3;
+ const size_t bl = b0 + (b + 2) % 3;
- legalize(a);
- return legalize(br);
+ const std::size_t p0 = triangles[ar];
+ const std::size_t pr = triangles[a];
+ const std::size_t pl = triangles[al];
+ const std::size_t p1 = triangles[bl];
+
+ const bool illegal = in_circle(
+ coords[2 * p0],
+ coords[2 * p0 + 1],
+ coords[2 * pr],
+ coords[2 * pr + 1],
+ coords[2 * pl],
+ coords[2 * pl + 1],
+ coords[2 * p1],
+ coords[2 * p1 + 1]);
+
+ if (illegal) {
+ triangles[a] = p1;
+ triangles[b] = p0;
+
+ auto hbl = halfedges[bl];
+
+ // edge swapped on the other side of the hull (rare); fix the halfedge reference
+ if (hbl == INVALID_INDEX) {
+ std::size_t e = hull_start;
+ do {
+ if (hull_tri[e] == bl) {
+ hull_tri[e] = a;
+ break;
+ }
+ e = hull_next[e];
+ } while (e != hull_start);
+ }
+ link(a, hbl);
+ link(b, halfedges[ar]);
+ link(ar, bl);
+ std::size_t br = b0 + (b + 1) % 3;
+
+ if (i < m_edge_stack.size()) {
+ m_edge_stack[i] = br;
+ } else {
+ m_edge_stack.push_back(br);
+ }
+ i++;
+
+ } else {
+ if (i > 0) {
+ i--;
+ a = m_edge_stack[i];
+ continue;
+ } else {
+ break;
+ }
+ }
}
return ar;
}
-std::size_t Delaunator::hash_key(double x, double y) {
+inline std::size_t Delaunator::hash_key(const double x, const double y) const {
const double dx = x - m_center_x;
const double dy = y - m_center_y;
- return static_cast<std::size_t>(std::llround(
- std::floor(pseudo_angle(dx, dy) * static_cast<double>(m_hash_size)))) %
- m_hash_size;
+ return fast_mod(
+ static_cast<std::size_t>(std::llround(std::floor(pseudo_angle(dx, dy) * static_cast<double>(m_hash_size)))),
+ m_hash_size);
}
std::size_t Delaunator::add_triangle(
@@ -537,7 +561,7 @@
return t;
}
-void Delaunator::link(std::size_t a, std::size_t b) {
+void Delaunator::link(const std::size_t a, const std::size_t b) {
std::size_t s = halfedges.size();
if (a == s) {
halfedges.push_back(b);