writing
This commit is contained in:
parent
c67793cacd
commit
b72540ec81
4
.gitignore
vendored
4
.gitignore
vendored
@ -14,3 +14,7 @@ node_modules/
|
||||
*.tdo
|
||||
*.synctex.gz
|
||||
*.out
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
main.run.xml
|
83
thesis/bibliography/algorithms.bib
Normal file
83
thesis/bibliography/algorithms.bib
Normal file
@ -0,0 +1,83 @@
|
||||
@article{lang1969rules,
|
||||
title={Rules for the robot draughtsmen},
|
||||
author={Lang, T},
|
||||
journal={The Geographical Magazine},
|
||||
volume={42},
|
||||
number={1},
|
||||
pages={50--51},
|
||||
year={1969}
|
||||
}
|
||||
|
||||
@article{douglas1973algorithms,
|
||||
title={Algorithms for the reduction of the number of points required to represent a digitized line or its caricature},
|
||||
author={Douglas, David H and Peucker, Thomas K},
|
||||
journal={Cartographica: the international journal for geographic information and geovisualization},
|
||||
volume={10},
|
||||
number={2},
|
||||
pages={112--122},
|
||||
year={1973},
|
||||
publisher={University of Toronto Press}
|
||||
}
|
||||
|
||||
@misc{reumann1974optimizing,
|
||||
title={Optimizing Curve Segmentation in Computer Graphics. International Computing Symposium},
|
||||
author={Reumann, K and Witkam, APM},
|
||||
year={1974},
|
||||
publisher={Amsterdam, North Holland}
|
||||
}
|
||||
|
||||
@article{opheim1982fast,
|
||||
title={Fast data reduction of a digitized curve},
|
||||
author={Opheim, Harold},
|
||||
journal={Geo-processing},
|
||||
volume={2},
|
||||
pages={33--40},
|
||||
year={1982}
|
||||
}
|
||||
|
||||
@article{clayton1985cartographic,
|
||||
title={Cartographic generalization: a review of feature simplification and systematic point algorithms},
|
||||
author={Clayton, Victoria H},
|
||||
year={1985}
|
||||
}
|
||||
|
||||
@article{visvalingam1993line,
|
||||
title={Line generalisation by repeated elimination of points},
|
||||
author={Visvalingam, Maheswari and Whyatt, James D},
|
||||
journal={The cartographic journal},
|
||||
volume={30},
|
||||
number={1},
|
||||
pages={46--51},
|
||||
year={1993},
|
||||
publisher={Taylor \& Francis}
|
||||
}
|
||||
|
||||
@inproceedings{zhao1997linear,
|
||||
title={Linear-time sleeve-fitting polyline simplification algorithms. In},
|
||||
author={Zhao, Zhiyuan and Saalfeld, Alan},
|
||||
booktitle={Proceedings of AutoCarto 13},
|
||||
year={1997},
|
||||
organization={Citeseer}
|
||||
}
|
||||
|
||||
@article{shi2006performance,
|
||||
title={Performance evaluation of line simplification algorithms for vector generalization},
|
||||
author={Shi, Wenzhong and Cheung, ChuiKwan},
|
||||
journal={The Cartographic Journal},
|
||||
volume={43},
|
||||
number={1},
|
||||
pages={27--44},
|
||||
year={2006},
|
||||
publisher={Taylor \& Francis}
|
||||
}
|
||||
|
||||
@article{koning2011polyline,
|
||||
title={Polyline Simplification},
|
||||
author={Elmar de Koning},
|
||||
journal={},
|
||||
volume={},
|
||||
number={},
|
||||
pages={},
|
||||
year={2011},
|
||||
publisher={}
|
||||
}
|
58
thesis/bibliography/bibliography.bib
Normal file
58
thesis/bibliography/bibliography.bib
Normal file
@ -0,0 +1,58 @@
|
||||
@article{theobald2001understanding,
|
||||
title={Understanding topology and shapefiles},
|
||||
author={Theobald, David M},
|
||||
journal={Arc-User (April-June)},
|
||||
year={2001},
|
||||
url={https://www.esri.com/news/arcuser/0401/topo.html}
|
||||
}
|
||||
|
||||
@article{ai2017envelope,
|
||||
title={Envelope generation and simplification of polylines using Delaunay triangulation},
|
||||
author={Ai, Tinghua and Ke, Shu and Yang, Min and Li, Jingzhong},
|
||||
journal={International Journal of Geographical Information Science},
|
||||
volume={31},
|
||||
number={2},
|
||||
pages={297--319},
|
||||
year={2017},
|
||||
publisher={Taylor \& Francis}
|
||||
}
|
||||
|
||||
@inproceedings{brophy1973automated,
|
||||
title={An automated methodology for linear generalization in thematic cartography},
|
||||
author={Brophy, M},
|
||||
booktitle={proceedings of American congress of surveying and mapping},
|
||||
pages={300--314},
|
||||
year={1973}
|
||||
}
|
||||
|
||||
@article{brassel1990computergestutzte,
|
||||
title={Computergest{\"u}tzte Generalisierung},
|
||||
author={Brassel, K},
|
||||
journal={Schweizerische Gesellschaft f{\"u}r Kartographie,(Ed.) Kartographisches Generalisieren. Z{\"u}rich, Orell F{\"u}ssli Graphische Betriebe},
|
||||
pages={37--48},
|
||||
year={1990}
|
||||
}
|
||||
|
||||
@online{bynens2010bulletproof,
|
||||
author = {Mathias Bynens, John-David Dalton},
|
||||
title = {Bulletproof JavaScript benchmarks},
|
||||
date = {2010-12-23},
|
||||
url = {https://calendar.perfplanet.com/2010/bulletproof-javascript-benchmarks/},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{bynens2010bulletproof,
|
||||
author = {Mathias Bynens, John-David Dalton},
|
||||
title = {Bulletproof JavaScript benchmarks},
|
||||
date = {2010-12-23},
|
||||
url = {https://calendar.perfplanet.com/2010/bulletproof-javascript-benchmarks/},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{hossain2012benchmark,
|
||||
author = {Monsur Hossain},
|
||||
title = {benchmark.js: how it works},
|
||||
date = {2012-12-11},
|
||||
url = {http://monsur.hossa.in/2012/12/11/benchmarkjs.html},
|
||||
urldate= {2019-08-15}
|
||||
}
|
30
thesis/bibliography/specifications.bib
Normal file
30
thesis/bibliography/specifications.bib
Normal file
@ -0,0 +1,30 @@
|
||||
@article{bray2014javascript,
|
||||
title={The javascript object notation (json) data interchange format},
|
||||
author={Bray, Tim},
|
||||
year={2014},
|
||||
url={https://tools.ietf.org/html/rfc8259}
|
||||
}
|
||||
|
||||
@article{butler2016geojson,
|
||||
title={The geojson format},
|
||||
author={Butler, Howard and Daly, Martin and Doyle, Allan and Gillies, Sean and Hagen, Stefan and Schaub, Tim and others},
|
||||
journal={RFC 7946; The Internet Engineering Task Force},
|
||||
year={2016},
|
||||
url={https://tools.ietf.org/html/rfc7946}
|
||||
}
|
||||
|
||||
@article{open1999opengis,
|
||||
title={OpenGIS simple features specification for SQL},
|
||||
author={Open GIS Consortium and others},
|
||||
journal={URL: http://www. opengeospatial. org/docs/99-054. pdf},
|
||||
year={1999},
|
||||
url={https://portal.opengeospatial.org/files/?artifact_id=829}
|
||||
}
|
||||
|
||||
@misc{bostock2017topojson,
|
||||
title={TopoJSON},
|
||||
author={Bostock, Mike},
|
||||
year={2017},
|
||||
publisher={Online},
|
||||
url={https://github.com/topojson/topojson-specification}
|
||||
}
|
68
thesis/bibliography/wasm.bib
Normal file
68
thesis/bibliography/wasm.bib
Normal file
@ -0,0 +1,68 @@
|
||||
@inproceedings{zakai2011emscripten,
|
||||
title={Emscripten: an LLVM-to-JavaScript compiler},
|
||||
author={Zakai, Alon},
|
||||
booktitle={Proceedings of the ACM international conference companion on Object oriented programming systems languages and applications companion},
|
||||
pages={301--312},
|
||||
year={2011},
|
||||
organization={ACM}
|
||||
}
|
||||
|
||||
@inproceedings{haas2017bringing,
|
||||
title={Bringing the web up to speed with WebAssembly},
|
||||
author={Haas, Andreas and Rossberg, Andreas and Schuff, Derek L and Titzer, Ben L and Holman, Michael and Gohman, Dan and Wagner, Luke and Zakai, Alon and Bastien, JF},
|
||||
booktitle={ACM SIGPLAN Notices},
|
||||
volume={52},
|
||||
number={6},
|
||||
pages={185--200},
|
||||
year={2017},
|
||||
organization={ACM}
|
||||
}
|
||||
|
||||
@inproceedings{reiser2017accelerate,
|
||||
title={Accelerate JavaScript applications by cross-compiling to WebAssembly},
|
||||
author={Reiser, Micha and Bl{\"a}ser, Luc},
|
||||
booktitle={Proceedings of the 9th ACM SIGPLAN International Workshop on Virtual Machines and Intermediate Languages},
|
||||
pages={10--17},
|
||||
year={2017},
|
||||
organization={ACM}
|
||||
}
|
||||
|
||||
@online{surma2018emscripting,
|
||||
author = {Das Surma},
|
||||
title = {Emscripting a C library to Wasm},
|
||||
date = {2018-03},
|
||||
url = {https://developers.google.com/web/updates/2018/03/emscripting-a-c-library},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{surma2019replacing,
|
||||
author = {Das Surma},
|
||||
title = {Replacing a hot path in your app's JavaScript with WebAssembly},
|
||||
date = {2019-02},
|
||||
url = {https://developers.google.com/web/updates/2019/02/hotpath-with-wasm},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{clark2017what,
|
||||
author = {Lin Clark},
|
||||
title = {What makes WebAssembly fast?},
|
||||
date = {2017-02-28},
|
||||
url = {https://hacks.mozilla.org/2017/02/what-makes-webassembly-fast/},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{zakai2019llvmbackend,
|
||||
author = {Zakai, Alon},
|
||||
title = {Emscripten and the LLVM WebAssembly backend},
|
||||
date = {2019-07-01},
|
||||
url = {https://v8.dev/blog/emscripten-llvm-wasm},
|
||||
urldate= {2019-08-15}
|
||||
}
|
||||
|
||||
@online{wagner2017support,
|
||||
author = {Luke Wagner},
|
||||
title = {WebAssembly consensus and end of Browser Preview},
|
||||
date = {2017-02-28},
|
||||
url = {https://lists.w3.org/Archives/Public/public-webassembly/2017Feb/0002.html},
|
||||
urldate= {2019-08-15}
|
||||
}
|
@ -1,11 +1,10 @@
|
||||
\input{./chapters/01-introduction.tex}
|
||||
\input{./chapters/02.00-theory.tex}
|
||||
\input{./chapters/02.01-Generalization.tex}
|
||||
\input{./chapters/02.02-Dataformats.tex}
|
||||
\input{./chapters/02.03-Algorithms.tex}
|
||||
\input{./chapters/02.02-Algorithms.tex}
|
||||
\input{./chapters/02.03-Dataformats.tex}
|
||||
\input{./chapters/02.04-webruntime.tex}
|
||||
\input{./chapters/03.00-methodology.tex}
|
||||
\input{./chapters/04.00-results.tex}
|
||||
\input{./chapters/05-discussion.tex}
|
||||
\input{./chapters/06-conclusion.tex}
|
||||
\input{./chapters/07-application.tex}
|
||||
|
@ -9,29 +9,31 @@
|
||||
% Why important, who participants, trends,
|
||||
|
||||
|
||||
Simplification of polygonal data structures is the task of reducing data points while preserving topological characteristics. The simplification often takes the form of removing points that make up the geometry. There are several solutions that tackle the problem in different ways. This thesis aims to compare and classify these solutions by various heuristics. Performance and compression rate are quantitative heuristic used. Positional, length and area errors will also be measured to quantify simplification errors. With the rising trend of moving desktop applications to the web platform also geographic information systems (GIS) have experienced the shift towards web browsers \footnote{\url{https://www.esri.com/about/newsroom/arcnews/implementing-web-gis/}}. Performance is critical in these applications. Since simplification is an important factor to performance the solutions will be tested by constructing a web application using a technology called WebAssembly.
|
||||
Simplification of polygonal data structures is the task of reducing data points while preserving topological characteristics. The simplification often takes the form of removing points that make up the geometry. There are several solutions that tackle the problem in different ways. With the rising trend of moving desktop applications to the web platform also geographic information systems (GIS) have experienced the shift towards web browsers \footnote{\url{https://www.esri.com/about/newsroom/arcnews/implementing-web-gis/}}. Performance is critical in these applications. Since simplification is an important factor to performance the solutions will be tested by constructing a web application using a technology called WebAssembly.
|
||||
|
||||
|
||||
\subsection{Binary instruction sets on the web platform}
|
||||
|
||||
The recent development of WebAssembly allows code written in various programming languages to be run natively in web browsers. So far JavaScript was the only native programming language on the web. The goals of WebAssembly are to define a binary instruction format as a compilation target to execute code at native speed and taking advantage of common hardware capabilities \footnote{\url{https://webassembly.org/}}. The integration into the web platform brings portability to a wide range of platforms like mobile and internet of things (IoT). The usage of this technology promises performance gains that will be tested by this thesis. The results can give conclusions to whether WebAssembly is worth a consideration for web applications with geographic computational aspects. Web GIS is an example technology that would benefit greatly of such an advancement. Thus far WebAssembly has been shipped to the stable version of the four most used browser engines \footnote{\url{https://lists.w3.org/Archives/Public/public-webassembly/2017Feb/0002.html}]}. The mainly targeted high-level languages for compilation are C and C++. Also a compiler for Rust and a TypeScript subset has been developed. It will be explored how existing implementations could easily be adopted when using a compiler.
|
||||
The recent development of WebAssembly allows code written in various programming languages to be run natively in web browsers. So far JavaScript was the only native programming language on the web. The goals of WebAssembly are to define a binary instruction format as a compilation target to execute code at native speed and taking advantage of common hardware capabilities \parencite{haas2017bringing}. The integration into the web platform brings portability to a wide range of platforms like mobile and internet of things (IoT). The usage of this technology promises performance gains that will be tested by this thesis. The results can give conclusions to whether WebAssembly is worth a consideration for web applications with geographic computational aspects. Web GIS is an example technology that would benefit greatly of such an advancement. Thus far WebAssembly has been shipped to the stable version of the four most used browser engines \parencite{wagner2017support}. The mainly targeted high-level languages for compilation are C and C++. Also a compiler for Rust and a TypeScript subset has been developed.
|
||||
|
||||
\subsection{Performance as important factor for web applications}
|
||||
|
||||
There has been a rapid growth of complex applications running in web-browsers. These so called progressive web apps (PWA) combine the fast reachability of web pages with the feature richness of locally installed applications. Even though these applications can grow quire complex, the requirement for fast page loads and instant user interaction still remains. One way to cope this need is the use of compression algorithms to reduce the amount of data transmitted and processed. In a way simplification is a form of data compression. Web servers use lossless compression algorithms like gzip to deflate data before transmission. Browsers that implement these algorithms can then fully restore the requested ressources resulting in lower bandwidth usage. The algorithms presented here however remove information from the data in a way that cannot be restored. This is called lossy compression. The most common usage on the web is the compression of image data.
|
||||
There has been a rapid growth of complex applications running in web-browsers. These so called progressive web apps (PWA) combine the fast reachability of web pages with the feature richness of locally installed applications. Even though these applications can grow quire complex, the requirement for fast page loads and instant user interaction still remains. One way to cope this need is the use of compression algorithms to reduce the amount of data transmitted and processed. In a way simplification is a form of data compression. Web servers use lossless compression algorithms like gzip to deflate data before transmission. Browsers that implement these algorithms can then fully restore the requested ressources resulting in lower bandwidth usage. The algorithms presented here however remove information from the data in a way that cannot be restored. This is called lossy compression. The most common usage for this on the web is the compression of image data.
|
||||
|
||||
\subsection{Topology simplification for rendering performance}
|
||||
|
||||
While compression is often used to minimize bandwidth usage the compression of geospatial data can particulary influence rendering performance. The bottleneck for rendering often is the svg transformation used to display topology on the web. Implementing simplification algorithms for use on the web platform can lead to smoother user experience when working with large geodata sets.
|
||||
|
||||
\subsection{Related work}
|
||||
\todo[inline]{Related Work}
|
||||
There have been previous attempts to speed up applications with WebAssembly. They all have seen great performance benefits when using this technology. Results show that over several source languages the performance is predictably consistent across browsers \parencite{surma2019replacing}. Reiser and Bläser even propose to cross-compile JavaScript to WebAssembly. Through their developed library Speedy.js one can compile TypeScript, a JavaScript superset, to WebAssembly. The performance gains of critical functions reaches up to a factor of four \parencite{reiser2017accelerate}.
|
||||
|
||||
Shi and Cheung analyzed several different polyline simplification algorithms in 2006 regarding their performance and quality \parencite{shi2006performance}. In this thesis the algorithms will also be introduced. The performance benchmarking however will be limited to only the most effective algorithm that is used on the web.
|
||||
|
||||
\subsection{Structure of this thesis}
|
||||
|
||||
This thesis is structured into a theoretical and a practical component. First the theoretical principles will be reviewed. Topology of polygonal data will be explained as how to describe geodata on the web. A number of algorithms will be introduced in this section. Each algorithm will be dissected by complexity, characteristics and the possible influence to the heuristics mentioned above. An introduction to WebAssembly will be given here.
|
||||
This thesis is structured into a theoretical and a practical component. First the theoretical principles will be reviewed. A number of algorithms will be introduced in this section. Each algorithm will be dissected by complexity and characteristics. Topology of polygonal data will be explained as how to describe geodata on the web. An introduction to WebAssembly will follow.
|
||||
|
||||
In the next chapter the practical implementation will be presented. This section is divided in two parts since two web applications are produced in this thesis. The first one is a benchmark comparison of an algorithm implemented in JavaScript and in WebAssembly. It will be used investigate if performance of established implementations can be improved by a new technology. The second part is about several algorithms brought to the web by compiling an existing C++ library. This application can be used for qualitative analysis of the algorithms. It will show live results to see the characteristics and influence of single parameters.
|
||||
In the next chapter the practical implementation will be presented. A web application will be developed to measure the performance of three related algorithms used for polyline simplification.
|
||||
|
||||
The results of the above methods will be shown in chapter 4. After discussion of the results a concluion will finish the thesis.
|
||||
The results of the above methods will be shown in chapter 4. After discussion of the results a conclusion will finish the thesis.
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
\section{Theory}
|
||||
% Principles
|
||||
|
||||
In this chapter the theory behind polygon simplification will be explained. The simplification process is part of generalization in cartography. So first a few words about it will be dropped do give a broad overview about the topic. It will be clarified which goals drive the reducing of data quantity, especially in the context of web applications. Then the data formats will be explained that make up the data. From there a closer look can be taken how the simplification can be accomplished.
|
||||
In this chapter the theory behind polygon simplification will be explained. The simplification process is part of generalization in cartography. It will be clarified which goals drive the reducing of data quantity, especially in the context of web applications. Then several different simplification algorithms will be introduced. The leading data formats that represent geodata on the web will be explained. From there a closer look can be taken how the algorithms run on the web platform. For that the technology WebAssembly will be presented.
|
||||
|
@ -1,8 +1,7 @@
|
||||
|
||||
\subsection{Generalization in cartography}
|
||||
|
||||
In map generalization one aims to reduce the data presented appropriate to the scale and/or purpose of the map \parencite{brophy1973automated}. This selection has been a manual process for a long time, such that geographic generalization has been developed into an art that can only be learned by years of apprenticeship and practice \parencite{brassel1990computergestutzte}. When using automation one could be concerned about a lower quality of maps. This is why many talk about computer assisted generalization where only subprocesses can be fully automated.
|
||||
|
||||
\subsubsection{Goals of reducing data}
|
||||
Polyline simplification is the most basic topic in map generalization \parencite{ai2017envelope}. The problems of geographic cartography also apply here. So a number of algorithms have been developed to describe the work of cartographers as abstract, computer automatable processes. A selection of these algorithms will be explained in chapter \ref{ch:algorithms}.
|
||||
|
||||
|
||||
\subsubsection{Automated generalization}
|
||||
Cartography does not halt before digitalization. In the era of big data there is a large volume of map data available. Many come from collaborative projects like OpenStreetMap\footnote{\url{https://www.openstreetmap.org/}} (OSM) where volunteers submit freely available geographic information. To deliver this mass of data over the internet one can make use of the simplification processes described in this thesis. This is particularly useful as the information provided has usually no scale description. Automated simplification can bring appropriate data sizes while maintaining data usability \parencite{ai2017envelope}.
|
||||
|
@ -1,11 +1,12 @@
|
||||
|
||||
\subsection{Polyline simplification}
|
||||
\label{ch:algorithms}
|
||||
|
||||
In this chapter several algorithms for polyline simplification will be explained. For each algorithm a short summary of the routine will be given. At the end a comparison will be drawn to determine the method in use for benchmarking.
|
||||
|
||||
%In this chapter the history behind polyline simplification is shown. Several algorithm in the chronological order of their creation will be explained. At the end comparison will be drawn to determine the method used for benchmarking.
|
||||
|
||||
\paragraph{n-th point algorithm} This algorithm is fairly simplistic. It was described in 1966 by Tobler. The routine is to select every n-th coordinate of the polyline to retain. The larger the value of n is, the greater the simplification will be.
|
||||
\paragraph{n-th point algorithm} This algorithm is fairly simplistic. It was described in 1966 by Tobler. The routine is to select every n-th coordinate of the polyline to retain. The larger the value of n is, the greater the simplification will be. \parencite{clayton1985cartographic}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -14,9 +15,9 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
% \label{fig:algo-np}
|
||||
%\end{figure}
|
||||
|
||||
\paragraph{The Random-point routine} is derived from the n-th point algorithm. It sections the line into parts containing n consecutive positions. From each section a random point is chosen to construct the simplified line.
|
||||
\paragraph{The Random-point routine} is derived from the n-th point algorithm. It sections the line into parts containing n consecutive positions. From each section a random point is chosen to construct the simplified line. \parencite{shi2006performance}
|
||||
|
||||
\paragraph{Radial distance algorithm} Another simple algorithm to reduce points clustered too closely together. The algorithm will sequentially go through the line and eliminate all points whose distance to the current key is shorter than a given tolerance limit. As soon as a point with greater distance is found, it becomes the new key.
|
||||
\paragraph{Radial distance algorithm} Another simple algorithm to reduce points clustered too closely together. The algorithm will sequentially go through the line and eliminate all points whose distance to the current key is shorter than a given tolerance limit. As soon as a point with greater distance is found, it becomes the new key. \parencite{koning2011polyline}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -26,7 +27,7 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
%\end{figure}
|
||||
|
||||
|
||||
\paragraph{Perpendicular distance algorithm} Again a tolerance limit is given. The measure to check against is the perpendicular distance of a point to the line connecting its two neighbors. All points that exceed this limit are retained.
|
||||
\paragraph{Perpendicular distance algorithm} Again a tolerance limit is given. The measure to check against is the perpendicular distance of a point to the line connecting its two neighbors. All points that exceed this limit are retained. \parencite{koning2011polyline}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -36,9 +37,9 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
%\end{figure}
|
||||
|
||||
|
||||
\paragraph{Reumann-Witkam simplification} As the name implies this algorithm was developed by Reumann and Witkam. In 1974 they described the routine that constructs a "corridor/search area" by placing two parallel lines in the direction of its initial tangent. The distance from this segment is user specified. Then the successive points will be checked until a point outside of this area is found. Its predecessor becomes a key and the two points mark the new tangent for the search area. This procedure is repeated until the last point is reached.
|
||||
\paragraph{Reumann-Witkam simplification} As the name implies this algorithm was developed by Reumann and Witkam. In 1974 they described the routine that constructs a "corridor/search area" by placing two parallel lines in the direction of its initial tangent. The distance from this segment is user specified. Then the successive points will be checked until a point outside of this area is found. Its predecessor becomes a key and the two points mark the new tangent for the search area. This procedure is repeated until the last point is reached. \parencite{reumann1974optimizing}
|
||||
|
||||
\paragraph{Zhao-Saalfeld simplification} This routine, also called the sleeve-fitting polyline simplification, developed in 1997 is similar to the Reumann-Witkam algorithm. Its goal is to fit as many consecutive points in the search area. The corridor is however not aligned to the initial tangent but rather to the last point in the sequence. From the starting point on successors get added as long as all in-between points fit in the sleeve. If the constraint fails a new sleeve will be started from the last point in the previous section.
|
||||
\paragraph{Zhao-Saalfeld simplification} This routine, also called the sleeve-fitting polyline simplification, developed in 1997 is similar to the Reumann-Witkam algorithm. Its goal is to fit as many consecutive points in the search area. The corridor is however not aligned to the initial tangent but rather to the last point in the sequence. From the starting point on successors get added as long as all in-between points fit in the sleeve. If the constraint fails a new sleeve will be started from the last point in the previous section. \parencite{zhao1997linear}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -48,7 +49,7 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
%\end{figure}
|
||||
|
||||
|
||||
\paragraph{The Opheim simplification} Opheim extends the Reumann-Witkam algorithm in 1982 by constraining the search area. To do that two parameters \textsf{dmin} and \textsf{dmax} are given. From the key point on the last point inside a radial distance search region defined by \textsf{dmin} is taken to form the direction of the search corridor. If there is no point inside this region the subsequent point is taken. Then the process from the Reumann-Witkam algorithm is applied with the corridor constrained to a maximum distance of \textsf{dmax}.
|
||||
\paragraph{The Opheim simplification} Opheim extends the Reumann-Witkam algorithm in 1982 by constraining the search area. To do that two parameters \textsf{dmin} and \textsf{dmax} are given. From the key point on the last point inside a radial distance search region defined by \textsf{dmin} is taken to form the direction of the search corridor. If there is no point inside this region the subsequent point is taken. Then the process from the Reumann-Witkam algorithm is applied with the corridor constrained to a maximum distance of \textsf{dmax}. \parencite{opheim1982fast}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -58,7 +59,7 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
%\end{figure}
|
||||
|
||||
|
||||
\paragraph{Lang simplification} Lang described this algorithm in 1969. The search area is defined by a specified number of points too look ahead of the key point. A line is constructed from the key point to the last point in the search area. If the perpendicular distance of all intermediate points to this line is below a tolerance limit, they will be removed and the last point is the new key. Otherwise the search area is shrunk by excluding this last point until the requirement is met or there are no more intermediate points. All the algorithms before operated on the line sequentially and have a linear time complexity. This one also operates sequentially, but one of the critics about the Lang algorithm is that it requires too much computer time (DP). The complexity of this algorithm is $\mathcal{O}(m^n)$.
|
||||
\paragraph{Lang simplification} Lang described this algorithm in 1969. The search area is defined by a specified number of points too look ahead of the key point. A line is constructed from the key point to the last point in the search area. If the perpendicular distance of all intermediate points to this line is below a tolerance limit, they will be removed and the last point is the new key. Otherwise the search area is shrunk by excluding this last point until the requirement is met or there are no more intermediate points. All the algorithms before operated on the line sequentially and have a linear time complexity. This one also operates sequentially, but one of the critics about the Lang algorithm is that it requires too much computer time (DP). The complexity of this algorithm is $\mathcal{O}(m^n)$ with \textsf{m} being the number of positions to look ahead. \parencite{lang1969rules}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -69,7 +70,7 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
|
||||
%\paragraph{Jenks simplification}
|
||||
|
||||
\paragraph{Douglas-Peucker simplification} David H. Douglas and Thomas K. Peucker developed this algorithm in 1973 as an improvement to the by then predominant Lang algorithm. It is the first global routine described here. A global routine considers the entire line for the simplification process and comes closest to imitating manual simplification techniques (clayton). The algorithm starts with constructing a line between the first point (anchor) and last point (floating point) of the feature. The perpendicular distance of all points in between those two is calculated. The intermediate point furthest away from the line will become the new floating point on the condition that its perpendicular distance is greater than the specified tolerance. Otherwise the line segment is deemed suitable to represent the whole line. In this case the floating point is considered the new anchor and the last point will serve as floating point again (DP). The worst case complexity of this algorithm is $\mathcal{O}(nm)$ with $\mathcal{O}(n\log{}m)$ being the average complexity (psimpl). The m here is the number of points in the resulting line which is not known beforehand.
|
||||
\paragraph{Douglas-Peucker simplification} David H. Douglas and Thomas K. Peucker developed this algorithm in 1973 as an improvement to the by then predominant Lang algorithm. It is the first global routine described here. A global routine considers the entire line for the simplification process and comes closest to imitating manual simplification techniques \parencite{clayton1985cartographic}. The algorithm starts with constructing a line between the first point (anchor) and last point (floating point) of the feature. The perpendicular distance of all points in between those two is calculated. The intermediate point furthest away from the line will become the new floating point on the condition that its perpendicular distance is greater than the specified tolerance. Otherwise the line segment is deemed suitable to represent the whole line. In this case the floating point is considered the new anchor and the last point will serve as floating point again (DP). The worst case complexity of this algorithm is $\mathcal{O}(nm)$ with $\mathcal{O}(n\log{}m)$ being the average complexity \parencite{koning2011polyline}. The m here is the number of points in the resulting line which is not known beforehand. \parencite{douglas1973algorithms}
|
||||
|
||||
%\begin{figure}
|
||||
% \centering
|
||||
@ -80,12 +81,13 @@ In this chapter several algorithms for polyline simplification will be explained
|
||||
|
||||
%\paragraph{with reduction parameter} \todo{O(n*m)}
|
||||
|
||||
\paragraph{Visvalingam-Whyatt simplification} This is another global point routine. It was developed in 1993 (VW). Visvalingam and Wyatt use a area-based method to rank the points by their significance. To do that the "effective area" of each point has to be calculated. This is the area the point spans up with its adjoining points (Shi). Then the points with the least effective area get iteratively eliminated, and its neighbors effective area recalculated, until there are only two points left. At each elimination the point gets stored in a list alongside with its associated area. This is the effective area of that point or the associated area of the previous point in case the latter one is higher. This way the algorithm can be used for scale dependent and scale-independent generalizations.
|
||||
\paragraph{Visvalingam-Whyatt simplification} This is another global point routine. It was developed in 1993 (VW). Visvalingam and Wyatt use a area-based method to rank the points by their significance. To do that the "effective area" of each point has to be calculated. This is the area the point spans up with its adjoining points \parencite{shi2006performance}. Then the points with the least effective area get iteratively eliminated, and its neighbors effective area recalculated, until there are only two points left. At each elimination the point gets stored in a list alongside with its associated area. This is the effective area of that point or the associated area of the previous point in case the latter one is higher. This way the algorithm can be used for scale dependent and scale-independent generalizations. \parencite{visvalingam1993line}
|
||||
|
||||
|
||||
\subsubsection{Summary}
|
||||
|
||||
The algorithms shown here are most common used simplification algorithms in cartography and GIS. The usage of one algorithm stands out however. It is the Douglas-Peucker algorithm. Its complexity however is not ideal for web-based applications. The solution is to preprocess the line with the linear-time radial distance algorithm to reduce point clusters. This solution will be further discussed in section \ref{ch:simplify.js}.
|
||||
The algorithms shown here are the most common used simplification algorithms in cartography and GIS. The usage of one algorithm stands out however. It is the Douglas-Peucker algorithm. In \textsf{Performance Evaluation of Line Simplification Algorithms for Vector Generalization} Shi and Cheung conclude that "the Douglas-Peucker algorithm was the most effective to preserve the shape of the line and the most accurate in terms of position" \parencite{shi2006performance}. Its complexity however is not ideal for web-based applications. The solution is to preprocess the line with the linear-time radial distance algorithm to reduce point clusters. This solution will be further discussed in section \ref{ch:simplify.js}.
|
||||
|
||||
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
|
||||
\subsection{Geodata formats on the Web}
|
||||
\label{ch:dataformats}
|
||||
|
||||
Here the data formats that are used through this theses will be explained.
|
||||
|
||||
|
||||
\paragraph{The JavaScript Object Notation (JSON) Data Interchange Format} was derived from the ECMAScript Programming Language Standard\footnote{\url{https://tools.ietf.org/html/rfc8259}}. It is a text format for the serialization of structured data. As a text format is suites well for the data exchange between server and client. Also it can easily be consumed by JavaScript. These characteristics are ideal for web based applications. It does however only support a limited number of data types. Four primitive ones (string, number, boolean and null) and two structured ones (objects and array). Objects are an unordered collection of name-value pairs, while arrays are simply ordered lists of values. JSON was meant as a replacement for XML as it provides a more human readable format. Through nesting complex data structures can be created.
|
||||
|
||||
\paragraph{The GeoJSON Format} is a geospatial data interchange format\footnote{\url{https://tools.ietf.org/html/rfc7946}}. As the name suggests it is based on JSON and deals with data representing geographic features. There are several geometry types defined to be compatible with the types in the OpenGIS Simple Features Implementation Specification for SQL\footnote{\url{https://portal.opengeospatial.org/files/?artifact_id=829}}. These are Point, MultiPoint, LineString, MultiLineString, Polygon, Multipolygon and the heterogeneous GeometryCollection. Listing \ref{lst:geojson-example} shows a simple example of a GeoJSON object with one point feature. A more complete example can be viewed in the file \path{data/example-7946.geojson}.
|
||||
|
||||
\lstinputlisting[
|
||||
float=!htb,
|
||||
language=javascript,
|
||||
caption=An example for a GeoJSON object,
|
||||
label=lst:geojson-example
|
||||
]{../data/example-simple.geojson}
|
||||
|
||||
|
||||
The feature types differ in the format of their coordinates property. A position is an array of at least two elements representing longitude and latitude. An optional third element can be added to specify altitude. All cases in this thesis will only deal with two-dimensional positions. While the coordinates member of a \textsl{Point}-feature is simply a single position, a \textsl{LineString}-feature describes its geometry through an Array of at least two positions. More interesting is the specification for Polygons. It introduces the concept of the \textsl{linear ring} as a closed \textsl{LineString} with at least four positions where the first and last positions are equivalent. The \textsl{Polygon's} coordinates member is an array of linear rings with the first one representing the exterior ring and all others interior rings, also named surface and holes respectively. At last the coordinates member of \textsl{MultiLineStrings} and \textsl{MultiPolygons} is defined as a single array of its singular feature type.
|
||||
|
||||
GeoJSON is mainly used for web-based mapping. Since it is based on JSON it inherits its strength. There is for one the enhanced readability through reduced markup overhead compared to XML-based data types like GML. Interoperability with web applications comes for free since the parsing of JSON-objects is integrated in JavaScript. Unlike the Esri Shapefile format a single file is sufficient to store and transmit all relevant data, including feature properties.
|
||||
|
||||
To its downsides count that a text based cannot store the geometries as efficiently as it would be possible with a binary format. Also only vector-based data types can be represented. Another disadvantage can be the strictly non-topologic approach. Every feature is completely described by one entry. However when there are features that share common components, like boundaries in neighboring polygons, these data points will be encoded twice in the GeoJSON object. On the one hand this further poses concerns about data size. On the other hand it is more difficult to execute topological analysis on the data set. Luckily there is a related data structure to tackle this problem.
|
||||
|
||||
\paragraph{TopoJSON} is an extension of GeoJSON and aims to encode datastructures into a shared topology\footnote{\url{https://github.com/topojson/topojson-specification}}. It supports the same geometry types as GeoJSON. It differs in some additional properties to use and new object types like "Topology" and "GeometryCollection". Its main feature is that LineStrings, Polygons and their multiplicitary equivalents must define line segments in a common property called "arcs". The geometries themselves then reference the arcs from with they are made up. This reduces redundancy of data points. Another feature is the quantization of positions. To use it one can define a "transform" object which specifies a scale and translate point to encode all coordinates. Together with delta-encoding of position arrays one obtains integer values better suited for efficient serialization and reduced file size.
|
||||
|
||||
Other than the reduced data duplication topological formats have the benefit of topological analysis and editing. When modifying adjacent Polygons for example by simplification one would prefer TopoJSON over GeoJSON. Figure \ref{fig:topological-editing} shows what this means. When modifying the boundary of one polygon, one can create gaps or overlaps in non-topological representations. With a topological data structure however the topology will preserve. [esri]\footnote{\url{https://www.esri.com/news/arcuser/0401/topo.html}}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=.3\linewidth]{./images/topological-editing.png}
|
||||
\label{fig:topological-editing}
|
||||
\caption{Topological editing (top) vs. Non-topological editing (bottom) [esri]}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Coordinate representation} Both GeoJSON and TopoJSON represent positions as an array of numbers. The elements depict longitude, latitude and optionally altitude in that order. For simplicity this thesis will deal with two-dimensional positions only. A polyline is described by creating an array of these positions as seen in listing \ref{lst:coordinates-array}.
|
||||
|
||||
\begin{lstlisting}[
|
||||
float=htb,
|
||||
label=lst:coordinates-array,
|
||||
caption=Polyline coordinates in nested-array form
|
||||
]
|
||||
[[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]]
|
||||
\end{lstlisting}
|
||||
|
||||
There will be however one library in this thesis which expects coordinates in a different format. Listing \ref{lst:coordinates-object} shows a polyline in the sense of this library. Here one location is represented by an object with x and y properties.
|
||||
|
||||
\begin{lstlisting}[
|
||||
float=htb,
|
||||
label=lst:coordinates-object,
|
||||
caption=Polyline in array-of-objects form
|
||||
]
|
||||
[{x: 102.0, y: 0.0}, {x: 103.0, y: 1.0}, {x: 104.0, y: 0.0}, {x: 105.0, y: 1.0}]
|
||||
\end{lstlisting}
|
||||
|
||||
To distinguish these formats in future references the first first format will be called nested-array format, while the latter will be called array-of-objects format.
|
57
thesis/chapters/02.03-Dataformats.tex
Normal file
57
thesis/chapters/02.03-Dataformats.tex
Normal file
@ -0,0 +1,57 @@
|
||||
|
||||
\subsection{Geodata formats on the Web}
|
||||
\label{ch:dataformats}
|
||||
|
||||
Here the data formats that are used through this theses will be explained.
|
||||
|
||||
|
||||
\paragraph{The JavaScript Object Notation (JSON) Data Interchange Format} was derived from the ECMAScript Programming Language Standard \parencite{bray2014javascript}. It is a text format for the serialization of structured data. As a text format it suites well for the data exchange between server and client. Also it can easily be consumed by JavaScript. These characteristics are ideal for web based applications. It does however only support a limited number of data types. Four primitive ones (string, number, boolean and null) and two structured ones (objects and array). Objects are an unordered collection of name-value pairs, while arrays are simply ordered lists of values. JSON was meant as a replacement for XML as it provides a more human readable format. Through nesting complex data structures can be created.
|
||||
|
||||
\paragraph{The GeoJSON Format} is a geospatial data interchange format \parencite{butler2016geojson}. As the name suggests it is based on JSON and deals with data representing geographic features. There are several geometry types defined to be compatible with the types in the OpenGIS Simple Features Implementation Specification for SQL \parencite{open1999opengis}. These are Point, MultiPoint, LineString, MultiLineString, Polygon, Multipolygon and the heterogeneous GeometryCollection. Listing \ref{lst:geojson-example} shows a simple example of a GeoJSON object with one point feature. A more complete example can be viewed in the file \path{data/example-7946.geojson}.
|
||||
|
||||
\lstinputlisting[
|
||||
float=!htb,
|
||||
language=javascript,
|
||||
caption=An example for a GeoJSON object,
|
||||
label=lst:geojson-example
|
||||
]{../data/example-simple.geojson}
|
||||
|
||||
|
||||
The feature types differ in the format of their coordinates property. A position is an array of at least two elements representing longitude and latitude. An optional third element can be added to specify altitude. While the coordinates member of a \textsl{Point}-feature is simply a single position, a \textsl{LineString}-feature describes its geometry through an Array of at least two positions. More interesting is the specification for Polygons. It introduces the concept of the \textsl{linear ring} as a closed \textsl{LineString} with at least four positions where the first and last positions are equivalent. The \textsl{Polygon's} coordinates member is an array of linear rings with the first one representing the exterior ring and all others interior rings, also named surface and holes respectively. At last the coordinates member of \textsl{MultiLineStrings} and \textsl{MultiPolygons} is defined as a single array of its singular feature type.
|
||||
|
||||
GeoJSON is mainly used for web-based mapping. Since it is based on JSON it inherits its strength. There is for one the enhanced readability through reduced markup overhead compared to XML-based data types like GML. Interoperability with web applications comes for free since the parsing of JSON-objects is integrated in JavaScript. Unlike the Esri Shapefile format a single file is sufficient to store and transmit all relevant data, including feature properties.
|
||||
|
||||
To its downsides count that a text based format cannot store the geometries as efficiently as it would be possible with a binary format. Also only vector-based data types can be represented. Another disadvantage can be the strictly non-topologic approach. Every feature is completely described by one entry. However when there are features that share common components, like boundaries in neighboring polygons, these data points will be encoded twice in the GeoJSON object. On the one hand this further poses concerns about data size. On the other hand it is more difficult to execute topological analysis on the data set. Luckily there is a related data structure to tackle this problem.
|
||||
|
||||
\paragraph{TopoJSON} is an extension of GeoJSON and aims to encode datastructures into a shared topology \parencite{bostock2017topojson}. It supports the same geometry types as GeoJSON. It differs in some additional properties to use and new object types like "Topology" and "GeometryCollection". Its main feature is that LineStrings, Polygons and their multiplicitary equivalents must define line segments in a common property called "arcs". The geometries themselves then reference the arcs from which they are made up. This reduces redundancy of data points. Another feature is the quantization of positions. To use it one can define a "transform" object which specifies a scale and translate point to encode all coordinates. Together with delta-encoding of position arrays one obtains integer values better suited for efficient serialization and reduced file size.
|
||||
|
||||
Other than the reduced data duplication topological formats have the benefit of topological analysis and editing. When modifying adjacent Polygons for example by simplification one would prefer TopoJSON over GeoJSON. Figure \ref{fig:topological-editing} shows what this means. When modifying the boundary of one polygon, one can create gaps or overlaps in non-topological representations. With a topological data structure however the topology will preserve. \parencite{theobald2001understanding}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=.3\linewidth]{./images/topological-editing.png}
|
||||
\label{fig:topological-editing}
|
||||
\caption{Topological editing (top) vs. Non-topological editing (bottom) \parencite{theobald2001understanding}}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Coordinate representation} Both GeoJSON and TopoJSON represent positions as an array of numbers. The elements depict longitude, latitude and optionally altitude in that order. For simplicity this thesis will deal with two-dimensional positions only. A polyline is described by creating an array of these positions as seen in listing \ref{lst:coordinates-array}.
|
||||
|
||||
\begin{lstlisting}[
|
||||
float=htb,
|
||||
label=lst:coordinates-array,
|
||||
caption=Polyline coordinates in nested-array form
|
||||
]
|
||||
[[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]]
|
||||
\end{lstlisting}
|
||||
|
||||
There will be however one library in this thesis which expects coordinates in a different format. Listing \ref{lst:coordinates-object} shows a polyline in the sense of this library. Here one location is represented by an object with x and y properties.
|
||||
|
||||
\begin{lstlisting}[
|
||||
float=htb,
|
||||
label=lst:coordinates-object,
|
||||
caption=Polyline in array-of-objects form
|
||||
]
|
||||
[{x: 102.0, y: 0.0}, {x: 103.0, y: 1.0}, {x: 104.0, y: 0.0}, {x: 105.0, y: 1.0}]
|
||||
\end{lstlisting}
|
||||
|
||||
To distinguish these formats in future references the first first format will be called nested-array format, while the latter will be called array-of-objects format.
|
@ -6,34 +6,33 @@ JavaScript has been the only native programming language of web browsers for a l
|
||||
|
||||
\subsubsection{Introduction to Webassembly}
|
||||
|
||||
WebAssembly started in April 2015 with an W3C Community Group\footnote{\url{https://www.w3.org/community/webassembly/}} and is designed by engineers from the four major browser vendors (Mozilla, Google, Apple and Microsoft). It is a portable low-level bytecode designed as target for compilationof high-level languages. By being an abstraction over modern hardware it is language-, hardware-, and platform-independent. It is intended to be run in a stack-based virtual machine. This way it is not restrained to the Web platform or a JavaScript environment. Some key concepts are the structuring into modules with exported and imported definitions and the linear memory model. Memory is represented as a large array of bytes that can be dynamically grown. Security is ensured by the linear memory being disjoint from code space, the execution stack and the engine's data structures. Another feature of WebAssembly is the possibility of streaming compilation and the parallelization of compilation processes. \footnote{\url{https://people.mpi-sws.org/~rossberg/papers/Haas,\%20Rossberg,
|
||||
\%20Schuff,\%20Titzer,\%20Gohman,\%20Wagner,\%20Zakai,\%20Bastien,\%20Holman\%20-\%20Bringing\%20the\%20Web\%20up\%20to\%20Speed\%20with\%20WebAssembly.pdf}}
|
||||
WebAssembly\footnote{\url{https://webassembly.org/}} started in April 2015 with an W3C Community Group\footnote{\url{https://www.w3.org/community/webassembly/}} and is designed by engineers from the four major browser vendors (Mozilla, Google, Apple and Microsoft). It is a portable low-level bytecode designed as target for compilation of high-level languages. By being an abstraction over modern hardware it is language-, hardware-, and platform-independent. It is intended to be run in a stack-based virtual machine. This way it is not restrained to the Web platform or a JavaScript environment. Some key concepts are the structuring into modules with exported and imported definitions and the linear memory model. Memory is represented as a large array of bytes that can be dynamically grown. Security is ensured by the linear memory being disjoint from code space, the execution stack and the engine's data structures. Another feature of WebAssembly is the possibility of streaming compilation and the parallelization of compilation processes. \parencite{haas2017bringing}
|
||||
|
||||
The goals of WebAssembly have been well defined. It's semantics are intended to be safe and fast to execute and bring portability by language-, hardware- and platform-independence. Furthermore it should be deterministic and have simple interoperability with the web platform. For its representation the following goals are declared. It shall be compact and easy to decode, validate and compile. Parallelization and streamable compilation are also mentioned.
|
||||
The goals of WebAssembly have been well defined. It's semantics are intended to be safe and fast to execute and bring portability by language-, hardware- and platform-independence. Furthermore it should be deterministic and have simple interoperability with the web platform. For its representation the following goals are declared. It shall be compact and easy to decode, validate and compile. Parallelization and streamable compilation are also mentioned. \parencite{haas2017bringing}
|
||||
|
||||
These goals are not specific to WebAssembly. They can be seen as properties that a low-level compilation target for the web should have. In fact there have been previous attempts to run low-level code on the web. Examples are Microsoft's ActiveX, Native Client (NaCl) and Emscripten each having issues complying with the goals. Java and Flash are examples for managed runtime plugins. Their usage is declining however not at least due to falling short on the goals mentioned above.
|
||||
These goals are not specific to WebAssembly. They can be seen as properties that a low-level compilation target for the web should have. In fact there have been previous attempts to run low-level code on the web. Examples are Microsoft's ActiveX, Native Client (NaCl) and Emscripten each having issues complying with the goals. Java and Flash are examples for managed runtime plugins. Their usage is declining however not at least due to falling short on the goals mentioned above. \parencite{haas2017bringing}
|
||||
|
||||
It is often stated that WebAssembly can bring performance benefits. It makes sense that statically typed machine code beats scripting languages performance wise. It has to be observed however if the overhead of switching contexts will neglect this performance gain. JavaScript has made a lot of performance improvements over the past years. Not at least Googles development on the V8 engine has brought JavaScript to an acceptable speed for extensive calculations. The engine observes the execution of running javaScript code and will perform optimizations that can be compared to optimizations of compilers.
|
||||
It is often stated that WebAssembly can bring performance benefits. It makes sense that statically typed machine code beats scripting languages performance wise. It has to be observed however if the overhead of switching contexts will neglect this performance gain. JavaScript has made a lot of performance improvements over the past years. Not at least Googles development on the V8 engine has brought JavaScript to an acceptable speed for extensive calculations. Modern engines observe the execution of running JavaScript code and will perform optimizations that can be compared to optimizations of compilers. \parencite{clark2017what}
|
||||
|
||||
The JavaScript ecosystem has rapidly evolved the past years. Thanks to package managers like bower, npm and yarn it is simple to pull code from external sources into ones codebase. Initially thought for server sided JavaScript execution the ecosystem has found its way into front-end development via module bundlers like browserify, webpack and rollup. In course of this growth many algorithms and implementations have been ported to JavaScript for use on the web. With WebAssembly this ecosystem can be broadened even further. By lifting the language barrier existing work of many more programmers can be reused on the web. Whole libraries exclusive for native development could be imported by a few simple tweaks. Codecs not supported by browsers can be made available for use in any browser supporting WebAssembly. One example could be the promising AV1 video codec.
|
||||
The JavaScript ecosystem has rapidly evolved the past years. Thanks to package managers like bower, npm and yarn it is simple to pull code from external sources into ones codebase. Initially thought for server sided JavaScript execution the ecosystem has found its way into front-end development via module bundlers like browserify, webpack and rollup. In course of this growth many algorithms and implementations have been ported to JavaScript for use on the web. With WebAssembly this ecosystem can be broadened even further. By lifting the language barrier existing work of many more programmers can be reused on the web. Whole libraries exclusive for native development could be imported by a few simple tweaks. Codecs not supported by browsers can be made available for use in any browser supporting WebAssembly. \parencite{surma2018emscripting}
|
||||
|
||||
% In this these the C++ library psimpl will be utilized to bring polyline simplification to the web. This library already implements various algorithms for this task. It will be further introduced in chapter \ref{ch:psimpl}.
|
||||
|
||||
\paragraph{The Emscripten toolchain}
|
||||
|
||||
There are various compilers with WebAssembly as compilation target. In this thesis the Emscripten toolchain is used. Other notable compilers are wasm-pack\footnote{\url{https://rustwasm.github.io/}} for rust projects and AssemblyScript \footnote{\url{https://github.com/AssemblyScript/assemblyscript}} for a TypeScript subset. This latter compiler is particularly interesting as TypeScript, itself a superset of JavaScript, is a popular choice among web developers. This reduces the friction for WebAssembly integration as it is not necessary to learn a new language.
|
||||
There are various compilers with WebAssembly as compilation target. In this thesis the Emscripten toolchain is used. Other notable compilers are wasm-pack\footnote{\url{https://rustwasm.github.io/}} for rust projects and AssemblyScript\footnote{\url{https://github.com/AssemblyScript/assemblyscript}} for a TypeScript subset. This latter compiler is particularly interesting as TypeScript, itself a superset of JavaScript, is a popular choice among web developers. This reduces the friction for WebAssembly integration as it is not necessary to learn a new language.
|
||||
|
||||
Emscripten started with the goal to compile unmodified C and C++ applications to JavaScript. They did this by acting as a compiler backend to LLVM assembly. High level languages compile through a frontend into the LLVM intermediate representation. Well known frontends are Clang and LLVM-GCC. From there it gets passed through a backend to generate the architecture specific machine code. Emscripten hooks in here to generate asm.js, a performant JavaScript subset. In figure \ref{fig:emscripten-chain} one such example chain can be seen. On the left is the original C code which sums up numbers from 1 to 100. The resulting LLVM assembly can be seen in the middle. It is definitely more verbose, but easier to work on for the backend compiler. Notable are the allocation instructions, the labeled code blocks and code flow moves. The JavaScript representation on the right is the nearly one to one translation of the LLVM assembly. The branching is done via a switch-in-for loop, memory is implemented by a JavaScript array named HEAP and LLVM assembly functions calls become normal JavaScript function calls like \textsf{\_printf()}. Through optimizations the code becomes more compact and only then performant. [zakai]
|
||||
Emscripten\footnote{\url{https://webassembly.org/}} started with the goal to compile unmodified C and C++ applications to JavaScript. They did this by acting as a compiler backend to LLVM assembly. High level languages compile through a frontend into the LLVM intermediate representation. Well known frontends are Clang and LLVM-GCC. From there it gets passed through a backend to generate the architecture specific machine code. Emscripten hooks in here to generate asm.js, a performant JavaScript subset. In figure \ref{fig:emscripten-chain} one such example chain can be seen. On the left is the original C code which sums up numbers from 1 to 100. The resulting LLVM assembly can be seen in the middle. It is definitely more verbose, but easier to work on for the backend compiler. Notable are the allocation instructions, the labeled code blocks and code flow moves. The JavaScript representation on the right is the nearly one to one translation of the LLVM assembly. The branching is done via a switch-in-for loop, memory is implemented by a JavaScript array named HEAP and LLVM assembly functions calls become normal JavaScript function calls like \textsf{\_printf()}. Through optimizations the code becomes more compact and only then more performant. \parencite{zakai2011emscripten}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=.3\linewidth]{./images/emscripten-c.png}
|
||||
\includegraphics[width=.3\linewidth]{./images/emscripten-llvm.png}
|
||||
\includegraphics[width=.3\linewidth]{./images/emscripten-js.png}
|
||||
\caption{Example code when compiling a C program (left) to asm.js (right) through LLVM bytecode (middle) without optimizations. [zakai]}
|
||||
\caption{Example code when compiling a C program (left) to asm.js (right) through LLVM bytecode (middle) without optimizations. \parencite{zakai2011emscripten}}
|
||||
\label{fig:emscripten-chain}
|
||||
\end{figure}
|
||||
|
||||
It is in fact this project that inspired the creation of WebAssembly. It was even called the "natural evolution of asm.js"\footnote{\url{https://groups.google.com/forum/\#!topic/emscripten-discuss/k-egXO7AkJY/discussion}}. As of May 2018 Emscripten changed its default output to WebAssembly\footnote{\url{https://github.com/emscripten-core/emscripten/pull/6419}} while still supporting asm.js. Currently the default backend named \textsf{fastcomp} generates the WebAssembly bytecode from asm.js. A new backend however is about to take its place that compiles directly from LLVM\footnote{\url{https://v8.dev/blog/emscripten-llvm-wasm}}.
|
||||
It is in fact this project that inspired the creation of WebAssembly. It was even called the "natural evolution of asm.js"\footnote{\url{https://groups.google.com/forum/\#!topic/emscripten-discuss/k-egXO7AkJY/discussion}}. As of May 2018 Emscripten changed its default output to WebAssembly\footnote{\url{https://github.com/emscripten-core/emscripten/pull/6419}} while still supporting asm.js. Currently the default backend named \textsf{fastcomp} generates the WebAssembly bytecode from asm.js. A new backend however is about to take its place that compiles directly from LLVM \parencite{zakai2019llvmbackend}.
|
||||
|
||||
The compiler is only one part of the Emscripten toolchain. Part of that are various APIs, for example for file system emulation or network calls, and tools like the compiler mentioned.
|
||||
|
@ -8,11 +8,11 @@ In this chapter I will explain the approach to improve the performance of a simp
|
||||
\label{ch:simplify.js}
|
||||
% Simplify.JS + turf
|
||||
|
||||
Simplify.js calls itself a "tiny high-performance JavaScript polyline simplification library"\footnote{\url{https://mourner.giformthub.io/simplify-js/}}. It was extracted from Leaflet, the "leading open-source JavaScript library for mobile-friendly interactive maps"\footnote{\url{https://leafletjs.com/}}. Due to its usage in leaflet and Turf.js, a geospatial analysis library, it is the most common used library for polyline simplification. The library itself currently has 20,066 weekly downloads while the Turf.js derivate @turf/simplify has 30,389. Turf.js maintains an unmodified fork of the library in its own repository. \todo{leaflet downloads}
|
||||
Simplify.js calls itself a "tiny high-performance JavaScript polyline simplification library"\footnote{\url{https://mourner.giformthub.io/simplify-js/}}. It was extracted from Leaflet, the "leading open-source JavaScript library for mobile-friendly interactive maps"\footnote{\url{https://leafletjs.com/}}. Due to its usage in leaflet and Turf.js, a geospatial analysis library, it is the most common used library for polyline simplification. The library itself currently has 20,066 weekly downloads on the npm platform while the Turf.js derivate @turf/simplify has 30,389. Turf.js maintains an unmodified fork of the library in its own repository. The mentioned mapping library Leaflet is is downloaded 189.228 times a week.
|
||||
|
||||
The Douglas-Peucker algorithm is implemented with an optional radial distance preprocessing routine. This preprocessing trades performance for quality. Thus the mode for disabling this routine is called highest quality.
|
||||
|
||||
Interestingly the library expects coordinates to be a list of object with x and y properties. GeoJSON and TopoJSON however store coordinates in nested array form (see chapter \ref{ch:dataformats}). Luckily since the library is small and written in JavaScript any skilled web developer can easily fork and modify the code for his own purpose. This is even pointed out in the source code. The fact that Turf.js, which can be seen as a convenience wrapper for processing GeoJSON data, decided to keep the library as is might indicate some benefit to this format. Listing \ref{lst:turf-transformation} shows how Turf.js calls Simplify.js. Instead of altering the source code the data is transformed back and forth between the formats on each call. It is questionable if this practice is advisable at all.
|
||||
Interestingly the library expects coordinates to be a list of object with x and y properties. GeoJSON and TopoJSON however store coordinates in nested array form (see chapter \ref{ch:dataformats}). Luckily since the library is small and written in JavaScript any skilled web developer can easily fork and modify the code for his own purpose. This is even pointed out in the library's source code. The fact that Turf.js, which can be seen as a convenience wrapper for processing GeoJSON data, decided to keep the library as is might indicate some benefit to this format. Listing \ref{lst:turf-transformation} shows how Turf.js calls Simplify.js. Instead of altering the source code the data is transformed back and forth between the formats on each call. It is questionable if this practice is advisable at all.
|
||||
|
||||
\lstinputlisting[
|
||||
float=htbp,
|
||||
@ -36,20 +36,18 @@ Since it is not clear which case is faster, and given how simple the required ch
|
||||
\subsection{The webassembly solution}
|
||||
\label{sec:benchmark-webassembly}
|
||||
|
||||
In scope of this thesis a library will be created that implements the same procedure as Simplify.JS in C code. It will be made available on the web platform through WebAssembly. In the style of the model library it will be called Simplify.wasm. The compiler to use will be Emscripten as it is the standard for porting C code to WebAssembly.
|
||||
In scope of this thesis a library will be created that implements the same procedure as Simplify.js in C code. It will be made available on the web platform through WebAssembly. In the style of the model library it will be called Simplify.wasm. The compiler to use will be Emscripten as it is the standard for porting C code to WebAssembly.
|
||||
|
||||
As mentioned the first step is to port simplify.JS to the C programming language. The file \path{lib/simplify-wasm/simplify.c} shows the attempt. It is kept as close to the JavaScript library as possible. This may result in C-untypical coding style but prevents skewed results from unexpected optimizations to the procedure itself. The entry point is not the \texttt{main}-function but a function called simplify. This is specified to the compiler as can be seen in listing \ref{lst:simplify-wasm-compiler-call}.
|
||||
As mentioned the first step is to port Simplify.js to the C programming language. The file \path{lib/simplify-wasm/simplify.c} shows the attempt. It is kept as close to the JavaScript library as possible. This may result in C-untypical coding style but prevents skewed results from unexpected optimizations to the procedure itself. The entry point is not the \texttt{main}-function but a function called simplify. This is specified to the compiler as can be seen in listing \ref{lst:simplify-wasm-compiler-call}.
|
||||
|
||||
\lstinputlisting[
|
||||
float=htpb,
|
||||
language=bash,
|
||||
% firstline=2, lastline=3,
|
||||
label=lst:simplify-wasm-compiler-call,
|
||||
caption={The compiler call}
|
||||
caption={The call to compile the C source code to WebAssembly in a Makefile}
|
||||
]{../lib/simplify-wasm/Makefile}
|
||||
|
||||
\todo{More about the compiler call}
|
||||
|
||||
Furthermore the functions malloc and free from the standard library are made available for the host environment. Compiling the code through Emscripten produces a binary file in wasm format and the glue code as JavaScript. These files are called \texttt{simplify.wasm} and \texttt{simplify.js} respectively.
|
||||
|
||||
An example usage can be seen in \path{lib/simplify-wasm/example.html}. Even through the memory access is abstracted in this example the process is still unhandy and far from a drop-in replacement of Simplify.js. Thus in \path{lib/simplify-wasm/index.js} a further abstraction to the Emscripten emitted code was written. The exported function \texttt{simplifyWasm} handles module instantiation, memory access and the correct call to the exported wasm function. Finding the correct path to the wasm binary is not always clear however when the code is imported from another location. The proposed solution is to leave the resolving of the code-path to an asset bundler that processes the file in a preprocessing step.
|
||||
@ -58,7 +56,8 @@ An example usage can be seen in \path{lib/simplify-wasm/example.html}. Even thro
|
||||
float=htpb,
|
||||
language=javascript,
|
||||
firstline=22, lastline=33,
|
||||
label=lst:simplify-wasm
|
||||
label=lst:simplify-wasm,
|
||||
caption={The top level function to invoke the WebAssembly simplification.}
|
||||
]{../lib/simplify-wasm/index.js}
|
||||
|
||||
Listing \ref{lst:simplify-wasm} shows the function \texttt{simplifyWasm}. Further explanaition will follow regarding the abstractions \texttt{getModule}, \texttt{storeCoords} and \texttt{loadResultAndFreeMemory}.
|
||||
@ -83,7 +82,7 @@ caption=The storeCoords function,
|
||||
label=lst:wasm-util-store-coords
|
||||
]{../lib/wasm-util/coordinates.js}
|
||||
|
||||
\paragraph{To read the result} back from memory we have to look at how the simplification will be returned in the C code. Listing \ref{lst:simplify-wasm-entrypoint} shows the entry point for the C code. This is the function that gets called from JavaScript. As expected arrays are represented as pointers with corresponding length. The first block of code (line 2 - 6) is only meant for declaring needed variables. Lines 8 to 12 mark the radial distance preprocessing. The result of this simplification is stored in an auxiliary array named \texttt{resultRdDistance}. In this case \texttt{points} will have to point to the new array and the length is adjusted. Finally the Douglas-Peucker procedure is invoked after reserving enough memory. The auxiliary array can be freed afterwards. The problem now is to return the result pointer and the array length back to the calling code. \todo{Fact check. evtl unsigned}The fact that pointers in Emscripten are represented by an integer will be exploited to return a fixed size array of two containing the values. A hacky solution but it works. We can now look back at how the JavaScript code reads the result.
|
||||
\paragraph{To read the result} back from memory we have to look at how the simplification will be returned in the C code. Listing \ref{lst:simplify-wasm-entrypoint} shows the entry point for the C code. This is the function that gets called from JavaScript. As expected arrays are represented as pointers with corresponding length. The first block of code (line 2 - 6) is only meant for declaring needed variables. Lines 8 to 12 mark the radial distance preprocessing. The result of this simplification is stored in an auxiliary array named \texttt{resultRdDistance}. In this case \texttt{points} will have to point to the new array and the length is adjusted. Finally the Douglas-Peucker procedure is invoked after reserving enough memory. The auxiliary array can be freed afterwards. The problem now is to return the result pointer and the array length back to the calling code. The fact that pointers in Emscripten are represented by 32bit will be exploited to return a fixed size array of two integers containing the values. A hacky solution but it works. We can now look back at how the JavaScript code reads the result.
|
||||
|
||||
\lstinputlisting[
|
||||
float=tbph,
|
||||
@ -114,7 +113,7 @@ For JavaScript applications there is also the possibility of reducing filesize b
|
||||
|
||||
For the WebAssembly solution there are two files required to work with it. The wasm bytecode and JavaScript gluecode. The glue code is already minified by the Emscripten compiler. The binary has a size of 3.8KB while the JavaScript code has a total of 3.1KB. Simplify.js on the other hand will merely need a size of 1.1KB. With minification the size shrinks to 638 bytes.
|
||||
|
||||
File size was not the main priority when producing the WebAssembly solution. There are ways to further shrink the size of the wasm bytecode. As of now it contains the logic of the library but also necessary functionality from the C standard library. These were added by Emscripten automatically. The bloat comes from using the memory management functions malloc and free. If the goal was to reduce the file size, one would have to get along without memory management at all. This would even be possible in this case as the simplification process is a self-contained process and the module has no other usage. The input size is known beforehand so instead of creating reserved memory one could just append the result in memory at the location directly after the input feature. The function would merely need to return the result size. After the call is finished and the result is read by JavaScript the memory is not needed any more. A test build was made that renounced from memory management. The size of the wasm bytecode shrunk to 507 byte and the glue code to 2.8KB. By using vanilla JavaScript API one could even ditch the glue code altogether\footnote{\url{https://developers.google.com/web/updates/2019/02/hotpath-with-wasm}}.
|
||||
File size was not the main priority when producing the WebAssembly solution. There are ways to further shrink the size of the wasm bytecode. As of now it contains the logic of the library but also necessary functionality from the C standard library. These were added by Emscripten automatically. The bloat comes from using the memory management functions malloc and free. If the goal was to reduce the file size, one would have to get along without memory management at all. This would even be possible in this case as the simplification process is a self-contained process and the module has no other usage. The input size is known beforehand so instead of creating reserved memory one could just append the result in memory at the location directly after the input feature. The function would merely need to return the result size. After the call is finished and the result is read by JavaScript the memory is not needed any more. A test build was made that renounced from memory management. The size of the wasm bytecode shrunk to 507 byte and the glue code to 2.8KB. By using vanilla JavaScript API one could even ditch the glue code altogether \parencite{surma2019replacing}.
|
||||
|
||||
For simplicity the memory management was left in as the optimizations would require more careful engineering to ensure correct functionality. The example above shows however that there is enormous potential to cut the size. Even file sizes below the JavaScript original are possible.
|
||||
|
||||
@ -122,11 +121,11 @@ For simplicity the memory management was left in as the optimizations would requ
|
||||
\subsection{The implementation of a web framework}
|
||||
\label{ch:benchmark-app}
|
||||
|
||||
The performance comparison of the two methods will be realized in a web page. It will be a built as a front-end web-application that allows the user to specify the input parameters of the benchmark. These parameters are: The polyline to simplify, a range of tolerances to use for simplification and if the so called high quality mode shall be used. By building this application it will be possible to test a variety of use cases on multiple devices. Also the behavior of the algorithms can be researched under different preconditions. In the scope of this thesis a few cases will be investigated. The application structure will now be introduced.
|
||||
The performance comparison of the two methods will be realized in a web page. It will be built as a frontend web application that allows the user to specify the input parameters of the benchmark. These parameters are: the polyline to simplify, a range of tolerances to use for simplification and if the so called high quality mode shall be used. By building this application it will be possible to test a variety of use cases on multiple devices. Also the behavior of the algorithms can be researched under different preconditions. In the scope of this thesis a few cases will be investigated. The application structure will now be introduced.
|
||||
|
||||
\subsubsection{External libraries}
|
||||
|
||||
The dynamic aspects of the web page will be built in JavaScript to make it run in the browser. Webpack\footnote{https://webpack.js.org/} will be used to bundle the application code and use compilers like babel\footnote{https://babeljs.io/} on the source code. As mentioned in section \ref{sec:benchmark-webassembly} the bundler is also useful for handling references to the WebAssembly binary as it resolves the filename to the correct download path to use. There will be intentionally no transpiling of the JavaScript code to older versions of the ECMA standard. This is often done to increase compatibility with older browsers. Luckily this is not a requirement in this case and by refraining from this practice there will also be no unintentional impact on the application performance. Libraries in use are Benchmark.js\footnote{https://benchmarkjs.com/} for statistically significant benchmarking results, React\footnote{https://reactjs.org/} for the building the user interface and Chart.js\footnote{https://www.chartjs.org/} for drawing graphs.
|
||||
The dynamic aspects of the web page will be built in JavaScript. Webpack\footnote{https://webpack.js.org/} will be used to bundle the application code and use compilers like babel\footnote{https://babeljs.io/} on the source code. As mentioned in section \ref{sec:benchmark-webassembly} the bundler is also useful for handling references to the WebAssembly binary as it resolves the filename to the correct download path to use. There will be intentionally no transpiling of the JavaScript code to older versions of the ECMA standard. This is often done to increase compatibility with older browsers. Luckily this is not a requirement in this case and by refraining from this practice there will also be no unintentional impact on the application performance. Libraries in use are Benchmark.js\footnote{https://benchmarkjs.com/} for statistically significant benchmarking results, React\footnote{https://reactjs.org/} for the building the user interface and Chart.js\footnote{https://www.chartjs.org/} for drawing graphs.
|
||||
|
||||
\subsubsection{The application logic}
|
||||
The web page consist of static and dynamic content. The static parts refer to the header and footer with explanation about the project. Those are written directly into the root HTML document. The dynamic parts are injected by JavaScript. Those will be further discussed in this chapter as they are the main application logic.
|
||||
@ -145,18 +144,18 @@ The web app is built to test a variety of cases with multiple datapoints. As men
|
||||
In the upper right corner the different Use-Cases are listed. These cases implement a function \texttt{"fn"} to benchmark. Additional methods for setting up the function and clean up afterwards can be implemented as given by the parent class \texttt{BenchmarkCase}. Concrete cases can be created by instantiating one of the BenchmarkCases with a defined set of parameters. There are three charts that will be rendered using a subset of these cases. These are:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Simplify.js vs Simplify.wasm} - This Chart shows the performance of the simplification by Simplify.js, the altered version of Simplify.js and the newly developed Simplify.wasm. \todo{Cases}
|
||||
\item \textbf{Simplify.js vs Simplify.wasm} - This Chart shows the performance of the simplification by Simplify.js, the altered version of Simplify.js and the newly developed Simplify.wasm.
|
||||
\item \textbf{Simplify.wasm runtime analysis} - To further gain insights to WebAssembly performance this stacked barchart shows the runtime of a call to Simplify.wasm. It is partitioned into time spent for preparing data (\texttt{storeCords}), the algorithm itself and the time it took for the coordinates being restored from memory (\texttt{loadResult}).
|
||||
\item \textbf{Turf.js method runtime analysis} - The last chart will use a similar structure. This time it analyses the performance impact of the back and forth transformation of data used in Truf.js. \todo{Cases}
|
||||
\item \textbf{Turf.js method runtime analysis} - The last chart will use a similar structure. This time it analyses the performance impact of the back and forth transformation of data used in Truf.js.
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{The different benchmark types}
|
||||
On the bottom the different types of Benchmarks implemented can be seen. They all implement the abstract \texttt{measure} function to return the mean time to run a function specified in the given BenchmarkCase. The \texttt{IterationsBenchmark} runs the function a specified number of times, while the \texttt{OpsPerTimeBenchmark} always runs a certain amount of milliseconds to tun as much iterations as possible. Both methods got their benefits and drawbacks. Using the iterations approach one cannot determine the time the benchmark runs beforehand. With fast devices and a small number of iterations one can even fall in the trap of the duration falling under the accuracy of the timer used. Those results would be unusable of course. It is however a very fast way of determining the speed of a function. And it holds valuable for getting a first approximation of how the algorithms perform over the span of datapoints. The second type, the operations per time benchmark, seems to overcome this problem. It is however prune to garbage collection, engine optimizations and other background processes. \footnote{\url{https://calendar.perfplanet.com/2010/bulletproof-javascript-benchmarks/}}
|
||||
On the bottom the different types of Benchmarks implemented can be seen. They all implement the abstract \texttt{measure} function to return the mean time to run a function specified in the given BenchmarkCase. The \texttt{IterationsBenchmark} runs the function a specified number of times, while the \texttt{OpsPerTimeBenchmark} always runs a certain amount of milliseconds to run as much iterations as possible. Both methods got their benefits and drawbacks. Using the iterations approach one cannot determine the time the benchmark runs beforehand. With fast devices and a small number of iterations one can even fall in the trap of the duration falling under the accuracy of the timer used. Those results would be unusable of course. It is however a very fast way of determining the speed of a function. And it holds valuable for getting a first approximation of how the algorithms perform over the span of datapoints. The second type, the operations per time benchmark, seems to overcome this problem. It is however prune to garbage collection, engine optimizations and other background processes. \parencite{bynens2010bulletproof}
|
||||
|
||||
Benchmark.js combines these approaches. In a first step it approximates the runtime in a few cycles. From this value it calculates the number of iterations to reach an uncertainty of at most 1\%. Then the samples are gathered. \todo{more about Benchmark.js}\footnote{\url{http://monsur.hossa.in/2012/12/11/benchmarkjs.html}}
|
||||
Benchmark.js combines these approaches. In a first step it approximates the runtime in a few cycles. From this value it calculates the number of iterations to reach an uncertainty of at most 1\%. Then the samples are gathered. \parencite{hossain2012benchmark}
|
||||
|
||||
\subsubsection{The benchmark suite}
|
||||
For running multiple benchmarks the class \texttt{BenchmarkSuite} was created. It takes a list of BenchmarkCases and runs them through a BenchmarkType. The Suite manages starting, pausing and stopping of going through list. It updates the statistics gathered on each cycle. By injecting an onCycle method, the \texttt{App} component can give live feedback about the progress.
|
||||
For running multiple benchmarks the class \texttt{BenchmarkSuite} was created. It takes a list of BenchmarkCases and runs them through a BenchmarkType. The Suite manages starting, pausing and stopping of going through list. It updates the statistics gathered on each cycle. By injecting an \textsl{onCycle} method, the \texttt{Runner} component can give live feedback about the progress.
|
||||
|
||||
\begin{figure}[htb]
|
||||
\centering
|
||||
@ -165,7 +164,7 @@ For running multiple benchmarks the class \texttt{BenchmarkSuite} was created. I
|
||||
\caption{The state machine for the benchmark suite}
|
||||
\end{figure}
|
||||
|
||||
Figure \ref{fig:benchmarking-statemachine} shows the state machine of the suite. Based on this diagram the ui component shows action buttons so the user can interact with the state. While running the suite checks if a state change was requested and acts accordingly by pausing the benchmarks or resetting all statistics gathered when stopping.
|
||||
Figure \ref{fig:benchmarking-statemachine} shows the state machine of the suite. Based on this diagram the user interface component shows action buttons so the user can interact with the state. While running the suite checks if a state change was requested and acts accordingly by pausing the benchmarks or resetting all statistics gathered when stopping.
|
||||
|
||||
\subsubsection{The user interface}
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
\section[Results]{Benchmark results}
|
||||
|
||||
In this chapter the results are presented. There were a multitude of tests to make. Multiple devices were used to run several benchmarks on different browsers and under various parameters. To organize which benchmarks had to run, first all the problem dimensions were clarified. Devices will be categorized into desktop and mobile devices. The browsers to test will come from the four major browser vendors which were involved in WebAssembly development. Those are Firefox from Mozilla, Chrome from Google, Edge from Microsoft and Safari from Apple. For either of the two data sets a fixed range of tolerances is set to maintain consistency across the diagrams. The values are explained in chapter \ref{ch:benchmark-data}. The other parameter "high quality" can be either switched on or off. The three chart types are explained in chapter \ref{ch:benchmark-cases}.
|
||||
In this chapter the results are presented. There were a multitude of tests to make. Multiple devices were used to run several benchmarks on different browsers and under various parameters. To organize which benchmarks had to run, first all the problem dimensions were clarified. Devices will be categorized into desktop and mobile devices. The browsers to test will come from the four major browser vendors which were involved in WebAssembly development. Those are Firefox from Mozilla, Chrome from Google, Edge from Microsoft and Safari from Apple. For either of the two data sets a fixed range of tolerances is set to maintain consistency across the diagrams. The other parameter "high quality" can be either switched on or off. The three chart types are explained in chapter \ref{ch:benchmark-cases}.
|
||||
|
||||
All benchmark results shown here can be interactively explored at the web page provided together with this thesis. The static files lie in the \path{build} folder. The results can be found when following the "show prepared results"-link on the home page.
|
||||
|
||||
Each section in this chapter describes a set of benchmarks run on the same system. A table in the beginning will indicate the problem dimensions chosen to inspect. After a description of the system and a short summary of the case the results will be presented in the form of graphs. Those are the graphs produced from the application described in chapter \ref{ch:benchmark-app}. Here the results will only be briefly described. A further analysis will follow in the next chapter.
|
||||
Each section in this chapter describes a set of benchmarks run on the same system. A table in the beginning will indicate the problem dimensions chosen to inspect. After a description of the system and a short summary of the case the results will be presented in the form of graphs. Those are the graphs produced from the application described in chapter \ref{ch:benchmark-app}. Here the results will only be briefly characterized. A further analysis will follow in the next chapter.
|
||||
|
||||
|
||||
\subsection{Case 1 - Windows - wasm vs js}
|
||||
\subsection{Case 1 - WebAssembly vs JavaScript in different browsers}
|
||||
\label{ch:case1}
|
||||
|
||||
\begin{table}[htb]
|
||||
@ -19,7 +19,7 @@ Each section in this chapter describes a set of benchmarks run on the same syste
|
||||
|
||||
At first it will be observed how the algorithms perform under different browsers. The chart to use for this is the "Simplify.js vs Simplify.wasm" chart. For that a Windows system was chosen as it allows to run benchmarks under three of the four browsers in question. The dataset is the Simplify.js example which will be simplified with and without the high quality mode.
|
||||
|
||||
The device is a \textsf{HP Pavilion x360 - 14-ba101ng}\footnote{\url{https://support.hp.com/us-en/product/hp-pavilion-14-ba100-x360-convertible-pc/16851098/model/18280360/document/c05691748}} convertible. It contains an Intel® Core™ i5-8250U Processor with 4 cores, 6MB cache. The operating system is Windows 10 and the browsers are on their newest versions with Chrome 75, Firefox 68 and Edge 44.18362.1.0.
|
||||
The device is a \textsf{HP Pavilion x360 - 14-ba101ng}\footnote{\url{https://support.hp.com/us-en/product/hp-pavilion-14-ba100-x360-convertible-pc/16851098/model/18280360/document/c05691748}} convertible. It contains an Intel® Core™ i5-8250U Processor with 4 cores and 6MB cache. The operating system is Windows 10 and the browsers are on their newest versions with Chrome 75, Firefox 68 and Edge 44.18362.1.0.
|
||||
|
||||
Table \ref{tbl:dimensions-1} summarizes the setting. For each problem dimension the chosen characteristics are highlighted in green color. The number of benchmark diagrams in a chapter is determined by the multitude of characteristics selected. In the case here there are three browsers tested each with two quality options resulting in six diagrams to be produced.
|
||||
|
||||
@ -29,16 +29,16 @@ Table \ref{tbl:dimensions-1} summarizes the setting. For each problem dimension
|
||||
|
||||
The first two graphs (figure \ref{fig:win_ffox_simplify_vs_false} and \ref{fig:win_ffox_simplify_vs_true}) show the results for the Firefox browser. Here and in all subsequent charts of this chapter the red line indicates the performance of Simplify.wasm, the blue line represents Simplify.js and the green line its alternative that operates on coordinates as nested arrays. The gray line represents the number of positions that remain in the simplified polyline.
|
||||
|
||||
Simplify.js run without the high quality mode per default. Here at the smallest tolerance chosen the WebAssembly solution is the fastest method. It is overtaken immediately by the original JavaScript implementation where it continues to be the fastest one of the three methods. The alternative is slowest in every case.
|
||||
Simplify.js runs without the high quality mode per default. Here at the smallest tolerance chosen the WebAssembly solution is the fastest method. It is overtaken immediately by the original JavaScript implementation where it continues to be the fastest one of the three methods. The alternative is slowest in every case.
|
||||
|
||||
In the case of the high quality mode enabled however the original and the WebAssembly solution switch places. The Simplify.js alternative clearly separates itself by being much slower than the other two. It does however have a steeper curve as the original and the WebAssembly solution have pretty consistent performance through the whole tolerance range.
|
||||
In the case of the high quality mode enabled however the original and the WebAssembly solution switch places. Here Simplify.wasm is always faster. The Simplify.js alternative clearly separates itself by being much slower than the other two. It does however have a steeper curve as the original and the WebAssembly solution have pretty consistent performance through the whole tolerance range.
|
||||
|
||||
\input{./results-benchmark/win_chro_simplify_vs_false.tex}
|
||||
\input{./results-benchmark/win_chro_simplify_vs_true.tex}
|
||||
|
||||
Figure \ref{fig:win_chro_simplify_vs_false} and \ref{fig:win_chro_simplify_vs_true} show the results under Chrome for the same setting. Here the performance seem to be switched around with the original being the slowest method in both cases. This version has however very inconsistent results. There is no clear curvature which indicates for some outside influence to the results. Either there is a flaw in the implementation or a special case of engine optimization was hit.
|
||||
|
||||
Without high quality mode the Simplify.wasm gets overtaken by the Simplify.js alternative at 0.4 tolerance. From there on the WebAssembly solution stagnates while the JavaScript one continues to get faster. With high quality enabled the performance gain of WebAssembly is more clear then in Firefox. Here the Simplify.js alternative is the second fastest followed by its original.
|
||||
Without high quality mode the Simplify.wasm gets overtaken by the Simplify.js alternative at 0.4 tolerance. From there on the WebAssembly solution stagnates while the JavaScript one continues to get faster. With high quality enabled the performance gain of WebAssembly is more clear than in Firefox. Here the Simplify.js alternative is the second fastest followed by its original.
|
||||
|
||||
\input{./results-benchmark/win_edge_simplify_vs_false.tex}
|
||||
\input{./results-benchmark/win_edge_simplify_vs_true.tex}
|
||||
@ -46,7 +46,7 @@ Without high quality mode the Simplify.wasm gets overtaken by the Simplify.js al
|
||||
Interestingly in the Edge browser the two JavaScript algorithms perform more alike when high quality disabled. As can be seen in figure \ref{fig:win_edge_simplify_vs_false} The turning point where WebAssembly is not the fastest is at around 0.45 to 0.6. When turning high quality on the graph in figure \ref{fig:win_edge_simplify_vs_true} resembles the chart from Chrome only with more consistent results for the original implementation.
|
||||
|
||||
\FloatBarrier
|
||||
\subsection{Case 2 - Windows - wasm runtime analysis}
|
||||
\subsection{Case 2 - Simplify.wasm runtime analysis}
|
||||
\label{ch:case2}
|
||||
|
||||
\begin{table}[!htb]
|
||||
@ -69,7 +69,7 @@ Inspecting figures \ref{fig:win_edge_simplify_stack_false} and \ref{fig:win_edge
|
||||
In the case of high quality disabled the results show a very steep curve of the execution time. Quickly the time span for preparing the memory dominates in the process. In the second graph it can be seen that the fraction is significantly lower due to the execution time being consistently higher.
|
||||
|
||||
\FloatBarrier
|
||||
\subsection{Case 3 - MacBook Pro - wasm vs js}
|
||||
\subsection{Case 3 - Benchmarking Safari on MacOS}
|
||||
\label{ch:case3}
|
||||
|
||||
\begin{table}[!htb]
|
||||
@ -94,7 +94,7 @@ The results of the Safari browser with high quality disabled (figure \ref{fig:ma
|
||||
When turning on high quality mode the JavaScript implementations still perform alike. However Simplify.wasm is clearly faster as seen in figure \ref{fig:mac_safa_bavaria_vs_true}. Simplify.wasm performs here about twice as fast as the algorithms implemented in JavaScript. Those however have a steeper decrease as the tolerance numbers go up.
|
||||
|
||||
\FloatBarrier
|
||||
\subsection{Case 4 - Ubuntu - turf.js analysis}
|
||||
\subsection{Case 4 - Measuring the Turf.js method}
|
||||
\label{ch:case4}
|
||||
|
||||
\begin{table}[!htb]
|
||||
@ -119,7 +119,7 @@ Figure \ref{fig:ubu_ffox_bavaria_vs_true} shows how the JavaScript versions perf
|
||||
The next two figures show the case when high quality is disabled. In figure \ref{fig:ubu_ffox_bavaria_vs_false} two algorithms seem to converge. And when looking at figure \ref{fig:ubu_ffox_bavaria_jsstack_false} one can see that the data preparation gets more costly as the tolerance rises. From a tolerance of 0.0014 on the alternative Simplify.js implementation is faster than the Turf.js method.
|
||||
|
||||
\FloatBarrier
|
||||
\subsection{Case 5 - iPad - mobile testing}
|
||||
\subsection{Case 5 - Mobile benchmarking}
|
||||
\label{ch:case5}
|
||||
|
||||
\begin{table}[!htb]
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 269 KiB After Width: | Height: | Size: 211 KiB |
BIN
thesis/main.pdf
BIN
thesis/main.pdf
Binary file not shown.
@ -11,6 +11,13 @@
|
||||
marginparwidth=2cm
|
||||
}
|
||||
|
||||
% for bibliography
|
||||
\usepackage[style=authoryear]{biblatex}
|
||||
\addbibresource{bibliography/bibliography.bib}
|
||||
\addbibresource{bibliography/algorithms.bib}
|
||||
\addbibresource{bibliography/specifications.bib}
|
||||
\addbibresource{bibliography/wasm.bib}
|
||||
|
||||
\usepackage{graphicx} % for figures
|
||||
\usepackage{todonotes} % for todo notes
|
||||
\usepackage{url} % for filepaths and urls
|
||||
@ -67,6 +74,8 @@ Abstract goes here
|
||||
\newpage
|
||||
\lstlistoflistings
|
||||
\newpage
|
||||
\printbibliography
|
||||
\newpage
|
||||
|
||||
% \input{gliederung.tex}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user