string_to_double.cpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double
  2. // this has a few requirements
  3. // - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
  4. // - handles atleast 15 significant figures (excel only serialises numbers up to 15sf)
  5. #include <benchmark/benchmark.h>
  6. #include <locale>
  7. #include <random>
  8. #include <sstream>
  9. namespace {
  10. // setup a large quantity of random doubles as strings
  11. template <bool Decimal_Locale = true>
  12. class RandomFloatStrs : public benchmark::Fixture
  13. {
  14. static constexpr size_t Number_of_Elements = 1 << 20;
  15. static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");
  16. std::vector<std::string> inputs;
  17. size_t index = 0;
  18. const char *locale_str = nullptr;
  19. public:
  20. void SetUp(const ::benchmark::State &state)
  21. {
  22. if (Decimal_Locale)
  23. {
  24. locale_str = setlocale(LC_ALL, "C");
  25. }
  26. else
  27. {
  28. locale_str = setlocale(LC_ALL, "de-DE");
  29. }
  30. std::random_device rd; // obtain a seed for the random number engine
  31. std::mt19937 gen(rd());
  32. // doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers
  33. // uniform is probably not the best distribution to use here, but it will do for now
  34. std::uniform_real_distribution<double> dis(-1'000, 1'000);
  35. // generate a large quantity of doubles to deserialise
  36. inputs.reserve(Number_of_Elements);
  37. for (int i = 0; i < Number_of_Elements; ++i)
  38. {
  39. double d = dis(gen);
  40. char buf[16];
  41. snprintf(buf, 16, "%.15f", d);
  42. inputs.push_back(std::string(buf));
  43. }
  44. }
  45. void TearDown(const ::benchmark::State &state)
  46. {
  47. // restore locale
  48. setlocale(LC_ALL, locale_str);
  49. // gbench is keeping the fixtures alive somewhere, need to clear the data after use
  50. inputs = std::vector<std::string>{};
  51. }
  52. std::string &get_rand()
  53. {
  54. return inputs[++index & (Number_of_Elements - 1)];
  55. }
  56. };
  57. // method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf
  58. // std::istringstream with the locale set to "C"
  59. struct number_converter
  60. {
  61. number_converter()
  62. {
  63. stream.imbue(std::locale("C"));
  64. }
  65. double stold(const std::string &s)
  66. {
  67. stream.str(s);
  68. stream.clear();
  69. stream >> result;
  70. return result;
  71. }
  72. std::istringstream stream;
  73. double result;
  74. };
  75. // to resolve the locale issue with strtod, a little preprocessing of the input is required
  76. struct number_converter_mk2
  77. {
  78. explicit number_converter_mk2()
  79. : should_convert_to_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
  80. {
  81. }
  82. double stold(std::string &s) const noexcept
  83. {
  84. assert(!s.empty());
  85. if (should_convert_to_comma)
  86. {
  87. auto decimal_pt = std::find(s.begin(), s.end(), '.');
  88. if (decimal_pt != s.end())
  89. {
  90. *decimal_pt = ',';
  91. }
  92. }
  93. return strtod(s.c_str(), nullptr);
  94. }
  95. double stold(const std::string &s) const
  96. {
  97. assert(!s.empty());
  98. if (!should_convert_to_comma)
  99. {
  100. return strtod(s.c_str(), nullptr);
  101. }
  102. std::string copy(s);
  103. auto decimal_pt = std::find(copy.begin(), copy.end(), '.');
  104. if (decimal_pt != copy.end())
  105. {
  106. *decimal_pt = ',';
  107. }
  108. return strtod(copy.c_str(), nullptr);
  109. }
  110. private:
  111. bool should_convert_to_comma = false;
  112. };
  113. using RandFloatStrs = RandomFloatStrs<true>;
  114. // german locale uses ',' as the seperator
  115. using RandFloatCommaStrs = RandomFloatStrs<false>;
  116. } // namespace
  117. BENCHMARK_F(RandFloatStrs, double_from_string_sstream)
  118. (benchmark::State &state)
  119. {
  120. number_converter converter;
  121. while (state.KeepRunning())
  122. {
  123. benchmark::DoNotOptimize(
  124. converter.stold(get_rand()));
  125. }
  126. }
  127. // using strotod
  128. // https://en.cppreference.com/w/cpp/string/byte/strtof
  129. // this naive usage is broken in the face of locales (fails condition 1)
  130. #include <cstdlib>
  131. BENCHMARK_F(RandFloatStrs, double_from_string_strtod)
  132. (benchmark::State &state)
  133. {
  134. while (state.KeepRunning())
  135. {
  136. benchmark::DoNotOptimize(
  137. strtod(get_rand().c_str(), nullptr));
  138. }
  139. }
  140. BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed)
  141. (benchmark::State &state)
  142. {
  143. number_converter_mk2 converter;
  144. while (state.KeepRunning())
  145. {
  146. benchmark::DoNotOptimize(
  147. converter.stold(get_rand()));
  148. }
  149. }
  150. BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref)
  151. (benchmark::State &state)
  152. {
  153. number_converter_mk2 converter;
  154. while (state.KeepRunning())
  155. {
  156. const std::string &inp = get_rand();
  157. benchmark::DoNotOptimize(
  158. converter.stold(inp));
  159. }
  160. }
  161. // locale names are different between OS's, and std::from_chars is only complete in MSVC
  162. #ifdef _MSC_VER
  163. #include <charconv>
  164. BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars)
  165. (benchmark::State &state)
  166. {
  167. while (state.KeepRunning())
  168. {
  169. const std::string &input = get_rand();
  170. double output;
  171. benchmark::DoNotOptimize(
  172. std::from_chars(input.data(), input.data() + input.size(), output));
  173. }
  174. }
  175. // not using the standard "C" locale with '.' seperator
  176. BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref)
  177. (benchmark::State &state)
  178. {
  179. number_converter_mk2 converter;
  180. while (state.KeepRunning())
  181. {
  182. benchmark::DoNotOptimize(
  183. converter.stold(get_rand()));
  184. }
  185. }
  186. BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref)
  187. (benchmark::State &state)
  188. {
  189. number_converter_mk2 converter;
  190. while (state.KeepRunning())
  191. {
  192. const std::string &inp = get_rand();
  193. benchmark::DoNotOptimize(
  194. converter.stold(inp));
  195. }
  196. }
  197. #endif