io_win32.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: laszlocsomor@google.com (Laszlo Csomor)
  31. //
  32. // Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well
  33. // as for the supporting utility functions.
  34. //
  35. // These functions convert the input path to an absolute Windows path
  36. // with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc.
  37. // (declared in <io.h>) respectively. This allows working with files/directories
  38. // whose paths are longer than MAX_PATH (260 chars).
  39. //
  40. // This file is only used on Windows, it's empty on other platforms.
  41. #if defined(_WIN32)
  42. // Comment this out to fall back to using the ANSI versions (open, mkdir, ...)
  43. // instead of the Unicode ones (_wopen, _wmkdir, ...). Doing so can be useful to
  44. // debug failing tests if that's caused by the long path support.
  45. #define SUPPORT_LONGPATHS
  46. #include <ctype.h>
  47. #include <direct.h>
  48. #include <errno.h>
  49. #include <fcntl.h>
  50. #include <io.h>
  51. #include <memory>
  52. #include <sys/stat.h>
  53. #include <sys/types.h>
  54. #include <wctype.h>
  55. #include <windows.h>
  56. #include <google/protobuf/stubs/io_win32.h>
  57. #include <memory>
  58. #include <sstream>
  59. #include <string>
  60. #include <vector>
  61. namespace google {
  62. namespace protobuf {
  63. namespace internal {
  64. namespace win32 {
  65. namespace {
  66. using std::string;
  67. using std::wstring;
  68. template <typename char_type>
  69. struct CharTraits {
  70. static bool is_alpha(char_type ch);
  71. };
  72. template <>
  73. struct CharTraits<char> {
  74. static bool is_alpha(char ch) { return isalpha(ch); }
  75. };
  76. template <>
  77. struct CharTraits<wchar_t> {
  78. static bool is_alpha(wchar_t ch) { return iswalpha(ch); }
  79. };
  80. template <typename char_type>
  81. bool null_or_empty(const char_type* s) {
  82. return s == NULL || *s == 0;
  83. }
  84. // Returns true if the path starts with a drive letter, e.g. "c:".
  85. // Note that this won't check for the "\" after the drive letter, so this also
  86. // returns true for "c:foo" (which is "c:\${PWD}\foo").
  87. // This check requires that a path not have a longpath prefix ("\\?\").
  88. template <typename char_type>
  89. bool has_drive_letter(const char_type* ch) {
  90. return CharTraits<char_type>::is_alpha(ch[0]) && ch[1] == ':';
  91. }
  92. // Returns true if the path starts with a longpath prefix ("\\?\").
  93. template <typename char_type>
  94. bool has_longpath_prefix(const char_type* path) {
  95. return path[0] == '\\' && path[1] == '\\' && path[2] == '?' &&
  96. path[3] == '\\';
  97. }
  98. template <typename char_type>
  99. bool is_separator(char_type c) {
  100. return c == '/' || c == '\\';
  101. }
  102. // Returns true if the path starts with a drive specifier (e.g. "c:\").
  103. template <typename char_type>
  104. bool is_path_absolute(const char_type* path) {
  105. return has_drive_letter(path) && is_separator(path[2]);
  106. }
  107. template <typename char_type>
  108. bool is_drive_relative(const char_type* path) {
  109. return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2]));
  110. }
  111. wstring join_paths(const wstring& path1, const wstring& path2) {
  112. if (path1.empty() || is_path_absolute(path2.c_str()) ||
  113. has_longpath_prefix(path2.c_str())) {
  114. return path2;
  115. }
  116. if (path2.empty()) {
  117. return path1;
  118. }
  119. if (is_separator(path1[path1.size() - 1])) {
  120. return is_separator(path2[0]) ? (path1 + path2.substr(1))
  121. : (path1 + path2);
  122. } else {
  123. return is_separator(path2[0]) ? (path1 + path2)
  124. : (path1 + L'\\' + path2);
  125. }
  126. }
  127. wstring normalize(wstring path) {
  128. if (has_longpath_prefix(path.c_str())) {
  129. path = path.substr(4);
  130. }
  131. static const wstring dot(L".");
  132. static const wstring dotdot(L"..");
  133. const WCHAR* p = path.c_str();
  134. std::vector<wstring> segments;
  135. int segment_start = -1;
  136. // Find the path segments in `path` (separated by "/").
  137. for (int i = 0;; ++i) {
  138. if (!is_separator(p[i]) && p[i] != L'\0') {
  139. // The current character does not end a segment, so start one unless it's
  140. // already started.
  141. if (segment_start < 0) {
  142. segment_start = i;
  143. }
  144. } else if (segment_start >= 0 && i > segment_start) {
  145. // The current character is "/" or "\0", so this ends a segment.
  146. // Add that to `segments` if there's anything to add; handle "." and "..".
  147. wstring segment(p, segment_start, i - segment_start);
  148. segment_start = -1;
  149. if (segment == dotdot) {
  150. if (!segments.empty() &&
  151. (!has_drive_letter(segments[0].c_str()) || segments.size() > 1)) {
  152. segments.pop_back();
  153. }
  154. } else if (segment != dot && !segment.empty()) {
  155. segments.push_back(segment);
  156. }
  157. }
  158. if (p[i] == L'\0') {
  159. break;
  160. }
  161. }
  162. // Handle the case when `path` is just a drive specifier (or some degenerate
  163. // form of it, e.g. "c:\..").
  164. if (segments.size() == 1 && segments[0].size() == 2 &&
  165. has_drive_letter(segments[0].c_str())) {
  166. return segments[0] + L'\\';
  167. }
  168. // Join all segments.
  169. bool first = true;
  170. std::wstringstream result;
  171. for (int i = 0; i < segments.size(); ++i) {
  172. if (!first) {
  173. result << L'\\';
  174. }
  175. first = false;
  176. result << segments[i];
  177. }
  178. // Preserve trailing separator if the input contained it.
  179. if (!path.empty() && is_separator(p[path.size() - 1])) {
  180. result << L'\\';
  181. }
  182. return result.str();
  183. }
  184. bool as_windows_path(const char* path, wstring* result) {
  185. if (null_or_empty(path)) {
  186. result->clear();
  187. return true;
  188. }
  189. wstring wpath;
  190. if (!strings::utf8_to_wcs(path, &wpath)) {
  191. return false;
  192. }
  193. if (has_longpath_prefix(wpath.c_str())) {
  194. *result = wpath;
  195. return true;
  196. }
  197. if (is_separator(path[0]) || is_drive_relative(path)) {
  198. return false;
  199. }
  200. if (!is_path_absolute(wpath.c_str())) {
  201. int size = ::GetCurrentDirectoryW(0, NULL);
  202. if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
  203. return false;
  204. }
  205. std::unique_ptr<WCHAR[]> wcwd(new WCHAR[size]);
  206. ::GetCurrentDirectoryW(size, wcwd.get());
  207. wpath = join_paths(wcwd.get(), wpath);
  208. }
  209. wpath = normalize(wpath);
  210. if (!has_longpath_prefix(wpath.c_str())) {
  211. // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API
  212. // from processing the path and "helpfully" removing trailing dots from the
  213. // path, for example.
  214. // See https://github.com/bazelbuild/bazel/issues/2935
  215. wpath = wstring(L"\\\\?\\") + wpath;
  216. }
  217. *result = wpath;
  218. return true;
  219. }
  220. } // namespace
  221. int open(const char* path, int flags, int mode) {
  222. #ifdef SUPPORT_LONGPATHS
  223. wstring wpath;
  224. if (!as_windows_path(path, &wpath)) {
  225. errno = ENOENT;
  226. return -1;
  227. }
  228. return ::_wopen(wpath.c_str(), flags, mode);
  229. #else
  230. return ::_open(path, flags, mode);
  231. #endif
  232. }
  233. int mkdir(const char* path, int _mode) {
  234. #ifdef SUPPORT_LONGPATHS
  235. wstring wpath;
  236. if (!as_windows_path(path, &wpath)) {
  237. errno = ENOENT;
  238. return -1;
  239. }
  240. return ::_wmkdir(wpath.c_str());
  241. #else // not SUPPORT_LONGPATHS
  242. return ::_mkdir(path);
  243. #endif // not SUPPORT_LONGPATHS
  244. }
  245. int access(const char* path, int mode) {
  246. #ifdef SUPPORT_LONGPATHS
  247. wstring wpath;
  248. if (!as_windows_path(path, &wpath)) {
  249. errno = ENOENT;
  250. return -1;
  251. }
  252. return ::_waccess(wpath.c_str(), mode);
  253. #else
  254. return ::_access(path, mode);
  255. #endif
  256. }
  257. int chdir(const char* path) {
  258. #ifdef SUPPORT_LONGPATHS
  259. wstring wpath;
  260. if (!as_windows_path(path, &wpath)) {
  261. errno = ENOENT;
  262. return -1;
  263. }
  264. return ::_wchdir(wpath.c_str());
  265. #else
  266. return ::_chdir(path);
  267. #endif
  268. }
  269. int stat(const char* path, struct _stat* buffer) {
  270. #ifdef SUPPORT_LONGPATHS
  271. wstring wpath;
  272. if (!as_windows_path(path, &wpath)) {
  273. errno = ENOENT;
  274. return -1;
  275. }
  276. return ::_wstat(wpath.c_str(), buffer);
  277. #else // not SUPPORT_LONGPATHS
  278. return ::_stat(path, buffer);
  279. #endif // not SUPPORT_LONGPATHS
  280. }
  281. FILE* fopen(const char* path, const char* mode) {
  282. #ifdef SUPPORT_LONGPATHS
  283. if (null_or_empty(path)) {
  284. errno = EINVAL;
  285. return NULL;
  286. }
  287. wstring wpath;
  288. if (!as_windows_path(path, &wpath)) {
  289. errno = ENOENT;
  290. return NULL;
  291. }
  292. wstring wmode;
  293. if (!strings::utf8_to_wcs(mode, &wmode)) {
  294. errno = EINVAL;
  295. return NULL;
  296. }
  297. return ::_wfopen(wpath.c_str(), wmode.c_str());
  298. #else
  299. return ::fopen(path, mode);
  300. #endif
  301. }
  302. int close(int fd) { return ::close(fd); }
  303. int dup(int fd) { return ::_dup(fd); }
  304. int dup2(int fd1, int fd2) { return ::_dup2(fd1, fd2); }
  305. int read(int fd, void* buffer, size_t size) {
  306. return ::_read(fd, buffer, size);
  307. }
  308. int setmode(int fd, int mode) { return ::_setmode(fd, mode); }
  309. int write(int fd, const void* buffer, size_t size) {
  310. return ::_write(fd, buffer, size);
  311. }
  312. wstring testonly_utf8_to_winpath(const char* path) {
  313. wstring wpath;
  314. return as_windows_path(path, &wpath) ? wpath : wstring();
  315. }
  316. namespace strings {
  317. bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) {
  318. if (null_or_empty(s)) {
  319. out->clear();
  320. return true;
  321. }
  322. BOOL usedDefaultChar = FALSE;
  323. SetLastError(0);
  324. int size = WideCharToMultiByte(
  325. outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL,
  326. outUtf8 ? NULL : &usedDefaultChar);
  327. if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
  328. || usedDefaultChar) {
  329. return false;
  330. }
  331. std::unique_ptr<CHAR[]> astr(new CHAR[size]);
  332. WideCharToMultiByte(
  333. outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL);
  334. out->assign(astr.get());
  335. return true;
  336. }
  337. bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) {
  338. if (null_or_empty(s)) {
  339. out->clear();
  340. return true;
  341. }
  342. SetLastError(0);
  343. int size =
  344. MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0);
  345. if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
  346. return false;
  347. }
  348. std::unique_ptr<WCHAR[]> wstr(new WCHAR[size]);
  349. MultiByteToWideChar(
  350. inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1);
  351. out->assign(wstr.get());
  352. return true;
  353. }
  354. bool utf8_to_wcs(const char* input, wstring* out) {
  355. return mbs_to_wcs(input, out, true);
  356. }
  357. bool wcs_to_utf8(const wchar_t* input, string* out) {
  358. return wcs_to_mbs(input, out, true);
  359. }
  360. } // namespace strings
  361. } // namespace win32
  362. } // namespace internal
  363. } // namespace protobuf
  364. } // namespace google
  365. #endif // defined(_WIN32)