csharp_helpers.cc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: kenton@google.com (Kenton Varda)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. #include <algorithm>
  34. #include <google/protobuf/stubs/hash.h>
  35. #include <limits>
  36. #include <vector>
  37. #include <google/protobuf/compiler/csharp/csharp_helpers.h>
  38. #include <google/protobuf/compiler/csharp/csharp_names.h>
  39. #include <google/protobuf/descriptor.pb.h>
  40. #include <google/protobuf/io/printer.h>
  41. #include <google/protobuf/wire_format.h>
  42. #include <google/protobuf/stubs/strutil.h>
  43. #include <google/protobuf/stubs/substitute.h>
  44. #include <google/protobuf/compiler/csharp/csharp_field_base.h>
  45. #include <google/protobuf/compiler/csharp/csharp_enum_field.h>
  46. #include <google/protobuf/compiler/csharp/csharp_map_field.h>
  47. #include <google/protobuf/compiler/csharp/csharp_message_field.h>
  48. #include <google/protobuf/compiler/csharp/csharp_options.h>
  49. #include <google/protobuf/compiler/csharp/csharp_primitive_field.h>
  50. #include <google/protobuf/compiler/csharp/csharp_repeated_enum_field.h>
  51. #include <google/protobuf/compiler/csharp/csharp_repeated_message_field.h>
  52. #include <google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h>
  53. #include <google/protobuf/compiler/csharp/csharp_wrapper_field.h>
  54. namespace google {
  55. namespace protobuf {
  56. namespace compiler {
  57. namespace csharp {
  58. CSharpType GetCSharpType(FieldDescriptor::Type type) {
  59. switch (type) {
  60. case FieldDescriptor::TYPE_INT32:
  61. return CSHARPTYPE_INT32;
  62. case FieldDescriptor::TYPE_INT64:
  63. return CSHARPTYPE_INT64;
  64. case FieldDescriptor::TYPE_UINT32:
  65. return CSHARPTYPE_UINT32;
  66. case FieldDescriptor::TYPE_UINT64:
  67. return CSHARPTYPE_UINT32;
  68. case FieldDescriptor::TYPE_SINT32:
  69. return CSHARPTYPE_INT32;
  70. case FieldDescriptor::TYPE_SINT64:
  71. return CSHARPTYPE_INT64;
  72. case FieldDescriptor::TYPE_FIXED32:
  73. return CSHARPTYPE_UINT32;
  74. case FieldDescriptor::TYPE_FIXED64:
  75. return CSHARPTYPE_UINT64;
  76. case FieldDescriptor::TYPE_SFIXED32:
  77. return CSHARPTYPE_INT32;
  78. case FieldDescriptor::TYPE_SFIXED64:
  79. return CSHARPTYPE_INT64;
  80. case FieldDescriptor::TYPE_FLOAT:
  81. return CSHARPTYPE_FLOAT;
  82. case FieldDescriptor::TYPE_DOUBLE:
  83. return CSHARPTYPE_DOUBLE;
  84. case FieldDescriptor::TYPE_BOOL:
  85. return CSHARPTYPE_BOOL;
  86. case FieldDescriptor::TYPE_ENUM:
  87. return CSHARPTYPE_ENUM;
  88. case FieldDescriptor::TYPE_STRING:
  89. return CSHARPTYPE_STRING;
  90. case FieldDescriptor::TYPE_BYTES:
  91. return CSHARPTYPE_BYTESTRING;
  92. case FieldDescriptor::TYPE_GROUP:
  93. return CSHARPTYPE_MESSAGE;
  94. case FieldDescriptor::TYPE_MESSAGE:
  95. return CSHARPTYPE_MESSAGE;
  96. // No default because we want the compiler to complain if any new
  97. // types are added.
  98. }
  99. GOOGLE_LOG(FATAL)<< "Can't get here.";
  100. return (CSharpType) -1;
  101. }
  102. std::string StripDotProto(const std::string& proto_file) {
  103. int lastindex = proto_file.find_last_of(".");
  104. return proto_file.substr(0, lastindex);
  105. }
  106. std::string GetFileNamespace(const FileDescriptor* descriptor) {
  107. if (descriptor->options().has_csharp_namespace()) {
  108. return descriptor->options().csharp_namespace();
  109. }
  110. return UnderscoresToCamelCase(descriptor->package(), true, true);
  111. }
  112. // Returns the Pascal-cased last part of the proto file. For example,
  113. // input of "google/protobuf/foo_bar.proto" would result in "FooBar".
  114. std::string GetFileNameBase(const FileDescriptor* descriptor) {
  115. std::string proto_file = descriptor->name();
  116. int lastslash = proto_file.find_last_of("/");
  117. std::string base = proto_file.substr(lastslash + 1);
  118. return UnderscoresToPascalCase(StripDotProto(base));
  119. }
  120. std::string GetReflectionClassUnqualifiedName(const FileDescriptor* descriptor) {
  121. // TODO: Detect collisions with existing messages,
  122. // and append an underscore if necessary.
  123. return GetFileNameBase(descriptor) + "Reflection";
  124. }
  125. // TODO(jtattermusch): can we reuse a utility function?
  126. std::string UnderscoresToCamelCase(const std::string& input,
  127. bool cap_next_letter,
  128. bool preserve_period) {
  129. string result;
  130. // Note: I distrust ctype.h due to locales.
  131. for (int i = 0; i < input.size(); i++) {
  132. if ('a' <= input[i] && input[i] <= 'z') {
  133. if (cap_next_letter) {
  134. result += input[i] + ('A' - 'a');
  135. } else {
  136. result += input[i];
  137. }
  138. cap_next_letter = false;
  139. } else if ('A' <= input[i] && input[i] <= 'Z') {
  140. if (i == 0 && !cap_next_letter) {
  141. // Force first letter to lower-case unless explicitly told to
  142. // capitalize it.
  143. result += input[i] + ('a' - 'A');
  144. } else {
  145. // Capital letters after the first are left as-is.
  146. result += input[i];
  147. }
  148. cap_next_letter = false;
  149. } else if ('0' <= input[i] && input[i] <= '9') {
  150. result += input[i];
  151. cap_next_letter = true;
  152. } else {
  153. cap_next_letter = true;
  154. if (input[i] == '.' && preserve_period) {
  155. result += '.';
  156. }
  157. }
  158. }
  159. // Add a trailing "_" if the name should be altered.
  160. if (input[input.size() - 1] == '#') {
  161. result += '_';
  162. }
  163. return result;
  164. }
  165. std::string UnderscoresToPascalCase(const std::string& input) {
  166. return UnderscoresToCamelCase(input, true);
  167. }
  168. // Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
  169. // into a PascalCase string. Precise rules implemented:
  170. // Previous input character Current character Case
  171. // Any Non-alphanumeric Skipped
  172. // None - first char of input Alphanumeric Upper
  173. // Non-letter (e.g. _ or 1) Alphanumeric Upper
  174. // Numeric Alphanumeric Upper
  175. // Lower letter Alphanumeric Same as current
  176. // Upper letter Alphanumeric Lower
  177. std::string ShoutyToPascalCase(const std::string& input) {
  178. string result;
  179. // Simple way of implementing "always start with upper"
  180. char previous = '_';
  181. for (int i = 0; i < input.size(); i++) {
  182. char current = input[i];
  183. if (!ascii_isalnum(current)) {
  184. previous = current;
  185. continue;
  186. }
  187. if (!ascii_isalnum(previous)) {
  188. result += ascii_toupper(current);
  189. } else if (ascii_isdigit(previous)) {
  190. result += ascii_toupper(current);
  191. } else if (ascii_islower(previous)) {
  192. result += current;
  193. } else {
  194. result += ascii_tolower(current);
  195. }
  196. previous = current;
  197. }
  198. return result;
  199. }
  200. // Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
  201. // (foo, foo_bar) => bar - underscore after prefix is skipped
  202. // (FOO, foo_bar) => bar - casing is ignored
  203. // (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
  204. // (foobar, foo_barbaz) => baz - underscore in value is ignored
  205. // (foo, bar) => bar - prefix isn't matched; return original value
  206. std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
  207. // First normalize to a lower-case no-underscores prefix to match against
  208. std::string prefix_to_match = "";
  209. for (size_t i = 0; i < prefix.size(); i++) {
  210. if (prefix[i] != '_') {
  211. prefix_to_match += ascii_tolower(prefix[i]);
  212. }
  213. }
  214. // This keeps track of how much of value we've consumed
  215. size_t prefix_index, value_index;
  216. for (prefix_index = 0, value_index = 0;
  217. prefix_index < prefix_to_match.size() && value_index < value.size();
  218. value_index++) {
  219. // Skip over underscores in the value
  220. if (value[value_index] == '_') {
  221. continue;
  222. }
  223. if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
  224. // Failed to match the prefix - bail out early.
  225. return value;
  226. }
  227. }
  228. // If we didn't finish looking through the prefix, we can't strip it.
  229. if (prefix_index < prefix_to_match.size()) {
  230. return value;
  231. }
  232. // Step over any underscores after the prefix
  233. while (value_index < value.size() && value[value_index] == '_') {
  234. value_index++;
  235. }
  236. // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
  237. if (value_index == value.size()) {
  238. return value;
  239. }
  240. return value.substr(value_index);
  241. }
  242. // Format the enum value name in a pleasant way for C#:
  243. // - Strip the enum name as a prefix if possible
  244. // - Convert to PascalCase.
  245. // For example, an enum called Color with a value of COLOR_BLUE should
  246. // result in an enum value in C# called just Blue
  247. std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
  248. std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
  249. std::string result = ShoutyToPascalCase(stripped);
  250. // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
  251. // string is a valid identifier.
  252. if (ascii_isdigit(result[0])) {
  253. result = "_" + result;
  254. }
  255. return result;
  256. }
  257. std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
  258. std::string result = GetFileNamespace(file);
  259. if (result != "") {
  260. result += '.';
  261. }
  262. string classname;
  263. if (file->package().empty()) {
  264. classname = name;
  265. } else {
  266. // Strip the proto package from full_name since we've replaced it with
  267. // the C# namespace.
  268. classname = name.substr(file->package().size() + 1);
  269. }
  270. result += StringReplace(classname, ".", ".Types.", true);
  271. return "global::" + result;
  272. }
  273. std::string GetReflectionClassName(const FileDescriptor* descriptor) {
  274. std::string result = GetFileNamespace(descriptor);
  275. if (!result.empty()) {
  276. result += '.';
  277. }
  278. result += GetReflectionClassUnqualifiedName(descriptor);
  279. return "global::" + result;
  280. }
  281. std::string GetClassName(const Descriptor* descriptor) {
  282. return ToCSharpName(descriptor->full_name(), descriptor->file());
  283. }
  284. std::string GetClassName(const EnumDescriptor* descriptor) {
  285. return ToCSharpName(descriptor->full_name(), descriptor->file());
  286. }
  287. // Groups are hacky: The name of the field is just the lower-cased name
  288. // of the group type. In C#, though, we would like to retain the original
  289. // capitalization of the type name.
  290. std::string GetFieldName(const FieldDescriptor* descriptor) {
  291. if (descriptor->type() == FieldDescriptor::TYPE_GROUP) {
  292. return descriptor->message_type()->name();
  293. } else {
  294. return descriptor->name();
  295. }
  296. }
  297. std::string GetFieldConstantName(const FieldDescriptor* field) {
  298. return GetPropertyName(field) + "FieldNumber";
  299. }
  300. std::string GetPropertyName(const FieldDescriptor* descriptor) {
  301. // TODO(jtattermusch): consider introducing csharp_property_name field option
  302. std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
  303. // Avoid either our own type name or reserved names. Note that not all names
  304. // are reserved - a field called to_string, write_to etc would still cause a problem.
  305. // There are various ways of ending up with naming collisions, but we try to avoid obvious
  306. // ones.
  307. if (property_name == descriptor->containing_type()->name()
  308. || property_name == "Types"
  309. || property_name == "Descriptor") {
  310. property_name += "_";
  311. }
  312. return property_name;
  313. }
  314. std::string GetOutputFile(
  315. const google::protobuf::FileDescriptor* descriptor,
  316. const std::string file_extension,
  317. const bool generate_directories,
  318. const std::string base_namespace,
  319. string* error) {
  320. string relative_filename = GetFileNameBase(descriptor) + file_extension;
  321. if (!generate_directories) {
  322. return relative_filename;
  323. }
  324. string ns = GetFileNamespace(descriptor);
  325. string namespace_suffix = ns;
  326. if (!base_namespace.empty()) {
  327. // Check that the base_namespace is either equal to or a leading part of
  328. // the file namespace. This isn't just a simple prefix; "Foo.B" shouldn't
  329. // be regarded as a prefix of "Foo.Bar". The simplest option is to add "."
  330. // to both.
  331. string extended_ns = ns + ".";
  332. if (extended_ns.find(base_namespace + ".") != 0) {
  333. *error = "Namespace " + ns + " is not a prefix namespace of base namespace " + base_namespace;
  334. return ""; // This will be ignored, because we've set an error.
  335. }
  336. namespace_suffix = ns.substr(base_namespace.length());
  337. if (namespace_suffix.find(".") == 0) {
  338. namespace_suffix = namespace_suffix.substr(1);
  339. }
  340. }
  341. string namespace_dir = StringReplace(namespace_suffix, ".", "/", true);
  342. if (!namespace_dir.empty()) {
  343. namespace_dir += "/";
  344. }
  345. return namespace_dir + relative_filename;
  346. }
  347. // TODO: c&p from Java protoc plugin
  348. // For encodings with fixed sizes, returns that size in bytes. Otherwise
  349. // returns -1.
  350. int GetFixedSize(FieldDescriptor::Type type) {
  351. switch (type) {
  352. case FieldDescriptor::TYPE_INT32 : return -1;
  353. case FieldDescriptor::TYPE_INT64 : return -1;
  354. case FieldDescriptor::TYPE_UINT32 : return -1;
  355. case FieldDescriptor::TYPE_UINT64 : return -1;
  356. case FieldDescriptor::TYPE_SINT32 : return -1;
  357. case FieldDescriptor::TYPE_SINT64 : return -1;
  358. case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size;
  359. case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size;
  360. case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size;
  361. case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size;
  362. case FieldDescriptor::TYPE_FLOAT : return internal::WireFormatLite::kFloatSize;
  363. case FieldDescriptor::TYPE_DOUBLE : return internal::WireFormatLite::kDoubleSize;
  364. case FieldDescriptor::TYPE_BOOL : return internal::WireFormatLite::kBoolSize;
  365. case FieldDescriptor::TYPE_ENUM : return -1;
  366. case FieldDescriptor::TYPE_STRING : return -1;
  367. case FieldDescriptor::TYPE_BYTES : return -1;
  368. case FieldDescriptor::TYPE_GROUP : return -1;
  369. case FieldDescriptor::TYPE_MESSAGE : return -1;
  370. // No default because we want the compiler to complain if any new
  371. // types are added.
  372. }
  373. GOOGLE_LOG(FATAL) << "Can't get here.";
  374. return -1;
  375. }
  376. static const char base64_chars[] =
  377. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  378. std::string StringToBase64(const std::string& input) {
  379. std::string result;
  380. size_t remaining = input.size();
  381. const unsigned char *src = (const unsigned char*) input.c_str();
  382. while (remaining > 2) {
  383. result += base64_chars[src[0] >> 2];
  384. result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
  385. result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)];
  386. result += base64_chars[src[2] & 0x3f];
  387. remaining -= 3;
  388. src += 3;
  389. }
  390. switch (remaining) {
  391. case 2:
  392. result += base64_chars[src[0] >> 2];
  393. result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
  394. result += base64_chars[(src[1] & 0xf) << 2];
  395. result += '=';
  396. src += 2;
  397. break;
  398. case 1:
  399. result += base64_chars[src[0] >> 2];
  400. result += base64_chars[((src[0] & 0x3) << 4)];
  401. result += '=';
  402. result += '=';
  403. src += 1;
  404. break;
  405. }
  406. return result;
  407. }
  408. std::string FileDescriptorToBase64(const FileDescriptor* descriptor) {
  409. std::string fdp_bytes;
  410. FileDescriptorProto fdp;
  411. descriptor->CopyTo(&fdp);
  412. fdp.SerializeToString(&fdp_bytes);
  413. return StringToBase64(fdp_bytes);
  414. }
  415. FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
  416. int fieldOrdinal,
  417. const Options* options) {
  418. switch (descriptor->type()) {
  419. case FieldDescriptor::TYPE_GROUP:
  420. case FieldDescriptor::TYPE_MESSAGE:
  421. if (descriptor->is_repeated()) {
  422. if (descriptor->is_map()) {
  423. return new MapFieldGenerator(descriptor, fieldOrdinal, options);
  424. } else {
  425. return new RepeatedMessageFieldGenerator(descriptor, fieldOrdinal, options);
  426. }
  427. } else {
  428. if (IsWrapperType(descriptor)) {
  429. if (descriptor->containing_oneof()) {
  430. return new WrapperOneofFieldGenerator(descriptor, fieldOrdinal, options);
  431. } else {
  432. return new WrapperFieldGenerator(descriptor, fieldOrdinal, options);
  433. }
  434. } else {
  435. if (descriptor->containing_oneof()) {
  436. return new MessageOneofFieldGenerator(descriptor, fieldOrdinal, options);
  437. } else {
  438. return new MessageFieldGenerator(descriptor, fieldOrdinal, options);
  439. }
  440. }
  441. }
  442. case FieldDescriptor::TYPE_ENUM:
  443. if (descriptor->is_repeated()) {
  444. return new RepeatedEnumFieldGenerator(descriptor, fieldOrdinal, options);
  445. } else {
  446. if (descriptor->containing_oneof()) {
  447. return new EnumOneofFieldGenerator(descriptor, fieldOrdinal, options);
  448. } else {
  449. return new EnumFieldGenerator(descriptor, fieldOrdinal, options);
  450. }
  451. }
  452. default:
  453. if (descriptor->is_repeated()) {
  454. return new RepeatedPrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
  455. } else {
  456. if (descriptor->containing_oneof()) {
  457. return new PrimitiveOneofFieldGenerator(descriptor, fieldOrdinal, options);
  458. } else {
  459. return new PrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
  460. }
  461. }
  462. }
  463. }
  464. } // namespace csharp
  465. } // namespace compiler
  466. } // namespace protobuf
  467. } // namespace google