cpp_helpers.cc 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: kenton@google.com (Kenton Varda)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. #include <google/protobuf/stubs/hash.h>
  34. #include <limits>
  35. #include <map>
  36. #include <queue>
  37. #include <vector>
  38. #include <google/protobuf/stubs/logging.h>
  39. #include <google/protobuf/stubs/common.h>
  40. #include <google/protobuf/compiler/cpp/cpp_helpers.h>
  41. #include <google/protobuf/io/printer.h>
  42. #include <google/protobuf/io/zero_copy_stream.h>
  43. #include <google/protobuf/stubs/strutil.h>
  44. #include <google/protobuf/stubs/substitute.h>
  45. namespace google {
  46. namespace protobuf {
  47. namespace compiler {
  48. namespace cpp {
  49. namespace {
  50. static const char kAnyMessageName[] = "Any";
  51. static const char kAnyProtoFile[] = "google/protobuf/any.proto";
  52. static const char kGoogleProtobufPrefix[] = "google/protobuf/";
  53. string DotsToUnderscores(const string& name) {
  54. return StringReplace(name, ".", "_", true);
  55. }
  56. string DotsToColons(const string& name) {
  57. return StringReplace(name, ".", "::", true);
  58. }
  59. const char* const kKeywordList[] = {
  60. "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor",
  61. "bool", "break", "case", "catch", "char", "class", "compl", "const",
  62. "constexpr", "const_cast", "continue", "decltype", "default", "delete", "do",
  63. "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern",
  64. "false", "float", "for", "friend", "goto", "if", "inline", "int", "long",
  65. "mutable", "namespace", "new", "noexcept", "not", "not_eq", "nullptr",
  66. "operator", "or", "or_eq", "private", "protected", "public", "register",
  67. "reinterpret_cast", "return", "short", "signed", "sizeof", "static",
  68. "static_assert", "static_cast", "struct", "switch", "template", "this",
  69. "thread_local", "throw", "true", "try", "typedef", "typeid", "typename",
  70. "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t",
  71. "while", "xor", "xor_eq"
  72. };
  73. hash_set<string> MakeKeywordsMap() {
  74. hash_set<string> result;
  75. for (int i = 0; i < GOOGLE_ARRAYSIZE(kKeywordList); i++) {
  76. result.insert(kKeywordList[i]);
  77. }
  78. return result;
  79. }
  80. hash_set<string> kKeywords = MakeKeywordsMap();
  81. // Returns whether the provided descriptor has an extension. This includes its
  82. // nested types.
  83. bool HasExtension(const Descriptor* descriptor) {
  84. if (descriptor->extension_count() > 0) {
  85. return true;
  86. }
  87. for (int i = 0; i < descriptor->nested_type_count(); ++i) {
  88. if (HasExtension(descriptor->nested_type(i))) {
  89. return true;
  90. }
  91. }
  92. return false;
  93. }
  94. // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
  95. char Base63Char(int value) {
  96. GOOGLE_CHECK_GE(value, 0);
  97. if (value < 26) return 'A' + value;
  98. value -= 26;
  99. if (value < 26) return 'a' + value;
  100. value -= 26;
  101. if (value < 10) return '0' + value;
  102. GOOGLE_CHECK_EQ(value, 10);
  103. return '_';
  104. }
  105. // Given a c identifier has 63 legal characters we can't implement base64
  106. // encoding. So we return the k least significant "digits" in base 63.
  107. template <typename I>
  108. string Base63(I n, int k) {
  109. string res;
  110. while (k-- > 0) {
  111. res += Base63Char(static_cast<int>(n % 63));
  112. n /= 63;
  113. }
  114. return res;
  115. }
  116. } // namespace
  117. string UnderscoresToCamelCase(const string& input, bool cap_next_letter) {
  118. string result;
  119. // Note: I distrust ctype.h due to locales.
  120. for (int i = 0; i < input.size(); i++) {
  121. if ('a' <= input[i] && input[i] <= 'z') {
  122. if (cap_next_letter) {
  123. result += input[i] + ('A' - 'a');
  124. } else {
  125. result += input[i];
  126. }
  127. cap_next_letter = false;
  128. } else if ('A' <= input[i] && input[i] <= 'Z') {
  129. // Capital letters are left as-is.
  130. result += input[i];
  131. cap_next_letter = false;
  132. } else if ('0' <= input[i] && input[i] <= '9') {
  133. result += input[i];
  134. cap_next_letter = true;
  135. } else {
  136. cap_next_letter = true;
  137. }
  138. }
  139. return result;
  140. }
  141. const char kThickSeparator[] =
  142. "// ===================================================================\n";
  143. const char kThinSeparator[] =
  144. "// -------------------------------------------------------------------\n";
  145. bool CanInitializeByZeroing(const FieldDescriptor* field) {
  146. if (field->is_repeated() || field->is_extension()) return false;
  147. switch (field->cpp_type()) {
  148. case FieldDescriptor::CPPTYPE_ENUM:
  149. return field->default_value_enum()->number() == 0;
  150. case FieldDescriptor::CPPTYPE_INT32:
  151. return field->default_value_int32() == 0;
  152. case FieldDescriptor::CPPTYPE_INT64:
  153. return field->default_value_int64() == 0;
  154. case FieldDescriptor::CPPTYPE_UINT32:
  155. return field->default_value_uint32() == 0;
  156. case FieldDescriptor::CPPTYPE_UINT64:
  157. return field->default_value_uint64() == 0;
  158. case FieldDescriptor::CPPTYPE_FLOAT:
  159. return field->default_value_float() == 0;
  160. case FieldDescriptor::CPPTYPE_DOUBLE:
  161. return field->default_value_double() == 0;
  162. case FieldDescriptor::CPPTYPE_BOOL:
  163. return field->default_value_bool() == false;
  164. default:
  165. return false;
  166. }
  167. }
  168. string ClassName(const Descriptor* descriptor) {
  169. const Descriptor* parent = descriptor->containing_type();
  170. string res;
  171. if (parent) res += ClassName(parent) + "_";
  172. res += descriptor->name();
  173. if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
  174. return res;
  175. }
  176. string ClassName(const EnumDescriptor* enum_descriptor) {
  177. if (enum_descriptor->containing_type() == NULL) {
  178. return enum_descriptor->name();
  179. } else {
  180. return ClassName(enum_descriptor->containing_type()) + "_" +
  181. enum_descriptor->name();
  182. }
  183. }
  184. string Namespace(const string& package) {
  185. if (package.empty()) return "";
  186. return "::" + DotsToColons(package);
  187. }
  188. string DefaultInstanceName(const Descriptor* descriptor) {
  189. string prefix = descriptor->file()->package().empty() ? "" : "::";
  190. return prefix + DotsToColons(descriptor->file()->package()) + "::_" +
  191. ClassName(descriptor, false) + "_default_instance_";
  192. }
  193. string ReferenceFunctionName(const Descriptor* descriptor) {
  194. return QualifiedClassName(descriptor) + "_ReferenceStrong";
  195. }
  196. string SuperClassName(const Descriptor* descriptor, const Options& options) {
  197. return HasDescriptorMethods(descriptor->file(), options)
  198. ? "::google::protobuf::Message"
  199. : "::google::protobuf::MessageLite";
  200. }
  201. string FieldName(const FieldDescriptor* field) {
  202. string result = field->name();
  203. LowerString(&result);
  204. if (kKeywords.count(result) > 0) {
  205. result.append("_");
  206. }
  207. return result;
  208. }
  209. string EnumValueName(const EnumValueDescriptor* enum_value) {
  210. string result = enum_value->name();
  211. if (kKeywords.count(result) > 0) {
  212. result.append("_");
  213. }
  214. return result;
  215. }
  216. int EstimateAlignmentSize(const FieldDescriptor* field) {
  217. if (field == NULL) return 0;
  218. if (field->is_repeated()) return 8;
  219. switch (field->cpp_type()) {
  220. case FieldDescriptor::CPPTYPE_BOOL:
  221. return 1;
  222. case FieldDescriptor::CPPTYPE_INT32:
  223. case FieldDescriptor::CPPTYPE_UINT32:
  224. case FieldDescriptor::CPPTYPE_ENUM:
  225. case FieldDescriptor::CPPTYPE_FLOAT:
  226. return 4;
  227. case FieldDescriptor::CPPTYPE_INT64:
  228. case FieldDescriptor::CPPTYPE_UINT64:
  229. case FieldDescriptor::CPPTYPE_DOUBLE:
  230. case FieldDescriptor::CPPTYPE_STRING:
  231. case FieldDescriptor::CPPTYPE_MESSAGE:
  232. return 8;
  233. }
  234. GOOGLE_LOG(FATAL) << "Can't get here.";
  235. return -1; // Make compiler happy.
  236. }
  237. string FieldConstantName(const FieldDescriptor *field) {
  238. string field_name = UnderscoresToCamelCase(field->name(), true);
  239. string result = "k" + field_name + "FieldNumber";
  240. if (!field->is_extension() &&
  241. field->containing_type()->FindFieldByCamelcaseName(
  242. field->camelcase_name()) != field) {
  243. // This field's camelcase name is not unique. As a hack, add the field
  244. // number to the constant name. This makes the constant rather useless,
  245. // but what can we do?
  246. result += "_" + SimpleItoa(field->number());
  247. }
  248. return result;
  249. }
  250. string FieldMessageTypeName(const FieldDescriptor* field) {
  251. // Note: The Google-internal version of Protocol Buffers uses this function
  252. // as a hook point for hacks to support legacy code.
  253. return ClassName(field->message_type(), true);
  254. }
  255. string StripProto(const string& filename) {
  256. if (HasSuffixString(filename, ".protodevel")) {
  257. return StripSuffixString(filename, ".protodevel");
  258. } else {
  259. return StripSuffixString(filename, ".proto");
  260. }
  261. }
  262. const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
  263. switch (type) {
  264. case FieldDescriptor::CPPTYPE_INT32 : return "::google::protobuf::int32";
  265. case FieldDescriptor::CPPTYPE_INT64 : return "::google::protobuf::int64";
  266. case FieldDescriptor::CPPTYPE_UINT32 : return "::google::protobuf::uint32";
  267. case FieldDescriptor::CPPTYPE_UINT64 : return "::google::protobuf::uint64";
  268. case FieldDescriptor::CPPTYPE_DOUBLE : return "double";
  269. case FieldDescriptor::CPPTYPE_FLOAT : return "float";
  270. case FieldDescriptor::CPPTYPE_BOOL : return "bool";
  271. case FieldDescriptor::CPPTYPE_ENUM : return "int";
  272. case FieldDescriptor::CPPTYPE_STRING : return "::std::string";
  273. case FieldDescriptor::CPPTYPE_MESSAGE: return NULL;
  274. // No default because we want the compiler to complain if any new
  275. // CppTypes are added.
  276. }
  277. GOOGLE_LOG(FATAL) << "Can't get here.";
  278. return NULL;
  279. }
  280. const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
  281. switch (type) {
  282. case FieldDescriptor::TYPE_INT32 : return "Int32";
  283. case FieldDescriptor::TYPE_INT64 : return "Int64";
  284. case FieldDescriptor::TYPE_UINT32 : return "UInt32";
  285. case FieldDescriptor::TYPE_UINT64 : return "UInt64";
  286. case FieldDescriptor::TYPE_SINT32 : return "SInt32";
  287. case FieldDescriptor::TYPE_SINT64 : return "SInt64";
  288. case FieldDescriptor::TYPE_FIXED32 : return "Fixed32";
  289. case FieldDescriptor::TYPE_FIXED64 : return "Fixed64";
  290. case FieldDescriptor::TYPE_SFIXED32: return "SFixed32";
  291. case FieldDescriptor::TYPE_SFIXED64: return "SFixed64";
  292. case FieldDescriptor::TYPE_FLOAT : return "Float";
  293. case FieldDescriptor::TYPE_DOUBLE : return "Double";
  294. case FieldDescriptor::TYPE_BOOL : return "Bool";
  295. case FieldDescriptor::TYPE_ENUM : return "Enum";
  296. case FieldDescriptor::TYPE_STRING : return "String";
  297. case FieldDescriptor::TYPE_BYTES : return "Bytes";
  298. case FieldDescriptor::TYPE_GROUP : return "Group";
  299. case FieldDescriptor::TYPE_MESSAGE : return "Message";
  300. // No default because we want the compiler to complain if any new
  301. // types are added.
  302. }
  303. GOOGLE_LOG(FATAL) << "Can't get here.";
  304. return "";
  305. }
  306. string Int32ToString(int number) {
  307. // gcc rejects the decimal form of kint32min.
  308. if (number == kint32min) {
  309. GOOGLE_COMPILE_ASSERT(kint32min == (~0x7fffffff), kint32min_value_error);
  310. return "(~0x7fffffff)";
  311. } else {
  312. return SimpleItoa(number);
  313. }
  314. }
  315. string Int64ToString(int64 number) {
  316. // gcc rejects the decimal form of kint64min
  317. if (number == kint64min) {
  318. // Make sure we are in a 2's complement system.
  319. GOOGLE_COMPILE_ASSERT(kint64min == GOOGLE_LONGLONG(~0x7fffffffffffffff),
  320. kint64min_value_error);
  321. return "GOOGLE_LONGLONG(~0x7fffffffffffffff)";
  322. }
  323. return "GOOGLE_LONGLONG(" + SimpleItoa(number) + ")";
  324. }
  325. string DefaultValue(const FieldDescriptor* field) {
  326. switch (field->cpp_type()) {
  327. case FieldDescriptor::CPPTYPE_INT32:
  328. return Int32ToString(field->default_value_int32());
  329. case FieldDescriptor::CPPTYPE_UINT32:
  330. return SimpleItoa(field->default_value_uint32()) + "u";
  331. case FieldDescriptor::CPPTYPE_INT64:
  332. return Int64ToString(field->default_value_int64());
  333. case FieldDescriptor::CPPTYPE_UINT64:
  334. return "GOOGLE_ULONGLONG(" + SimpleItoa(field->default_value_uint64())+ ")";
  335. case FieldDescriptor::CPPTYPE_DOUBLE: {
  336. double value = field->default_value_double();
  337. if (value == std::numeric_limits<double>::infinity()) {
  338. return "::google::protobuf::internal::Infinity()";
  339. } else if (value == -std::numeric_limits<double>::infinity()) {
  340. return "-::google::protobuf::internal::Infinity()";
  341. } else if (value != value) {
  342. return "::google::protobuf::internal::NaN()";
  343. } else {
  344. return SimpleDtoa(value);
  345. }
  346. }
  347. case FieldDescriptor::CPPTYPE_FLOAT:
  348. {
  349. float value = field->default_value_float();
  350. if (value == std::numeric_limits<float>::infinity()) {
  351. return "static_cast<float>(::google::protobuf::internal::Infinity())";
  352. } else if (value == -std::numeric_limits<float>::infinity()) {
  353. return "static_cast<float>(-::google::protobuf::internal::Infinity())";
  354. } else if (value != value) {
  355. return "static_cast<float>(::google::protobuf::internal::NaN())";
  356. } else {
  357. string float_value = SimpleFtoa(value);
  358. // If floating point value contains a period (.) or an exponent
  359. // (either E or e), then append suffix 'f' to make it a float
  360. // literal.
  361. if (float_value.find_first_of(".eE") != string::npos) {
  362. float_value.push_back('f');
  363. }
  364. return float_value;
  365. }
  366. }
  367. case FieldDescriptor::CPPTYPE_BOOL:
  368. return field->default_value_bool() ? "true" : "false";
  369. case FieldDescriptor::CPPTYPE_ENUM:
  370. // Lazy: Generate a static_cast because we don't have a helper function
  371. // that constructs the full name of an enum value.
  372. return strings::Substitute(
  373. "static_cast< $0 >($1)",
  374. ClassName(field->enum_type(), true),
  375. Int32ToString(field->default_value_enum()->number()));
  376. case FieldDescriptor::CPPTYPE_STRING:
  377. return "\"" + EscapeTrigraphs(
  378. CEscape(field->default_value_string())) +
  379. "\"";
  380. case FieldDescriptor::CPPTYPE_MESSAGE:
  381. return "*" + FieldMessageTypeName(field) +
  382. "::internal_default_instance()";
  383. }
  384. // Can't actually get here; make compiler happy. (We could add a default
  385. // case above but then we wouldn't get the nice compiler warning when a
  386. // new type is added.)
  387. GOOGLE_LOG(FATAL) << "Can't get here.";
  388. return "";
  389. }
  390. // Convert a file name into a valid identifier.
  391. string FilenameIdentifier(const string& filename) {
  392. string result;
  393. for (int i = 0; i < filename.size(); i++) {
  394. if (ascii_isalnum(filename[i])) {
  395. result.push_back(filename[i]);
  396. } else {
  397. // Not alphanumeric. To avoid any possibility of name conflicts we
  398. // use the hex code for the character.
  399. StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
  400. }
  401. }
  402. return result;
  403. }
  404. string FileLevelNamespace(const string& filename) {
  405. return "protobuf_" + FilenameIdentifier(filename);
  406. }
  407. // Return the qualified C++ name for a file level symbol.
  408. string QualifiedFileLevelSymbol(const string& package, const string& name) {
  409. if (package.empty()) {
  410. return StrCat("::", name);
  411. }
  412. return StrCat("::", DotsToColons(package), "::", name);
  413. }
  414. // Escape C++ trigraphs by escaping question marks to \?
  415. string EscapeTrigraphs(const string& to_escape) {
  416. return StringReplace(to_escape, "?", "\\?", true);
  417. }
  418. // Escaped function name to eliminate naming conflict.
  419. string SafeFunctionName(const Descriptor* descriptor,
  420. const FieldDescriptor* field,
  421. const string& prefix) {
  422. // Do not use FieldName() since it will escape keywords.
  423. string name = field->name();
  424. LowerString(&name);
  425. string function_name = prefix + name;
  426. if (descriptor->FindFieldByName(function_name)) {
  427. // Single underscore will also make it conflicting with the private data
  428. // member. We use double underscore to escape function names.
  429. function_name.append("__");
  430. } else if (kKeywords.count(name) > 0) {
  431. // If the field name is a keyword, we append the underscore back to keep it
  432. // consistent with other function names.
  433. function_name.append("_");
  434. }
  435. return function_name;
  436. }
  437. static bool HasMapFields(const Descriptor* descriptor) {
  438. for (int i = 0; i < descriptor->field_count(); ++i) {
  439. if (descriptor->field(i)->is_map()) {
  440. return true;
  441. }
  442. }
  443. for (int i = 0; i < descriptor->nested_type_count(); ++i) {
  444. if (HasMapFields(descriptor->nested_type(i))) return true;
  445. }
  446. return false;
  447. }
  448. bool HasMapFields(const FileDescriptor* file) {
  449. for (int i = 0; i < file->message_type_count(); ++i) {
  450. if (HasMapFields(file->message_type(i))) return true;
  451. }
  452. return false;
  453. }
  454. static bool HasEnumDefinitions(const Descriptor* message_type) {
  455. if (message_type->enum_type_count() > 0) return true;
  456. for (int i = 0; i < message_type->nested_type_count(); ++i) {
  457. if (HasEnumDefinitions(message_type->nested_type(i))) return true;
  458. }
  459. return false;
  460. }
  461. bool HasEnumDefinitions(const FileDescriptor* file) {
  462. if (file->enum_type_count() > 0) return true;
  463. for (int i = 0; i < file->message_type_count(); ++i) {
  464. if (HasEnumDefinitions(file->message_type(i))) return true;
  465. }
  466. return false;
  467. }
  468. bool IsStringOrMessage(const FieldDescriptor* field) {
  469. switch (field->cpp_type()) {
  470. case FieldDescriptor::CPPTYPE_INT32:
  471. case FieldDescriptor::CPPTYPE_INT64:
  472. case FieldDescriptor::CPPTYPE_UINT32:
  473. case FieldDescriptor::CPPTYPE_UINT64:
  474. case FieldDescriptor::CPPTYPE_DOUBLE:
  475. case FieldDescriptor::CPPTYPE_FLOAT:
  476. case FieldDescriptor::CPPTYPE_BOOL:
  477. case FieldDescriptor::CPPTYPE_ENUM:
  478. return false;
  479. case FieldDescriptor::CPPTYPE_STRING:
  480. case FieldDescriptor::CPPTYPE_MESSAGE:
  481. return true;
  482. }
  483. GOOGLE_LOG(FATAL) << "Can't get here.";
  484. return false;
  485. }
  486. FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field) {
  487. GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
  488. // Open-source protobuf release only supports STRING ctype.
  489. return FieldOptions::STRING;
  490. }
  491. bool IsAnyMessage(const FileDescriptor* descriptor) {
  492. return descriptor->name() == kAnyProtoFile;
  493. }
  494. bool IsAnyMessage(const Descriptor* descriptor) {
  495. return descriptor->name() == kAnyMessageName &&
  496. descriptor->file()->name() == kAnyProtoFile;
  497. }
  498. bool IsWellKnownMessage(const FileDescriptor* descriptor) {
  499. return !descriptor->name().compare(0, 16, kGoogleProtobufPrefix);
  500. }
  501. enum Utf8CheckMode {
  502. STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields.
  503. VERIFY = 1, // Only log an error but parsing will succeed.
  504. NONE = 2, // No UTF-8 check.
  505. };
  506. // Which level of UTF-8 enforcemant is placed on this file.
  507. static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
  508. const Options& options) {
  509. if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
  510. return STRICT;
  511. } else if (GetOptimizeFor(field->file(), options) !=
  512. FileOptions::LITE_RUNTIME) {
  513. return VERIFY;
  514. } else {
  515. return NONE;
  516. }
  517. }
  518. static void GenerateUtf8CheckCode(const FieldDescriptor* field,
  519. const Options& options, bool for_parse,
  520. const std::map<string, string>& variables,
  521. const char* parameters,
  522. const char* strict_function,
  523. const char* verify_function,
  524. io::Printer* printer) {
  525. switch (GetUtf8CheckMode(field, options)) {
  526. case STRICT: {
  527. if (for_parse) {
  528. printer->Print("DO_(");
  529. }
  530. printer->Print(
  531. "::google::protobuf::internal::WireFormatLite::$function$(\n",
  532. "function", strict_function);
  533. printer->Indent();
  534. printer->Print(variables, parameters);
  535. if (for_parse) {
  536. printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n");
  537. } else {
  538. printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n");
  539. }
  540. printer->Print("\"$full_name$\")", "full_name", field->full_name());
  541. if (for_parse) {
  542. printer->Print(")");
  543. }
  544. printer->Print(";\n");
  545. printer->Outdent();
  546. break;
  547. }
  548. case VERIFY: {
  549. printer->Print(
  550. "::google::protobuf::internal::WireFormat::$function$(\n",
  551. "function", verify_function);
  552. printer->Indent();
  553. printer->Print(variables, parameters);
  554. if (for_parse) {
  555. printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n");
  556. } else {
  557. printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n");
  558. }
  559. printer->Print("\"$full_name$\");\n", "full_name", field->full_name());
  560. printer->Outdent();
  561. break;
  562. }
  563. case NONE:
  564. break;
  565. }
  566. }
  567. void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
  568. const Options& options, bool for_parse,
  569. const std::map<string, string>& variables,
  570. const char* parameters,
  571. io::Printer* printer) {
  572. GenerateUtf8CheckCode(field, options, for_parse, variables, parameters,
  573. "VerifyUtf8String", "VerifyUTF8StringNamedField",
  574. printer);
  575. }
  576. void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
  577. const Options& options, bool for_parse,
  578. const std::map<string, string>& variables,
  579. const char* parameters,
  580. io::Printer* printer) {
  581. GenerateUtf8CheckCode(field, options, for_parse, variables, parameters,
  582. "VerifyUtf8Cord", "VerifyUTF8CordNamedField", printer);
  583. }
  584. namespace {
  585. void Flatten(const Descriptor* descriptor,
  586. std::vector<const Descriptor*>* flatten) {
  587. for (int i = 0; i < descriptor->nested_type_count(); i++)
  588. Flatten(descriptor->nested_type(i), flatten);
  589. flatten->push_back(descriptor);
  590. }
  591. } // namespace
  592. void FlattenMessagesInFile(const FileDescriptor* file,
  593. std::vector<const Descriptor*>* result) {
  594. for (int i = 0; i < file->message_type_count(); i++) {
  595. Flatten(file->message_type(i), result);
  596. }
  597. }
  598. bool HasWeakFields(const Descriptor* descriptor) {
  599. return false;
  600. }
  601. bool HasWeakFields(const FileDescriptor* file) {
  602. return false;
  603. }
  604. bool UsingImplicitWeakFields(const FileDescriptor* file,
  605. const Options& options) {
  606. return options.lite_implicit_weak_fields &&
  607. GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
  608. }
  609. bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
  610. SCCAnalyzer* scc_analyzer) {
  611. return UsingImplicitWeakFields(field->file(), options) &&
  612. field->type() == FieldDescriptor::TYPE_MESSAGE &&
  613. !field->is_required() && !field->is_map() &&
  614. field->containing_oneof() == NULL &&
  615. !IsWellKnownMessage(field->message_type()->file()) &&
  616. // We do not support implicit weak fields between messages in the same
  617. // strongly-connected component.
  618. scc_analyzer->GetSCC(field->containing_type()) !=
  619. scc_analyzer->GetSCC(field->message_type());
  620. }
  621. struct CompareDescriptors {
  622. bool operator()(const Descriptor* a, const Descriptor* b) {
  623. return a->full_name() < b->full_name();
  624. }
  625. };
  626. SCCAnalyzer::NodeData SCCAnalyzer::DFS(const Descriptor* descriptor) {
  627. // Must not have visited already.
  628. GOOGLE_DCHECK_EQ(cache_.count(descriptor), 0);
  629. // Mark visited by inserting in map.
  630. NodeData& result = cache_[descriptor];
  631. // Initialize data structures.
  632. result.index = result.lowlink = index_++;
  633. stack_.push_back(descriptor);
  634. // Recurse the fields / nodes in graph
  635. for (int i = 0; i < descriptor->field_count(); i++) {
  636. const Descriptor* child = descriptor->field(i)->message_type();
  637. if (child) {
  638. if (cache_.count(child) == 0) {
  639. // unexplored node
  640. NodeData child_data = DFS(child);
  641. result.lowlink = std::min(result.lowlink, child_data.lowlink);
  642. } else {
  643. NodeData child_data = cache_[child];
  644. if (child_data.scc == NULL) {
  645. // Still in the stack_ so we found a back edge
  646. result.lowlink = std::min(result.lowlink, child_data.index);
  647. }
  648. }
  649. }
  650. }
  651. if (result.index == result.lowlink) {
  652. // This is the root of a strongly connected component
  653. SCC* scc = CreateSCC();
  654. while (true) {
  655. const Descriptor* scc_desc = stack_.back();
  656. scc->descriptors.push_back(scc_desc);
  657. // Remove from stack
  658. stack_.pop_back();
  659. cache_[scc_desc].scc = scc;
  660. if (scc_desc == descriptor) break;
  661. }
  662. // The order of descriptors is random and depends how this SCC was
  663. // discovered. In-order to ensure maximum stability we sort it by name.
  664. std::sort(scc->descriptors.begin(), scc->descriptors.end(),
  665. CompareDescriptors());
  666. AddChildren(scc);
  667. }
  668. return result;
  669. }
  670. void SCCAnalyzer::AddChildren(SCC* scc) {
  671. std::set<const SCC*> seen;
  672. for (int i = 0; i < scc->descriptors.size(); i++) {
  673. const Descriptor* descriptor = scc->descriptors[i];
  674. for (int j = 0; j < descriptor->field_count(); j++) {
  675. const Descriptor* child_msg = descriptor->field(j)->message_type();
  676. if (child_msg) {
  677. const SCC* child = GetSCC(child_msg);
  678. if (child == scc) continue;
  679. if (seen.insert(child).second) {
  680. scc->children.push_back(child);
  681. }
  682. }
  683. }
  684. }
  685. }
  686. MessageAnalysis SCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
  687. if (analysis_cache_.count(scc)) return analysis_cache_[scc];
  688. MessageAnalysis result = MessageAnalysis();
  689. for (int i = 0; i < scc->descriptors.size(); i++) {
  690. const Descriptor* descriptor = scc->descriptors[i];
  691. if (descriptor->extension_range_count() > 0) {
  692. result.contains_extension = true;
  693. }
  694. for (int i = 0; i < descriptor->field_count(); i++) {
  695. const FieldDescriptor* field = descriptor->field(i);
  696. if (field->is_required()) {
  697. result.contains_required = true;
  698. }
  699. switch (field->type()) {
  700. case FieldDescriptor::TYPE_STRING:
  701. case FieldDescriptor::TYPE_BYTES: {
  702. if (field->options().ctype() == FieldOptions::CORD) {
  703. result.contains_cord = true;
  704. }
  705. break;
  706. }
  707. case FieldDescriptor::TYPE_GROUP:
  708. case FieldDescriptor::TYPE_MESSAGE: {
  709. const SCC* child = GetSCC(field->message_type());
  710. if (child != scc) {
  711. MessageAnalysis analysis = GetSCCAnalysis(child);
  712. result.contains_cord |= analysis.contains_cord;
  713. result.contains_extension |= analysis.contains_extension;
  714. if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
  715. result.contains_required |= analysis.contains_required;
  716. }
  717. } else {
  718. // This field points back into the same SCC hence the messages
  719. // in the SCC are recursive. Note if SCC contains more than two
  720. // nodes it has to be recursive, however this test also works for
  721. // a single node that is recursive.
  722. result.is_recursive = true;
  723. }
  724. break;
  725. }
  726. default:
  727. break;
  728. }
  729. }
  730. }
  731. // We deliberately only insert the result here. After we contracted the SCC
  732. // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
  733. // nodes visited as we can never return to them. By inserting them here
  734. // we will go in an infinite loop if the SCC is not correct.
  735. return analysis_cache_[scc] = result;
  736. }
  737. void ListAllFields(const Descriptor* d,
  738. std::vector<const FieldDescriptor*>* fields) {
  739. // Collect sub messages
  740. for (int i = 0; i < d->nested_type_count(); i++) {
  741. ListAllFields(d->nested_type(i), fields);
  742. }
  743. // Collect message level extensions.
  744. for (int i = 0; i < d->extension_count(); i++) {
  745. fields->push_back(d->extension(i));
  746. }
  747. // Add types of fields necessary
  748. for (int i = 0; i < d->field_count(); i++) {
  749. fields->push_back(d->field(i));
  750. }
  751. }
  752. void ListAllFields(const FileDescriptor* d,
  753. std::vector<const FieldDescriptor*>* fields) {
  754. // Collect file level message.
  755. for (int i = 0; i < d->message_type_count(); i++) {
  756. ListAllFields(d->message_type(i), fields);
  757. }
  758. // Collect message level extensions.
  759. for (int i = 0; i < d->extension_count(); i++) {
  760. fields->push_back(d->extension(i));
  761. }
  762. }
  763. void ListAllTypesForServices(const FileDescriptor* fd,
  764. std::vector<const Descriptor*>* types) {
  765. for (int i = 0; i < fd->service_count(); i++) {
  766. const ServiceDescriptor* sd = fd->service(i);
  767. for (int j = 0; j < sd->method_count(); j++) {
  768. const MethodDescriptor* method = sd->method(j);
  769. types->push_back(method->input_type());
  770. types->push_back(method->output_type());
  771. }
  772. }
  773. }
  774. } // namespace cpp
  775. } // namespace compiler
  776. } // namespace protobuf
  777. } // namespace google