registry_search.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. /*
  2. * Copyright 2011 Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "../domain_registry.h"
  17. #include <string.h>
  18. #include "tsk_assert.h"
  19. #include "string_util.h"
  20. #include "trie_search.h"
  21. /* RFCs 1035 and 1123 specify a max hostname length of 255 bytes. */
  22. static const size_t kMaxHostnameLen = 255;
  23. /* strdup() is not part of ANSI C89 so we define our own. */
  24. static char* StrDup(const char* s) {
  25. const size_t len = strlen(s);
  26. char* s2 = malloc(len + 1);
  27. if (s2 == NULL) {
  28. return NULL;
  29. }
  30. memcpy(s2, s, len);
  31. s2[len] = 0;
  32. return s2;
  33. }
  34. /* strnlen() is not part of ANSI C89 so we define our own. */
  35. static size_t StrnLen(const char* s, size_t max) {
  36. const char* end = s + max;
  37. const char* i;
  38. for (i = s; i < end; ++i) {
  39. if (*i == 0) break;
  40. }
  41. return (size_t) (i - s);
  42. }
  43. static int IsStringASCII(const char* s) {
  44. const char* it = s;
  45. for (; *it != 0; ++it) {
  46. unsigned const char unsigned_char = (unsigned char)*it;
  47. if (unsigned_char > 0x7f) {
  48. return 0;
  49. }
  50. }
  51. return 1;
  52. }
  53. static int IsValidHostname(const char* hostname) {
  54. /*
  55. * http://www.ietf.org/rfc/rfc1035.txt (DNS) and
  56. * http://tools.ietf.org/html/rfc1123 (Internet host requirements)
  57. * specify a maximum hostname length of 255 characters. To make sure
  58. * string comparisons, etc are bounded elsewhere in the codebase, we
  59. * enforce the 255 character limit here. There are various other
  60. * hostname constraints specified in the RFCs (63 bytes per
  61. * hostname-part, etc) but we do not enforce those here since doing
  62. * so would not change correctness of the overall implementation,
  63. * and it's possible that hostnames used in other contexts
  64. * (e.g. outside of DNS) would not be subject to the 63-byte
  65. * hostname-part limit. So we let the DNS layer enforce its policy,
  66. * and enforce only the maximum hostname length here.
  67. */
  68. if (StrnLen(hostname, kMaxHostnameLen + 1) > kMaxHostnameLen) {
  69. return 0;
  70. }
  71. /*
  72. * All hostnames must contain only ASCII characters. If a hostname
  73. * is passed in that contains non-ASCII (e.g. an IDN that hasn't been
  74. * converted to ASCII via punycode) we want to reject it outright.
  75. */
  76. if (IsStringASCII(hostname) == 0) {
  77. return 0;
  78. }
  79. return 1;
  80. }
  81. /*
  82. * Get a pointer to the beginning of the valid registry. If rule_part
  83. * is an exception component, this will seek past the
  84. * rule_part. Otherwise this will simply return the component itself.
  85. */
  86. static const char* GetDomainRegistryStr(const char* rule_part,
  87. const char* component) {
  88. if (IsExceptionComponent(rule_part)) {
  89. return component + strlen(component) + 1;
  90. } else {
  91. return component;
  92. }
  93. }
  94. /*
  95. * Iterates the hostname-parts between start and end in reverse order,
  96. * separated by the character specified by sep. For instance if the
  97. * string between start and end is "foo\0bar\0com" and sep is the null
  98. * character, we will return a pointer to "com", then "bar", then
  99. * "foo".
  100. */
  101. static const char* GetNextHostnamePartImpl(const char* start,
  102. const char* end,
  103. char sep,
  104. void** ctx) {
  105. const char* last;
  106. const char* i;
  107. if (*ctx == NULL) {
  108. *ctx = (void*) end;
  109. /*
  110. * Special case: a single trailing dot indicates a fully-qualified
  111. * domain name. Skip over it.
  112. */
  113. if (end > start && *(end - 1) == sep) {
  114. *ctx = (void*) (end - 1);
  115. }
  116. }
  117. last = *ctx;
  118. if (start > last) return NULL;
  119. for (i = last - 1; i >= start; --i) {
  120. if (*i == sep) {
  121. *ctx = (void*) i;
  122. return i + 1;
  123. }
  124. }
  125. if (last != start && *start != 0) {
  126. /*
  127. * Special case: If we didn't find a match, but the context
  128. * indicates that we haven't visited the first component yet, and
  129. * there is a non-NULL first component, then visit the first
  130. * component.
  131. */
  132. *ctx = (void*) start;
  133. return start;
  134. }
  135. return NULL;
  136. }
  137. static const char* GetNextHostnamePart(const char* start,
  138. const char* end,
  139. char sep,
  140. void** ctx) {
  141. const char* hostname_part = GetNextHostnamePartImpl(start, end, sep, ctx);
  142. if (IsInvalidComponent(hostname_part)) {
  143. return NULL;
  144. }
  145. return hostname_part;
  146. }
  147. /*
  148. * Iterate over all hostname-parts between value and value_end, where
  149. * the hostname-parts are separated by character sep.
  150. */
  151. static const char* GetRegistryForHostname(const char* value,
  152. const char* value_end,
  153. const char sep) {
  154. void *ctx = NULL;
  155. const struct TrieNode* current = NULL;
  156. const char* component = NULL;
  157. const char* last_valid = NULL;
  158. /*
  159. * Iterate over the hostname components one at a time, e.g. if value
  160. * is foo.com, we will first visit component com, then component foo.
  161. */
  162. while ((component =
  163. GetNextHostnamePart(value, value_end, sep, &ctx)) != NULL) {
  164. const char* leaf_node;
  165. current = FindRegistryNode(component, current);
  166. if (current == NULL) {
  167. break;
  168. }
  169. if (current->is_terminal == 1) {
  170. last_valid = GetDomainRegistryStr(
  171. GetHostnamePart(current->string_table_offset), component);
  172. } else {
  173. last_valid = NULL;
  174. }
  175. if (HasLeafChildren(current)) {
  176. /*
  177. * The child nodes are in the leaf node table, so perform a
  178. * search in that table.
  179. */
  180. component = GetNextHostnamePart(value, value_end, sep, &ctx);
  181. if (component == NULL) {
  182. break;
  183. }
  184. leaf_node = FindRegistryLeafNode(component, current);
  185. if (leaf_node == NULL) {
  186. break;
  187. }
  188. return GetDomainRegistryStr(leaf_node, component);
  189. }
  190. }
  191. return last_valid;
  192. }
  193. static size_t GetRegistryLengthImpl(
  194. const char* value,
  195. const char* value_end,
  196. const char sep,
  197. int allow_unknown_registries) {
  198. const char* registry;
  199. size_t match_len;
  200. while (*value == sep && value < value_end) {
  201. /* Skip over leading separators. */
  202. ++value;
  203. }
  204. registry = GetRegistryForHostname(value, value_end, sep);
  205. if (registry == NULL) {
  206. /*
  207. * Didn't find a match. If unknown registries are allowed, see if
  208. * the root hostname part is not in the table. If so, consider it to be a
  209. * valid registry, and return its length.
  210. */
  211. if (allow_unknown_registries != 0) {
  212. void* ctx = NULL;
  213. const char* root_hostname_part =
  214. GetNextHostnamePart(value, value_end, sep, &ctx);
  215. /*
  216. * See if the root hostname-part is in the table. If it's not in
  217. * the table, then consider the unknown registry to be a valid
  218. * registry.
  219. */
  220. if (root_hostname_part != NULL &&
  221. FindRegistryNode(root_hostname_part, NULL) == NULL) {
  222. registry = root_hostname_part;
  223. }
  224. }
  225. if (registry == NULL) {
  226. return 0;
  227. }
  228. }
  229. if (registry < value || registry >= value_end) {
  230. /* Error cases. */
  231. DCHECK(registry >= value);
  232. DCHECK(registry < value_end);
  233. return 0;
  234. }
  235. match_len = (size_t) (value_end - registry);
  236. return match_len;
  237. }
  238. size_t GetRegistryLength(const char* hostname) {
  239. const char* buf_end;
  240. char* buf;
  241. size_t registry_length;
  242. if (hostname == NULL) {
  243. return 0;
  244. }
  245. if (IsValidHostname(hostname) == 0) {
  246. return 0;
  247. }
  248. /*
  249. * Replace dots between hostname parts with the null byte. This
  250. * allows us to index directly into the string and refer to each
  251. * hostname-part as if it were its own null-terminated string.
  252. */
  253. buf = StrDup(hostname);
  254. if (buf == NULL) {
  255. return 0;
  256. }
  257. ReplaceChar(buf, '.', '\0');
  258. buf_end = buf + strlen(hostname);
  259. DCHECK(*buf_end == 0);
  260. /* Normalize the input by converting all characters to lowercase. */
  261. ToLowerASCII(buf, buf_end);
  262. registry_length = GetRegistryLengthImpl(buf, buf_end, '\0', 0);
  263. free(buf);
  264. return registry_length;
  265. }
  266. size_t GetRegistryLengthAllowUnknownRegistries(const char* hostname) {
  267. const char* buf_end;
  268. char* buf;
  269. size_t registry_length;
  270. if (hostname == NULL) {
  271. return 0;
  272. }
  273. if (IsValidHostname(hostname) == 0) {
  274. return 0;
  275. }
  276. /*
  277. * Replace dots between hostname parts with the null byte. This
  278. * allows us to index directly into the string and refer to each
  279. * hostname-part as if it were its own null-terminated string.
  280. */
  281. buf = StrDup(hostname);
  282. if (buf == NULL) {
  283. return 0;
  284. }
  285. ReplaceChar(buf, '.', '\0');
  286. buf_end = buf + strlen(hostname);
  287. DCHECK(*buf_end == 0);
  288. /* Normalize the input by converting all characters to lowercase. */
  289. ToLowerASCII(buf, buf_end);
  290. registry_length = GetRegistryLengthImpl(buf, buf_end, '\0', 1);
  291. free(buf);
  292. return registry_length;
  293. }