Adaptive Framework  0.9.0
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
afw_uri.c
Go to the documentation of this file.
1 // See the 'COPYING' file in the project root for licensing information.
2 /*
3  * Adaptive Framework Uniform Resource Identifier (URI) helpers
4  *
5  * Copyright (c) 2010-2023 Clemson University
6  *
7  */
8 
14 #include "afw_internal.h"
15 
16 
18 afw_uri_octet_type[256] = {
19 
20  /* 00 */
21  AFW_URI_OCTET_INVALID,
22 
23  /* 01 */
24  AFW_URI_OCTET_INVALID,
25 
26  /* 02 */
27  AFW_URI_OCTET_INVALID,
28 
29  /* 03 */
30  AFW_URI_OCTET_INVALID,
31 
32  /* 04 */
33  AFW_URI_OCTET_INVALID,
34 
35  /* 05 */
36  AFW_URI_OCTET_INVALID,
37 
38  /* 06 */
39  AFW_URI_OCTET_INVALID,
40 
41  /* 07 */
42  AFW_URI_OCTET_INVALID,
43 
44  /* 08 */
45  AFW_URI_OCTET_INVALID,
46 
47  /* 09 */
48  AFW_URI_OCTET_INVALID,
49 
50  /* 0A */
51  AFW_URI_OCTET_INVALID,
52 
53  /* 0B */
54  AFW_URI_OCTET_INVALID,
55 
56  /* 0C */
57  AFW_URI_OCTET_INVALID,
58 
59  /* 0D */
60  AFW_URI_OCTET_INVALID,
61 
62  /* 0E */
63  AFW_URI_OCTET_INVALID,
64 
65  /* 0F */
66  AFW_URI_OCTET_INVALID,
67 
68 
69  /* 10 */
70  AFW_URI_OCTET_INVALID,
71 
72  /* 11 */
73  AFW_URI_OCTET_INVALID,
74 
75  /* 12 */
76  AFW_URI_OCTET_INVALID,
77 
78  /* 13 */
79  AFW_URI_OCTET_INVALID,
80 
81  /* 14 */
82  AFW_URI_OCTET_INVALID,
83 
84  /* 15 */
85  AFW_URI_OCTET_INVALID,
86 
87  /* 16 */
88  AFW_URI_OCTET_INVALID,
89 
90  /* 17 */
91  AFW_URI_OCTET_INVALID,
92 
93  /* 18 */
94  AFW_URI_OCTET_INVALID,
95 
96  /* 19 */
97  AFW_URI_OCTET_INVALID,
98 
99  /* 1A */
100  AFW_URI_OCTET_INVALID,
101 
102  /* 1B */
103  AFW_URI_OCTET_INVALID,
104 
105  /* 1C */
106  AFW_URI_OCTET_INVALID,
107 
108  /* 1D */
109  AFW_URI_OCTET_INVALID,
110 
111  /* 1E */
112  AFW_URI_OCTET_INVALID,
113 
114  /* 1F */
115  AFW_URI_OCTET_INVALID,
116 
117  /* 20 SP */
118  AFW_URI_OCTET_INVALID,
119 
120  /* 21 ! */
121  AFW_URI_OCTET_SUB_DELIM |
125 
126  /* 22 " */
127  AFW_URI_OCTET_INVALID,
128 
129  /* 23 # */
130  AFW_URI_OCTET_GEN_DELIM |
132 
133  /* 24 $ */
134  AFW_URI_OCTET_SUB_DELIM |
136 
137  /* 25 % */
138  AFW_URI_OCTET_PERCENT,
139 
140  /* 26 & */
141  AFW_URI_OCTET_SUB_DELIM |
143 
144  /* 27 ' */
145  AFW_URI_OCTET_SUB_DELIM |
149 
150  /* 28 ( */
151  AFW_URI_OCTET_SUB_DELIM |
154 
155  /* 29 ) */
156  AFW_URI_OCTET_SUB_DELIM |
159 
160  /* 2A * */
161  AFW_URI_OCTET_SUB_DELIM |
165 
166  /* 2B + */
167  AFW_URI_OCTET_SUB_DELIM |
168  AFW_URI_OCTET_PLUS |
170 
171  /* 2C , */
172  AFW_URI_OCTET_SUB_DELIM |
174 
175  /* 2D - */
176  AFW_URI_OCTET_UNRESERVED |
177  AFW_URI_OCTET_MINUS |
181 
182  /* 2E . */
183  AFW_URI_OCTET_UNRESERVED |
184  AFW_URI_OCTET_PERIOD |
188 
189  /* 2F / */
190  AFW_URI_OCTET_GEN_DELIM |
191  AFW_URI_OCTET_SLASH |
193 
194  /* 30 0 */
195  AFW_URI_OCTET_UNRESERVED |
196  AFW_URI_OCTET_DIGIT |
197  AFW_URI_OCTET_HEXDIGIT |
201 
202  /* 31 1 */
203  AFW_URI_OCTET_UNRESERVED |
204  AFW_URI_OCTET_DIGIT |
205  AFW_URI_OCTET_HEXDIGIT |
209 
210  /* 32 2 */
211  AFW_URI_OCTET_UNRESERVED |
212  AFW_URI_OCTET_DIGIT |
213  AFW_URI_OCTET_HEXDIGIT |
217 
218  /* 33 3 */
219  AFW_URI_OCTET_UNRESERVED |
220  AFW_URI_OCTET_DIGIT |
221  AFW_URI_OCTET_HEXDIGIT |
225 
226  /* 34 4 */
227  AFW_URI_OCTET_UNRESERVED |
228  AFW_URI_OCTET_DIGIT |
229  AFW_URI_OCTET_HEXDIGIT |
233 
234  /* 35 5 */
235  AFW_URI_OCTET_UNRESERVED |
236  AFW_URI_OCTET_DIGIT |
237  AFW_URI_OCTET_HEXDIGIT |
241 
242  /* 36 6 */
243  AFW_URI_OCTET_UNRESERVED |
244  AFW_URI_OCTET_DIGIT |
245  AFW_URI_OCTET_HEXDIGIT |
249 
250  /* 37 7 */
251  AFW_URI_OCTET_UNRESERVED |
252  AFW_URI_OCTET_DIGIT |
253  AFW_URI_OCTET_HEXDIGIT |
257 
258  /* 38 8 */
259  AFW_URI_OCTET_UNRESERVED |
260  AFW_URI_OCTET_DIGIT |
261  AFW_URI_OCTET_HEXDIGIT |
265 
266  /* 39 9 */
267  AFW_URI_OCTET_UNRESERVED |
268  AFW_URI_OCTET_DIGIT |
269  AFW_URI_OCTET_HEXDIGIT |
273 
274  /* 3A : */
275  AFW_URI_OCTET_GEN_DELIM |
276  AFW_URI_OCTET_COLON |
278 
279  /* 3B ; */
280  AFW_URI_OCTET_SUB_DELIM |
282 
283  /* 3C < */
284  AFW_URI_OCTET_INVALID,
285 
286  /* 3D = */
287  AFW_URI_OCTET_SUB_DELIM,
288 
289  /* 3E > */
290  AFW_URI_OCTET_INVALID,
291 
292  /* 3F ? */
293  AFW_URI_OCTET_GEN_DELIM |
294  AFW_URI_OCTET_QUESTION |
296 
297 
298  /* 40 @ */
299  AFW_URI_OCTET_GEN_DELIM |
300  AFW_URI_OCTET_AT |
302 
303  /* 41 A */
304  AFW_URI_OCTET_UNRESERVED |
305  AFW_URI_OCTET_ALPHA_UPPER |
306  AFW_URI_OCTET_HEXDIGIT |
310 
311  /* 42 B */
312  AFW_URI_OCTET_UNRESERVED |
313  AFW_URI_OCTET_ALPHA_UPPER |
314  AFW_URI_OCTET_HEXDIGIT |
318 
319  /* 43 C */
320  AFW_URI_OCTET_UNRESERVED |
321  AFW_URI_OCTET_ALPHA_UPPER |
322  AFW_URI_OCTET_HEXDIGIT |
326 
327  /* 44 D */
328  AFW_URI_OCTET_UNRESERVED |
329  AFW_URI_OCTET_ALPHA_UPPER |
330  AFW_URI_OCTET_HEXDIGIT |
334 
335  /* 45 E */
336  AFW_URI_OCTET_UNRESERVED |
337  AFW_URI_OCTET_ALPHA_UPPER |
338  AFW_URI_OCTET_HEXDIGIT |
342 
343  /* 46 F */
344  AFW_URI_OCTET_UNRESERVED |
345  AFW_URI_OCTET_ALPHA_UPPER |
346  AFW_URI_OCTET_HEXDIGIT |
350 
351  /* 47 G */
352  AFW_URI_OCTET_UNRESERVED |
353  AFW_URI_OCTET_ALPHA_UPPER |
357 
358  /* 48 H */
359  AFW_URI_OCTET_UNRESERVED |
360  AFW_URI_OCTET_ALPHA_UPPER |
364 
365  /* 49 I */
366  AFW_URI_OCTET_UNRESERVED |
367  AFW_URI_OCTET_ALPHA_UPPER |
371 
372  /* 4A J */
373  AFW_URI_OCTET_UNRESERVED |
374  AFW_URI_OCTET_ALPHA_UPPER |
378 
379  /* 4B K */
380  AFW_URI_OCTET_UNRESERVED |
381  AFW_URI_OCTET_ALPHA_UPPER |
385 
386  /* 4C L */
387  AFW_URI_OCTET_UNRESERVED |
388  AFW_URI_OCTET_ALPHA_UPPER |
392 
393  /* 4D M */
394  AFW_URI_OCTET_UNRESERVED |
395  AFW_URI_OCTET_ALPHA_UPPER |
399 
400  /* 4E N */
401  AFW_URI_OCTET_UNRESERVED |
402  AFW_URI_OCTET_ALPHA_UPPER |
406 
407  /* 4F O */
408  AFW_URI_OCTET_UNRESERVED |
409  AFW_URI_OCTET_ALPHA_UPPER |
413 
414 
415  /* 50 P */
416  AFW_URI_OCTET_UNRESERVED |
417  AFW_URI_OCTET_ALPHA_UPPER |
421 
422  /* 51 Q */
423  AFW_URI_OCTET_UNRESERVED |
424  AFW_URI_OCTET_ALPHA_UPPER |
428 
429  /* 52 R */
430  AFW_URI_OCTET_UNRESERVED |
431  AFW_URI_OCTET_ALPHA_UPPER |
435 
436  /* 53 S */
437  AFW_URI_OCTET_UNRESERVED |
438  AFW_URI_OCTET_ALPHA_UPPER |
442 
443  /* 54 T */
444  AFW_URI_OCTET_UNRESERVED |
445  AFW_URI_OCTET_ALPHA_UPPER |
449 
450  /* 55 U */
451  AFW_URI_OCTET_UNRESERVED |
452  AFW_URI_OCTET_ALPHA_UPPER |
456 
457  /* 56 V */
458  AFW_URI_OCTET_UNRESERVED |
459  AFW_URI_OCTET_ALPHA_UPPER |
463 
464  /* 57 W */
465  AFW_URI_OCTET_UNRESERVED |
466  AFW_URI_OCTET_ALPHA_UPPER |
470 
471  /* 58 X */
472  AFW_URI_OCTET_UNRESERVED |
473  AFW_URI_OCTET_ALPHA_UPPER |
477 
478  /* 59 Y */
479  AFW_URI_OCTET_UNRESERVED |
480  AFW_URI_OCTET_ALPHA_UPPER |
484 
485  /* 5A Z */
486  AFW_URI_OCTET_UNRESERVED |
487  AFW_URI_OCTET_ALPHA_UPPER |
491 
492  /* 5B [ */
493  AFW_URI_OCTET_GEN_DELIM,
494 
495  /* 5C \ */
496  AFW_URI_OCTET_INVALID,
497 
498  /* 5D ] */
499  AFW_URI_OCTET_GEN_DELIM,
500 
501  /* 5E ^ */
502  AFW_URI_OCTET_INVALID,
503 
504  /* 5F _ */
505  AFW_URI_OCTET_UNRESERVED |
509 
510 
511  /* 60 ` */
512  AFW_URI_OCTET_INVALID,
513 
514  /* 61 a */
515  AFW_URI_OCTET_UNRESERVED |
516  AFW_URI_OCTET_ALPHA_LOWER |
517  AFW_URI_OCTET_HEXDIGIT |
521 
522  /* 62 b */
523  AFW_URI_OCTET_UNRESERVED |
524  AFW_URI_OCTET_ALPHA_LOWER |
525  AFW_URI_OCTET_HEXDIGIT |
529 
530  /* 63 c */
531  AFW_URI_OCTET_UNRESERVED |
532  AFW_URI_OCTET_ALPHA_LOWER |
533  AFW_URI_OCTET_HEXDIGIT |
537 
538  /* 64 d */
539  AFW_URI_OCTET_UNRESERVED |
540  AFW_URI_OCTET_ALPHA_LOWER |
541  AFW_URI_OCTET_HEXDIGIT |
545 
546  /* 65 e */
547  AFW_URI_OCTET_UNRESERVED |
548  AFW_URI_OCTET_ALPHA_LOWER |
549  AFW_URI_OCTET_HEXDIGIT |
553 
554  /* 66 f */
555  AFW_URI_OCTET_UNRESERVED |
556  AFW_URI_OCTET_ALPHA_LOWER |
557  AFW_URI_OCTET_HEXDIGIT |
561 
562  /* 67 g */
563  AFW_URI_OCTET_UNRESERVED |
564  AFW_URI_OCTET_ALPHA_LOWER |
568 
569  /* 68 h */
570  AFW_URI_OCTET_UNRESERVED |
571  AFW_URI_OCTET_ALPHA_LOWER |
575 
576  /* 69 i */
577  AFW_URI_OCTET_UNRESERVED |
578  AFW_URI_OCTET_ALPHA_LOWER |
582 
583  /* 6A j */
584  AFW_URI_OCTET_UNRESERVED |
585  AFW_URI_OCTET_ALPHA_LOWER |
589 
590  /* 6B k */
591  AFW_URI_OCTET_UNRESERVED |
592  AFW_URI_OCTET_ALPHA_LOWER |
596 
597  /* 6C l */
598  AFW_URI_OCTET_UNRESERVED |
599  AFW_URI_OCTET_ALPHA_LOWER |
603 
604  /* 6D m */
605  AFW_URI_OCTET_UNRESERVED |
606  AFW_URI_OCTET_ALPHA_LOWER |
610 
611  /* 6E n */
612  AFW_URI_OCTET_UNRESERVED |
613  AFW_URI_OCTET_ALPHA_LOWER |
617 
618  /* 6F o */
619  AFW_URI_OCTET_UNRESERVED |
620  AFW_URI_OCTET_ALPHA_LOWER |
624 
625 
626  /* 70 p */
627  AFW_URI_OCTET_UNRESERVED |
628  AFW_URI_OCTET_ALPHA_LOWER |
632 
633  /* 71 q */
634  AFW_URI_OCTET_UNRESERVED |
635  AFW_URI_OCTET_ALPHA_LOWER |
639 
640  /* 72 r */
641  AFW_URI_OCTET_UNRESERVED |
642  AFW_URI_OCTET_ALPHA_LOWER |
646 
647  /* 73 s */
648  AFW_URI_OCTET_UNRESERVED |
649  AFW_URI_OCTET_ALPHA_LOWER |
653 
654  /* 74 t */
655  AFW_URI_OCTET_UNRESERVED |
656  AFW_URI_OCTET_ALPHA_LOWER |
660 
661  /* 75 u */
662  AFW_URI_OCTET_UNRESERVED |
663  AFW_URI_OCTET_ALPHA_LOWER |
667 
668  /* 76 v */
669  AFW_URI_OCTET_UNRESERVED |
670  AFW_URI_OCTET_ALPHA_LOWER |
674 
675  /* 77 w */
676  AFW_URI_OCTET_UNRESERVED |
677  AFW_URI_OCTET_ALPHA_LOWER |
681 
682  /* 78 x */
683  AFW_URI_OCTET_UNRESERVED |
684  AFW_URI_OCTET_ALPHA_LOWER |
688 
689  /* 79 y */
690  AFW_URI_OCTET_UNRESERVED |
691  AFW_URI_OCTET_ALPHA_LOWER |
695 
696  /* 7A z */
697  AFW_URI_OCTET_UNRESERVED |
698  AFW_URI_OCTET_ALPHA_LOWER |
702 
703  /* 7B { */
704  AFW_URI_OCTET_INVALID,
705 
706  /* 7C | */
707  AFW_URI_OCTET_INVALID,
708 
709  /* 7D } */
710  AFW_URI_OCTET_INVALID,
711 
712  /* 7E ~ */
713  AFW_URI_OCTET_UNRESERVED |
717 
718  /* 7F DEL */
719  AFW_URI_OCTET_INVALID
720 
721  /* 80 - FF will be 0 which is AFW_URI_OCTET_INVALID */
722 
723 };
724 
725 
726 
727 
728 typedef enum {
729  impl_state_error,
730  impl_state_end,
731  impl_state_initial,
732  impl_state_hier_part_begin,
733  impl_state_hier_part_slash1,
734  impl_state_hier_part_slash2,
735  impl_state_scheme_begin,
736  impl_state_scheme,
737  impl_state_authority_begin,
738  impl_state_authority_determine,
739  impl_state_authority_userinfo_begin,
740  impl_state_authority_userinfo,
741  impl_state_authority_host_begin,
742  impl_state_authority_reg_name,
743  impl_state_authority_host_v4,
744  impl_state_authority_host_v6,
745  impl_state_authority_optional_port,
746  impl_state_authority_port,
747  impl_state_path_begin,
748  impl_state_path,
749  impl_state_query_begin,
750  impl_state_query,
751  impl_segment_begin,
752  impl_segment,
753  impl_segment_parameter_begin,
754  impl_segment_parameter_value_begin,
755  impl_state_fragment_begin,
756  impl_state_fragment
757 } impl_state;
758 
759 
760 /* The length needed to URI encode a string. */
763  const afw_utf8_t *string,
765  afw_xctx_t *xctx)
766 {
767  afw_size_t len;
768  const afw_octet_t *c, *end;
769 
770  for (len = 0,
771  c = (const afw_octet_t *)string->s,
772  end = c + string->len;
773  c < end;
774  c++, len++)
775  {
776  if (!afw_uri_octet_test(*c, mask))
777  {
778  len += 2;
779  }
780  }
781 
782  return len;
783 }
784 
785 
786 /* The length needed to URI encode a raw. */
789  const afw_memory_t *raw,
791  afw_xctx_t *xctx)
792 {
793  afw_size_t len;
794  const afw_octet_t *c, *end;
795 
796  for (len = 0,
797  c = raw->ptr,
798  end = c + raw->size;
799  c < end;
800  c++, len++)
801  {
802  if (!afw_uri_octet_test(*c, mask))
803  {
804  len += 2;
805  }
806  }
807 
808  return len;
809 }
810 
811 
812 /* URI encode a string. */
813 AFW_DEFINE(const afw_utf8_t *)
815  const afw_utf8_t *string,
817  const afw_pool_t *p,
818  afw_xctx_t *xctx)
819 {
820  afw_size_t len;
821  afw_octet_t *s;
822  const afw_octet_t *c, *end;
823  afw_utf8_t *result;
824 
825 
826  /* If string is 0 length, just return empty string. */
827  if (!string || string->len == 0) {
828  return &afw_s_a_empty_string;
829  }
830 
831  /* Get encoded length. */
832  len = afw_uri_encode_len(string, mask, xctx);
833 
834  /* If len is the same as string, no encoding is required. */
835  if (len == string->len) {
836  return string;
837  }
838 
839  /* Encode string and return it. */
840  result = afw_pool_calloc_type(p, afw_utf8_t, xctx);
841  s = afw_pool_malloc(p, len, xctx);
842  result->s = (const afw_utf8_octet_t *)s;
843  result->len = len;
844  for (c = (const afw_octet_t *)string->s,
845  end = c + string->len;
846  c < end;
847  c++)
848  {
849  if (afw_uri_octet_test(*c, mask))
850  {
851  *s++ = *c;
852  } else {
853  *s++ = '%';
854  *s++ = afw_ascii_encode_hex_digit(*c / 16);
855  *s++ = afw_ascii_encode_hex_digit(*c % 16);
856  }
857  }
858  return result;
859 
860 }
861 
862 
863 
864 /* Create a URI encode a string. */
865 AFW_DEFINE(const afw_utf8_t *)
867  const afw_utf8_octet_t *s,
868  afw_size_t len,
870  const afw_pool_t *p,
871  afw_xctx_t *xctx)
872 {
873  const afw_utf8_t *result;
874 
875  result = afw_utf8_create(s, len, p, xctx);
876  return afw_uri_encode(result, mask, p, xctx);
877 }
878 
879 
880 
881 
882 /* URI encode a raw. */
883 AFW_DEFINE(const afw_utf8_t *)
885  const afw_memory_t *raw,
887  const afw_pool_t *p,
888  afw_xctx_t *xctx)
889 {
890  afw_size_t len;
891  afw_octet_t *s;
892  const afw_octet_t *c, *end;
893  afw_utf8_t *result;
894 
895  result = afw_pool_calloc_type(p, afw_utf8_t, xctx);
896 
897  len = afw_uri_encode_raw_len(raw, mask, xctx);
898  if (len > 0) {
899  s = afw_pool_malloc(p, len, xctx);
900  result->s = (const afw_utf8_octet_t *)s;
901  result->len = len;
902  for (c = raw->ptr,
903  end = c + raw->size;
904  c < end;
905  c++)
906  {
907  if (afw_uri_octet_test(*c, mask))
908  {
909  *s++ = *c;
910  }
911  else {
912  *s++ = '%';
913  *s++ = afw_ascii_encode_hex_digit(*c / 16);
914  *s++ = afw_ascii_encode_hex_digit(*c % 16);
915  }
916  }
917  }
918 
919  return result;
920 }
921 
922 
923 /* URI encode a string using supplied afw_utf8_t. */
926  afw_utf8_octet_t *s,
927  afw_size_t s_len,
928  const afw_utf8_t *string,
930  const afw_pool_t *p,
931  afw_xctx_t *xctx)
932 {
933  afw_size_t len;
934  const afw_octet_t *c, *end;
935 
936  len = afw_uri_encode_len(string, mask, xctx);
937  if (len > s_len) return -1;
938  if (len > 0) {
939  for (c = (const afw_octet_t *)string->s,
940  end = c + string->len;
941  c < end;
942  c++)
943  {
944  if (afw_uri_octet_test(*c, mask))
945  {
946  *s++ = *c;
947  }
948  else {
949  *s++ = '%';
950  *s++ = afw_ascii_encode_hex_digit(*c / 16);
951  *s++ = afw_ascii_encode_hex_digit(*c % 16);
952  }
953  }
954  }
955 
956  return len;
957 }
958 
959 
960 
961 /* URI encode a string using supplied afw_memory_t. */
964  afw_utf8_octet_t *s,
965  afw_size_t s_len,
966  const afw_memory_t *raw,
968  const afw_pool_t *p,
969  afw_xctx_t *xctx)
970 {
971  afw_size_t len;
972  const afw_utf8_octet_t *c, *end;
973 
974  len = afw_uri_encode_raw_len(raw, mask, xctx);
975  if (len > s_len) return -1;
976  if (len > 0) {
977  for (c = (const afw_utf8_octet_t *)raw->ptr, end = c + raw->size;
978  c < end;
979  c++)
980  {
981  if (afw_uri_octet_test(*c, mask))
982  {
983  *s++ = *c;
984  }
985  else {
986  *s++ = '%';
987  *s++ = afw_ascii_encode_hex_digit(*c / 16);
988  *s++ = afw_ascii_encode_hex_digit(*c % 16);
989  }
990  }
991  }
992 
993  return len;
994 }
995 
996 
997 
998 /* % encoding should already be validate. */
999 AFW_DEFINE(const afw_utf8_t *)
1001  const afw_utf8_t *encoded,
1002  const afw_pool_t *p,
1003  afw_xctx_t *xctx)
1004 {
1005  const afw_memory_t *decoded;
1006 
1007  decoded = afw_uri_decode_to_raw(encoded, p, xctx);
1008 
1009  if (!afw_utf8_is_nfc((const afw_utf8_octet_t *)
1010  decoded->ptr, decoded->size, p, xctx))
1011  {
1012  AFW_THROW_ERROR_Z(general,
1013  "Only URIs that can be decoded to NFC Unicode are supported",
1014  xctx);
1015  }
1016 
1017  return (const afw_utf8_t *)decoded;
1018 }
1019 
1020 
1021 /* Create a URI decoded string. */
1022 AFW_DEFINE(const afw_utf8_t *)
1024  const afw_utf8_octet_t *s,
1025  afw_size_t len,
1026  const afw_pool_t *p,
1027  afw_xctx_t *xctx)
1028 {
1029  const afw_memory_t *decoded;
1030 
1031  decoded = afw_uri_decode_to_raw_create(s, len, p, xctx);
1032 
1033  if (!afw_utf8_is_nfc((const afw_utf8_octet_t *)
1034  decoded->ptr, decoded->size, p, xctx))
1035  {
1036  AFW_THROW_ERROR_Z(general,
1037  "Only URIs that can be decoded to NFC Unicode are supported",
1038  xctx);
1039  }
1040 
1041  return (const afw_utf8_t *)decoded;
1042 }
1043 
1044 
1045 /* Create a URI decoded a string. */
1046 AFW_DEFINE(const afw_memory_t *)
1048  const afw_utf8_octet_t *s,
1049  afw_size_t len,
1050  const afw_pool_t *p,
1051  afw_xctx_t *xctx)
1052 {
1053  afw_memory_t *result;
1054  afw_utf8_octet_t *o;
1055  const afw_utf8_octet_t *c;
1056  afw_utf8_octet_t c1, c2;
1057  const afw_utf8_octet_t *end;
1058  afw_size_t percent_count;
1059  afw_size_t decoded_len;
1060 
1061  result = afw_pool_calloc_type(p, afw_memory_t, xctx);
1062 
1063  for (percent_count = 0, c = s, end = c + len;
1064  c < end; c++)
1065  {
1066  if (*c == '%') percent_count++;
1067  }
1068 
1069  if (percent_count == 0 || len == 0) {
1070  result->ptr = (const afw_octet_t *)s;
1071  result->size = len;
1072  return result;
1073  }
1074 
1075  result->size = decoded_len = len - 2 * percent_count;
1076  o = afw_pool_malloc(p, result->size, xctx);
1077  result->ptr = (const afw_octet_t *)o;
1078 
1079  for (c = s; decoded_len > 0; decoded_len--) {
1080  if (*c == '%') {
1081  c++;
1082  c1 = *c++;
1083  c2 = *c++;
1084  *o++ = afw_ascii_decode_hex_digit(c1) * 16 +
1086  }
1087  else {
1088  *o++ = *c++;
1089  }
1090  }
1091 
1092  return result;
1093 }
1094 
1095 
1096 
1097 /* % encoding should already be validate. */
1098 AFW_DEFINE(const afw_memory_t *)
1100  const afw_utf8_t *encoded,
1101  const afw_pool_t *p,
1102  afw_xctx_t *xctx)
1103 {
1104  afw_memory_t *result;
1105  afw_utf8_octet_t *o;
1106  const afw_utf8_octet_t *c;
1107  const afw_utf8_octet_t *end;
1108  afw_utf8_octet_t c1, c2;
1109  afw_size_t len;
1110  afw_size_t percent_count;
1111 
1112  for (percent_count = 0, c = encoded->s, end = c + encoded->len;
1113  c < end; c++)
1114  {
1115  if (*c == '%') percent_count++;
1116  }
1117 
1118  if (percent_count == 0 || encoded->len == 0) {
1119  return (const afw_memory_t *)encoded;
1120  }
1121 
1122  result = afw_pool_malloc_type(p, afw_memory_t, xctx);
1123  result->size = len = encoded->len - 2 * percent_count;
1124  o = afw_pool_malloc(p, result->size, xctx);
1125  result->ptr = (const afw_octet_t *)o;
1126 
1127  for (c = encoded->s; len > 0; len--) {
1128  if (*c == '%') {
1129  c++;
1130  c1 = *c++;
1131  c2 = *c++;
1132  *o++ = afw_ascii_decode_hex_digit(c1) * 16 +
1134  }
1135  else {
1136  *o++ = *c++;
1137  }
1138  }
1139 
1140  return result;
1141 }
1142 
1143 
1144 
1145 AFW_DEFINE(void)
1147  const afw_utf8_t *uri,
1148  const afw_pool_t *p, afw_xctx_t *xctx)
1149 {
1150  memset(parser, 0, sizeof(afw_uri_parser_t));
1151  parser->uri = uri;
1152  parser->p = p;
1153  parser->c = (const afw_octet_t *)uri->s;
1154  parser->end = (const afw_octet_t *)uri->s + uri->len;
1155 }
1156 
1157 
1158 
1161  const afw_pool_t *p, afw_xctx_t *xctx)
1162 {
1163  afw_uri_parser_t *parser;
1164 
1165  parser = afw_pool_malloc_type(p, afw_uri_parser_t, xctx);
1166 
1167  afw_uri_parser_initialize(parser, uri, p, xctx);
1168 
1169  return parser;
1170 }
1171 
1172 
1173 
1174 AFW_DEFINE(void)
1176  const afw_uri_parser_t *parser,
1177  afw_xctx_t *xctx)
1178 {
1179  afw_uri_parser_t *self = (afw_uri_parser_t *)parser;
1180  afw_boolean_t is_encoded;
1181  const afw_utf8_t *decoded;
1182  afw_utf8_octet_t x1, x2;
1183  afw_uri_octet_type_t type;
1184 
1185  if (self->reuse) {
1186  self->reuse = false;
1187  return;
1188  }
1189 
1190  if (self->c == self->end) {
1191  memset(&self->token, 0, sizeof(afw_utf8_t));
1192  self->token.s = NULL;
1193  self->token.len = 0;
1194  self->is_reserved = true;
1195  return;
1196  }
1197 
1198  self->type = afw_uri_octet_type[*self->c];
1199  if (!self->type) {
1200  goto error;
1201  }
1202 
1203  self->token.s = (const afw_utf8_octet_t *)self->c;
1204 
1205  if (AFW_URI_OCTET_IS(self->type, RESERVED)) {
1206  self->token.len = 1;
1207  self->is_reserved = true;
1208  (self->c)++;
1209  return;
1210  }
1211 
1212  self->is_reserved = false;
1213 
1214  if (*self->c == '.' && self->consider_period_a_token) {
1215  self->token.len = 1;
1216  (self->c)++;
1217  return;
1218  }
1219 
1220  for (is_encoded = false; self->c != self->end; (self->c)++) {
1221  if (*self->c == '%') {
1222  if (self->c + 2 > self->end) {
1223  goto error;
1224  }
1225  is_encoded = true;
1226  x1 = afw_ascii_decode_hex_digit(*(self->c + 1));
1227  if (x1 < 0) {
1228  goto error;
1229  }
1230  x2 = afw_ascii_decode_hex_digit(*(self->c + 2));
1231  if (x2 < 0) {
1232  goto error;
1233  }
1234  self->c += 2;
1235  } else {
1236  if (*self->c == '.' && self->consider_period_a_token)
1237  {
1238  break;
1239  }
1240  type = afw_uri_octet_type[*self->c];
1241  if (AFW_URI_OCTET_IS(type, RESERVED)) {
1242  break;
1243  }
1244  }
1245  }
1246  self->token.len = self->c - (const afw_octet_t *)self->token.s;
1247  if (is_encoded) {
1248  decoded = afw_uri_decode(&self->token, self->p, xctx);
1249  self->token.s = decoded->s;
1250  self->token.len = decoded->len;
1251  }
1252 
1253  return;
1254 
1255 error:
1256  AFW_THROW_ERROR_FZ(general, xctx,
1257  "Error parsing URI %" AFW_UTF8_FMT " at offset %d",
1258  AFW_UTF8_FMT_ARG(self->uri),
1259  (int)(self->c - (const afw_octet_t *)self->uri->s));
1260 }
1261 
1262 
1263 
1264 AFW_DEFINE(void)
1266  const afw_uri_parser_t *parser,
1267  afw_xctx_t *xctx)
1268 {
1269  afw_uri_parser_t *self = (afw_uri_parser_t *)parser;
1270 
1271  if (self->reuse) {
1272  AFW_THROW_ERROR_Z(general, "Token already being reused", xctx);
1273  }
1274 
1275  self->reuse = true;
1276 }
1277 
1278 
1279 
1280 static afw_size_t
1281 impl_component_len(const afw_utf8_t *component)
1282 {
1283  afw_size_t len;
1284  const afw_octet_t *s, *end;
1285  afw_octet_t o, x1, x2;
1286  afw_uri_octet_type_t type;
1287 
1288  for (len = 0, s = (const afw_octet_t *)component->s,
1289  end = s + component->len;
1290  s < end; s++)
1291  {
1292  if (*s == '%') {
1293  if (s + 2 > end) goto error;
1294  x1 = afw_ascii_decode_hex_digit(*(s + 1));
1295  if (x1 == 255) goto error;
1296  x2 = afw_ascii_decode_hex_digit(*(s + 2));
1297  if (x2 == 255) goto error;
1298  s += 2;
1299  o = x1 * 16 + x2;
1300  type = afw_uri_octet_type[o];
1301  if (AFW_URI_OCTET_IS(type, ENCODE_URI)) {
1302  len += 1;
1303  }
1304  else {
1305  len += 3;
1306  }
1307  }
1308  else {
1309  type = afw_uri_octet_type[*s];
1310  if (AFW_URI_OCTET_IS(type, INVALID)) {
1311  len += 3;
1312  }
1313  else {
1314  len += 1;
1315  }
1316  }
1317  }
1318 
1319  return len;
1320 
1321 error:
1322  return -1;
1323 }
1324 
1325 
1326 /* Assumes impl_component_len() was called to validate. */
1327 static afw_size_t
1328 impl_component_encode(afw_utf8_octet_t *s, const afw_utf8_t *component)
1329 {
1330  afw_size_t len;
1331  const afw_octet_t *c, *end;
1332  afw_octet_t o, x1, x2;
1333  afw_uri_octet_type_t type;
1334 
1335  for (len = 0,
1336  c = (const afw_octet_t *)component->s,
1337  end = c + component->len;
1338  c < end;
1339  c++)
1340  {
1341  if (*c == '%') {
1342  x1 = afw_ascii_decode_hex_digit(*(c + 1));
1343  x2 = afw_ascii_decode_hex_digit(*(c + 2));
1344  c += 2;
1345  o = x1 * 16 + x2;
1346  type = afw_uri_octet_type[o];
1347  if (AFW_URI_OCTET_IS(type, ENCODE_URI)) {
1348  *s++ = o;
1349  len++;
1350  }
1351  else {
1352  *s++ = '%';
1353  *s++ = afw_ascii_encode_hex_digit(o / 16);
1354  *s++ = afw_ascii_encode_hex_digit(o % 16);
1355  len += 3;
1356  }
1357  }
1358  else {
1359  type = afw_uri_octet_type[*c];
1360  if (AFW_URI_OCTET_IS(type, INVALID)) {
1361  *s++ = '%';
1362  *s++ = afw_ascii_encode_hex_digit(*c / 16);
1363  *s++ = afw_ascii_encode_hex_digit(*c % 16);
1364  len += 3;
1365  }
1366  else {
1367  *s++ = *c;
1368  len++;
1369  }
1370  }
1371  }
1372 
1373  return len;
1374 }
1375 
1376 
1377 
1378 static void
1379 impl_set_normalized_uri(
1380  afw_uri_parsed_t *parsed,
1381  const afw_pool_t *p,
1382  afw_xctx_t *xctx)
1383 {
1384  afw_size_t len;
1385  afw_utf8_octet_t *s;
1386 
1387  if (parsed->path_parsed) {
1388  parsed->normalized_uri.s = parsed->path_parsed->normalized_path.s;
1389  parsed->normalized_uri.len = parsed->path_parsed->normalized_path.len;
1390  return;
1391  }
1392 
1393  /* Calculate length. */
1394  len = 0;
1395  if (parsed->scheme.len != 0) {
1396  len += parsed->scheme.len + 1 /* : */;
1397  }
1398 
1399  if (parsed->userinfo || parsed->host || parsed->port.len != 0) {
1400  len += 2; /* // */
1401  if (parsed->userinfo) {
1402  len += parsed->userinfo->len + 1 /* @ */;
1403  }
1404  if (parsed->host) {
1405  len += parsed->host->len;
1406  }
1407  if (parsed->port.len != 0) {
1408  len += 1 /* : */
1409  + parsed->port.len;
1410  }
1411  }
1412 
1413  if (parsed->original_path.len != 0) {
1414  len += impl_component_len(&parsed->original_path);
1415  }
1416 
1417  if (parsed->original_query.len != 0) {
1418  len += impl_component_len(&parsed->original_query) + 1 /* ? */;
1419  }
1420 
1421  if (parsed->original_fragment.len != 0) {
1422  len += impl_component_len(&parsed->original_fragment) + 1 /* ? */;
1423  }
1424 
1425  /* Produce normalized URI. */
1426  parsed->normalized_uri.s = s = afw_pool_malloc(p, len, xctx);
1427  parsed->normalized_uri.len = len;
1428 
1429  if (parsed->scheme.len != 0) {
1430  memcpy(s, parsed->scheme.s, parsed->scheme.len);
1431  s += parsed->scheme.len;
1432  *s++ = ':';
1433  }
1434 
1435  if (parsed->userinfo || parsed->host || parsed->port.len != 0) {
1436  *s++ = '/';
1437  *s++ = '/';
1438  if (parsed->userinfo) {
1439  memcpy(s, parsed->userinfo->s, parsed->userinfo->len);
1440  s += parsed->userinfo->len;
1441  *s++ = '@';
1442  }
1443  if (parsed->host) {
1444  memcpy(s, parsed->host->s, parsed->host->len);
1445  s += parsed->host->len;
1446  }
1447  if (parsed->port.len != 0) {
1448  *s++ = ':';
1449  memcpy(s, parsed->port.s, parsed->port.len);
1450  s += parsed->port.len;
1451  }
1452  }
1453 
1454  if (parsed->original_path.len != 0) {
1455  s += impl_component_encode(s, &parsed->original_path);
1456  }
1457  if (parsed->original_query.len != 0) {
1458  *s++ = '?';
1459  s += impl_component_encode(s, &parsed->original_query);
1460  }
1461  if (parsed->original_fragment.len != 0) {
1462  *s++ = '#';
1463  s += impl_component_encode(s, &parsed->original_fragment);
1464  }
1465 }
1466 
1467 
1468 /* Parse an URI in specific pool. */
1471  const afw_utf8_t *uri,
1472  afw_boolean_t is_value_path,
1473  const afw_utf8_t *current_path,
1474  const afw_pool_t *p,
1475  afw_xctx_t *xctx)
1476 {
1477  afw_uri_octet_type_t type;
1478  impl_state state;
1479  const afw_octet_t *c;
1480  const afw_octet_t *end;
1481  afw_uri_parsed_t *parsed;
1482  afw_utf8_octet_t x1, x2;
1483  int increment;
1484 
1485  if (!uri || uri->len == 0) {
1486  AFW_THROW_ERROR_Z(general,
1487  "afw_uri_parse() requires a uri.",
1488  xctx);
1489  }
1490 
1491  parsed = afw_pool_calloc_type(p, afw_uri_parsed_t, xctx);
1492  parsed->original_uri = afw_utf8_clone(uri, p, xctx);
1493  if (current_path) {
1494  parsed->current_path = afw_utf8_clone(current_path, p, xctx);
1495  }
1496 
1497  for (
1498  state = impl_state_initial,
1499  end = (const afw_octet_t *)uri->s + uri->len,
1500  c = (const afw_octet_t *)uri->s,
1501  increment = 0;
1502 
1503  state != impl_state_end;
1504 
1505  )
1506  {
1507 
1508  /* If at end, type is -1. Code should check for end before checking type. */
1509  if (c == end) {
1510  type = -1;
1511  /* increment remains the same. */
1512  }
1513 
1514  /*
1515  * If char is '%', percent decode and advance cursor to last octet.
1516  * For example, %ab, *c will be 'b'. The code below can still use *c
1517  * to check for special characters as long as it is not HEXDIGIT. The
1518  * type is set to AFW_URI_OCTET_PCT_ENCODED.
1519  */
1520  else if (*c == '%') {
1521  if (c + 3 > end) goto error;
1522  x1 = afw_ascii_decode_hex_digit(*(c + 1));
1523  if (x1 < 0) goto error;
1524  x2 = afw_ascii_decode_hex_digit(*(c + 2));
1525  if (x2 < 0) goto error;
1526  type = AFW_URI_OCTET_PCT_ENCODED;
1527  increment = 3;
1528  }
1529 
1530  /* If not "%', lookup type. */
1531  else {
1532  type = afw_uri_octet_type[*c];
1533  if (!type) goto error;
1534  increment = 1;
1535  }
1536 
1537  /* Process based on state. */
1538  switch (state) {
1539 
1540  /* Initial state. */
1541  case impl_state_initial:
1542 
1543  /* If at end, reset c to start of uri and parse hier_part. */
1544  if (c == end) {
1545  c = (const afw_octet_t *)uri->s;
1546  state = impl_state_hier_part_begin;
1547  break;
1548  }
1549 
1550  /*
1551  * If ':', everything so far is scheme, so reset c to start of uri
1552  * and parse scheme.
1553  */
1554  if (*c == ':') {
1555  c = (const afw_octet_t *)uri->s;
1556  state = impl_state_scheme_begin;
1557  break;
1558  }
1559 
1560  /* If not a scheme continue character, reset c and parse hier part. */
1561  if (!AFW_URI_OCTET_IS(type, SCHEME_CONTINUE)) {
1562  c = (const afw_octet_t *)uri->s;
1563  state = impl_state_hier_part_begin;
1564  break;
1565  }
1566 
1567  c += increment;
1568  break;
1569 
1570 
1571  case impl_state_scheme_begin:
1572 
1573  if (!AFW_URI_OCTET_IS(type, SCHEME_START)) goto error;
1574 
1575  parsed->scheme.s = (const afw_utf8_octet_t *)c;
1576  c += increment;
1577  state = impl_state_scheme;
1578  break;
1579 
1580 
1581  case impl_state_scheme:
1582 
1583  if (*c == ':') {
1584  parsed->scheme.len = c - (const afw_octet_t *)parsed->scheme.s;
1585  c++;
1586  parsed->original_hier_part.s = (const afw_utf8_octet_t *)c;
1587  state = impl_state_hier_part_begin;
1588  break;
1589  }
1590 
1591  if (!AFW_URI_OCTET_IS(type, SCHEME_CONTINUE)) goto error;
1592  c += increment;
1593  break;
1594 
1595 
1596  case impl_state_hier_part_begin:
1597 
1598  if (c == end) {
1599  state = impl_state_end;
1600  break;
1601  }
1602 
1603  parsed->original_hier_part.s = (const afw_utf8_octet_t *)c;
1604 
1605  if (*c == '/') {
1606  c++;
1607  state = impl_state_hier_part_slash1;
1608  break;
1609  }
1610 
1611  state = impl_state_path_begin;
1612  break;
1613 
1614 
1615  case impl_state_hier_part_slash1:
1616 
1617  if (c == end) {
1618  c--;
1619  state = impl_state_path_begin;
1620  break;
1621  }
1622 
1623  if (*c == '/') {
1624  c++;
1625  state = impl_state_authority_begin;
1626  break;
1627  }
1628 
1629  c -= increment;
1630  state = impl_state_path_begin;
1631  break;
1632 
1633 
1634  case impl_state_authority_begin:
1635 
1636  if (c == end) goto error;
1637 
1638  parsed->original_authority.s = (const afw_utf8_octet_t *)c;
1639  state = impl_state_authority_determine;
1640  /* Fall thought to impl_state_authority_determine. */
1641 
1642  case impl_state_authority_determine:
1643 
1644  if (c == end || AFW_URI_OCTET_IS(type, GEN_DELIM)) {
1645  state = (c != end && *c == '@')
1646  ? impl_state_authority_userinfo_begin
1647  : impl_state_authority_host_begin;
1648  c = (const afw_octet_t *)parsed->original_authority.s;
1649  break;
1650  };
1651 
1652  c += increment;
1653  break;
1654 
1655 
1656  case impl_state_authority_userinfo_begin:
1657 
1658  parsed->original_userinfo.s = (const afw_utf8_octet_t *)c;
1659  state = impl_state_authority_userinfo;
1660  /* Fall though to impl_state_authority_userinfo. */
1661 
1662  case impl_state_authority_userinfo:
1663 
1664  if (*c == '@') {
1665  parsed->original_userinfo.len =
1666  c - (const afw_octet_t *)parsed->original_userinfo.s;
1667  parsed->userinfo =
1668  afw_uri_decode(&parsed->original_userinfo, p, xctx);
1669  c++;
1670  state = impl_state_authority_host_begin;
1671  break;
1672  }
1673 
1674  if (!AFW_URI_OCTET_IS(type, USERINFO)) goto error;
1675  c += increment;
1676  break;
1677 
1678 
1679  case impl_state_authority_host_begin:
1680 
1681  if (c == end) {
1682  state = impl_state_end;
1683  break;
1684  }
1685 
1686  parsed->original_host.s = (const afw_utf8_octet_t *)c;
1687 
1688  if (*c == '[') {
1689  c += increment;
1690  state = impl_state_authority_host_v6;
1691  break;
1692  }
1693 
1694  if (AFW_URI_OCTET_IS(type, DIGIT)) {
1695  c++;
1696  state = impl_state_authority_host_v4;
1697  break;
1698  }
1699 
1700  state = impl_state_authority_reg_name;
1701  /* Fall though to impl_state_authority_reg_name */
1702 
1703  case impl_state_authority_reg_name:
1704 
1705  if (c == end || *c == '/' || *c == ':') {
1706  parsed->original_host.len =
1707  c - (const afw_octet_t *)parsed->original_host.s;
1708  parsed->host =
1709  afw_uri_decode(&parsed->original_host, p, xctx);
1710  state = (c == end)
1711  ? impl_state_end
1712  : impl_state_authority_optional_port;
1713  break;
1714  }
1715 
1716  c += increment;
1717  break;
1718 
1719 
1720  case impl_state_authority_host_v4:
1721 
1722  if (c == end || *c == '/' || *c == ':') {
1723  parsed->original_host.len =
1724  c - (const afw_octet_t *)parsed->original_host.s;
1725  parsed->host = &parsed->original_host; /* v4 not encoded. */
1726  state = impl_state_authority_optional_port;
1727  break;
1728  }
1729 
1731  c += increment;
1732  break;
1733 
1734 
1735  case impl_state_authority_host_v6:
1736 
1737  if (c == end) goto error;
1738 
1739  if (*c == ']') {
1740  parsed->original_host.len =
1741  c - (const afw_octet_t *)parsed->original_host.s + 1;
1742  parsed->host = &parsed->original_host; /* v6 not encoded. */
1743  c += increment;
1744  state = impl_state_authority_optional_port;
1745  break;
1746  }
1747 
1749  c += increment;
1750  break;
1751 
1752 
1753  case impl_state_authority_optional_port:
1754 
1755  if (c == end) {
1756  state = impl_state_end;
1757  break;
1758  }
1759 
1760  if (*c == ':') {
1761  c += increment;
1762  parsed->port.s = (const afw_utf8_octet_t *)c;
1763  state = impl_state_authority_port;
1764  break;
1765  }
1766 
1767  state = impl_state_path_begin;
1768  break;
1769 
1770 
1771  case impl_state_authority_port:
1772 
1773  if (c == end || !AFW_URI_OCTET_IS(type, DIGIT)) {
1774  parsed->port.len =
1775  c - (const afw_octet_t *)parsed->port.s;
1776  if (c != end && *c != '/') goto error;
1777  state = impl_state_path_begin;
1778  break;
1779  }
1780 
1781  c++;
1782  break;
1783 
1784 
1785  case impl_state_path_begin:
1786 
1787  if (c == end) {
1788  state = impl_state_end;
1789  break;
1790  }
1791 
1792  parsed->original_path.s = (const afw_utf8_octet_t *)c;
1793  state = impl_state_path;
1794  /* Fall though to impl_state_path. */
1795 
1796  case impl_state_path:
1797 
1798  if (c == end || *c == '?' || *c == '#') {
1799  parsed->original_path.len =
1800  c - (const afw_octet_t *)parsed->original_path.s;
1801  if (c == end) {
1802  state = impl_state_end;
1803  break;
1804  }
1805  state = (*c == '?')
1806  ? impl_state_query_begin
1807  : impl_state_fragment_begin;
1808  c += increment;
1809  break;
1810  }
1811 
1812  c += increment;
1813  break;
1814 
1815 
1816  case impl_state_query_begin:
1817 
1818  parsed->original_query.s = (const afw_utf8_octet_t *)c;
1819  state = impl_state_query;
1820  /* Fall though to impl_state_query. */
1821 
1822  case impl_state_query:
1824  if (c == end || *c == '#') {
1825  parsed->original_query.len =
1826  c - (const afw_octet_t *)parsed->original_query.s;
1827  if (c == end) {
1828  state = impl_state_end;
1829  break;
1830  }
1831  state = impl_state_fragment_begin;
1832  c += increment;
1833  break;
1834  }
1835 
1836  if (!AFW_URI_OCTET_IS(type, QUERY)) {
1837  goto error;
1838  }
1839  c += increment;
1840  break;
1841 
1842 
1843  case impl_state_fragment_begin:
1844 
1845  parsed->original_fragment.s = (const afw_utf8_octet_t *)c;
1846  state = impl_state_fragment;
1847  /* Fall though to impl_state_fragment. */
1848 
1849  case impl_state_fragment:
1851  if (c == end) {
1852  parsed->original_fragment.len =
1853  c - (const afw_octet_t *)parsed->original_fragment.s;
1854  state = impl_state_end;
1855  break;
1856  }
1857 
1858  if (!AFW_URI_OCTET_IS(type, FRAGMENT)) goto error;
1859  c += increment;
1860  break;
1861 
1862 
1863  default:
1864  AFW_THROW_ERROR_Z(general, "Internal error", xctx);
1865  }
1866 
1867  }
1868 
1869  if (is_value_path &&
1870  parsed->scheme.len == 0 &&
1871  parsed->original_authority.len == 0 &&
1872  parsed->original_query.len == 0 &&
1873  parsed->original_fragment.len == 0 &&
1874  parsed->original_path.len != 0)
1875  {
1876  parsed->path_parsed = afw_object_path_parse(
1877  &parsed->original_path, parsed->current_path, NULL,
1878  p, xctx);
1879  }
1880 
1881  impl_set_normalized_uri(parsed, p, xctx);
1882 
1883  return parsed;
1884 
1885 error:
1886  AFW_THROW_ERROR_FZ(general, xctx,
1887  "Error parsing URI %" AFW_UTF8_FMT " at offset %d",
1888  AFW_UTF8_FMT_ARG(uri), (int)(c - (const afw_octet_t *)uri->s));
1889 }
1890 
1891 
1892 /* Turn a parsed URI into an object representation. */
1893 AFW_DEFINE(const afw_object_t *)
1895  const afw_uri_parsed_t * parsed,
1896  const afw_pool_t *p,
1897  afw_xctx_t *xctx)
1898 {
1899  const afw_object_t *result;
1900  afw_uri_parser_t parser;
1901  const afw_list_t *list;
1902  const afw_value_t *value;
1903  const afw_object_t *object;
1904 
1905  result = afw_object_create_managed(p, xctx);
1906 
1907  if (parsed->original_uri) {
1909  &afw_s_originalURI, parsed->original_uri, xctx);
1910  }
1911 
1912  if (parsed->normalized_uri.len > 0) {
1914  &afw_s_normalizedURI, &parsed->normalized_uri, xctx);
1915  }
1916 
1917  if (parsed->scheme.len > 0) {
1919  &afw_s_scheme, &parsed->scheme, xctx);
1920  }
1921 
1922  if (parsed->original_hier_part.len > 0) {
1924  &afw_s_originalHierPart, &parsed->original_hier_part, xctx);
1925  }
1926 
1927  if (parsed->original_authority.len > 0) {
1929  &afw_s_originalAuthority, &parsed->original_authority, xctx);
1930  }
1931 
1932  if (parsed->authority) {
1934  &afw_s_authority, parsed->authority, xctx);
1935  }
1936 
1937  if (parsed->original_userinfo.len > 0) {
1939  &afw_s_originalUserinfo, &parsed->original_userinfo, xctx);
1940  }
1941 
1942  if (parsed->userinfo) {
1944  &afw_s_userinfo, parsed->userinfo, xctx);
1945  }
1946 
1947  if (parsed->original_host.len > 0) {
1949  &afw_s_originalHost, &parsed->original_host, xctx);
1950  }
1951 
1952  if (parsed->host) {
1954  &afw_s_host, parsed->host, xctx);
1955  }
1956 
1957  if (parsed->port.len > 0) {
1959  &afw_s_port, &parsed->port, xctx);
1960  }
1961 
1962  if (parsed->original_path.len > 0) {
1964  &afw_s_originalPath, &parsed->original_path, xctx);
1965  }
1966 
1967  if (parsed->original_path.len > 0) {
1968  afw_object_set_property_as_string(result, &afw_s_path,
1969  afw_uri_decode(&parsed->original_path, p, xctx), xctx);
1970  afw_uri_parser_initialize(&parser, &parsed->original_path, p, xctx);
1971  list = afw_list_of_create(
1972  afw_data_type_string, p, xctx);
1973  for (;;) {
1974  afw_uri_parse_next_token(&parser, xctx);
1975  if (!parser.token.s) break;
1976  value = afw_value_create_string(&parser.token, p, xctx);
1977  afw_list_add_value(list, value, xctx);
1978  }
1980  &afw_s_pathTokens, list, xctx);
1981  }
1982 
1983  if (parsed->original_query.len > 0) {
1985  &afw_s_originalQuery, &parsed->original_query, xctx);
1986  }
1987 
1988  if (parsed->original_query.len > 0) {
1989  afw_object_set_property_as_string(result, &afw_s_query,
1990  afw_uri_decode(&parsed->original_query, p, xctx), xctx);
1991  afw_uri_parser_initialize(&parser, &parsed->original_query, p, xctx);
1992  list = afw_list_of_create(
1993  afw_data_type_string, p, xctx);
1994  for (;;) {
1995  afw_uri_parse_next_token(&parser, xctx);
1996  if (!parser.token.s) break;
1997  value = afw_value_create_string(&parser.token, p, xctx);
1998  afw_list_add_value(list, value, xctx);
1999  }
2001  &afw_s_queryTokens, list, xctx);
2002  }
2003 
2004  if (parsed->original_fragment.len > 0) {
2006  &afw_s_originalFragment, &parsed->original_fragment, xctx);
2007  }
2008 
2009  if (parsed->original_fragment.len > 0) {
2010  afw_object_set_property_as_string(result, &afw_s_fragment,
2011  afw_uri_decode(&parsed->original_fragment, p, xctx), xctx);
2012  afw_uri_parser_initialize(&parser, &parsed->original_fragment, p, xctx);
2013  list = afw_list_of_create(
2014  afw_data_type_string, p, xctx);
2015  for (;;) {
2016  afw_uri_parse_next_token(&parser, xctx);
2017  if (!parser.token.s) break;
2018  value = afw_value_create_string(&parser.token, p, xctx);
2019  afw_list_add_value(list, value, xctx);
2020  }
2022  &afw_s_fragmentTokens, list, xctx);
2023  }
2024 
2025  if (parsed->path_parsed) {
2027  parsed->path_parsed, p, xctx);
2029  &afw_s_valuePath, object, xctx);
2030  }
2031 
2032  return result;
2033 }
2034 
2035 
2036 
2037 /* Turn a URI into an object representation. */
2038 AFW_DEFINE(const afw_object_t *)
2040  const afw_utf8_t *uri,
2041  afw_boolean_t is_value_path,
2042  const afw_utf8_t *current_path,
2043  const afw_pool_t *p,
2044  afw_xctx_t *xctx)
2045 {
2046  const afw_uri_parsed_t *parsed;
2047 
2048  parsed = afw_uri_parse(uri, is_value_path, current_path, p, xctx);
2049 
2050  return afw_uri_parsed_to_object(parsed, p, xctx);
2051 }
2052 
2053 
2054 
2055 /* Set consider_period_a_token flag. */
2056 AFW_DEFINE(void)
2058  const afw_uri_parser_t *parser,
2059  afw_boolean_t consider_period_a_token,
2060  afw_xctx_t *xctx)
2061 {
2062  afw_uri_parser_t *self = (afw_uri_parser_t *)parser;
2063 
2064  self->consider_period_a_token = consider_period_a_token;
2065 }
2066 
2067 
2068 /* Determine if two parsed URIs are equivalent. */
2071  const afw_uri_parsed_t *parsed1,
2072  const afw_uri_parsed_t *parsed2,
2073  afw_xctx_t *xctx)
2074 {
2075 
2076  if (parsed1->scheme.len != parsed2->scheme.len ||
2077  (parsed1->scheme.len != 0 &&
2078  !afw_utf8_equal(&parsed1->scheme, &parsed2->scheme)))
2079  {
2080  goto not_equal;
2081  }
2082 
2083  if (parsed1->userinfo) {
2084  if (!parsed2->userinfo ||
2085  !afw_utf8_equal(parsed1->userinfo, parsed2->userinfo))
2086  {
2087  goto not_equal;
2088  }
2089  }
2090  else {
2091  if (parsed2->userinfo) goto not_equal;
2092  }
2093 
2094  if (parsed1->host) {
2095  if (!parsed2->host ||
2096  !afw_utf8_equal(parsed1->host, parsed2->host))
2097  {
2098  goto not_equal;
2099  }
2100  }
2101  else {
2102  if (parsed2->host) goto not_equal;
2103  }
2104 
2105  if (parsed1->port.len != parsed2->port.len ||
2106  (parsed1->port.len != 0 &&
2107  !afw_utf8_equal(&parsed1->port, &parsed2->port)))
2108  {
2109  goto not_equal;
2110  }
2111 
2112 
2113  if (parsed1->path_parsed) {
2114  if (!parsed2->path_parsed ||
2116  parsed1->path_parsed, parsed2->path_parsed, xctx))
2117  {
2118  goto not_equal;
2119  }
2120  }
2121  else {
2122  if (parsed2->path_parsed) goto not_equal;
2124  if (parsed1->original_path.len != parsed2->original_path.len ||
2125  (parsed1->original_path.len != 0 &&
2126  !afw_utf8_equal(&parsed1->original_path, &parsed2->original_path)))
2127  {
2128  goto not_equal;
2129  }
2130  }
2131 
2134  return true;
2135 
2136 not_equal:
2137  return false;
2138 }
2139 
2140 
2141 /* Determine if two URIs are equivalent. */
2144  const afw_utf8_t *uri1,
2145  const afw_utf8_t *uri2,
2146  afw_boolean_t is_value_path,
2147  const afw_utf8_t *current_path2,
2148  const afw_pool_t *p,
2149  afw_xctx_t *xctx)
2150 {
2151  const afw_uri_parsed_t *parsed1;
2152  const afw_uri_parsed_t *parsed2;
2153 
2154  parsed1 = afw_uri_parse(uri1, is_value_path, NULL, p, xctx);
2155  parsed2 = afw_uri_parse(uri2, is_value_path, current_path2, p, xctx);
2156  return afw_uri_parsed_are_equivalent(parsed1, parsed2, xctx);
2157 }
2158 
2159 
2162  afw_octet_t octet,
2163  afw_uri_octet_type_t mask)
2164 {
2165  afw_uri_octet_type_t type;
2166  afw_boolean_t result;
2167 
2168  type = afw_uri_octet_type[octet];
2169 
2170  if (AFW_URI_OCTET_MASK_IS_BITWISE_NOT & mask) {
2171  result = (type & ~mask) == 0;
2172  }
2173 
2174  else {
2175  result = (type & mask) != 0;
2176  }
2177 
2178  return result;
2179 }
AFW_DEFINE(const afw_object_t *)
#define AFW_DECLARE(type)
Declare a public afw function.
#define AFW_DEFINE_CONST_DATA(type)
Define a public afw variable.
Adaptive Framework Core Internal.
afw_ascii_encode_hex_digit(afw_octet_t octet)
Encode ascii hex digit.
Definition: afw_ascii.c:39
afw_ascii_decode_hex_digit(afw_octet_t octet)
Decode ascii hex digit.
Definition: afw_ascii.c:31
afw_object_set_property_as_list(const afw_object_t *object, const afw_utf8_t *property_name, const afw_list_t *internal, afw_xctx_t *xctx)
Set property function for data type list values.
afw_object_set_property_as_object(const afw_object_t *object, const afw_utf8_t *property_name, const afw_object_t *internal, afw_xctx_t *xctx)
Set property function for data type object values.
afw_value_create_string(const afw_utf8_t *internal, const afw_pool_t *p, afw_xctx_t *xctx)
Create function for unmanaged data type string value.
afw_data_type_string
Data type struct for string.
afw_object_set_property_as_string(const afw_object_t *object, const afw_utf8_t *property_name, const afw_utf8_t *internal, afw_xctx_t *xctx)
Set property function for data type string values.
#define AFW_UTF8_FMT_ARG(A_STRING)
Convenience Macro for use with AFW_UTF8_FMT to specify arg.
Definition: afw_common.h:605
_Bool afw_boolean_t
Definition: afw_common.h:373
#define AFW_UTF8_FMT
Format string specifier used for afw_utf8_t.
Definition: afw_common.h:588
char afw_utf8_octet_t
8 bits of utf-8 codepoint.
Definition: afw_common.h:236
apr_size_t afw_size_t
size_t.
Definition: afw_common.h:151
unsigned char afw_octet_t
8 bits (unsigned).
Definition: afw_common.h:211
#define AFW_THROW_ERROR_FZ(code, xctx, format_z,...)
Macro used to set error and 0 rv in xctx and throw it.
Definition: afw_error.h:319
#define AFW_THROW_ERROR_Z(code, message_z, xctx)
Macro used to set error and 0 rv in xctx and throw it.
Definition: afw_error.h:283
afw_list_add_value(const afw_list_t *instance, const afw_value_t *value, afw_xctx_t *xctx)
Call method add_value of interface afw_list_setter.
Definition: afw_list.c:104
#define afw_list_of_create(data_type, p, xctx)
Create an list of a specific data type in memory.
Definition: afw_list.h:64
afw_object_path_parse(const afw_utf8_t *path, const afw_utf8_t *current_path, const afw_object_options_t *default_options, const afw_pool_t *p, afw_xctx_t *xctx)
Parse an object value path in specific pool.
afw_object_path_parsed_to_object(const afw_object_path_parsed_t *parsed, const afw_pool_t *p, afw_xctx_t *xctx)
Turn a parsed path into an object representation.
afw_object_path_parsed_are_equivalent(const afw_object_path_parsed_t *parsed1, const afw_object_path_parsed_t *parsed2, afw_xctx_t *xctx)
Determine if two parsed paths are equivalent.
#define afw_object_create_managed(p, xctx)
Create an empty entity object in its own pool.
Definition: afw_object.h:913
#define afw_pool_malloc(instance, size, xctx)
Call method malloc of interface afw_pool.
#define afw_pool_calloc_type(instance, type, xctx)
Macro to allocate cleared memory to hold type in pool.
Definition: afw_pool.h:167
#define afw_pool_malloc_type(instance, type, xctx)
Macro to allocate uncleared memory to hold type in pool.
Definition: afw_pool.h:182
afw_uri_to_object(const afw_utf8_t *uri, afw_boolean_t is_value_path, const afw_utf8_t *current_path, const afw_pool_t *p, afw_xctx_t *xctx)
Turn a URI into an object representation.
Definition: afw_uri.c:2039
#define AFW_URI_OCTET_ENCODE_COMPONENT_VALUE
encode except A-Z a-z 0-9 - _ . ! ~ * '
Definition: afw_uri.h:54
afw_uri_are_equivalent(const afw_utf8_t *uri1, const afw_utf8_t *uri2, afw_boolean_t is_value_path, const afw_utf8_t *current_path2, const afw_pool_t *p, afw_xctx_t *xctx)
Determine if two URIs are equivalent.
Definition: afw_uri.c:2143
#define AFW_URI_OCTET_ENCODE_URI
encode except A-Z a-z 0-9 ; , / ? : @ & = + $ - _ . ! ~ * ' ( ) #
Definition: afw_uri.h:48
afw_uri_parse_next_token(const afw_uri_parser_t *parser, afw_xctx_t *xctx)
Parse next token.
Definition: afw_uri.c:1175
afw_uri_decode_create(const afw_utf8_octet_t *s, afw_size_t len, const afw_pool_t *p, afw_xctx_t *xctx)
Create a URI decoded string.
Definition: afw_uri.c:1023
#define AFW_URI_OCTET_ENCODE_COMPONENT
encode except A-Z a-z 0-9 - _ . ! ~ * ' ( )
Definition: afw_uri.h:51
afw_uri_parser_initialize(afw_uri_parser_t *parser, const afw_utf8_t *uri, const afw_pool_t *p, afw_xctx_t *xctx)
Initialize a parser in specific pool.
Definition: afw_uri.c:1146
afw_uri_parse_reuse_token(const afw_uri_parser_t *parser, afw_xctx_t *xctx)
Cause next call afw_uri_parse_get_next_token() to return current token.
Definition: afw_uri.c:1265
afw_uri_decode_to_raw_create(const afw_utf8_octet_t *s, afw_size_t len, const afw_pool_t *p, afw_xctx_t *xctx)
Decode URI encoded string to raw.
Definition: afw_uri.c:1047
afw_uri_parse(const afw_utf8_t *uri, afw_boolean_t is_value_path, const afw_utf8_t *current_path, const afw_pool_t *p, afw_xctx_t *xctx)
Parse a URI in specific pool.
Definition: afw_uri.c:1470
afw_size_t afw_uri_encode_len(const afw_utf8_t *string, afw_uri_octet_type_t mask, afw_xctx_t *xctx)
The length needed to URI encode a string.
Definition: afw_uri.c:762
afw_uri_encode_create(const afw_utf8_octet_t *s, afw_size_t len, afw_uri_octet_type_t mask, const afw_pool_t *p, afw_xctx_t *xctx)
Create a URI encode a string.
Definition: afw_uri.c:866
afw_uri_encode_raw(const afw_memory_t *raw, afw_uri_octet_type_t mask, const afw_pool_t *p, afw_xctx_t *xctx)
URI encode raw.
Definition: afw_uri.c:884
afw_uri_parsed_to_object(const afw_uri_parsed_t *parsed, const afw_pool_t *p, afw_xctx_t *xctx)
Turn a parsed URI into an object representation.
Definition: afw_uri.c:1894
afw_uri_decode_to_raw(const afw_utf8_t *encoded, const afw_pool_t *p, afw_xctx_t *xctx)
Decode a URI encoded string to raw.
Definition: afw_uri.c:1099
afw_uri_encode(const afw_utf8_t *string, afw_uri_octet_type_t mask, const afw_pool_t *p, afw_xctx_t *xctx)
URI encode a string.
Definition: afw_uri.c:814
afw_uri_parser_create(const afw_utf8_t *uri, const afw_pool_t *p, afw_xctx_t *xctx)
Create and initialize a parser in specific pool.
Definition: afw_uri.c:1160
afw_uri_encode_raw_to_preallocated(afw_utf8_octet_t *s, afw_size_t s_len, const afw_memory_t *raw, afw_uri_octet_type_t mask, const afw_pool_t *p, afw_xctx_t *xctx)
URI encode from raw using supplied afw_utf8_t.
Definition: afw_uri.c:963
afw_uri_octet_test(afw_octet_t octet, afw_uri_octet_type_t mask)
Test uri octet type mask.
Definition: afw_uri.c:2161
afw_uri_decode(const afw_utf8_t *encoded, const afw_pool_t *p, afw_xctx_t *xctx)
Decode a URI encoded string.
Definition: afw_uri.c:1000
afw_uint32_t afw_uri_octet_type_t
Token type typedef.
Definition: afw_uri.h:31
afw_uri_parse_set_consider_period_a_token(const afw_uri_parser_t *parser, afw_boolean_t consider_period_a_token, afw_xctx_t *xctx)
Set consider_period_a_token flag.
Definition: afw_uri.c:2057
afw_uri_encode_raw_len(const afw_memory_t *raw, afw_uri_octet_type_t mask, afw_xctx_t *xctx)
The length needed to URI encode raw.
Definition: afw_uri.c:788
#define AFW_URI_OCTET_IS(type, mask)
Test uri octet type using mask.
Definition: afw_uri.h:123
afw_uri_parsed_are_equivalent(const afw_uri_parsed_t *parsed1, const afw_uri_parsed_t *parsed2, afw_xctx_t *xctx)
Determine if two parsed URIs are equivalent.
Definition: afw_uri.c:2070
afw_uri_encode_to_preallocated(afw_utf8_octet_t *s, afw_size_t s_len, const afw_utf8_t *string, afw_uri_octet_type_t mask, const afw_pool_t *p, afw_xctx_t *xctx)
URI encode a string using supplied afw_utf8_t.
Definition: afw_uri.c:925
afw_uri_octet_type[256]
Token type table indexed by afw_octet_t ( 0 - 255 ).
Definition: afw_uri.h:129
#define afw_utf8_is_nfc(s, len, p, xctx)
Determine if series of bytes is NFC normalized utf-8.
Definition: afw_utf8.h:168
afw_boolean_t afw_utf8_equal(const afw_utf8_t *s1, const afw_utf8_t *s2)
Check to see if a string equals another string.
const afw_utf8_t * afw_utf8_clone(const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
Clone a utf-8 string into a specific pool.
Definition: afw_utf8.h:347
#define afw_utf8_create(s, len, p, xctx)
Create utf-8 string without copy unless necessary in pool specified.
Definition: afw_utf8.h:239
Interface afw_list public struct.
Struct for memory pointer and size.
Definition: afw_common.h:505
Interface afw_object public struct.
Interface afw_pool public struct.
Typedef for parsed URI returned from afw_uri_parse().
Definition: afw_uri.h:133
URI parser typedef.
Definition: afw_uri.h:197
NFC normalized UTF-8 string.
Definition: afw_common.h:545
Interface afw_value public struct.
Interface afw_xctx public struct.