Adaptive Framework  0.9.0
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
afw_utf8.h
Go to the documentation of this file.
1 // See the 'COPYING' file in the project root for licensing information.
2 /*
3  * AFW - String Functions
4  *
5  * Copyright (c) 2010-2023 Clemson University
6  *
7  */
8 
9 #ifndef __AFW_UTF8_H__
10 #define __AFW_UTF8_H__
11 
12 #include "afw_minimal.h"
13 
31 
39 typedef const afw_utf8_t *
40 (AFW_CALLBACK *afw_utf8_from_value_t) (
41  const afw_value_t *value,
42  afw_boolean_t own,
43  afw_xctx_t *xctx);
44 
45 
62 typedef const afw_value_t *
63 (AFW_CALLBACK *afw_utf8_to_value_t) (
64  const afw_utf8_t *from_utf8,
65  const afw_utf8_t *path,
66  afw_xctx_t *xctx);
67 
68 
69 /* @brief Options for function afw_utf8_nfc(). */
70 typedef enum afw_utf8_nfc_option_e {
71 
79 
87 
90 
93 
94 } afw_utf8_nfc_option_t;
95 
96 
111 AFW_DECLARE(const afw_utf8_t *)
113  const afw_utf8_octet_t *s, afw_size_t len,
114  afw_utf8_nfc_option_t option,
115  const afw_pool_t *p, afw_xctx_t *xctx);
116 
117 
132  afw_size_t len, afw_xctx_t *xctx);
133 
134 
144  afw_xctx_t *xctx);
145 
146 
154 #define afw_utf8_is_valid(s, len, xctx) \
155  (afw_utf8_nfc(s, len, afw_utf8_nfc_option_is_valid, \
156  (xctx)->p, xctx) == NULL)
157 
158 
159 
168 #define afw_utf8_is_nfc(s, len, p, xctx) \
169  (afw_utf8_nfc(s, len, afw_utf8_nfc_option_is_nfc, \
170  p, xctx) == NULL)
171 
172 
180 AFW_DEFINE_STATIC_INLINE(const afw_memory_t *)
182  const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
183 {
184  return (const afw_memory_t *)string;
185 }
186 
187 
198 AFW_DEFINE_STATIC_INLINE(const afw_utf8_t *)
200  const afw_memory_t *raw, const afw_pool_t *p, afw_xctx_t *xctx)
201 {
202  return afw_utf8_nfc((const afw_utf8_octet_t *)raw->ptr, raw->size,
203  afw_utf8_nfc_option_create, p, xctx);
204 }
205 
206 
207 
219 AFW_DECLARE(const afw_utf8_t *)
221  const afw_utf8_t * from_encoding,
222  const char* * from, afw_size_t * from_size,
223  const afw_pool_t *p, afw_xctx_t *xctx);
224 
225 
226 
239 #define afw_utf8_create(s,len, p, xctx) \
240  afw_utf8_nfc(s, len, afw_utf8_nfc_option_create, p, xctx)
241 
242 
243 
254 #define afw_utf8_from_utf8_z(s_z, p, xctx) \
255  afw_utf8_nfc(s_z, AFW_UTF8_Z_LEN, afw_utf8_nfc_option_create, \
256  p, xctx)
257 
258 
271 AFW_DECLARE(const afw_utf8_t * const *)
273  afw_size_t count,
274  const afw_utf8_t * const * pointers,
275  afw_boolean_t NULL_terminate,
276  const afw_pool_t *p, afw_xctx_t *xctx);
277 
278 
290 AFW_DECLARE(const afw_utf8_t *)
292  const afw_utf8_t * const * strings,
293  const afw_utf8_t * separator,
294  const afw_pool_t *p, afw_xctx_t *xctx);
295 
296 
297 
309 AFW_DECLARE(const afw_utf8_z_t *)
311  const afw_utf8_t * const * strings,
312  const afw_utf8_t * separator,
313  const afw_pool_t *p, afw_xctx_t *xctx);
314 
315 
316 
328 AFW_DECLARE(const afw_utf8_z_t *)
330  const afw_utf8_z_t * const * strings_z,
331  const afw_utf8_t * separator,
332  const afw_pool_t *p, afw_xctx_t *xctx);
333 
334 
335 
346 AFW_DEFINE_STATIC_INLINE(const afw_utf8_t *)
348  const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
349 {
350  return (string)
351  ? afw_utf8_nfc(string->s, string->len,
353  : NULL;
354 }
355 
356 
357 
369 #define afw_utf8_create_copy(s, len, p, xctx) \
370  afw_utf8_nfc(s, len, afw_utf8_nfc_option_create_copy, p, xctx)
371 
372 
373 
385 AFW_DECLARE(const afw_utf8_t *)
387  const afw_utf8_t *s, const afw_pool_t *p, afw_xctx_t *xctx);
388 
389 
390 
406 AFW_DECLARE(const afw_utf8_t *)
408  const afw_utf8_t *s, const afw_pool_t *p, afw_xctx_t *xctx);
409 
410 
411 
421  const afw_pool_t *p, afw_xctx_t *xctx, ...);
422 
423 
431 AFW_DECLARE(const afw_utf8_t *)
433  afw_xctx_t *xctx, va_list strings);
434 
435 
436 
455  afw_size_t *line_number,
456  afw_size_t *column_number,
457  const afw_utf8_t *s,
458  afw_size_t offset,
459  int tab_size,
460  afw_xctx_t *xctx);
461 
462 
463 
475 AFW_DECLARE(void)
477  afw_size_t *number_of_lines,
478  afw_size_t *max_column_number,
479  const afw_utf8_t *s,
480  int tab_size,
481  afw_xctx_t *xctx);
482 
483 
484 
498  const afw_pool_t *p, afw_xctx_t *xctx, const afw_utf8_z_t *format_z,
499  ...);
500 
501 
513 AFW_DECLARE(const afw_utf8_t *)
515  const afw_utf8_z_t *format, va_list arg,
516  const afw_pool_t *p, afw_xctx_t *xctx);
517 
518 
528 AFW_DEFINE_STATIC_INLINE(const afw_utf8_z_t *)
530  const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
531 {
532  afw_utf8_z_t * result;
533 
534  result = afw_pool_malloc(p, string->len + 1, xctx);
535  memcpy(result, string->s, string->len);
536  result[string->len] = 0;
537  return result;
538 }
539 
540 
552  const afw_utf8_t *string, const afw_utf8_t *starts_with);
553 
554 
566  const afw_utf8_t *string, const afw_utf8_z_t *starts_with_z);
567 
568 
580  const afw_utf8_t *string, const afw_utf8_t *ends_with);
581 
582 
594  const afw_utf8_t *string, const afw_utf8_z_t *ends_with_z);
595 
596 
608  const afw_utf8_t *s1, const afw_utf8_t *s2);
609 
610 
622  const afw_utf8_t *s1, const afw_utf8_t *s2);
623 
624 
633 AFW_DECLARE(int)
635  const afw_utf8_t *s1, const afw_utf8_t *s2);
636 
637 
646 AFW_DECLARE(int)
648  const afw_utf8_t *s1, const afw_utf8_t *s2, afw_xctx_t *xctx);
649 
650 
662  const afw_utf8_t *s1, const afw_utf8_z_t *s2_z);
663 
664 
674 AFW_DECLARE(const afw_utf8_t * const *)
676  const afw_utf8_t *s,
677  const afw_pool_t *p,
678  afw_xctx_t *xctx);
679 
680 
693 AFW_DEFINE_STATIC_INLINE(void)
695  afw_utf8_t *result, const afw_utf8_t *string, afw_size_t start,
696  afw_size_t end)
697 {
698  if (end > string->len) end = string->len;
699  result->len = (end > start) ? end - start : 0;
700  result->s = (result->len > 0) ? string->s + start : NULL;
701 }
702 
703 
704 /* -- The following are primarily zero terminate utf-8 string functions. -- */
705 
718 AFW_DECLARE(const afw_utf8_z_t *)
720  const afw_utf8_octet_t *s, afw_size_t len, const afw_pool_t *p, afw_xctx_t *xctx);
721 
722 
731 AFW_DEFINE_STATIC_INLINE(afw_boolean_t)
733  const afw_utf8_octet_t *s1, afw_size_t len1, const afw_utf8_z_t *s2_z)
734 {
735  while (*s2_z) {
736  if (len1-- <= 0 || *s1 != *s2_z) {
737  return false;
738  }
739  s1++;
740  s2_z++;
741  }
742 
743  return true;
744 }
745 
746 
755 AFW_DEFINE_STATIC_INLINE(afw_boolean_t)
757  const afw_utf8_z_t *s1_z, const afw_utf8_z_t *s2_z)
758 {
759  while (*s2_z) {
760  if (!*s1_z || *s1_z != *s2_z) {
761  return false;
762  }
763  s1_z++;
764  s2_z++;
765  }
766 
767  return true;
768 }
769 
770 
776 AFW_DEFINE_STATIC_INLINE(int)
778  const afw_utf8_z_t *s1, const afw_utf8_z_t *s2, afw_xctx_t *xctx)
779 {
780  afw_utf8_t a1, a2;
781 
782  a1.s = s1;
783  a1.len = s1 ? strlen(s1): 0;
784 
785  a2.s = s2;
786  a2.len = s2 ? strlen(s2): 0;
787 
788  return afw_utf8_compare_ignore_case(&a1, &a2, xctx);
789 }
790 
791 
798 AFW_DEFINE_STATIC_INLINE(afw_boolean_t)
800  const afw_utf8_z_t *s1, const afw_utf8_z_t *s2)
801 {
802  return (s1 && s2 && (strcmp((const char *)s1, (const char *)s2) == 0
803  || s1 == s2)) ? TRUE : FALSE;
804 }
805 
806 
816 AFW_DEFINE_STATIC_INLINE(afw_boolean_t)
818  const afw_utf8_z_t *s1, const afw_utf8_z_t *s2)
819 {
820  return (strcasecmp((const char *)s1, (const char *)s2) == 0) ? true : false;
821 }
822 
823 
824 
830  const afw_pool_t *p, afw_xctx_t *xctx, ...);
831 
832 
836 AFW_DECLARE(const afw_object_t *)
838  const afw_utf8_z_t *s, afw_xctx_t *xctx);
839 
840 
844 AFW_DECLARE(const afw_utf8_z_t *)
846  const afw_utf8_z_t *format_z, va_list ap,
847  const afw_pool_t *p, afw_xctx_t *xctx);
848 
849 
853 AFW_DEFINE_STATIC_INLINE(const afw_utf8_z_t *)
855  const afw_pool_t *p, afw_xctx_t *xctx, const afw_utf8_z_t *format_z, ...)
856 {
857  va_list ap;
858  const afw_utf8_z_t *result;
859 
860  va_start(ap, format_z);
861  result = afw_utf8_z_printf_v(format_z, ap, p, xctx);
862  va_end(ap);
863 
864  return result;
865 };
866 
867 
868 
872 AFW_DEFINE_STATIC_INLINE(const afw_utf8_z_t *)
874  const afw_utf8_z_t *path_z)
875 {
876  const afw_utf8_z_t *file_name;
877  const afw_utf8_z_t *c;
878 
879  if (!path_z) return "";
880 
881  for (c = file_name = path_z; *c; c++) {
882  if ((*c == '/') || (*c == '\\')) {
883  file_name = c + 1;
884  }
885  }
886 
887  return file_name;
888 }
889 
890 
899 AFW_DECLARE(const afw_utf8_z_t *)
900 afw_utf8_z_source_file(const afw_utf8_z_t *source_z);
901 
902 
903 
911 AFW_DECLARE(const afw_stream_t *)
913  const afw_utf8_t *streamId,
914  const afw_pool_t *p,
915  afw_xctx_t *xctx);
916 
917 
918 
926 AFW_DECLARE(const afw_writer_t *)
928  const afw_utf8_t *tab,
929  const afw_pool_t *p,
930  afw_xctx_t *xctx);
931 
932 
933 
940 AFW_DECLARE(void)
942  const afw_stream_t *stream,
943  afw_utf8_t *current_cached_string,
944  afw_xctx_t *xctx);
945 
946 
947 
954 AFW_DECLARE(void)
956  const afw_writer_t *writer,
957  afw_utf8_t *current_string,
958  afw_xctx_t *xctx);
959 
960 
961 
962 AFW_END_DECLARES
963 
966 #endif /* __AFW_UTF8_H__ */
967 
#define AFW_DECLARE_ELLIPSIS(type)
Declare a public afw function with variable arguments.
#define AFW_BEGIN_DECLARES
#define AFW_DECLARE(type)
Declare a public afw function.
Adaptive Framework Minimal Header.
_Bool afw_boolean_t
Definition: afw_common.h:373
afw_int32_t afw_code_point_t
Unicode code point.
Definition: afw_common.h:205
afw_utf8_octet_t afw_utf8_z_t
NFC normalized UTF-8 null terminated string.
Definition: afw_common.h:523
char afw_utf8_octet_t
8 bits of utf-8 codepoint.
Definition: afw_common.h:236
apr_size_t afw_size_t
size_t.
Definition: afw_common.h:151
#define afw_pool_malloc(instance, size, xctx)
Call method malloc of interface afw_pool.
const afw_utf8_t * afw_utf8_concat(const afw_pool_t *p, afw_xctx_t *xctx,...)
Concatenate strings with result in specifed pool.
const afw_memory_t * afw_utf8_as_raw(const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
Convert utf-8 string to raw in specified pool.
Definition: afw_utf8.h:181
afw_boolean_t afw_utf8_equal_utf8_z(const afw_utf8_t *s1, const afw_utf8_z_t *s2_z)
Check to see if a string equals a utf8_z string.
void afw_utf8_stream_get_current_cached_string(const afw_stream_t *stream, afw_utf8_t *current_cached_string, afw_xctx_t *xctx)
Get the current string in a UTF-8 writer.
int afw_utf8_compare(const afw_utf8_t *s1, const afw_utf8_t *s2)
Compare two strings.
const afw_object_t * afw_utf8_z_query_string_to_object(const afw_utf8_z_t *s, afw_xctx_t *xctx)
void afw_utf8_writer_current_string(const afw_writer_t *writer, afw_utf8_t *current_string, afw_xctx_t *xctx)
Get the current string in a UTF-8 writer.
const afw_utf8_t * afw_utf8_nfc(const afw_utf8_octet_t *s, afw_size_t len, afw_utf8_nfc_option_t option, const afw_pool_t *p, afw_xctx_t *xctx)
UTF-8 NFC support function.
Definition: afw_utf8.c:86
afw_boolean_t afw_utf8_ends_with(const afw_utf8_t *string, const afw_utf8_t *ends_with)
Check to see if a string ends with another string.
Definition: afw_utf8.c:513
afw_boolean_t afw_utf8_starts_with_z(const afw_utf8_t *string, const afw_utf8_z_t *starts_with_z)
Check to see if a string starts with a utf8_z string.
const afw_utf8_t * afw_utf8_from_encoding(const afw_utf8_t *from_encoding, const char **from, afw_size_t *from_size, const afw_pool_t *p, afw_xctx_t *xctx)
Convert character encoding to a utf-8 in specified pool.
Definition: afw_utf8.c:395
const afw_utf8_t * afw_utf8_from_raw(const afw_memory_t *raw, const afw_pool_t *p, afw_xctx_t *xctx)
Convert raw to a utf-8 NFC normalizing if necessary in specified pool.
Definition: afw_utf8.h:199
afw_boolean_t afw_utf8_ends_with_z(const afw_utf8_t *string, const afw_utf8_z_t *ends_with_z)
Check to see if a string ends with a utf8_z string.
Definition: afw_utf8.c:524
const afw_utf8_z_t * afw_utf8_z_printf_v(const afw_utf8_z_t *format_z, va_list ap, const afw_pool_t *p, afw_xctx_t *xctx)
Definition: afw_utf8.c:844
const afw_utf8_t * afw_utf8_printf_v(const afw_utf8_z_t *format, va_list arg, const afw_pool_t *p, afw_xctx_t *xctx)
Create a utf-8 string using a c format string in specified pool.
Definition: afw_utf8.c:477
afw_boolean_t afw_utf8_z_equal_ignore_case(const afw_utf8_z_t *s1, const afw_utf8_z_t *s2)
Definition: afw_utf8.h:817
afw_boolean_t afw_utf8_equal(const afw_utf8_t *s1, const afw_utf8_t *s2)
Check to see if a string equals another string.
afw_boolean_t afw_utf8_from_code_point(afw_utf8_octet_t utf8_z[5], afw_code_point_t cp, afw_xctx_t *xctx)
Convert a code point to utf8.
Definition: afw_utf8.c:64
const afw_utf8_t *const * afw_utf8_parse_csv(const afw_utf8_t *s, const afw_pool_t *p, afw_xctx_t *xctx)
Check to see if a string equals a utf8_z string.
Definition: afw_utf8.c:1149
const afw_utf8_t *const * afw_utf8_clone_pointer_array(afw_size_t count, const afw_utf8_t *const *pointers, afw_boolean_t NULL_terminate, const afw_pool_t *p, afw_xctx_t *xctx)
Clone a pointer array of utf-8 to specified pool.
Definition: afw_utf8.c:862
afw_boolean_t afw_utf8_z_starts_with_z(const afw_utf8_z_t *s1_z, const afw_utf8_z_t *s2_z)
Returns true if zero terminated s1 starts with zero terminated string s2.
Definition: afw_utf8.h:756
afw_boolean_t afw_utf8_line_column_of_offset(afw_size_t *line_number, afw_size_t *column_number, const afw_utf8_t *s, afw_size_t offset, int tab_size, afw_xctx_t *xctx)
Determine the line and column of an offset in a string.
Definition: afw_utf8.c:1058
const afw_utf8_z_t * afw_utf8_z_printf(const afw_pool_t *p, afw_xctx_t *xctx, const afw_utf8_z_t *format_z,...)
Definition: afw_utf8.h:854
int afw_utf8_compare_ignore_case(const afw_utf8_t *s1, const afw_utf8_t *s2, afw_xctx_t *xctx)
Compare two strings ignoring case.
const afw_utf8_z_t * afw_utf8_array_to_utf8_z_with_separator(const afw_utf8_t *const *strings, const afw_utf8_t *separator, const afw_pool_t *p, afw_xctx_t *xctx)
Concat array of utf-8 with optional separator to specified pool.
Definition: afw_utf8.c:951
const afw_utf8_z_t * afw_utf8_z_create(const afw_utf8_octet_t *s, afw_size_t len, const afw_pool_t *p, afw_xctx_t *xctx)
Create a NFC Normalized zero terminated UTF-8 string in specified pool.
Definition: afw_utf8.c:366
void afw_utf8_substring_byte(afw_utf8_t *result, const afw_utf8_t *string, afw_size_t start, afw_size_t end)
Set result to a substring of string using byte indexes.
Definition: afw_utf8.h:694
void afw_utf8_line_count_and_max_column(afw_size_t *number_of_lines, afw_size_t *max_column_number, const afw_utf8_t *s, int tab_size, afw_xctx_t *xctx)
Determine the line count and maximum column in a string.
Definition: afw_utf8.c:1111
const afw_writer_t * afw_utf8_writer_create(const afw_utf8_t *tab, const afw_pool_t *p, afw_xctx_t *xctx)
Create UTF-8 writer.
const afw_utf8_t * afw_utf8_clone(const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
Clone a utf-8 string into a specific pool.
Definition: afw_utf8.h:347
afw_utf8_nfc_option_e
Definition: afw_utf8.h:70
afw_boolean_t afw_utf8_len_starts_with_z(const afw_utf8_octet_t *s1, afw_size_t len1, const afw_utf8_z_t *s2_z)
Returns true if series of bytes for len s1 starts with zero terminated string s2.
Definition: afw_utf8.h:732
const afw_value_t *(AFW_CALLBACK * afw_utf8_to_value_t)(const afw_utf8_t *from_utf8, const afw_utf8_t *path, afw_xctx_t *xctx)
Callback function for converting a string to an adaptive value.
Definition: afw_utf8.h:63
const afw_utf8_z_t * afw_utf8_z_file_name_from_path(const afw_utf8_z_t *path_z)
Definition: afw_utf8.h:873
const afw_utf8_z_t * afw_utf8_to_utf8_z(const afw_utf8_t *string, const afw_pool_t *p, afw_xctx_t *xctx)
Convert utf8 to utf8_z in specified pool.
Definition: afw_utf8.h:529
int afw_utf8_z_compare_ignore_case(const afw_utf8_z_t *s1, const afw_utf8_z_t *s2, afw_xctx_t *xctx)
Compare two zero terminated utf-8 strings ignoring case.
Definition: afw_utf8.h:777
const afw_stream_t * afw_utf8_stream_create(const afw_utf8_t *streamId, const afw_pool_t *p, afw_xctx_t *xctx)
Create UTF-8 stream.
const afw_utf8_z_t * afw_utf8_z_concat(const afw_pool_t *p, afw_xctx_t *xctx,...)
const afw_utf8_t * afw_utf8_printf(const afw_pool_t *p, afw_xctx_t *xctx, const afw_utf8_z_t *format_z,...)
Create a utf-8 string using a c format string in specified pool.
Definition: afw_utf8.c:459
afw_boolean_t afw_utf8_contains(const afw_utf8_t *s1, const afw_utf8_t *s2)
Check to see if a string contains another string.
Definition: afw_utf8.c:543
const afw_utf8_t * afw_utf8_normalize_space(const afw_utf8_t *s, const afw_pool_t *p, afw_xctx_t *xctx)
Create a utf-8 sting with spaces normalized in specified pool.
afw_boolean_t afw_utf8_z_equal(const afw_utf8_z_t *s1, const afw_utf8_z_t *s2)
Definition: afw_utf8.h:799
const afw_utf8_z_t * afw_utf8_z_array_to_utf8_z_with_separator(const afw_utf8_z_t *const *strings_z, const afw_utf8_t *separator, const afw_pool_t *p, afw_xctx_t *xctx)
Concat array of utf-8 with optional separator to specified pool.
Definition: afw_utf8.c:994
const afw_utf8_t * afw_utf8_to_lower(const afw_utf8_t *s, const afw_pool_t *p, afw_xctx_t *xctx)
Convert utf-8 sting to lower case in specified pool.
afw_boolean_t afw_utf8_starts_with(const afw_utf8_t *string, const afw_utf8_t *starts_with)
Check to see if a string starts with another string.
afw_code_point_t afw_utf8_next_code_point(const afw_utf8_octet_t *s, afw_size_t *offset, afw_size_t len, afw_xctx_t *xctx)
Get next codepoint in utf-8.
Definition: afw_utf8.c:31
const afw_utf8_t * afw_utf8_concat_v(const afw_pool_t *p, afw_xctx_t *xctx, va_list strings)
Concatenate strings with result in specifed pool.
const afw_utf8_t *(AFW_CALLBACK * afw_utf8_from_value_t)(const afw_value_t *value, afw_boolean_t own, afw_xctx_t *xctx)
Callback function for creating a string from an adaptive value.
Definition: afw_utf8.h:40
const afw_utf8_t * afw_utf8_array_to_utf8_with_separator(const afw_utf8_t *const *strings, const afw_utf8_t *separator, const afw_pool_t *p, afw_xctx_t *xctx)
Concat array of utf-8 with optional separator to specified pool.
Definition: afw_utf8.c:906
const afw_utf8_z_t * afw_utf8_z_source_file(const afw_utf8_z_t *source_z)
Returns value of source_z after last '/ 'or '\'.
Definition: afw_utf8.c:1037
@ afw_utf8_nfc_option_is_valid
Only check that input is valid UTF-8.
Definition: afw_utf8.h:78
@ afw_utf8_nfc_option_create
If s is already normalized, use it directly for result->s.
Definition: afw_utf8.h:89
@ afw_utf8_nfc_option_create_copy
If s is already normalized, make copy for result->s.
Definition: afw_utf8.h:92
@ afw_utf8_nfc_option_is_nfc
Only check that input is UTF-8 NFC normalized.
Definition: afw_utf8.h:86
Struct for memory pointer and size.
Definition: afw_common.h:505
Interface afw_object public struct.
Interface afw_pool public struct.
Interface afw_stream public struct.
NFC normalized UTF-8 string.
Definition: afw_common.h:545
Interface afw_value public struct.
Interface afw_writer public struct.
Interface afw_xctx public struct.