lib/charset: utf8_get() should return error

author Heinrich Schuchardt <xypron.glpk@gmx.de>

Sat, 27 Feb 2021 13:08:37 +0000 (14:08 +0100)

committer Heinrich Schuchardt <xypron.glpk@gmx.de>

Sun, 7 Mar 2021 16:37:13 +0000 (17:37 +0100)
author Heinrich Schuchardt <xypron.glpk@gmx.de>
Sat, 27 Feb 2021 13:08:37 +0000 (14:08 +0100)
committer Heinrich Schuchardt <xypron.glpk@gmx.de>
Sun, 7 Mar 2021 16:37:13 +0000 (17:37 +0100)
diff --git a/lib/charset.c b/lib/charset.c

index 1345c8f9f05c72cf206006f47a28f30103d703fd..946d5ee23eb7e86034a1448b3003b1edd122f800 100644 (file)
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] =
   *
   * @read_u8:   - stream reader
   * @src:       - string buffer passed to stream reader, optional
- * Return:     - Unicode code point
+ * Return:     - Unicode code point, or -1
   */
  static int get_code(u8 (*read_u8)(void *data), void *data)
  {
@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data)
         }
         return ch;
  error:
-       return '?';
+       return -1;
  }
  
  /**
@@ -120,14 +120,21 @@ static u8 read_console(void *data)
  
  int console_read_unicode(s32 *code)
  {
-       if (!tstc()) {
-               /* No input available */
-               return 1;
-       }
+       for (;;) {
+               s32 c;
  
-       /* Read Unicode code */
-       *code = get_code(read_console, NULL);
-       return 0;
+               if (!tstc()) {
+                       /* No input available */
+                       return 1;
+               }
+
+               /* Read Unicode code */
+               c = get_code(read_console, NULL);
+               if (c > 0) {
+                       *code = c;
+                       return 0;
+               }
+       }
  }
  
  s32 utf8_get(const char **src)
diff --git a/test/unicode_ut.c b/test/unicode_ut.c

index 2cc6b5fefffe338295e5dd602f61c1db184e5d82..154361aea7da5c64d2526866d20d4da7386e24c6 100644 (file)
--- a/test/unicode_ut.c
+++ b/test/unicode_ut.c
@@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
  static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
  static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
  static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
+static const char j4[] = {0xa1, 0x00};
  
  static int unicode_test_u16_strlen(struct unit_test_state *uts)
  {
@@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts)
         ut_asserteq(0x0001048d, code);
         ut_asserteq_ptr(s, d4 + 4);
  
+       /* Check illegal character */
+       s = j4;
+       code = utf8_get((const char **)&s);
+       ut_asserteq(-1, code);
+       ut_asserteq_ptr(j4 + 1, s);
+
         return 0;
  }
  UNICODE_TEST(unicode_test_utf8_get);
author	Heinrich Schuchardt <xypron.glpk@gmx.de>
	Sat, 27 Feb 2021 13:08:37 +0000 (14:08 +0100)
committer	Heinrich Schuchardt <xypron.glpk@gmx.de>
	Sun, 7 Mar 2021 16:37:13 +0000 (17:37 +0100)
lib/charset.c		patch \| blob \| history
test/unicode_ut.c		patch \| blob \| history