[Gpa-commits] r775 - trunk/src
scm-commit@wald.intevation.org
scm-commit at wald.intevation.org
Wed May 2 21:05:02 CEST 2007
Author: werner
Date: 2007-05-02 21:05:02 +0200 (Wed, 02 May 2007)
New Revision: 775
Modified:
trunk/src/ChangeLog
trunk/src/gpgmetools.c
Log:
Fixed an UTF-8 issue.
Modified: trunk/src/ChangeLog
===================================================================
--- trunk/src/ChangeLog 2007-04-25 13:13:49 UTC (rev 774)
+++ trunk/src/ChangeLog 2007-05-02 19:05:02 UTC (rev 775)
@@ -1,3 +1,7 @@
+2007-05-02 Werner Koch <wk at g10code.com>
+
+ * gpgmetools.c (string_to_utf8): Rewritten.
+
2007-04-25 Werner Koch <wk at g10code.com>
* keyring.c (keyring_details_page_fill_num_keys): Use ngettext.
Modified: trunk/src/gpgmetools.c
===================================================================
--- trunk/src/gpgmetools.c 2007-04-25 13:13:49 UTC (rev 774)
+++ trunk/src/gpgmetools.c 2007-05-02 19:05:02 UTC (rev 775)
@@ -729,24 +729,41 @@
static gchar *
string_to_utf8 (const gchar *string)
{
- const gchar *s;
-
+ const char *s;
+
if (!string)
- {
- return NULL;
- }
- /* Make sure the encoding is UTF-8. Test structure suggested by
- Werner Koch. */
+ return NULL;
+
+ /* Due to a bug in old and not so old PGP versions user IDs have
+ been copied verbatim into the key. Thus many users with Umlauts
+ et al. in their name will see their names garbled. Although this
+ is not an issue for me (;-)), I have a couple of friends with
+ Umlauts in their name, so let's try to make their life easier by
+ detecting invalid encodings and convert that to Latin-1. */
for (s = string; *s && !(*s & 0x80); s++)
;
- if (*s && !strchr (string, 0xc3))
+ if (*s && ((s[1] & 0xc0) == 0x80) && ( ((*s & 0xe0) == 0xc0)
+ || ((*s & 0xf0) == 0xe0)
+ || ((*s & 0xf8) == 0xf0)
+ || ((*s & 0xfc) == 0xf8)
+ || ((*s & 0xfe) == 0xfc)) )
+ {
+ /* Possible utf-8 character followed by continuation byte.
+ Although this might still be Latin-1 we better assume that it
+ is valid utf-8. */
+ return g_strdup (string);
+ }
+ else if (*s && !strchr (string, 0xc3))
{
- /* The string is Latin-1. */
- return g_convert (string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
+ /* No 0xC3 character in the string; assume that it is Latin-1. */
+ return g_convert (string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
}
else
{
- /* The string is already in UTF-8. */
+ /* Everything else is assumed to be UTF-8. We do this even that
+ we know the encoding is not valid. However as we only test
+ the first non-ascii character, valid encodings might
+ follow. */
return g_strdup (string);
}
}
More information about the Gpa-commits
mailing list