[Thuban-commits] r2797 - in branches/WIP-pyshapelib-Unicode/thuban: . Thuban/Model libraries/pyshapelib libraries/shapelib

scm-commit@wald.intevation.org scm-commit at wald.intevation.org
Sat Dec 15 21:40:24 CET 2007


Author: bramz
Date: 2007-12-15 21:40:22 +0100 (Sat, 15 Dec 2007)
New Revision: 2797

Modified:
   branches/WIP-pyshapelib-Unicode/thuban/ChangeLog
   branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py
   branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog
   branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c
   branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py
   branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c
   branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c
   branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h
   branches/WIP-pyshapelib-Unicode/thuban/setup.py
Log:
start of pyshapelib Unicode support mark II.  It now also reads the .CPG file so that UTF-8 encodings can be used.  Thuban will now create UTF-8 DBF files by default.  See ChangeLogs

Modified: branches/WIP-pyshapelib-Unicode/thuban/ChangeLog
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/ChangeLog	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/ChangeLog	2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,5 +1,15 @@
-2007-12-12   Bram de Greve <bram.degreve at gmail.com>
+2007-12-15   Bram de Greve <bram.degreve at bramz.net>
 
+	* shapelib and pyshapelib Unicode support now read the .CPG
+	file so that we finally can use UTF-8 content.  See ChangeLog
+	in pyshapelib
+	
+	* Thuban/Model/table.py: when opening DBF files, ask to return
+	Unicode strings, and use the UTF-8 encoding when creating new
+	shapefiles.
+
+2007-12-12   Bram de Greve <bram.degreve at bramz.net>
+
 	Porting shapelib from maptools source cvs.maptools.org.
 	Currently, this will have no support for code pages and wide 
 	character filenames (Win32), but we'll get that back in later, 

Modified: branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py	2007-12-15 20:40:22 UTC (rev 2797)
@@ -86,7 +86,7 @@
         title = os.path.splitext(os.path.basename(self.filename))[0]
         TitledObject.__init__(self, title)
 
-        self.dbf = dbflib.DBFFile(filename)
+        self.dbf = dbflib.open(filename, return_unicode = True)
 
         # If true, self.dbf is open for writing.
         self._writable = 0
@@ -226,7 +226,7 @@
         order.
         """
         if not self._writable:
-            new_dbf = dbflib.DBFFile(self.filename, "r+b")
+            new_dbf = dbflib.open(self.filename, "r+b", return_unicode = True)
             self.dbf.close()
             self.dbf = new_dbf
             self._writable = 1
@@ -462,7 +462,7 @@
     indices to be saved to the file, otherwise all rows are saved.
     """
 
-    dbf = dbflib.create(filename)
+    dbf = dbflib.create(filename, code_page = dbflib.CPG_UTF_8, return_unicode = True)
 
     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                          FIELDTYPE_INT: dbflib.FTInteger,

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog	2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,3 +1,29 @@
+2007-12-15  Bram de Greve <bram.degreve at bramz.net>
+
+	* dbflibmodule.c: Unicode support mark II.  Ditched are the language_driver
+	members and functions, as it is not sufficient to indicate code pages 
+	specified by .CPG files.  
+	- code_page: DBFFile now has member code_page that returns the DBF code page 
+	as a string.  This is either the content of the .CPG file, or a string of 
+	the form "LDID/42" if there's no .CPG file and the LDID number of the .DBF 
+	file is used to indicate the code page instead.
+	- DBFFile also sports a member codec, which is the name of the Python codec used
+	for the code page.	
+	- code_page: a new optional argument on create() to specify to DBF file's 
+	code page on creation.  This is _not_ a Python codec name, but one the constants
+	dbflib.LDID_* and dbflib.CPG_*.
+	- return_unicode: a new optional argument on DBFFile, open() and create().
+	It tells the DBFFile to decode the textual content using its codec and 
+	return it as Unicode.  It is False by default, which means you get the raw 
+	encoded string instead.
+	- codecs_map: a new optional argument on DBFFile, open() and create().
+	It allows you to provide your own dictionary that links DBF code pages to
+	Python codecs in case the default builtin one is not correct for your 
+	application.
+	- HAVE_LANGUAGE_DRIVER is gone, and HAVE_CODE_PAGE is here instead.	
+	- corresponding revions of shapelib on cvs.maptools.org: shapefil.h v1.41,
+	dbfopen.c v1.77, safileio.c v1.2
+
 2007-04-25  Bernhard Herzog  <bh at intevation.de>
 
 	* shptreemodule.c: Fix copyright notice.  It should have been

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c	2007-12-15 20:40:22 UTC (rev 2797)
@@ -9,90 +9,16 @@
 
 #include "pyshapelib_common.h"
 
-/* UNICODE & LANGUAGE DRIVER SUPPORT FOR DBFLIB
- *
- * When writing Unicode objects to a dbflib database, the unicode has to be
- * encoded in 8-bit characters using a code page.  This code page is indentified
- * by the Language Driver ID (LDID) field in the database header.
- *
- * At this moment, this need unofficial modifications of the maptools shapelib
- * library because they do not read the LDID.  No patch has been submitted yet,
- * but the version contained in the Thuban source tree incorporates the required
- * modifications.
- *
- * pyshapelib is designed to compile with either the patched or unpatched shapelib
- * by defining HAVE_LANGUAGE_DRIVER as true or false respectively.  In the latter 
- * case, a Windows ANSI code page (cp1252) is assumed
- */
- #if HAVE_LANGUAGE_DRIVER
 
-#define PYSHAPELIB_NUM_LANGUAGE_DRIVERS 256
+static PyObject* default_codecs_map = NULL;
 
-#define PYSHAPELIB_ADD_LANGUAGE_DRIVER(ldid, codec, name)\
-	codecs[ldid] = codec;\
-	drivers[ldid] = "LDID_" name;\
-	PyModule_AddIntConstant(module, "LDID_" name, ldid)
-
-static char* codecs[PYSHAPELIB_NUM_LANGUAGE_DRIVERS];
-static char* drivers[PYSHAPELIB_NUM_LANGUAGE_DRIVERS];
-
-#endif
-
-
-
-/** Determine name of Python's built-in codec
- */
-static char* get_codec(DBFHandle handle)
-{
-#if HAVE_LANGUAGE_DRIVER
-	if (!codecs[handle->nLanguageDriver])
-	{
-		PyErr_Format(PyExc_ValueError, "Language Driver ID %d not recognized", handle->nLanguageDriver);
-	}
-	return codecs[handle->nLanguageDriver];
-#else
-	return "cp1252";
-#endif
-}
-
-
-
-/** decode to unicode object 
- */
-static PyObject* decode_string(DBFHandle handle, const char* string)
-{
-	char* codec = get_codec(handle);
-	if (!codec) return NULL;
-	return PyUnicode_Decode(string, strlen(string), codec, NULL);
-}
-
-/** encode unicode object to normal Python string object 
- */
-static PyObject* encode_string(DBFHandle handle, PyObject* string)
-{
-	char* codec = get_codec(handle);
-	if (!codec) return NULL;
-
-	if (PyString_Check(string))
-	{
-		return PyString_AsEncodedObject(string, codec, NULL);
-	}
-	if (PyUnicode_Check(string))
-	{
-		return PyUnicode_AsEncodedString(string, codec, NULL);
-	}
-
-	PyErr_SetString(PyExc_TypeError, "value is neither a string or unicode object");
-	return NULL;
-}
-
-
-
 /* --- DBFFile ------------------------------------------------------------------------------------------------------- */
 
 typedef struct {
 	PyObject_HEAD
 	DBFHandle handle;
+	char* codec;
+	int return_unicode;
 } DBFFileObject;
 
 
@@ -104,6 +30,8 @@
 	DBFFileObject* self;	
 	self = (DBFFileObject*) type->tp_alloc(type, 0);
 	self->handle = NULL;
+	self->codec = NULL;
+	self->return_unicode = 0;
 	return (PyObject*) self;
 }
 
@@ -115,18 +43,79 @@
 {
 	DBFClose(self->handle);
 	self->handle = NULL;
+	PyMem_Free(self->codec);
+	self->codec = NULL;
 	self->ob_type->tp_free((PyObject*)self);
 }
 
 
+static int dbffile_init_codec(DBFFileObject* self, PyObject* codecs_map)
+{
+#if HAVE_CODE_PAGE
+	size_t n = 0;
+	PyObject* ocodec = NULL;
+	char* codec = NULL;
+	char* code_page = (char*) DBFGetCodePage(self->handle);
 
+	PyMem_Free(self->codec);
+	self->codec = NULL;
+	if (codecs_map && codecs_map != Py_None)
+	{
+		if (!PyMapping_Check(codecs_map))
+		{
+			PyErr_SetString(PyExc_TypeError, "codecs_map is not mapable");
+			return -1;
+		}
+	}
+	else
+	{
+		codecs_map = default_codecs_map;
+	}	
+
+	if (code_page)
+	{
+		ocodec = PyMapping_GetItemString(codecs_map, code_page);
+		if (!ocodec)
+		{
+			PyErr_Format(PyExc_KeyError, "code_page '%s' not found in codecs_map", code_page);
+			return -1;
+		}
+		codec = PyString_AsString(ocodec);
+		if (!codec)
+		{
+			return -1;
+		}		
+		n = strlen(codec);
+		self->codec = PyMem_Malloc(n + 1);
+		if (!self->codec)
+		{
+			PyErr_NoMemory();
+			return -1;
+		}
+		memcpy(self->codec, codec, n + 1);
+	}
+#else
+	PyMem_Free(self->codec);
+	self->codec = NULL;
+#endif
+
+	return 0;
+}
+
+
 /* constructor
 */
 static int dbffile_init(DBFFileObject* self, PyObject* args, PyObject* kwds)
 {
 	char* file = NULL;
 	char* mode = "rb";
-	static char *kwlist[] = {"name", "mode", NULL};
+	PyObject* return_unicode = 0;
+	PyObject* codecs_map = NULL;
+#if HAVE_CODE_PAGE
+	static char *kwlist[] = {"name", "mode", "return_unicode", "codecs_map", NULL};
+#else
+	static char *kwlist[] = {"name", "mode", "return_unicode", NULL};
+#endif
 
 	DBFClose(self->handle);
 	self->handle = NULL;
@@ -134,7 +123,13 @@
 #if defined(SHPAPI_HAS_WIDE) && defined(Py_WIN_WIDE_FILENAMES)
 	if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
 		PyObject *wfile;
-		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|s:DBFFile", kwlist, &wfile, &mode)) 
+#if HAVE_CODE_PAGE
+		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sOO:DBFFile", kwlist, 
+				&wfile, &mode, &return_unicode, &codecs_map)) 
+#else
+		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sO:DBFFile", kwlist, 
+				&wfile, &mode, &return_unicode)) 
+#endif
 		{
 			PyObject *wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
 			if (!wmode) return -1;
@@ -157,8 +152,13 @@
 
 	if (!self->handle)
 	{
-		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|s:DBFFile", kwlist, 
-			Py_FileSystemDefaultEncoding, &file, &mode)) return -1;	
+#if HAVE_CODE_PAGE
+		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sOO:DBFFile", kwlist, 
+			Py_FileSystemDefaultEncoding, &file, &mode, &return_unicode, &codecs_map)) return -1;	
+#else
+		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sO:DBFFile", kwlist, 
+			Py_FileSystemDefaultEncoding, &file, &mode, &return_unicode)) return -1;	
+#endif
 		self->handle = DBFOpen(file, mode);
 
 		if (!self->handle)
@@ -171,6 +171,15 @@
 		PyMem_Free(file);
 	}
 
+	self->return_unicode = return_unicode && PyObject_IsTrue(return_unicode);
+
+	if (dbffile_init_codec(self, codecs_map) != 0)
+	{
+		DBFClose(self->handle);
+		self->handle = NULL;
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -180,11 +189,44 @@
 {
 	DBFClose(self->handle);
 	self->handle = NULL;
+	PyMem_Free(self->codec);
+	self->codec = NULL;
 	Py_RETURN_NONE;
 }
 
 
 
+/** decode to unicode object 
+ */
+static PyObject* dbffile_decode_string(DBFFileObject* self, const char* string)
+{
+	if (self->return_unicode)
+	{
+		return PyUnicode_Decode(string, strlen(string), self->codec, NULL);
+	}
+	return PyString_FromString(string);
+}
+
+/** encode unicode object to normal Python string object 
+ */
+static PyObject* dbffile_encode_string(DBFFileObject* self, PyObject* string)
+{
+	if (PyString_Check(string))
+	{
+		Py_INCREF(string);
+		return string;
+	}
+	if (PyUnicode_Check(string))
+	{
+		return PyUnicode_AsEncodedString(string, self->codec, NULL);
+	}
+
+	PyErr_SetString(PyExc_TypeError, "value is neither a string or unicode object");
+	return NULL;
+}
+
+
+
 static PyObject* dbffile_field_count(DBFFileObject* self)
 {
 	return PyInt_FromLong((long)DBFGetFieldCount(self->handle));
@@ -209,7 +251,7 @@
 	
 	field_name[0] = '\0';
 	field_type = DBFGetFieldInfo(self->handle, field, field_name, &width, &decimals);
-	name_object = decode_string(self->handle, field_name);
+	name_object = dbffile_decode_string(self, field_name);
 	
 	return Py_BuildValue("iOii", field_type, name_object, width, decimals);
 }
@@ -228,7 +270,7 @@
 		if (!PyArg_ParseTuple(args, "Siii:add_field", &oname, &type, &width, &decimals)) return NULL;
 	}
 	
-	name = encode_string(self->handle, oname);
+	name = dbffile_encode_string(self, oname);
 	if (!name) return NULL;
 
 	field = DBFAddField(self->handle, PyString_AsString(name), (DBFFieldType)type, width, decimals);
@@ -253,18 +295,18 @@
 * The name argument will be passed to DBFGetFieldInfo as is and should
 * thus be either NULL or a pointer to an array of at least 12 chars
 */
-static PyObject* do_read_attribute(DBFHandle handle, int record, int field, char * name)
+static PyObject* do_read_attribute(DBFFileObject* self, int record, int field, char * name)
 {
 	int type, width;
 	const char* string;
-	type = DBFGetFieldInfo(handle, field, name, &width, NULL);
+	type = DBFGetFieldInfo(self->handle, field, name, &width, NULL);
 	
 	/* For strings NULL and the empty string are indistinguishable
 	* in DBF files. We prefer empty strings instead for backwards
 	* compatibility reasons because older wrapper versions returned
 	* emtpy strings as empty strings.
 	*/
-	if (type != FTString && DBFIsAttributeNULL(handle, record, field))
+	if (type != FTString && DBFIsAttributeNULL(self->handle, record, field))
 	{
 		Py_RETURN_NONE;
 	}
@@ -273,17 +315,17 @@
 		switch (type)
 		{
 		case FTString:
-			string = DBFReadStringAttribute(handle, record, field);
-			if (string) return decode_string(handle, string);
+			string = DBFReadStringAttribute(self->handle, record, field);
+			if (string) return dbffile_decode_string(self, string);
 
 		case FTInteger:
-			return PyInt_FromLong((long)DBFReadIntegerAttribute(handle, record, field));
+			return PyInt_FromLong((long)DBFReadIntegerAttribute(self->handle, record, field));
 
 		case FTDouble:
-			return PyFloat_FromDouble(DBFReadDoubleAttribute(handle, record, field));
+			return PyFloat_FromDouble(DBFReadDoubleAttribute(self->handle, record, field));
 			
 		case FTLogical:
-			string = DBFReadLogicalAttribute(handle, record, field);
+			string = DBFReadLogicalAttribute(self->handle, record, field);
 			if (string)
 			{
 				switch (string[0])
@@ -306,7 +348,7 @@
 	
 	PyErr_Format(PyExc_IOError,	"Can't read value for row %d column %d", record, field);
 	return NULL;
-}    
+}
 
 
 
@@ -335,7 +377,7 @@
 		return NULL;
 	}
 
-	return do_read_attribute(self->handle, record, field, NULL);
+	return do_read_attribute(self, record, field, NULL);
 }
 
 
@@ -369,7 +411,7 @@
 	num_fields = DBFGetFieldCount(self->handle);
 	for (i = 0; i < num_fields; i++)
 	{
-		value = do_read_attribute(self->handle, record, i, name);
+		value = do_read_attribute(self, record, i, name);
 		if (!value || PyDict_SetItemString(dict, name, value) < 0) goto fail;
 		Py_DECREF(value);
 		value = NULL;
@@ -386,7 +428,7 @@
 
 
 /* write a single field of a record. */
-static int do_write_attribute(DBFHandle handle, int record, int field, int type, PyObject* value)
+static int do_write_attribute(DBFFileObject* self, int record, int field, int type, PyObject* value)
 {
 	PyObject* string_value = NULL;
 	int int_value;
@@ -395,16 +437,16 @@
 
 	if (value == Py_None)
 	{
-		if (DBFWriteNULLAttribute(handle, record, field)) return 1;
+		if (DBFWriteNULLAttribute(self->handle, record, field)) return 1;
 	}
 	else
 	{
 		switch (type)
 		{
 		case FTString:
-			string_value = encode_string(handle, value);
+			string_value = dbffile_encode_string(self, value);
 			if (!string_value) return 0;
-			if (DBFWriteStringAttribute(handle, record, field, PyString_AsString(string_value)))
+			if (DBFWriteStringAttribute(self->handle, record, field, PyString_AsString(string_value)))
 			{
 				Py_DECREF(string_value);
 				return 1;
@@ -415,19 +457,19 @@
 		case FTInteger:
 			int_value = PyInt_AsLong(value);
 			if (int_value == -1 && PyErr_Occurred()) return 0;
-			if (DBFWriteIntegerAttribute(handle, record, field, int_value)) return 1;
+			if (DBFWriteIntegerAttribute(self->handle, record, field, int_value)) return 1;
 			break;
 
 		case FTDouble:
 			double_value = PyFloat_AsDouble(value);
 			if (double_value == -1 && PyErr_Occurred()) return 0;
-			if (DBFWriteDoubleAttribute(handle, record, field, double_value)) return 1;
+			if (DBFWriteDoubleAttribute(self->handle, record, field, double_value)) return 1;
 			break;
 			
 		case FTLogical:
 			logical_value = PyObject_IsTrue(value);
 			if (logical_value == -1) return 0;
-			if (DBFWriteLogicalAttribute(handle, record, field, logical_value ? 'T' : 'F')) return 1;
+			if (DBFWriteLogicalAttribute(self->handle, record, field, logical_value ? 'T' : 'F')) return 1;
 			break;
 
 		default:
@@ -459,7 +501,7 @@
 	}
 
 	type = DBFGetFieldInfo(self->handle, field, NULL, NULL, NULL);
-	if (!do_write_attribute(self->handle, record, field, type, value)) return NULL;
+	if (!do_write_attribute(self, record, field, type, value)) return NULL;
 	Py_RETURN_NONE;
 }
 
@@ -500,7 +542,7 @@
 			type = DBFGetFieldInfo(self->handle, i, NULL, NULL, NULL); 
 			value = PySequence_GetItem(record_object, i);
 			if (!value) return NULL;
-			if (!do_write_attribute(self->handle, record, i, type, value)) 
+			if (!do_write_attribute(self, record, i, type, value)) 
 			{
 				Py_DECREF(value);
 				return NULL;
@@ -518,7 +560,7 @@
 			name[0] = '\0';
 			type = DBFGetFieldInfo(self->handle, i, name, NULL, NULL);
 			value = PyDict_GetItemString(record_object, name);
-			if (value && !do_write_attribute(self->handle, record, i, type, value)) return NULL;
+			if (value && !do_write_attribute(self, record, i, type, value)) return NULL;
 		}
 	}
 	
@@ -551,16 +593,30 @@
 #endif
 
 
-#if HAVE_LANGUAGE_DRIVER
+#if HAVE_CODE_PAGE
 
-static PyObject* dbffile_language_driver(DBFFileObject* self, void* closure)
+static PyObject* dbffile_code_page(DBFFileObject* self, void* closure)
 {
-	return PyInt_FromLong((long)self->handle->nLanguageDriver);
+	const char* code_page = DBFGetCodePage(self->handle);
+	if (!code_page)
+	{
+		Py_RETURN_NONE;
+	}
+	return PyString_FromString(code_page);
 }
 
 #endif
 
+static PyObject* dbffile_codec(DBFFileObject* self, void* closure)
+{
+	if (!self->codec)
+	{
+		Py_RETURN_NONE;
+	}
+	return PyString_FromString(self->codec);
+}
 
+
 static struct PyMethodDef dbffile_methods[] = 
 {
 	{"close", (PyCFunction)dbffile_close, METH_NOARGS, 
@@ -613,8 +669,9 @@
 
 static struct PyGetSetDef dbffile_getsetters[] = 
 {
-#if HAVE_LANGUAGE_DRIVER
-	{"language_driver", (getter)dbffile_language_driver, NULL, "Language Driver ID (read-only)" },
+	{"codec", (getter)dbffile_codec, NULL, "Python codec name used to encode or decode Unicode strings (read-only)" },
+#if HAVE_CODE_PAGE
+	{"code_page", (getter)dbffile_code_page, NULL, "DBF Code Page from LDID or .CPG file (read-only)" },
 #endif
 	{NULL}
 };
@@ -627,24 +684,39 @@
 
 /* --- dbflib -------------------------------------------------------------------------------------------------------- */
 
-static PyObject* dbflib_open(PyObject* module, PyObject* args)
+static PyObject* dbflib_open(PyObject* module, PyObject* args, PyObject* kwds)
 {
-	return PyObject_CallObject((PyObject*)&DBFFileType, args);
+	return PyObject_Call((PyObject*)&DBFFileType, args, kwds);
 }
 
 
 
-static PyObject* dbflib_create(PyObject* module, PyObject* args)
+static PyObject* dbflib_create(PyObject* module, PyObject* args, PyObject* kwds)
 {
 	char* file;
 	DBFFileObject* result;
 	DBFHandle handle = NULL;
 	int wideargument = 0;
+	PyObject* return_unicode = NULL;
+	PyObject* codecs_map = NULL;
+	char* code_page = NULL;
 
+#if HAVE_CODE_PAGE
+	static char *kwlist[] = {"name", "code_page", "return_unicode", "codecs_map", NULL};
+#else
+	static char *kwlist[] = {"name", "return_unicode", NULL};
+#endif
+
 #if defined(SHPAPI_HAS_WIDE) && defined(Py_WIN_WIDE_FILENAMES)
 	if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
 		PyObject *wfile;
-		if (PyArg_ParseTuple(args, "U:create", &wfile)) 
+#if HAVE_CODE_PAGE
+		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sOO:create", kwlist, 
+				&wfile, &code_page, &return_unicode, &codecs_map))
+#else
+		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|O:create", kwlist, 
+				&wfile, &return_unicode))
+#endif
 		{
 			wideargument = 1;
 			handle = DBFCreateW(PyUnicode_AS_UNICODE(wfile));
@@ -665,8 +737,15 @@
 	
 	if (!handle)
 	{
-		if (!PyArg_ParseTuple(args, "et:create", Py_FileSystemDefaultEncoding, &file)) return NULL;
+#if HAVE_CODE_PAGE
+		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sOO:create", kwlist, Py_FileSystemDefaultEncoding, 
+				&file, &code_page, &return_unicode, &codecs_map)) return NULL;
+		handle = DBFCreateEx(file, code_page);
+#else
+		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|O:create", kwlist, Py_FileSystemDefaultEncoding, 
+				&file, &return_unicode)) return NULL;
 		handle = DBFCreate(file);
+#endif
 		if (!handle)
 		{
 				PyErr_SetFromErrnoWithFilename(PyExc_IOError, file);
@@ -684,69 +763,68 @@
 	}
 	
 	result->handle = handle;
-	return (PyObject*) result;
-}
+	result->return_unicode = return_unicode && PyObject_IsTrue(return_unicode);
+	result->codec = NULL;
 
-
-
-#if HAVE_LANGUAGE_DRIVER
-
-/** translate a numeric Language Driver ID to the name of Python's codec.
- */
-static PyObject* dbflib_language_driver_codec(PyObject* module, PyObject* args)
-{
-	int ldid;
-	if (!PyArg_ParseTuple(args, "i:language_driver_name", &ldid)) return NULL;
-	if (ldid < 0 || ldid >= PYSHAPELIB_NUM_LANGUAGE_DRIVERS || !codecs[ldid])
+	if (dbffile_init_codec(result, codecs_map) != 0)
 	{
-		PyErr_SetString(PyExc_ValueError, "invalid driver id");
+		dbffile_dealloc(result);
 		return NULL;
 	}
-	return PyString_FromString(codecs[ldid]);
-}
 
-/** translate a numeric Language Driver ID to a string represting its constant.
- */
-static PyObject* dbflib_language_driver_name(PyObject* module, PyObject* args)
-{
-	int ldid;
-	if (!PyArg_ParseTuple(args, "i:language_driver_name", &ldid)) return NULL;
-	if (ldid < 0 || ldid >= PYSHAPELIB_NUM_LANGUAGE_DRIVERS || !drivers[ldid])
-	{
-		PyErr_SetString(PyExc_ValueError, "invalid driver id");
-		return NULL;
-	}
-	return PyString_FromString(drivers[ldid]);
+	return (PyObject*) result;
 }
 
-#endif
 
 
-
 static struct PyMethodDef dbflib_methods[] = 
 {
-	{"open", (PyCFunction)dbflib_open, METH_VARARGS, 
-		"open(name [, mode]) -> DBFFile\n\n"
+	{"open", (PyCFunction)dbflib_open, METH_VARARGS | METH_KEYWORDS,
+#if HAVE_CODE_PAGE
+		"open(name [, mode [, return_unicode [, codecs_map]]]) -> DBFFile\n\n"
+#else
+		"open(name [, mode [, return_unicode]]) -> DBFFile\n\n"
+#endif
 		"opens a DBFFile" },
-	{"create", (PyCFunction)dbflib_create, METH_VARARGS, 
-		"create(name [, language_driver]) -> DBFFile\n\n"
+	{"create", (PyCFunction)dbflib_create, METH_VARARGS | METH_KEYWORDS, 
+#if HAVE_CODE_PAGE
+		"create(name [, code_page [, return_unicode [, codecs_map]]]) -> DBFFile\n\n"
+#else
+		"create(name [, return_unicode]) -> DBFFile\n\n"
+#endif
 		"create a DBFFile " },
-#if HAVE_LANGUAGE_DRIVER
-	{"language_driver_codec", (PyCFunction)dbflib_language_driver_codec, METH_VARARGS, 
-		"language_driver_codec(driver_id) -> string\n\n"
-		"translate language driver id into the name of the Python's codec used as code page." },
-	{"language_driver_name", (PyCFunction)dbflib_language_driver_name, METH_VARARGS, 
-		"language_driver_name(driver_id) -> string\n\n"
-		"translate language driver id into a string." },
-#endif
 	{NULL}
 };
 
 
+#if HAVE_CODE_PAGE
+
+void add_ldid(PyObject* module, int ldid, const char* codec, const char* name)
+{
+	char code_page[64];
+	char constant[64];
+	PyObject* ocodec = PyString_FromString(codec);
+	sprintf(code_page, "LDID/%i", ldid);
+	PyDict_SetItemString(default_codecs_map, code_page, ocodec);
+	Py_XDECREF(ocodec);
+	sprintf(constant, "LDID_%s", name);
+	PyModule_AddStringConstant(module, constant, code_page);
+}
+
+void add_cpg(PyObject* module, char* code_page, const char* codec, const char* name)
+{
+	char constant[64];
+	PyObject* ocodec = PyString_FromString(codec);
+	PyDict_SetItemString(default_codecs_map, code_page, PyString_FromString(codec));
+	Py_XDECREF(ocodec);
+	sprintf(constant, "CPG_%s", name);
+	PyModule_AddStringConstant(module, constant, code_page);
+}
+
+#endif
+
 PyMODINIT_FUNC initdbflib(void)
 {
-	int i;
-
 	PyObject* module = Py_InitModule("dbflib", dbflib_methods);
 	if (!module) return;
 	
@@ -758,86 +836,114 @@
 	PYSHAPELIB_ADD_CONSTANT(FTLogical);
 	PYSHAPELIB_ADD_CONSTANT(FTInvalid);
 	PyModule_AddIntConstant(module, "_have_commit", HAVE_UPDATE_HEADER);
+	PyModule_AddIntConstant(module, "_have_code_page", HAVE_CODE_PAGE);
 
-#if HAVE_LANGUAGE_DRIVER
+#if HAVE_CODE_PAGE
+	default_codecs_map = PyDict_New();
+
 	/* table compiled from these resources:
 	 * http://www.clicketyclick.dk/databases/xbase/format/dbf.html
 	 * http://www.esrinl.com/content/file.asp?id=307
 	 * http://msdn2.microsoft.com/en-us/library/aa975345(VS.71).aspx
 	 */
-	for (i = 0; i < PYSHAPELIB_NUM_LANGUAGE_DRIVERS; ++i)
-	{
-		codecs[i] = NULL;
-		drivers[i] = NULL;
-	}
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x00, "cp1252", "NOT_SET");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x01, "cp437", "DOS_USA");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x02, "cp850", "DOS_INTERNATIONAL");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x03, "cp1252", "WINDOWS_ANSI");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x04, "mac_roman", "STANDARD_MACINTOSH");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x08, "cp865", "DANISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x09, "cp437", "DUTCH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0a, "cp850", "DUTCH_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0b, "cp437", "FINNISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0d, "cp437", "FRENCH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0e, "cp850", "FRENCH_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0f, "cp437", "GERMAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x10, "cp850", "GERMAN_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x11, "cp437", "ITALIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x12, "cp850", "ITALIAN_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x13, "cp932", "JAPANESE_SHIFT_JIS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x14, "cp850", "SPANISH_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x15, "cp437", "SWEDISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x16, "cp850", "SWEDISH_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x17, "cp865", "NORWEGIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x18, "cp437", "SPANISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x19, "cp437", "ENGLISH_BRITAIN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1a, "cp850", "ENGLISH_BRITAIN_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0b, "cp437", "ENGLISH_US_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1c, "cp863", "FRENCH_CANADA_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1d, "cp850", "FRENCH_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1f, "cp852", "CZECH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x22, "cp852", "HUNGARIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x23, "cp852", "POLISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x24, "cp860", "PORTUGUESE_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x25, "cp850", "PORTUGUESE_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x26, "cp866", "RUSSIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x37, "cp850", "ENGLISH_US_OEM_2");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x40, "cp852", "ROMANIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4d, "cp936", "CHINESE_GBK_PRC");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4e, "cp949", "KOREAN_ANSI_OEM);");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4f, "cp950", "CHINESE_BIG5_TAIWAN");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x50, "cp874", "THAI_ANSI_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x57, "cp1252", "ESRI_ANSI");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x58, "cp1252", "WESTERN_EUROPEAN_ANSI");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x59, "cp1252", "SPANISH_ANSI");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x64, "cp852", "EASTERN_EUROPEAN_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x65, "cp866", "RUSSIAN_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x66, "cp865", "NORDIC_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x67, "cp861", "ICELANDIC_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x68, "cp895", "CZECH_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x69, "cp620", "POLISH_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6a, "cp737", "GREEK_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6b, "cp857", "TURKISH_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6c, "cp863", "FRENCH_CANADA_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x78, "cp950", "TAIWAN_BIG5");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x79, "cp949", "HANGUL_WANSUG");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7a, "cp936", "PRC_GBK");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7b, "cp932", "JAPANESE_SHIFT_JIS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7c, "cp874", "THAI_WINDOWS_MSDOS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7d, "cp1255", "HEBREW_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7e, "cp1256", "ARABIC_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x86, "cp737", "GREEK_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x87, "cp852", "SLOVENIAN_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x88, "cp857", "TURKISH_OEM");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x96, "mac_cyrillic", "RUSSIAN_MACINTOSH");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x97, "mac_latin2", "EASTERN_EUROPEAN_MACINTOSH");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x98, "mac_greek", "GREEK_MACINTOSH");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xc8, "cp1250", "EASTERN_EUROPEAN_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xc9, "cp1251", "RUSSIAN_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xca, "cp1254", "TURKISH_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xcb, "cp1253", "GREEK_WINDOWS");
-	PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xcc, "cp1257", "BALTIC_WINDOWS");
+	add_ldid(module, 0x00, "cp1252", "NOT_SET");
+	add_ldid(module, 0x01, "cp437", "DOS_USA");
+	add_ldid(module, 0x02, "cp850", "DOS_INTERNATIONAL");
+	add_ldid(module, 0x03, "cp1252", "WINDOWS_ANSI");
+	add_ldid(module, 0x04, "mac_roman", "STANDARD_MACINTOSH");
+	add_ldid(module, 0x08, "cp865", "DANISH_OEM");
+	add_ldid(module, 0x09, "cp437", "DUTCH_OEM");
+	add_ldid(module, 0x0a, "cp850", "DUTCH_OEM_2");
+	add_ldid(module, 0x0b, "cp437", "FINNISH_OEM");
+	add_ldid(module, 0x0d, "cp437", "FRENCH_OEM");
+	add_ldid(module, 0x0e, "cp850", "FRENCH_OEM_2");
+	add_ldid(module, 0x0f, "cp437", "GERMAN_OEM");
+	add_ldid(module, 0x10, "cp850", "GERMAN_OEM_2");
+	add_ldid(module, 0x11, "cp437", "ITALIAN_OEM");
+	add_ldid(module, 0x12, "cp850", "ITALIAN_OEM_2");
+	add_ldid(module, 0x13, "cp932", "JAPANESE_SHIFT_JIS");
+	add_ldid(module, 0x14, "cp850", "SPANISH_OEM_2");
+	add_ldid(module, 0x15, "cp437", "SWEDISH_OEM");
+	add_ldid(module, 0x16, "cp850", "SWEDISH_OEM_2");
+	add_ldid(module, 0x17, "cp865", "NORWEGIAN_OEM");
+	add_ldid(module, 0x18, "cp437", "SPANISH_OEM");
+	add_ldid(module, 0x19, "cp437", "ENGLISH_BRITAIN_OEM");
+	add_ldid(module, 0x1a, "cp850", "ENGLISH_BRITAIN_OEM_2");
+	add_ldid(module, 0x1b, "cp437", "ENGLISH_US_OEM");
+	add_ldid(module, 0x1c, "cp863", "FRENCH_CANADA_OEM");
+	add_ldid(module, 0x1d, "cp850", "FRENCH_OEM_2");
+	add_ldid(module, 0x1f, "cp852", "CZECH_OEM");
+	add_ldid(module, 0x22, "cp852", "HUNGARIAN_OEM");
+	add_ldid(module, 0x23, "cp852", "POLISH_OEM");
+	add_ldid(module, 0x24, "cp860", "PORTUGUESE_OEM");
+	add_ldid(module, 0x25, "cp850", "PORTUGUESE_OEM_2");
+	add_ldid(module, 0x26, "cp866", "RUSSIAN_OEM");
+	add_ldid(module, 0x37, "cp850", "ENGLISH_US_OEM_2");
+	add_ldid(module, 0x40, "cp852", "ROMANIAN_OEM");
+	add_ldid(module, 0x4d, "cp936", "CHINESE_GBK_PRC");
+	add_ldid(module, 0x4e, "cp949", "KOREAN_ANSI_OEM);");
+	add_ldid(module, 0x4f, "cp950", "CHINESE_BIG5_TAIWAN");
+	add_ldid(module, 0x50, "cp874", "THAI_ANSI_OEM");
+	add_ldid(module, 0x57, "cp1252", "ESRI_ANSI");
+	add_ldid(module, 0x58, "cp1252", "WESTERN_EUROPEAN_ANSI");
+	add_ldid(module, 0x59, "cp1252", "SPANISH_ANSI");
+	add_ldid(module, 0x64, "cp852", "EASTERN_EUROPEAN_MSDOS");
+	add_ldid(module, 0x65, "cp866", "RUSSIAN_MSDOS");
+	add_ldid(module, 0x66, "cp865", "NORDIC_MSDOS");
+	add_ldid(module, 0x67, "cp861", "ICELANDIC_MSDOS");
+	add_ldid(module, 0x68, "cp895", "CZECH_MSDOS");
+	add_ldid(module, 0x69, "cp620", "POLISH_MSDOS");
+	add_ldid(module, 0x6a, "cp737", "GREEK_MSDOS");
+	add_ldid(module, 0x6b, "cp857", "TURKISH_MSDOS");
+	add_ldid(module, 0x6c, "cp863", "FRENCH_CANADA_MSDOS");
+	add_ldid(module, 0x78, "cp950", "TAIWAN_BIG5");
+	add_ldid(module, 0x79, "cp949", "HANGUL_WANSUG");
+	add_ldid(module, 0x7a, "cp936", "PRC_GBK");
+	add_ldid(module, 0x7b, "cp932", "JAPANESE_SHIFT_JIS");
+	add_ldid(module, 0x7c, "cp874", "THAI_WINDOWS_MSDOS");
+	add_ldid(module, 0x7d, "cp1255", "HEBREW_WINDOWS");
+	add_ldid(module, 0x7e, "cp1256", "ARABIC_WINDOWS");
+	add_ldid(module, 0x86, "cp737", "GREEK_OEM");
+	add_ldid(module, 0x87, "cp852", "SLOVENIAN_OEM");
+	add_ldid(module, 0x88, "cp857", "TURKISH_OEM");
+	add_ldid(module, 0x96, "mac_cyrillic", "RUSSIAN_MACINTOSH");
+	add_ldid(module, 0x97, "mac_latin2", "EASTERN_EUROPEAN_MACINTOSH");
+	add_ldid(module, 0x98, "mac_greek", "GREEK_MACINTOSH");
+	add_ldid(module, 0xc8, "cp1250", "EASTERN_EUROPEAN_WINDOWS");
+	add_ldid(module, 0xc9, "cp1251", "RUSSIAN_WINDOWS");
+	add_ldid(module, 0xca, "cp1254", "TURKISH_WINDOWS");
+	add_ldid(module, 0xcb, "cp1253", "GREEK_WINDOWS");
+	add_ldid(module, 0xcc, "cp1257", "BALTIC_WINDOWS");
+	add_cpg(module, "UTF-8", "utf_8", "UTF_8");
+	add_cpg(module, "OEM 737", "cp737", "OEM_737");
+	add_cpg(module, "OEM 775", "cp755", "OEM_775");
+	add_cpg(module, "OEM 852", "cp852", "OEM_852");
+	add_cpg(module, "OEM 855", "cp855", "OEM_855");
+	add_cpg(module, "OEM 857", "cp857", "OEM_857");
+	add_cpg(module, "OEM 860", "cp860", "OEM_860");
+	add_cpg(module, "OEM 861", "cp861", "OEM_861");
+	add_cpg(module, "OEM 862", "cp862", "OEM_862");
+	add_cpg(module, "OEM 863", "cp863", "OEM_863");
+	add_cpg(module, "OEM 864", "cp864", "OEM_864");
+	add_cpg(module, "OEM 865", "cp865", "OEM_865");
+	add_cpg(module, "OEM 866", "cp866", "OEM_866");
+	add_cpg(module, "OEM 869", "cp869", "OEM_869");
+	add_cpg(module, "OEM 932", "cp932", "OEM_932");
+	add_cpg(module, "OEM 950", "cp950", "OEM_950");
+	add_cpg(module, "ISO 88591", "iso-8859-1", "ISO_8859_1");
+	add_cpg(module, "ISO 88592", "iso-8859-2", "ISO_8859_2");
+	add_cpg(module, "ISO 88593", "iso-8859-3", "ISO_8859_3");
+	add_cpg(module, "ISO 88594", "iso-8859-4", "ISO_8859_4");
+	add_cpg(module, "ISO 88595", "iso-8859-5", "ISO_8859_5");
+	add_cpg(module, "ISO 88596", "iso-8859-6", "ISO_8859_6");
+	add_cpg(module, "ISO 88597", "iso-8859-7", "ISO_8859_7");
+	add_cpg(module, "ISO 88598", "iso-8859-8", "ISO_8859_8");
+	add_cpg(module, "ISO 88599", "iso-8859-9", "ISO_8859_9");
+	add_cpg(module, "ISO 885910", "iso-8859-10", "ISO_8859_10");
+	add_cpg(module, "ISO 885913", "iso-8859-13", "ISO_8859_13");
+	add_cpg(module, "ISO 885915", "iso-8859-15", "ISO_8859_15");
+
+
 #endif
 
 }

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py	2007-12-15 20:40:22 UTC (rev 2797)
@@ -39,8 +39,8 @@
 	DBFUpdateHeader function and '0' otherwise.  To check whether
 	DBFUpdateHeader is available, we scan shapefil.h for the string
 	'DBFUpdateHeader'.
-	- HAVE_LANGUAGE_DRIVER, which is '1' if the dbflib version we will 
-	compiling with has the nLanguageDriver field in DBFInfo and '0' otherwise.
+	- HAVE_CODE_PAGE, which is '1' if the dbflib version we will 
+	compiling with has the DBFGetCodePage function and '0' otherwise.
 	Again, shapefil.h is scanned to check this.
 	"""
 	f = open(convert_path(shp_dir + "/shapefil.h"))
@@ -54,7 +54,7 @@
 	
 	return [
 		("HAVE_UPDATE_HEADER", have("DBFUpdateHeader")),
-		("HAVE_LANGUAGE_DRIVER", have("nLanguageDriver"))]
+		("HAVE_CODE_PAGE", have("DBFGetCodePage"))]
 
 extensions = [Extension("shapelib",
 						["shapelibmodule.c",

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c	2007-12-15 20:40:22 UTC (rev 2797)
@@ -34,6 +34,9 @@
  ******************************************************************************
  *
  * $Log: dbfopen.c,v $
+ * Revision 1.77  2007/12/15 20:25:21  bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function.  This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file.  When creating a DBF file, the code can be set using DBFCreateEx.
+ *
  * Revision 1.76  2007/12/12 22:21:32  bram
  * DBFClose: check for NULL psDBF handle before trying to close it.
  *
@@ -181,6 +184,8 @@
     abyHeader[10] = (unsigned char) (psDBF->nRecordLength % 256);
     abyHeader[11] = (unsigned char) (psDBF->nRecordLength / 256);
 
+    abyHeader[29] = (unsigned char) (psDBF->iLanguageDriver);
+
 /* -------------------------------------------------------------------- */
 /*      Write the initial 32 byte file header, and all the field        */
 /*      descriptions.                                     		*/
@@ -348,9 +353,11 @@
 
 {
     DBFHandle		psDBF;
+    SAFile		pfCPG;
     unsigned char	*pabyBuf;
     int			nFields, nHeadLen, iField, i;
     char		*pszBasename, *pszFullname;
+    int                 nBufSize = 500;
 
 /* -------------------------------------------------------------------- */
 /*      We only allow the access strings "rb" and "r+".                  */
@@ -392,13 +399,22 @@
         sprintf( pszFullname, "%s.DBF", pszBasename );
         psDBF->fp = psDBF->sHooks.FOpen(pszFullname, pszAccess );
     }
-    
+
+    sprintf( pszFullname, "%s.cpg", pszBasename );
+    pfCPG = psHooks->FOpen( pszFullname, "r" );
+    if( pfCPG == NULL )
+    {
+        sprintf( pszFullname, "%s.CPG", pszBasename );
+        pfCPG = psHooks->FOpen( pszFullname, "r" );
+    }
+
     free( pszBasename );
     free( pszFullname );
     
     if( psDBF->fp == NULL )
     {
         free( psDBF );
+        psHooks->FClose( pfCPG );
         return( NULL );
     }
 
@@ -409,10 +425,11 @@
 /* -------------------------------------------------------------------- */
 /*  Read Table Header info                                              */
 /* -------------------------------------------------------------------- */
-    pabyBuf = (unsigned char *) malloc(500);
+    pabyBuf = (unsigned char *) malloc(nBufSize);
     if( psDBF->sHooks.FRead( pabyBuf, 32, 1, psDBF->fp ) != 1 )
     {
         psDBF->sHooks.FClose( psDBF->fp );
+        psDBF->sHooks.FClose( pfCPG );
         free( pabyBuf );
         free( psDBF );
         return NULL;
@@ -423,12 +440,39 @@
 
     psDBF->nHeaderLength = nHeadLen = pabyBuf[8] + pabyBuf[9]*256;
     psDBF->nRecordLength = pabyBuf[10] + pabyBuf[11]*256;
-    
+    psDBF->iLanguageDriver = pabyBuf[29];
+
     psDBF->nFields = nFields = (nHeadLen - 32) / 32;
 
     psDBF->pszCurrentRecord = (char *) malloc(psDBF->nRecordLength);
 
 /* -------------------------------------------------------------------- */
+/*  Figure out the code page from the LDID and CPG                      */
+/* -------------------------------------------------------------------- */
+
+    psDBF->pszCodePage = NULL;
+    if( pfCPG )
+    {
+        size_t n;
+        char *buffer = (char *) pabyBuf;
+        buffer[0] = '\0';
+        psDBF->sHooks.FRead( pabyBuf, nBufSize - 1, 1, pfCPG );
+        n = strcspn( pabyBuf, "\n\r" );
+        if( n > 0 )
+        {
+            pabyBuf[n] = '\0';
+            psDBF->pszCodePage = (char *) malloc(n + 1);
+            memcpy( psDBF->pszCodePage, pabyBuf, n + 1 );
+        }
+    }
+    if( psDBF->pszCodePage == NULL && pabyBuf[29] != 0 )
+    {
+        sprintf( pabyBuf, "LDID/%i", psDBF->iLanguageDriver );
+        psDBF->pszCodePage = (char *) malloc(strlen(pabyBuf) + 1);
+        strcpy( psDBF->pszCodePage, pabyBuf );
+    }
+
+/* -------------------------------------------------------------------- */
 /*  Read in Field Definitions                                           */
 /* -------------------------------------------------------------------- */
     
@@ -530,6 +574,7 @@
 
     free( psDBF->pszHeader );
     free( psDBF->pszCurrentRecord );
+    free( psDBF->pszCodePage );
 
     free( psDBF );
 }
@@ -537,18 +582,31 @@
 /************************************************************************/
 /*                             DBFCreate()                              */
 /*                                                                      */
-/*      Create a new .dbf file.                                         */
+/*      Create a new .dbf file with default code page LDID/3            */
 /************************************************************************/
 
 DBFHandle SHPAPI_CALL
 DBFCreate( const char * pszFilename )
 
 {
+    return DBFCreateEx( pszFilename, "LDID/3" );
+}
+
+/************************************************************************/
+/*                            DBFCreateEx()                             */
+/*                                                                      */
+/*      Create a new .dbf file.                                         */
+/************************************************************************/
+
+DBFHandle SHPAPI_CALL
+DBFCreateEx( const char * pszFilename, const char* pszCodePage )
+
+{
     SAHooks sHooks;
 
     SASetupDefaultHooks( &sHooks );
 
-    return DBFCreateLL( pszFilename, &sHooks );
+    return DBFCreateLL( pszFilename, pszCodePage , &sHooks );
 }
 
 /************************************************************************/
@@ -558,13 +616,13 @@
 /************************************************************************/
 
 DBFHandle SHPAPI_CALL
-DBFCreateLL( const char * pszFilename, SAHooks *psHooks )
+DBFCreateLL( const char * pszFilename, const char * pszCodePage, SAHooks *psHooks )
 
 {
     DBFHandle	psDBF;
     SAFile	fp;
     char	*pszFullname, *pszBasename;
-    int		i;
+    int		i, ldid = -1;
     char chZero = '\0';
 
 /* -------------------------------------------------------------------- */
@@ -583,7 +641,6 @@
 
     pszFullname = (char *) malloc(strlen(pszBasename) + 5);
     sprintf( pszFullname, "%s.dbf", pszBasename );
-    free( pszBasename );
 
 /* -------------------------------------------------------------------- */
 /*      Create the file.                                                */
@@ -599,6 +656,29 @@
     if( fp == NULL )
         return( NULL );
 
+
+    sprintf( pszFullname, "%s.cpg", pszBasename );
+    if( pszCodePage != NULL )
+    {
+        if( strncmp( pszCodePage, "LDID/", 5 ) == 0 )
+        {
+            ldid = atoi( pszCodePage + 5 );
+            if( ldid > 255 )
+                ldid = -1; // don't use 0 to indicate out of range as LDID/0 is a valid one
+        }
+        if( ldid < 0 )
+        {
+            SAFile fpCPG = psHooks->FOpen( pszFullname, "w" );
+            psHooks->FWrite( (char*) pszCodePage, strlen(pszCodePage), 1, fpCPG );
+            psHooks->FClose( fpCPG );
+        }
+    }
+    if( pszCodePage == NULL || ldid >= 0 )
+    {
+        psHooks->Remove( pszFullname );
+    }
+
+    free( pszBasename );
     free( pszFullname );
 
 /* -------------------------------------------------------------------- */
@@ -625,6 +705,14 @@
 
     psDBF->bNoHeader = TRUE;
 
+    psDBF->iLanguageDriver = ldid > 0 ? ldid : 0;
+    psDBF->pszCodePage = NULL;
+    if( pszCodePage )
+    {
+        psDBF->pszCodePage = (char * ) malloc( strlen(pszCodePage) + 1 );
+        strcpy( psDBF->pszCodePage, pszCodePage );
+    }
+
     return( psDBF );
 }
 
@@ -1416,7 +1504,7 @@
 {
     DBFHandle	newDBF;
 
-   newDBF = DBFCreate ( pszFilename );
+   newDBF = DBFCreateEx ( pszFilename, psDBF->pszCodePage );
    if ( newDBF == NULL ) return ( NULL ); 
    
    newDBF->nFields = psDBF->nFields;
@@ -1582,3 +1670,15 @@
 
     return TRUE;
 }
+
+/************************************************************************/
+/*                            DBFGetCodePage                            */
+/************************************************************************/
+
+const char SHPAPI_CALL1(*)
+DBFGetCodePage(DBFHandle psDBF )
+{
+    if( psDBF == NULL )
+        return NULL;
+    return psDBF->pszCodePage;
+}
\ No newline at end of file

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c	2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,5 +1,5 @@
 /******************************************************************************
- * $Id: safileio.c,v 1.1 2007/12/06 06:56:41 fwarmerdam Exp $
+ * $Id: safileio.c,v 1.2 2007/12/15 20:25:30 bram Exp $
  *
  * Project:  Shapelib
  * Purpose:  Default implementation of file io based on stdio.
@@ -34,6 +34,9 @@
  ******************************************************************************
  *
  * $Log: safileio.c,v $
+ * Revision 1.2  2007/12/15 20:25:30  bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function.  This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file.  When creating a DBF file, the code can be set using DBFCreateEx.
+ *
  * Revision 1.1  2007/12/06 06:56:41  fwarmerdam
  * new
  *
@@ -48,7 +51,7 @@
 #include <string.h>
 #include <stdio.h>
 
-SHP_CVSID("$Id: safileio.c,v 1.1 2007/12/06 06:56:41 fwarmerdam Exp $");
+SHP_CVSID("$Id: safileio.c,v 1.2 2007/12/15 20:25:30 bram Exp $");
 
 /************************************************************************/
 /*                              SADFOpen()                              */
@@ -119,10 +122,22 @@
 int SADFClose( SAFile file )
 
 {
+    if( file == NULL )
+        return;
     return fclose( (FILE *) file );
 }
 
 /************************************************************************/
+/*                             SADFClose()                              */
+/************************************************************************/
+
+int SADRemove( const char *filename )
+
+{
+    return remove( filename );
+}
+
+/************************************************************************/
 /*                              SADError()                              */
 /************************************************************************/
 
@@ -146,6 +161,7 @@
     psHooks->FTell   = SADFTell;
     psHooks->FFlush  = SADFFlush;
     psHooks->FClose  = SADFClose;
+    psHooks->Remove  = SADRemove;
 
     psHooks->Error   = SADError;
 }

Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h	2007-12-15 20:40:22 UTC (rev 2797)
@@ -37,6 +37,9 @@
  ******************************************************************************
  *
  * $Log: shapefil.h,v $
+ * Revision 1.41  2007/12/15 20:25:32  bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function.  This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file.  When creating a DBF file, the code can be set using DBFCreateEx.
+ *
  * Revision 1.40  2007/12/06 07:00:25  fwarmerdam
  * dbfopen now using SAHooks for fileio
  *
@@ -195,13 +198,14 @@
 #endif
 
 typedef struct {
-    SAFile     (*FOpen) ( const char *filename, const char *path);
+    SAFile     (*FOpen) ( const char *filename, const char *access);
     SAOffset   (*FRead) ( void *p, SAOffset size, SAOffset nmemb, SAFile file);
     SAOffset   (*FWrite)( void *p, SAOffset size, SAOffset nmemb, SAFile file);
     SAOffset   (*FSeek) ( SAFile file, SAOffset offset, int whence );
     SAOffset   (*FTell) ( SAFile file );
     int        (*FFlush)( SAFile file );
     int        (*FClose)( SAFile file );
+    int        (*Remove) ( const char *filename );
 
     void       (*Error) ( const char *message );
 } SAHooks;
@@ -458,6 +462,9 @@
     int		bUpdated;
 
     double      dfDoubleField;
+
+    int         iLanguageDriver;
+    char        *pszCodePage;
 } DBFInfo;
 
 typedef DBFInfo * DBFHandle;
@@ -481,7 +488,9 @@
 DBFHandle SHPAPI_CALL
       DBFCreate( const char * pszDBFFile );
 DBFHandle SHPAPI_CALL
-      DBFCreateLL( const char * pszDBFFile, SAHooks *psHooks );
+      DBFCreateEx( const char * pszDBFFile, const char * pszCodePage );
+DBFHandle SHPAPI_CALL
+      DBFCreateLL( const char * pszDBFFile, const char * pszCodePage, SAHooks *psHooks );
 
 int	SHPAPI_CALL
       DBFGetFieldCount( DBFHandle psDBF );
@@ -550,6 +559,9 @@
 char    SHPAPI_CALL
       DBFGetNativeFieldType( DBFHandle hDBF, int iField );
 
+const char SHPAPI_CALL1(*)
+      DBFGetCodePage(DBFHandle psDBF );
+
 #ifdef __cplusplus
 }
 #endif

Modified: branches/WIP-pyshapelib-Unicode/thuban/setup.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/setup.py	2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/setup.py	2007-12-15 20:40:22 UTC (rev 2797)
@@ -283,7 +283,7 @@
 	
 	return [
 		("HAVE_UPDATE_HEADER", have("DBFUpdateHeader")),
-		("HAVE_LANGUAGE_DRIVER", have("nLanguageDriver"))]
+		("HAVE_CODE_PAGE", have("DBFGetCodePage"))]
 
 
 extensions.append(Extension("Lib.shapelib",



More information about the Thuban-commits mailing list