[Thuban-commits] r2797 - in branches/WIP-pyshapelib-Unicode/thuban: . Thuban/Model libraries/pyshapelib libraries/shapelib
scm-commit@wald.intevation.org
scm-commit at wald.intevation.org
Sat Dec 15 21:40:24 CET 2007
Author: bramz
Date: 2007-12-15 21:40:22 +0100 (Sat, 15 Dec 2007)
New Revision: 2797
Modified:
branches/WIP-pyshapelib-Unicode/thuban/ChangeLog
branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py
branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog
branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c
branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py
branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c
branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c
branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h
branches/WIP-pyshapelib-Unicode/thuban/setup.py
Log:
start of pyshapelib Unicode support mark II. It now also reads the .CPG file so that UTF-8 encodings can be used. Thuban will now create UTF-8 DBF files by default. See ChangeLogs
Modified: branches/WIP-pyshapelib-Unicode/thuban/ChangeLog
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/ChangeLog 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/ChangeLog 2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,5 +1,15 @@
-2007-12-12 Bram de Greve <bram.degreve at gmail.com>
+2007-12-15 Bram de Greve <bram.degreve at bramz.net>
+ * shapelib and pyshapelib Unicode support now read the .CPG
+ file so that we finally can use UTF-8 content. See ChangeLog
+ in pyshapelib
+
+ * Thuban/Model/table.py: when opening DBF files, ask to return
+ Unicode strings, and use the UTF-8 encoding when creating new
+ shapefiles.
+
+2007-12-12 Bram de Greve <bram.degreve at bramz.net>
+
Porting shapelib from maptools source cvs.maptools.org.
Currently, this will have no support for code pages and wide
character filenames (Win32), but we'll get that back in later,
Modified: branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/Thuban/Model/table.py 2007-12-15 20:40:22 UTC (rev 2797)
@@ -86,7 +86,7 @@
title = os.path.splitext(os.path.basename(self.filename))[0]
TitledObject.__init__(self, title)
- self.dbf = dbflib.DBFFile(filename)
+ self.dbf = dbflib.open(filename, return_unicode = True)
# If true, self.dbf is open for writing.
self._writable = 0
@@ -226,7 +226,7 @@
order.
"""
if not self._writable:
- new_dbf = dbflib.DBFFile(self.filename, "r+b")
+ new_dbf = dbflib.open(self.filename, "r+b", return_unicode = True)
self.dbf.close()
self.dbf = new_dbf
self._writable = 1
@@ -462,7 +462,7 @@
indices to be saved to the file, otherwise all rows are saved.
"""
- dbf = dbflib.create(filename)
+ dbf = dbflib.create(filename, code_page = dbflib.CPG_UTF_8, return_unicode = True)
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
FIELDTYPE_INT: dbflib.FTInteger,
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/ChangeLog 2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,3 +1,29 @@
+2007-12-15 Bram de Greve <bram.degreve at bramz.net>
+
+ * dbflibmodule.c: Unicode support mark II. Ditched are the language_driver
+ members and functions, as it is not sufficient to indicate code pages
+ specified by .CPG files.
+ - code_page: DBFFile now has member code_page that returns the DBF code page
+ as a string. This is either the content of the .CPG file, or a string of
+ the form "LDID/42" if there's no .CPG file and the LDID number of the .DBF
+ file is used to indicate the code page instead.
+ - DBFFile also sports a member codec, which is the name of the Python codec used
+ for the code page.
+ - code_page: a new optional argument on create() to specify to DBF file's
+ code page on creation. This is _not_ a Python codec name, but one the constants
+ dbflib.LDID_* and dbflib.CPG_*.
+ - return_unicode: a new optional argument on DBFFile, open() and create().
+ It tells the DBFFile to decode the textual content using its codec and
+ return it as Unicode. It is False by default, which means you get the raw
+ encoded string instead.
+ - codecs_map: a new optional argument on DBFFile, open() and create().
+ It allows you to provide your own dictionary that links DBF code pages to
+ Python codecs in case the default builtin one is not correct for your
+ application.
+ - HAVE_LANGUAGE_DRIVER is gone, and HAVE_CODE_PAGE is here instead.
+ - corresponding revions of shapelib on cvs.maptools.org: shapefil.h v1.41,
+ dbfopen.c v1.77, safileio.c v1.2
+
2007-04-25 Bernhard Herzog <bh at intevation.de>
* shptreemodule.c: Fix copyright notice. It should have been
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/dbflibmodule.c 2007-12-15 20:40:22 UTC (rev 2797)
@@ -9,90 +9,16 @@
#include "pyshapelib_common.h"
-/* UNICODE & LANGUAGE DRIVER SUPPORT FOR DBFLIB
- *
- * When writing Unicode objects to a dbflib database, the unicode has to be
- * encoded in 8-bit characters using a code page. This code page is indentified
- * by the Language Driver ID (LDID) field in the database header.
- *
- * At this moment, this need unofficial modifications of the maptools shapelib
- * library because they do not read the LDID. No patch has been submitted yet,
- * but the version contained in the Thuban source tree incorporates the required
- * modifications.
- *
- * pyshapelib is designed to compile with either the patched or unpatched shapelib
- * by defining HAVE_LANGUAGE_DRIVER as true or false respectively. In the latter
- * case, a Windows ANSI code page (cp1252) is assumed
- */
- #if HAVE_LANGUAGE_DRIVER
-#define PYSHAPELIB_NUM_LANGUAGE_DRIVERS 256
+static PyObject* default_codecs_map = NULL;
-#define PYSHAPELIB_ADD_LANGUAGE_DRIVER(ldid, codec, name)\
- codecs[ldid] = codec;\
- drivers[ldid] = "LDID_" name;\
- PyModule_AddIntConstant(module, "LDID_" name, ldid)
-
-static char* codecs[PYSHAPELIB_NUM_LANGUAGE_DRIVERS];
-static char* drivers[PYSHAPELIB_NUM_LANGUAGE_DRIVERS];
-
-#endif
-
-
-
-/** Determine name of Python's built-in codec
- */
-static char* get_codec(DBFHandle handle)
-{
-#if HAVE_LANGUAGE_DRIVER
- if (!codecs[handle->nLanguageDriver])
- {
- PyErr_Format(PyExc_ValueError, "Language Driver ID %d not recognized", handle->nLanguageDriver);
- }
- return codecs[handle->nLanguageDriver];
-#else
- return "cp1252";
-#endif
-}
-
-
-
-/** decode to unicode object
- */
-static PyObject* decode_string(DBFHandle handle, const char* string)
-{
- char* codec = get_codec(handle);
- if (!codec) return NULL;
- return PyUnicode_Decode(string, strlen(string), codec, NULL);
-}
-
-/** encode unicode object to normal Python string object
- */
-static PyObject* encode_string(DBFHandle handle, PyObject* string)
-{
- char* codec = get_codec(handle);
- if (!codec) return NULL;
-
- if (PyString_Check(string))
- {
- return PyString_AsEncodedObject(string, codec, NULL);
- }
- if (PyUnicode_Check(string))
- {
- return PyUnicode_AsEncodedString(string, codec, NULL);
- }
-
- PyErr_SetString(PyExc_TypeError, "value is neither a string or unicode object");
- return NULL;
-}
-
-
-
/* --- DBFFile ------------------------------------------------------------------------------------------------------- */
typedef struct {
PyObject_HEAD
DBFHandle handle;
+ char* codec;
+ int return_unicode;
} DBFFileObject;
@@ -104,6 +30,8 @@
DBFFileObject* self;
self = (DBFFileObject*) type->tp_alloc(type, 0);
self->handle = NULL;
+ self->codec = NULL;
+ self->return_unicode = 0;
return (PyObject*) self;
}
@@ -115,18 +43,79 @@
{
DBFClose(self->handle);
self->handle = NULL;
+ PyMem_Free(self->codec);
+ self->codec = NULL;
self->ob_type->tp_free((PyObject*)self);
}
+static int dbffile_init_codec(DBFFileObject* self, PyObject* codecs_map)
+{
+#if HAVE_CODE_PAGE
+ size_t n = 0;
+ PyObject* ocodec = NULL;
+ char* codec = NULL;
+ char* code_page = (char*) DBFGetCodePage(self->handle);
+ PyMem_Free(self->codec);
+ self->codec = NULL;
+ if (codecs_map && codecs_map != Py_None)
+ {
+ if (!PyMapping_Check(codecs_map))
+ {
+ PyErr_SetString(PyExc_TypeError, "codecs_map is not mapable");
+ return -1;
+ }
+ }
+ else
+ {
+ codecs_map = default_codecs_map;
+ }
+
+ if (code_page)
+ {
+ ocodec = PyMapping_GetItemString(codecs_map, code_page);
+ if (!ocodec)
+ {
+ PyErr_Format(PyExc_KeyError, "code_page '%s' not found in codecs_map", code_page);
+ return -1;
+ }
+ codec = PyString_AsString(ocodec);
+ if (!codec)
+ {
+ return -1;
+ }
+ n = strlen(codec);
+ self->codec = PyMem_Malloc(n + 1);
+ if (!self->codec)
+ {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memcpy(self->codec, codec, n + 1);
+ }
+#else
+ PyMem_Free(self->codec);
+ self->codec = NULL;
+#endif
+
+ return 0;
+}
+
+
/* constructor
*/
static int dbffile_init(DBFFileObject* self, PyObject* args, PyObject* kwds)
{
char* file = NULL;
char* mode = "rb";
- static char *kwlist[] = {"name", "mode", NULL};
+ PyObject* return_unicode = 0;
+ PyObject* codecs_map = NULL;
+#if HAVE_CODE_PAGE
+ static char *kwlist[] = {"name", "mode", "return_unicode", "codecs_map", NULL};
+#else
+ static char *kwlist[] = {"name", "mode", "return_unicode", NULL};
+#endif
DBFClose(self->handle);
self->handle = NULL;
@@ -134,7 +123,13 @@
#if defined(SHPAPI_HAS_WIDE) && defined(Py_WIN_WIDE_FILENAMES)
if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
PyObject *wfile;
- if (PyArg_ParseTupleAndKeywords(args, kwds, "U|s:DBFFile", kwlist, &wfile, &mode))
+#if HAVE_CODE_PAGE
+ if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sOO:DBFFile", kwlist,
+ &wfile, &mode, &return_unicode, &codecs_map))
+#else
+ if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sO:DBFFile", kwlist,
+ &wfile, &mode, &return_unicode))
+#endif
{
PyObject *wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
if (!wmode) return -1;
@@ -157,8 +152,13 @@
if (!self->handle)
{
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|s:DBFFile", kwlist,
- Py_FileSystemDefaultEncoding, &file, &mode)) return -1;
+#if HAVE_CODE_PAGE
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sOO:DBFFile", kwlist,
+ Py_FileSystemDefaultEncoding, &file, &mode, &return_unicode, &codecs_map)) return -1;
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sO:DBFFile", kwlist,
+ Py_FileSystemDefaultEncoding, &file, &mode, &return_unicode)) return -1;
+#endif
self->handle = DBFOpen(file, mode);
if (!self->handle)
@@ -171,6 +171,15 @@
PyMem_Free(file);
}
+ self->return_unicode = return_unicode && PyObject_IsTrue(return_unicode);
+
+ if (dbffile_init_codec(self, codecs_map) != 0)
+ {
+ DBFClose(self->handle);
+ self->handle = NULL;
+ return -1;
+ }
+
return 0;
}
@@ -180,11 +189,44 @@
{
DBFClose(self->handle);
self->handle = NULL;
+ PyMem_Free(self->codec);
+ self->codec = NULL;
Py_RETURN_NONE;
}
+/** decode to unicode object
+ */
+static PyObject* dbffile_decode_string(DBFFileObject* self, const char* string)
+{
+ if (self->return_unicode)
+ {
+ return PyUnicode_Decode(string, strlen(string), self->codec, NULL);
+ }
+ return PyString_FromString(string);
+}
+
+/** encode unicode object to normal Python string object
+ */
+static PyObject* dbffile_encode_string(DBFFileObject* self, PyObject* string)
+{
+ if (PyString_Check(string))
+ {
+ Py_INCREF(string);
+ return string;
+ }
+ if (PyUnicode_Check(string))
+ {
+ return PyUnicode_AsEncodedString(string, self->codec, NULL);
+ }
+
+ PyErr_SetString(PyExc_TypeError, "value is neither a string or unicode object");
+ return NULL;
+}
+
+
+
static PyObject* dbffile_field_count(DBFFileObject* self)
{
return PyInt_FromLong((long)DBFGetFieldCount(self->handle));
@@ -209,7 +251,7 @@
field_name[0] = '\0';
field_type = DBFGetFieldInfo(self->handle, field, field_name, &width, &decimals);
- name_object = decode_string(self->handle, field_name);
+ name_object = dbffile_decode_string(self, field_name);
return Py_BuildValue("iOii", field_type, name_object, width, decimals);
}
@@ -228,7 +270,7 @@
if (!PyArg_ParseTuple(args, "Siii:add_field", &oname, &type, &width, &decimals)) return NULL;
}
- name = encode_string(self->handle, oname);
+ name = dbffile_encode_string(self, oname);
if (!name) return NULL;
field = DBFAddField(self->handle, PyString_AsString(name), (DBFFieldType)type, width, decimals);
@@ -253,18 +295,18 @@
* The name argument will be passed to DBFGetFieldInfo as is and should
* thus be either NULL or a pointer to an array of at least 12 chars
*/
-static PyObject* do_read_attribute(DBFHandle handle, int record, int field, char * name)
+static PyObject* do_read_attribute(DBFFileObject* self, int record, int field, char * name)
{
int type, width;
const char* string;
- type = DBFGetFieldInfo(handle, field, name, &width, NULL);
+ type = DBFGetFieldInfo(self->handle, field, name, &width, NULL);
/* For strings NULL and the empty string are indistinguishable
* in DBF files. We prefer empty strings instead for backwards
* compatibility reasons because older wrapper versions returned
* emtpy strings as empty strings.
*/
- if (type != FTString && DBFIsAttributeNULL(handle, record, field))
+ if (type != FTString && DBFIsAttributeNULL(self->handle, record, field))
{
Py_RETURN_NONE;
}
@@ -273,17 +315,17 @@
switch (type)
{
case FTString:
- string = DBFReadStringAttribute(handle, record, field);
- if (string) return decode_string(handle, string);
+ string = DBFReadStringAttribute(self->handle, record, field);
+ if (string) return dbffile_decode_string(self, string);
case FTInteger:
- return PyInt_FromLong((long)DBFReadIntegerAttribute(handle, record, field));
+ return PyInt_FromLong((long)DBFReadIntegerAttribute(self->handle, record, field));
case FTDouble:
- return PyFloat_FromDouble(DBFReadDoubleAttribute(handle, record, field));
+ return PyFloat_FromDouble(DBFReadDoubleAttribute(self->handle, record, field));
case FTLogical:
- string = DBFReadLogicalAttribute(handle, record, field);
+ string = DBFReadLogicalAttribute(self->handle, record, field);
if (string)
{
switch (string[0])
@@ -306,7 +348,7 @@
PyErr_Format(PyExc_IOError, "Can't read value for row %d column %d", record, field);
return NULL;
-}
+}
@@ -335,7 +377,7 @@
return NULL;
}
- return do_read_attribute(self->handle, record, field, NULL);
+ return do_read_attribute(self, record, field, NULL);
}
@@ -369,7 +411,7 @@
num_fields = DBFGetFieldCount(self->handle);
for (i = 0; i < num_fields; i++)
{
- value = do_read_attribute(self->handle, record, i, name);
+ value = do_read_attribute(self, record, i, name);
if (!value || PyDict_SetItemString(dict, name, value) < 0) goto fail;
Py_DECREF(value);
value = NULL;
@@ -386,7 +428,7 @@
/* write a single field of a record. */
-static int do_write_attribute(DBFHandle handle, int record, int field, int type, PyObject* value)
+static int do_write_attribute(DBFFileObject* self, int record, int field, int type, PyObject* value)
{
PyObject* string_value = NULL;
int int_value;
@@ -395,16 +437,16 @@
if (value == Py_None)
{
- if (DBFWriteNULLAttribute(handle, record, field)) return 1;
+ if (DBFWriteNULLAttribute(self->handle, record, field)) return 1;
}
else
{
switch (type)
{
case FTString:
- string_value = encode_string(handle, value);
+ string_value = dbffile_encode_string(self, value);
if (!string_value) return 0;
- if (DBFWriteStringAttribute(handle, record, field, PyString_AsString(string_value)))
+ if (DBFWriteStringAttribute(self->handle, record, field, PyString_AsString(string_value)))
{
Py_DECREF(string_value);
return 1;
@@ -415,19 +457,19 @@
case FTInteger:
int_value = PyInt_AsLong(value);
if (int_value == -1 && PyErr_Occurred()) return 0;
- if (DBFWriteIntegerAttribute(handle, record, field, int_value)) return 1;
+ if (DBFWriteIntegerAttribute(self->handle, record, field, int_value)) return 1;
break;
case FTDouble:
double_value = PyFloat_AsDouble(value);
if (double_value == -1 && PyErr_Occurred()) return 0;
- if (DBFWriteDoubleAttribute(handle, record, field, double_value)) return 1;
+ if (DBFWriteDoubleAttribute(self->handle, record, field, double_value)) return 1;
break;
case FTLogical:
logical_value = PyObject_IsTrue(value);
if (logical_value == -1) return 0;
- if (DBFWriteLogicalAttribute(handle, record, field, logical_value ? 'T' : 'F')) return 1;
+ if (DBFWriteLogicalAttribute(self->handle, record, field, logical_value ? 'T' : 'F')) return 1;
break;
default:
@@ -459,7 +501,7 @@
}
type = DBFGetFieldInfo(self->handle, field, NULL, NULL, NULL);
- if (!do_write_attribute(self->handle, record, field, type, value)) return NULL;
+ if (!do_write_attribute(self, record, field, type, value)) return NULL;
Py_RETURN_NONE;
}
@@ -500,7 +542,7 @@
type = DBFGetFieldInfo(self->handle, i, NULL, NULL, NULL);
value = PySequence_GetItem(record_object, i);
if (!value) return NULL;
- if (!do_write_attribute(self->handle, record, i, type, value))
+ if (!do_write_attribute(self, record, i, type, value))
{
Py_DECREF(value);
return NULL;
@@ -518,7 +560,7 @@
name[0] = '\0';
type = DBFGetFieldInfo(self->handle, i, name, NULL, NULL);
value = PyDict_GetItemString(record_object, name);
- if (value && !do_write_attribute(self->handle, record, i, type, value)) return NULL;
+ if (value && !do_write_attribute(self, record, i, type, value)) return NULL;
}
}
@@ -551,16 +593,30 @@
#endif
-#if HAVE_LANGUAGE_DRIVER
+#if HAVE_CODE_PAGE
-static PyObject* dbffile_language_driver(DBFFileObject* self, void* closure)
+static PyObject* dbffile_code_page(DBFFileObject* self, void* closure)
{
- return PyInt_FromLong((long)self->handle->nLanguageDriver);
+ const char* code_page = DBFGetCodePage(self->handle);
+ if (!code_page)
+ {
+ Py_RETURN_NONE;
+ }
+ return PyString_FromString(code_page);
}
#endif
+static PyObject* dbffile_codec(DBFFileObject* self, void* closure)
+{
+ if (!self->codec)
+ {
+ Py_RETURN_NONE;
+ }
+ return PyString_FromString(self->codec);
+}
+
static struct PyMethodDef dbffile_methods[] =
{
{"close", (PyCFunction)dbffile_close, METH_NOARGS,
@@ -613,8 +669,9 @@
static struct PyGetSetDef dbffile_getsetters[] =
{
-#if HAVE_LANGUAGE_DRIVER
- {"language_driver", (getter)dbffile_language_driver, NULL, "Language Driver ID (read-only)" },
+ {"codec", (getter)dbffile_codec, NULL, "Python codec name used to encode or decode Unicode strings (read-only)" },
+#if HAVE_CODE_PAGE
+ {"code_page", (getter)dbffile_code_page, NULL, "DBF Code Page from LDID or .CPG file (read-only)" },
#endif
{NULL}
};
@@ -627,24 +684,39 @@
/* --- dbflib -------------------------------------------------------------------------------------------------------- */
-static PyObject* dbflib_open(PyObject* module, PyObject* args)
+static PyObject* dbflib_open(PyObject* module, PyObject* args, PyObject* kwds)
{
- return PyObject_CallObject((PyObject*)&DBFFileType, args);
+ return PyObject_Call((PyObject*)&DBFFileType, args, kwds);
}
-static PyObject* dbflib_create(PyObject* module, PyObject* args)
+static PyObject* dbflib_create(PyObject* module, PyObject* args, PyObject* kwds)
{
char* file;
DBFFileObject* result;
DBFHandle handle = NULL;
int wideargument = 0;
+ PyObject* return_unicode = NULL;
+ PyObject* codecs_map = NULL;
+ char* code_page = NULL;
+#if HAVE_CODE_PAGE
+ static char *kwlist[] = {"name", "code_page", "return_unicode", "codecs_map", NULL};
+#else
+ static char *kwlist[] = {"name", "return_unicode", NULL};
+#endif
+
#if defined(SHPAPI_HAS_WIDE) && defined(Py_WIN_WIDE_FILENAMES)
if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
PyObject *wfile;
- if (PyArg_ParseTuple(args, "U:create", &wfile))
+#if HAVE_CODE_PAGE
+ if (PyArg_ParseTupleAndKeywords(args, kwds, "U|sOO:create", kwlist,
+ &wfile, &code_page, &return_unicode, &codecs_map))
+#else
+ if (PyArg_ParseTupleAndKeywords(args, kwds, "U|O:create", kwlist,
+ &wfile, &return_unicode))
+#endif
{
wideargument = 1;
handle = DBFCreateW(PyUnicode_AS_UNICODE(wfile));
@@ -665,8 +737,15 @@
if (!handle)
{
- if (!PyArg_ParseTuple(args, "et:create", Py_FileSystemDefaultEncoding, &file)) return NULL;
+#if HAVE_CODE_PAGE
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|sOO:create", kwlist, Py_FileSystemDefaultEncoding,
+ &file, &code_page, &return_unicode, &codecs_map)) return NULL;
+ handle = DBFCreateEx(file, code_page);
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|O:create", kwlist, Py_FileSystemDefaultEncoding,
+ &file, &return_unicode)) return NULL;
handle = DBFCreate(file);
+#endif
if (!handle)
{
PyErr_SetFromErrnoWithFilename(PyExc_IOError, file);
@@ -684,69 +763,68 @@
}
result->handle = handle;
- return (PyObject*) result;
-}
+ result->return_unicode = return_unicode && PyObject_IsTrue(return_unicode);
+ result->codec = NULL;
-
-
-#if HAVE_LANGUAGE_DRIVER
-
-/** translate a numeric Language Driver ID to the name of Python's codec.
- */
-static PyObject* dbflib_language_driver_codec(PyObject* module, PyObject* args)
-{
- int ldid;
- if (!PyArg_ParseTuple(args, "i:language_driver_name", &ldid)) return NULL;
- if (ldid < 0 || ldid >= PYSHAPELIB_NUM_LANGUAGE_DRIVERS || !codecs[ldid])
+ if (dbffile_init_codec(result, codecs_map) != 0)
{
- PyErr_SetString(PyExc_ValueError, "invalid driver id");
+ dbffile_dealloc(result);
return NULL;
}
- return PyString_FromString(codecs[ldid]);
-}
-/** translate a numeric Language Driver ID to a string represting its constant.
- */
-static PyObject* dbflib_language_driver_name(PyObject* module, PyObject* args)
-{
- int ldid;
- if (!PyArg_ParseTuple(args, "i:language_driver_name", &ldid)) return NULL;
- if (ldid < 0 || ldid >= PYSHAPELIB_NUM_LANGUAGE_DRIVERS || !drivers[ldid])
- {
- PyErr_SetString(PyExc_ValueError, "invalid driver id");
- return NULL;
- }
- return PyString_FromString(drivers[ldid]);
+ return (PyObject*) result;
}
-#endif
-
static struct PyMethodDef dbflib_methods[] =
{
- {"open", (PyCFunction)dbflib_open, METH_VARARGS,
- "open(name [, mode]) -> DBFFile\n\n"
+ {"open", (PyCFunction)dbflib_open, METH_VARARGS | METH_KEYWORDS,
+#if HAVE_CODE_PAGE
+ "open(name [, mode [, return_unicode [, codecs_map]]]) -> DBFFile\n\n"
+#else
+ "open(name [, mode [, return_unicode]]) -> DBFFile\n\n"
+#endif
"opens a DBFFile" },
- {"create", (PyCFunction)dbflib_create, METH_VARARGS,
- "create(name [, language_driver]) -> DBFFile\n\n"
+ {"create", (PyCFunction)dbflib_create, METH_VARARGS | METH_KEYWORDS,
+#if HAVE_CODE_PAGE
+ "create(name [, code_page [, return_unicode [, codecs_map]]]) -> DBFFile\n\n"
+#else
+ "create(name [, return_unicode]) -> DBFFile\n\n"
+#endif
"create a DBFFile " },
-#if HAVE_LANGUAGE_DRIVER
- {"language_driver_codec", (PyCFunction)dbflib_language_driver_codec, METH_VARARGS,
- "language_driver_codec(driver_id) -> string\n\n"
- "translate language driver id into the name of the Python's codec used as code page." },
- {"language_driver_name", (PyCFunction)dbflib_language_driver_name, METH_VARARGS,
- "language_driver_name(driver_id) -> string\n\n"
- "translate language driver id into a string." },
-#endif
{NULL}
};
+#if HAVE_CODE_PAGE
+
+void add_ldid(PyObject* module, int ldid, const char* codec, const char* name)
+{
+ char code_page[64];
+ char constant[64];
+ PyObject* ocodec = PyString_FromString(codec);
+ sprintf(code_page, "LDID/%i", ldid);
+ PyDict_SetItemString(default_codecs_map, code_page, ocodec);
+ Py_XDECREF(ocodec);
+ sprintf(constant, "LDID_%s", name);
+ PyModule_AddStringConstant(module, constant, code_page);
+}
+
+void add_cpg(PyObject* module, char* code_page, const char* codec, const char* name)
+{
+ char constant[64];
+ PyObject* ocodec = PyString_FromString(codec);
+ PyDict_SetItemString(default_codecs_map, code_page, PyString_FromString(codec));
+ Py_XDECREF(ocodec);
+ sprintf(constant, "CPG_%s", name);
+ PyModule_AddStringConstant(module, constant, code_page);
+}
+
+#endif
+
PyMODINIT_FUNC initdbflib(void)
{
- int i;
-
PyObject* module = Py_InitModule("dbflib", dbflib_methods);
if (!module) return;
@@ -758,86 +836,114 @@
PYSHAPELIB_ADD_CONSTANT(FTLogical);
PYSHAPELIB_ADD_CONSTANT(FTInvalid);
PyModule_AddIntConstant(module, "_have_commit", HAVE_UPDATE_HEADER);
+ PyModule_AddIntConstant(module, "_have_code_page", HAVE_CODE_PAGE);
-#if HAVE_LANGUAGE_DRIVER
+#if HAVE_CODE_PAGE
+ default_codecs_map = PyDict_New();
+
/* table compiled from these resources:
* http://www.clicketyclick.dk/databases/xbase/format/dbf.html
* http://www.esrinl.com/content/file.asp?id=307
* http://msdn2.microsoft.com/en-us/library/aa975345(VS.71).aspx
*/
- for (i = 0; i < PYSHAPELIB_NUM_LANGUAGE_DRIVERS; ++i)
- {
- codecs[i] = NULL;
- drivers[i] = NULL;
- }
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x00, "cp1252", "NOT_SET");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x01, "cp437", "DOS_USA");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x02, "cp850", "DOS_INTERNATIONAL");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x03, "cp1252", "WINDOWS_ANSI");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x04, "mac_roman", "STANDARD_MACINTOSH");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x08, "cp865", "DANISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x09, "cp437", "DUTCH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0a, "cp850", "DUTCH_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0b, "cp437", "FINNISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0d, "cp437", "FRENCH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0e, "cp850", "FRENCH_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0f, "cp437", "GERMAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x10, "cp850", "GERMAN_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x11, "cp437", "ITALIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x12, "cp850", "ITALIAN_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x13, "cp932", "JAPANESE_SHIFT_JIS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x14, "cp850", "SPANISH_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x15, "cp437", "SWEDISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x16, "cp850", "SWEDISH_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x17, "cp865", "NORWEGIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x18, "cp437", "SPANISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x19, "cp437", "ENGLISH_BRITAIN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1a, "cp850", "ENGLISH_BRITAIN_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x0b, "cp437", "ENGLISH_US_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1c, "cp863", "FRENCH_CANADA_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1d, "cp850", "FRENCH_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x1f, "cp852", "CZECH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x22, "cp852", "HUNGARIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x23, "cp852", "POLISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x24, "cp860", "PORTUGUESE_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x25, "cp850", "PORTUGUESE_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x26, "cp866", "RUSSIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x37, "cp850", "ENGLISH_US_OEM_2");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x40, "cp852", "ROMANIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4d, "cp936", "CHINESE_GBK_PRC");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4e, "cp949", "KOREAN_ANSI_OEM);");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x4f, "cp950", "CHINESE_BIG5_TAIWAN");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x50, "cp874", "THAI_ANSI_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x57, "cp1252", "ESRI_ANSI");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x58, "cp1252", "WESTERN_EUROPEAN_ANSI");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x59, "cp1252", "SPANISH_ANSI");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x64, "cp852", "EASTERN_EUROPEAN_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x65, "cp866", "RUSSIAN_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x66, "cp865", "NORDIC_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x67, "cp861", "ICELANDIC_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x68, "cp895", "CZECH_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x69, "cp620", "POLISH_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6a, "cp737", "GREEK_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6b, "cp857", "TURKISH_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x6c, "cp863", "FRENCH_CANADA_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x78, "cp950", "TAIWAN_BIG5");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x79, "cp949", "HANGUL_WANSUG");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7a, "cp936", "PRC_GBK");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7b, "cp932", "JAPANESE_SHIFT_JIS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7c, "cp874", "THAI_WINDOWS_MSDOS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7d, "cp1255", "HEBREW_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x7e, "cp1256", "ARABIC_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x86, "cp737", "GREEK_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x87, "cp852", "SLOVENIAN_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x88, "cp857", "TURKISH_OEM");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x96, "mac_cyrillic", "RUSSIAN_MACINTOSH");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x97, "mac_latin2", "EASTERN_EUROPEAN_MACINTOSH");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0x98, "mac_greek", "GREEK_MACINTOSH");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xc8, "cp1250", "EASTERN_EUROPEAN_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xc9, "cp1251", "RUSSIAN_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xca, "cp1254", "TURKISH_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xcb, "cp1253", "GREEK_WINDOWS");
- PYSHAPELIB_ADD_LANGUAGE_DRIVER(0xcc, "cp1257", "BALTIC_WINDOWS");
+ add_ldid(module, 0x00, "cp1252", "NOT_SET");
+ add_ldid(module, 0x01, "cp437", "DOS_USA");
+ add_ldid(module, 0x02, "cp850", "DOS_INTERNATIONAL");
+ add_ldid(module, 0x03, "cp1252", "WINDOWS_ANSI");
+ add_ldid(module, 0x04, "mac_roman", "STANDARD_MACINTOSH");
+ add_ldid(module, 0x08, "cp865", "DANISH_OEM");
+ add_ldid(module, 0x09, "cp437", "DUTCH_OEM");
+ add_ldid(module, 0x0a, "cp850", "DUTCH_OEM_2");
+ add_ldid(module, 0x0b, "cp437", "FINNISH_OEM");
+ add_ldid(module, 0x0d, "cp437", "FRENCH_OEM");
+ add_ldid(module, 0x0e, "cp850", "FRENCH_OEM_2");
+ add_ldid(module, 0x0f, "cp437", "GERMAN_OEM");
+ add_ldid(module, 0x10, "cp850", "GERMAN_OEM_2");
+ add_ldid(module, 0x11, "cp437", "ITALIAN_OEM");
+ add_ldid(module, 0x12, "cp850", "ITALIAN_OEM_2");
+ add_ldid(module, 0x13, "cp932", "JAPANESE_SHIFT_JIS");
+ add_ldid(module, 0x14, "cp850", "SPANISH_OEM_2");
+ add_ldid(module, 0x15, "cp437", "SWEDISH_OEM");
+ add_ldid(module, 0x16, "cp850", "SWEDISH_OEM_2");
+ add_ldid(module, 0x17, "cp865", "NORWEGIAN_OEM");
+ add_ldid(module, 0x18, "cp437", "SPANISH_OEM");
+ add_ldid(module, 0x19, "cp437", "ENGLISH_BRITAIN_OEM");
+ add_ldid(module, 0x1a, "cp850", "ENGLISH_BRITAIN_OEM_2");
+ add_ldid(module, 0x1b, "cp437", "ENGLISH_US_OEM");
+ add_ldid(module, 0x1c, "cp863", "FRENCH_CANADA_OEM");
+ add_ldid(module, 0x1d, "cp850", "FRENCH_OEM_2");
+ add_ldid(module, 0x1f, "cp852", "CZECH_OEM");
+ add_ldid(module, 0x22, "cp852", "HUNGARIAN_OEM");
+ add_ldid(module, 0x23, "cp852", "POLISH_OEM");
+ add_ldid(module, 0x24, "cp860", "PORTUGUESE_OEM");
+ add_ldid(module, 0x25, "cp850", "PORTUGUESE_OEM_2");
+ add_ldid(module, 0x26, "cp866", "RUSSIAN_OEM");
+ add_ldid(module, 0x37, "cp850", "ENGLISH_US_OEM_2");
+ add_ldid(module, 0x40, "cp852", "ROMANIAN_OEM");
+ add_ldid(module, 0x4d, "cp936", "CHINESE_GBK_PRC");
+ add_ldid(module, 0x4e, "cp949", "KOREAN_ANSI_OEM);");
+ add_ldid(module, 0x4f, "cp950", "CHINESE_BIG5_TAIWAN");
+ add_ldid(module, 0x50, "cp874", "THAI_ANSI_OEM");
+ add_ldid(module, 0x57, "cp1252", "ESRI_ANSI");
+ add_ldid(module, 0x58, "cp1252", "WESTERN_EUROPEAN_ANSI");
+ add_ldid(module, 0x59, "cp1252", "SPANISH_ANSI");
+ add_ldid(module, 0x64, "cp852", "EASTERN_EUROPEAN_MSDOS");
+ add_ldid(module, 0x65, "cp866", "RUSSIAN_MSDOS");
+ add_ldid(module, 0x66, "cp865", "NORDIC_MSDOS");
+ add_ldid(module, 0x67, "cp861", "ICELANDIC_MSDOS");
+ add_ldid(module, 0x68, "cp895", "CZECH_MSDOS");
+ add_ldid(module, 0x69, "cp620", "POLISH_MSDOS");
+ add_ldid(module, 0x6a, "cp737", "GREEK_MSDOS");
+ add_ldid(module, 0x6b, "cp857", "TURKISH_MSDOS");
+ add_ldid(module, 0x6c, "cp863", "FRENCH_CANADA_MSDOS");
+ add_ldid(module, 0x78, "cp950", "TAIWAN_BIG5");
+ add_ldid(module, 0x79, "cp949", "HANGUL_WANSUG");
+ add_ldid(module, 0x7a, "cp936", "PRC_GBK");
+ add_ldid(module, 0x7b, "cp932", "JAPANESE_SHIFT_JIS");
+ add_ldid(module, 0x7c, "cp874", "THAI_WINDOWS_MSDOS");
+ add_ldid(module, 0x7d, "cp1255", "HEBREW_WINDOWS");
+ add_ldid(module, 0x7e, "cp1256", "ARABIC_WINDOWS");
+ add_ldid(module, 0x86, "cp737", "GREEK_OEM");
+ add_ldid(module, 0x87, "cp852", "SLOVENIAN_OEM");
+ add_ldid(module, 0x88, "cp857", "TURKISH_OEM");
+ add_ldid(module, 0x96, "mac_cyrillic", "RUSSIAN_MACINTOSH");
+ add_ldid(module, 0x97, "mac_latin2", "EASTERN_EUROPEAN_MACINTOSH");
+ add_ldid(module, 0x98, "mac_greek", "GREEK_MACINTOSH");
+ add_ldid(module, 0xc8, "cp1250", "EASTERN_EUROPEAN_WINDOWS");
+ add_ldid(module, 0xc9, "cp1251", "RUSSIAN_WINDOWS");
+ add_ldid(module, 0xca, "cp1254", "TURKISH_WINDOWS");
+ add_ldid(module, 0xcb, "cp1253", "GREEK_WINDOWS");
+ add_ldid(module, 0xcc, "cp1257", "BALTIC_WINDOWS");
+ add_cpg(module, "UTF-8", "utf_8", "UTF_8");
+ add_cpg(module, "OEM 737", "cp737", "OEM_737");
+ add_cpg(module, "OEM 775", "cp755", "OEM_775");
+ add_cpg(module, "OEM 852", "cp852", "OEM_852");
+ add_cpg(module, "OEM 855", "cp855", "OEM_855");
+ add_cpg(module, "OEM 857", "cp857", "OEM_857");
+ add_cpg(module, "OEM 860", "cp860", "OEM_860");
+ add_cpg(module, "OEM 861", "cp861", "OEM_861");
+ add_cpg(module, "OEM 862", "cp862", "OEM_862");
+ add_cpg(module, "OEM 863", "cp863", "OEM_863");
+ add_cpg(module, "OEM 864", "cp864", "OEM_864");
+ add_cpg(module, "OEM 865", "cp865", "OEM_865");
+ add_cpg(module, "OEM 866", "cp866", "OEM_866");
+ add_cpg(module, "OEM 869", "cp869", "OEM_869");
+ add_cpg(module, "OEM 932", "cp932", "OEM_932");
+ add_cpg(module, "OEM 950", "cp950", "OEM_950");
+ add_cpg(module, "ISO 88591", "iso-8859-1", "ISO_8859_1");
+ add_cpg(module, "ISO 88592", "iso-8859-2", "ISO_8859_2");
+ add_cpg(module, "ISO 88593", "iso-8859-3", "ISO_8859_3");
+ add_cpg(module, "ISO 88594", "iso-8859-4", "ISO_8859_4");
+ add_cpg(module, "ISO 88595", "iso-8859-5", "ISO_8859_5");
+ add_cpg(module, "ISO 88596", "iso-8859-6", "ISO_8859_6");
+ add_cpg(module, "ISO 88597", "iso-8859-7", "ISO_8859_7");
+ add_cpg(module, "ISO 88598", "iso-8859-8", "ISO_8859_8");
+ add_cpg(module, "ISO 88599", "iso-8859-9", "ISO_8859_9");
+ add_cpg(module, "ISO 885910", "iso-8859-10", "ISO_8859_10");
+ add_cpg(module, "ISO 885913", "iso-8859-13", "ISO_8859_13");
+ add_cpg(module, "ISO 885915", "iso-8859-15", "ISO_8859_15");
+
+
#endif
}
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/pyshapelib/setup.py 2007-12-15 20:40:22 UTC (rev 2797)
@@ -39,8 +39,8 @@
DBFUpdateHeader function and '0' otherwise. To check whether
DBFUpdateHeader is available, we scan shapefil.h for the string
'DBFUpdateHeader'.
- - HAVE_LANGUAGE_DRIVER, which is '1' if the dbflib version we will
- compiling with has the nLanguageDriver field in DBFInfo and '0' otherwise.
+ - HAVE_CODE_PAGE, which is '1' if the dbflib version we will
+ compiling with has the DBFGetCodePage function and '0' otherwise.
Again, shapefil.h is scanned to check this.
"""
f = open(convert_path(shp_dir + "/shapefil.h"))
@@ -54,7 +54,7 @@
return [
("HAVE_UPDATE_HEADER", have("DBFUpdateHeader")),
- ("HAVE_LANGUAGE_DRIVER", have("nLanguageDriver"))]
+ ("HAVE_CODE_PAGE", have("DBFGetCodePage"))]
extensions = [Extension("shapelib",
["shapelibmodule.c",
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/dbfopen.c 2007-12-15 20:40:22 UTC (rev 2797)
@@ -34,6 +34,9 @@
******************************************************************************
*
* $Log: dbfopen.c,v $
+ * Revision 1.77 2007/12/15 20:25:21 bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function. This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file. When creating a DBF file, the code can be set using DBFCreateEx.
+ *
* Revision 1.76 2007/12/12 22:21:32 bram
* DBFClose: check for NULL psDBF handle before trying to close it.
*
@@ -181,6 +184,8 @@
abyHeader[10] = (unsigned char) (psDBF->nRecordLength % 256);
abyHeader[11] = (unsigned char) (psDBF->nRecordLength / 256);
+ abyHeader[29] = (unsigned char) (psDBF->iLanguageDriver);
+
/* -------------------------------------------------------------------- */
/* Write the initial 32 byte file header, and all the field */
/* descriptions. */
@@ -348,9 +353,11 @@
{
DBFHandle psDBF;
+ SAFile pfCPG;
unsigned char *pabyBuf;
int nFields, nHeadLen, iField, i;
char *pszBasename, *pszFullname;
+ int nBufSize = 500;
/* -------------------------------------------------------------------- */
/* We only allow the access strings "rb" and "r+". */
@@ -392,13 +399,22 @@
sprintf( pszFullname, "%s.DBF", pszBasename );
psDBF->fp = psDBF->sHooks.FOpen(pszFullname, pszAccess );
}
-
+
+ sprintf( pszFullname, "%s.cpg", pszBasename );
+ pfCPG = psHooks->FOpen( pszFullname, "r" );
+ if( pfCPG == NULL )
+ {
+ sprintf( pszFullname, "%s.CPG", pszBasename );
+ pfCPG = psHooks->FOpen( pszFullname, "r" );
+ }
+
free( pszBasename );
free( pszFullname );
if( psDBF->fp == NULL )
{
free( psDBF );
+ psHooks->FClose( pfCPG );
return( NULL );
}
@@ -409,10 +425,11 @@
/* -------------------------------------------------------------------- */
/* Read Table Header info */
/* -------------------------------------------------------------------- */
- pabyBuf = (unsigned char *) malloc(500);
+ pabyBuf = (unsigned char *) malloc(nBufSize);
if( psDBF->sHooks.FRead( pabyBuf, 32, 1, psDBF->fp ) != 1 )
{
psDBF->sHooks.FClose( psDBF->fp );
+ psDBF->sHooks.FClose( pfCPG );
free( pabyBuf );
free( psDBF );
return NULL;
@@ -423,12 +440,39 @@
psDBF->nHeaderLength = nHeadLen = pabyBuf[8] + pabyBuf[9]*256;
psDBF->nRecordLength = pabyBuf[10] + pabyBuf[11]*256;
-
+ psDBF->iLanguageDriver = pabyBuf[29];
+
psDBF->nFields = nFields = (nHeadLen - 32) / 32;
psDBF->pszCurrentRecord = (char *) malloc(psDBF->nRecordLength);
/* -------------------------------------------------------------------- */
+/* Figure out the code page from the LDID and CPG */
+/* -------------------------------------------------------------------- */
+
+ psDBF->pszCodePage = NULL;
+ if( pfCPG )
+ {
+ size_t n;
+ char *buffer = (char *) pabyBuf;
+ buffer[0] = '\0';
+ psDBF->sHooks.FRead( pabyBuf, nBufSize - 1, 1, pfCPG );
+ n = strcspn( pabyBuf, "\n\r" );
+ if( n > 0 )
+ {
+ pabyBuf[n] = '\0';
+ psDBF->pszCodePage = (char *) malloc(n + 1);
+ memcpy( psDBF->pszCodePage, pabyBuf, n + 1 );
+ }
+ }
+ if( psDBF->pszCodePage == NULL && pabyBuf[29] != 0 )
+ {
+ sprintf( pabyBuf, "LDID/%i", psDBF->iLanguageDriver );
+ psDBF->pszCodePage = (char *) malloc(strlen(pabyBuf) + 1);
+ strcpy( psDBF->pszCodePage, pabyBuf );
+ }
+
+/* -------------------------------------------------------------------- */
/* Read in Field Definitions */
/* -------------------------------------------------------------------- */
@@ -530,6 +574,7 @@
free( psDBF->pszHeader );
free( psDBF->pszCurrentRecord );
+ free( psDBF->pszCodePage );
free( psDBF );
}
@@ -537,18 +582,31 @@
/************************************************************************/
/* DBFCreate() */
/* */
-/* Create a new .dbf file. */
+/* Create a new .dbf file with default code page LDID/3 */
/************************************************************************/
DBFHandle SHPAPI_CALL
DBFCreate( const char * pszFilename )
{
+ return DBFCreateEx( pszFilename, "LDID/3" );
+}
+
+/************************************************************************/
+/* DBFCreateEx() */
+/* */
+/* Create a new .dbf file. */
+/************************************************************************/
+
+DBFHandle SHPAPI_CALL
+DBFCreateEx( const char * pszFilename, const char* pszCodePage )
+
+{
SAHooks sHooks;
SASetupDefaultHooks( &sHooks );
- return DBFCreateLL( pszFilename, &sHooks );
+ return DBFCreateLL( pszFilename, pszCodePage , &sHooks );
}
/************************************************************************/
@@ -558,13 +616,13 @@
/************************************************************************/
DBFHandle SHPAPI_CALL
-DBFCreateLL( const char * pszFilename, SAHooks *psHooks )
+DBFCreateLL( const char * pszFilename, const char * pszCodePage, SAHooks *psHooks )
{
DBFHandle psDBF;
SAFile fp;
char *pszFullname, *pszBasename;
- int i;
+ int i, ldid = -1;
char chZero = '\0';
/* -------------------------------------------------------------------- */
@@ -583,7 +641,6 @@
pszFullname = (char *) malloc(strlen(pszBasename) + 5);
sprintf( pszFullname, "%s.dbf", pszBasename );
- free( pszBasename );
/* -------------------------------------------------------------------- */
/* Create the file. */
@@ -599,6 +656,29 @@
if( fp == NULL )
return( NULL );
+
+ sprintf( pszFullname, "%s.cpg", pszBasename );
+ if( pszCodePage != NULL )
+ {
+ if( strncmp( pszCodePage, "LDID/", 5 ) == 0 )
+ {
+ ldid = atoi( pszCodePage + 5 );
+ if( ldid > 255 )
+ ldid = -1; // don't use 0 to indicate out of range as LDID/0 is a valid one
+ }
+ if( ldid < 0 )
+ {
+ SAFile fpCPG = psHooks->FOpen( pszFullname, "w" );
+ psHooks->FWrite( (char*) pszCodePage, strlen(pszCodePage), 1, fpCPG );
+ psHooks->FClose( fpCPG );
+ }
+ }
+ if( pszCodePage == NULL || ldid >= 0 )
+ {
+ psHooks->Remove( pszFullname );
+ }
+
+ free( pszBasename );
free( pszFullname );
/* -------------------------------------------------------------------- */
@@ -625,6 +705,14 @@
psDBF->bNoHeader = TRUE;
+ psDBF->iLanguageDriver = ldid > 0 ? ldid : 0;
+ psDBF->pszCodePage = NULL;
+ if( pszCodePage )
+ {
+ psDBF->pszCodePage = (char * ) malloc( strlen(pszCodePage) + 1 );
+ strcpy( psDBF->pszCodePage, pszCodePage );
+ }
+
return( psDBF );
}
@@ -1416,7 +1504,7 @@
{
DBFHandle newDBF;
- newDBF = DBFCreate ( pszFilename );
+ newDBF = DBFCreateEx ( pszFilename, psDBF->pszCodePage );
if ( newDBF == NULL ) return ( NULL );
newDBF->nFields = psDBF->nFields;
@@ -1582,3 +1670,15 @@
return TRUE;
}
+
+/************************************************************************/
+/* DBFGetCodePage */
+/************************************************************************/
+
+const char SHPAPI_CALL1(*)
+DBFGetCodePage(DBFHandle psDBF )
+{
+ if( psDBF == NULL )
+ return NULL;
+ return psDBF->pszCodePage;
+}
\ No newline at end of file
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/safileio.c 2007-12-15 20:40:22 UTC (rev 2797)
@@ -1,5 +1,5 @@
/******************************************************************************
- * $Id: safileio.c,v 1.1 2007/12/06 06:56:41 fwarmerdam Exp $
+ * $Id: safileio.c,v 1.2 2007/12/15 20:25:30 bram Exp $
*
* Project: Shapelib
* Purpose: Default implementation of file io based on stdio.
@@ -34,6 +34,9 @@
******************************************************************************
*
* $Log: safileio.c,v $
+ * Revision 1.2 2007/12/15 20:25:30 bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function. This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file. When creating a DBF file, the code can be set using DBFCreateEx.
+ *
* Revision 1.1 2007/12/06 06:56:41 fwarmerdam
* new
*
@@ -48,7 +51,7 @@
#include <string.h>
#include <stdio.h>
-SHP_CVSID("$Id: safileio.c,v 1.1 2007/12/06 06:56:41 fwarmerdam Exp $");
+SHP_CVSID("$Id: safileio.c,v 1.2 2007/12/15 20:25:30 bram Exp $");
/************************************************************************/
/* SADFOpen() */
@@ -119,10 +122,22 @@
int SADFClose( SAFile file )
{
+ if( file == NULL )
+ return;
return fclose( (FILE *) file );
}
/************************************************************************/
+/* SADFClose() */
+/************************************************************************/
+
+int SADRemove( const char *filename )
+
+{
+ return remove( filename );
+}
+
+/************************************************************************/
/* SADError() */
/************************************************************************/
@@ -146,6 +161,7 @@
psHooks->FTell = SADFTell;
psHooks->FFlush = SADFFlush;
psHooks->FClose = SADFClose;
+ psHooks->Remove = SADRemove;
psHooks->Error = SADError;
}
Modified: branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/libraries/shapelib/shapefil.h 2007-12-15 20:40:22 UTC (rev 2797)
@@ -37,6 +37,9 @@
******************************************************************************
*
* $Log: shapefil.h,v $
+ * Revision 1.41 2007/12/15 20:25:32 bram
+ * dbfopen.c now reads the Code Page information from the DBF file, and exports this information as a string through the DBFGetCodePage function. This is either the number from the LDID header field ("LDID/<number>") or as the content of an accompanying .CPG file. When creating a DBF file, the code can be set using DBFCreateEx.
+ *
* Revision 1.40 2007/12/06 07:00:25 fwarmerdam
* dbfopen now using SAHooks for fileio
*
@@ -195,13 +198,14 @@
#endif
typedef struct {
- SAFile (*FOpen) ( const char *filename, const char *path);
+ SAFile (*FOpen) ( const char *filename, const char *access);
SAOffset (*FRead) ( void *p, SAOffset size, SAOffset nmemb, SAFile file);
SAOffset (*FWrite)( void *p, SAOffset size, SAOffset nmemb, SAFile file);
SAOffset (*FSeek) ( SAFile file, SAOffset offset, int whence );
SAOffset (*FTell) ( SAFile file );
int (*FFlush)( SAFile file );
int (*FClose)( SAFile file );
+ int (*Remove) ( const char *filename );
void (*Error) ( const char *message );
} SAHooks;
@@ -458,6 +462,9 @@
int bUpdated;
double dfDoubleField;
+
+ int iLanguageDriver;
+ char *pszCodePage;
} DBFInfo;
typedef DBFInfo * DBFHandle;
@@ -481,7 +488,9 @@
DBFHandle SHPAPI_CALL
DBFCreate( const char * pszDBFFile );
DBFHandle SHPAPI_CALL
- DBFCreateLL( const char * pszDBFFile, SAHooks *psHooks );
+ DBFCreateEx( const char * pszDBFFile, const char * pszCodePage );
+DBFHandle SHPAPI_CALL
+ DBFCreateLL( const char * pszDBFFile, const char * pszCodePage, SAHooks *psHooks );
int SHPAPI_CALL
DBFGetFieldCount( DBFHandle psDBF );
@@ -550,6 +559,9 @@
char SHPAPI_CALL
DBFGetNativeFieldType( DBFHandle hDBF, int iField );
+const char SHPAPI_CALL1(*)
+ DBFGetCodePage(DBFHandle psDBF );
+
#ifdef __cplusplus
}
#endif
Modified: branches/WIP-pyshapelib-Unicode/thuban/setup.py
===================================================================
--- branches/WIP-pyshapelib-Unicode/thuban/setup.py 2007-12-12 22:32:34 UTC (rev 2796)
+++ branches/WIP-pyshapelib-Unicode/thuban/setup.py 2007-12-15 20:40:22 UTC (rev 2797)
@@ -283,7 +283,7 @@
return [
("HAVE_UPDATE_HEADER", have("DBFUpdateHeader")),
- ("HAVE_LANGUAGE_DRIVER", have("nLanguageDriver"))]
+ ("HAVE_CODE_PAGE", have("DBFGetCodePage"))]
extensions.append(Extension("Lib.shapelib",
More information about the Thuban-commits
mailing list