### Re: How to deal with several types in a class?

Nikolaj van Omme <nikolaj.van.omme <at> gmail.com>

2015-05-13 14:06:01 GMT

On 15-05-13 12:45 AM, Robert Bradshaw wrote:
> On Tue, May 12, 2015 at 11:37 AM, Nikolaj van Omme
> <nikolaj.van.omme <at> gmail.com> wrote:
>> Hello,
>>
>> I'm writing a library that implements its own sparse matrices. We
>> decided to implement everything in Cython.
>>
>> We have several extension classes for different sparse matrix types. For
>> instance:
>>
>> cdef class LLSparseMatrix(SparseMatrix):
>> cdef:
>> FLOAT64_t * val
>> INT32_t * col
>>
>> def ____cinit____(self, **kwargs):
>> val = <FLOAT64_t *> PyMem_Malloc(self.size_hint *
>> sizeof(FLOAT64_t))
>> if not val:
>> raise MemoryError()
>> self.val = val
>>
>> col = <INT32_t *> PyMem_Malloc(self.size_hint * sizeof(INT32_t))
>> if not col:
>> raise MemoryError()
>> self.col = col
>> ...
>>
>> where FLOAT64_t and INT32_t are ctypedef types.
>>
>> This LLSparseMatrix has INT32_t for index type and FLOAT64_t type for
>> its element but we would like to offer the possibility to use other
>> types at run time.For instance, INT64_t for indices and UINT64_t for the
>> element types. The user could give the index type "itype" and element
>> type "dtype" as arguments to the constructor:
>>
>> my_matrix = LLSparseMatrix(itype=INT32_t, dtype=FLOAT64_t, ...)
>>
>> Access to the elements of the internal "val" and "col" arrays in Cython
>> should be efficient.
>>
>> Here is my question: what is the best (most efficient (wrt time) and at
>> the same time not too difficult to maintain) solution if I want to use
>> different types at run time?
>>
>> We can use a template language (like Jinja2) to generate Cython code, we
>> can use numpy arrays for "val" and "col" and use numpy types, we
>> **don't****** want to implement the library in C++ with templates.
>>
>> Any help must appreciated. Thanks in advance.
> Personally, I would go with NumPy.
>
Thanks for your reply.
How would you use the NumPy arrays?
Something along these lines?
==================
cimport numpy as cnp
import numpy as np
cnp.import_array()
cdef Py_ssize_t SIZE_HINT = 100
cdef class LLSparseMatrix:
cdef:
cnp.ndarray val
cnp.ndarray col
def ____cinit____(self, **kwargs):
size_hint = kwargs.get('size_hint', SIZE_HINT)
dtype = kwargs.get('dtype', np.double)
itype = kwargs.get('itype', np.int)
self.val = np.empty(dtype=dtype, shape=(size_hint,))
self.col = np.empty(dtype=itype, shape=(size_hint,))
==================
and the user would be able to create a matrix like this:
==================
matrix = LLSparseMatrix(dtype=np.float, size_hint=10000)
==================
and fill the matrix later on. Attached, you'll find a minimal example.
And here is my test.py script:
==================
import base_case as bc
import numpy as np
DTYPE = np.int
size = 60
B = bc.LLSparseMatrix(nrow=size, ncol=size, dtype=np.double, itype=np.int)
for i in xrange(size):
for j in xrange(size):
B[i,j] = 9.99
==================
I've tried exactly this approach and it's about 300 times slower than
with C arrays for the basic operations we need. Maybe, I'm not using the
NumPy arrays correctly/efficiently?
Thank you in advance,
Nikolaj
--
---
You received this message because you are subscribed to the Google Groups "cython-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to cython-users+unsubscribe <at> googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

cimport numpy as cnp
import numpy as np
cnp.import_array()
LL_MAT_INCREASE_FACTOR = 1.5
cdef class LLSparseMatrix:
cdef:
free # index to first element in free chain
cnp.ndarray val
double *ival # pointer to array of imaginary values NOT USED here
cnp.ndarray col
cnp.ndarray link
cnp.ndarray root
public size_hint
public nalloc
public nnz
public bint is_symmetric
public bint is_complex
public bint store_zeros
public nrow
public ncol
def ____cinit____(self, **kwargs):
self.nrow = kwargs['nrow']
self.ncol = kwargs['ncol']
size_hint = kwargs.get('size_hint', 510)
self.size_hint = size_hint
self.nalloc = size_hint
self.free = -1
self.nnz = 0
self.val = np.empty(dtype=np.double, shape=(size_hint,))
self.col = np.empty(dtype=np.int, shape=(size_hint,))
self.link = np.empty(dtype=np.int, shape=(size_hint,))
self.root = np.empty(dtype=np.int, shape=(self.nrow, ))
self.root.fill(-1)
self.is_symmetric = False
self.is_complex = False
self.store_zeros = False
cdef _realloc(self, nalloc_new):
self.val.resize(nalloc_new)
self.col.resize(nalloc_new)
self.link.resize(nalloc_new)
self.nalloc = nalloc_new
cdef _realloc_expand(self):
assert LL_MAT_INCREASE_FACTOR > 1.0
real_new_alloc = int(LL_MAT_INCREASE_FACTOR * self.nalloc) + 1
return self._realloc(real_new_alloc)
cdef put(self, i, j, value, imaginary = 0.0):
if self.is_symmetric and i < j:
raise IndexError('Write operation to upper triangle of symmetric matrix not allowed')
# Find element to be set (or removed)
col = last = -1
k = self.root[i]
while k != -1:
col = self.col[k]
# TODO: check this
if col >= j:
break
last = k
k = self.link[k]
# Store value
if self.store_zeros or (value != 0.0 or imaginary != 0.0):
if col == j:
# element already exist
self.val[k] = value
if self.is_complex:
self.ival[k] = imaginary
else:
# new element
# find location for new element
if self.free != -1:
# use element from the free chain
new_elem = self.free
self.free = self.link[new_elem]
else:
# append new element to the end
new_elem = self.nnz
# test if there is space for a new element
if self.nnz == self.nalloc:
# we have to reallocate some space
self._realloc_expand()
self.val[new_elem] = value
if self.is_complex:
self.ival[new_elem] = imaginary
self.col[new_elem] = j
self.link[new_elem] = k
if last == -1:
self.root[i] = new_elem
else:
self.link[last] = new_elem
self.nnz += 1
else:
# value == 0.0 and imaginary == 0.0:
# if element exists but we don't store zero elements
# we need to "zeroify" this element
if col == j:
# relink row i
if last == -1:
self.root[i] = self.link[k]
else:
self.link[last] = self.link[k]
# add element to free list
self.link[k] = self.free
self.free = k
self.nnz -= 1
cdef safe_put(self, i, j, value, imaginary = 0.0):
if i < 0 or i >= self.nrow or j < 0 or j >= self.ncol:
raise IndexError('Indices out of range')
self.put(i, j, value, imaginary)
def ____setitem____(self, tuple key, value):
i = key[0]
j = key[1]
if self.is_complex:
pass
else:
self.safe_put(i, j, value)