473,396 Members | 1,970 Online
Bytes | Software Development & Data Engineering Community
Post Job

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 473,396 software developers and data experts.

Python C module questions

Hi,

I'm rather new to Python, and I've just written my first Python C
module. I was wondering if some more experience Pythonista would look
over what I've written and given me some pointers (or find some bugs).
I had a few questions while writing this as well.

Also, I know that there are much better ways to compute nearest
neighbors than the brute force n^2 method, but this is plenty fast for
this application (at least the C version is).

1. How do I add docstrings to my module?
2. The Python reference counting and memory managment seemes awfully
repetetive and error prone. Is there a nicer way of doing this? I
know there are some wrappers out there such as Pyrex and SWIG that
might prove useful.
3. This module consisted of turning 4 Python sequences into C double
arrays, performing some calculations, and then turning a C int array
into a Python tuple for return. And it was a lot of work. Are there
any nice wrapper functions out there for turning Python sequences into
C arrays and vice versa?

Thanks,
Jeremy Brewer

The code is below:

#include <Python.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>

/* xmatch: Takes 2 sets of positions (ra1, dec1 of size len1 and ra2,
dec2 of
of size len2) and computes the nearest neighbors. An array
of
integers for the nearest neighbors of ra1, dec1 that
correspond to
the positions in ra2, dec2 is returned, where points further
away
than mindist (in degrees) on the sky are marked as
-(closest index + 1). */

static int *xmatch(double *ra1, double *dec1, int len1, double *ra2,
double *dec2, int len2, double mindist)
{
int i, j, closest, *neighbors;
double mindist2, dist, old_dist;

/* allocate memory for the neighbors */
neighbors = (int *) malloc(len1 * sizeof(int));
if (neighbors == NULL) {
return NULL;
}

/* compare to mindist^2 to save computational time (saves many
calls to
sqrt in inner loop) */
mindist2 = mindist * mindist;

for (i = 0; i < len1; i++) {
/* intial closest match is as far off as possible */
closest = 0;
old_dist = DBL_MAX;

for (j = 0; j < len2; j++) {
/* angular distance on the sky squared */
dist = pow((cos(dec1[i]) * (ra1[i] - ra2[j])), 2) +
pow((dec1[i] - dec2[j]), 2);

/* keep track of only the closest point */
if (dist < old_dist) {
closest = j;
old_dist = dist;
}
}

/* mark points that lie outside the specified distance */
if (old_dist > mindist2) {
neighbors[i] = -(closest + 1);
}
else {
neighbors[i] = closest;
}
}

return neighbors;
}

/* wrap_xmatch: Python wrapper function for xmatch above */
static PyObject *wrap_xmatch(PyObject *self, PyObject *args)
{
PyObject *ra1, *dec1, *ra2, *dec2, *ntup;
int i, len1, len2, len_chk1, len_chk2;
int *neighbors;
double mindist;
double *r1, *d1, *r2, *d2;

/* read 4 sequences (ra1, dec1, ra2, dec2) and a double (mindist)
*/
if (!PyArg_ParseTuple(args, "OOOOd", &ra1, &dec1, &ra2, &dec2,
&mindist)) {
return NULL;
}

/* check that mindist is > 0 */
if (mindist <= 0.0) {
PyErr_SetString(PyExc_ValueError, "mindist must be > 0");
return NULL;
}

/* convert sequences to tuples if necessary */
ra1 = PySequence_Fast(ra1, "ra1 must be iterable");
if (ra1 == NULL) {
return NULL;
}

dec1 = PySequence_Fast(dec1, "dec1 must be iterable");
if (dec1 == NULL) {
return NULL;
}

ra2 = PySequence_Fast(ra2, "ra2 must be iterable");
if (ra2 == NULL) {
return NULL;
}

dec2 = PySequence_Fast(dec2, "dec2 must be iterable");
if (dec2 == NULL) {
return NULL;
}

/* length of input sequences */
len1 = PySequence_Fast_GET_SIZE(ra1);
len_chk1 = PySequence_Fast_GET_SIZE(dec1);

if (len1 != len_chk1) {
PyErr_SetString(PyExc_ValueError, "ra1 and dec1 must be the
same size");
return NULL;
}

len2 = PySequence_Fast_GET_SIZE(ra2);
len_chk2 = PySequence_Fast_GET_SIZE(dec2);

if (len2 != len_chk2) {
PyErr_SetString(PyExc_ValueError, "ra2 and dec2 must be the
same size");
return NULL;
}

/* allocate memory for C arrays */
r1 = (double *) malloc(len1 * sizeof(double));
if (r1 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
}

d1 = (double *) malloc(len1 * sizeof(double));
if (d1 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
}

r2 = (double *) malloc(len2 * sizeof(double));
if (r2 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
}

d2 = (double *) malloc(len2 * sizeof(double));
if (d2 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
}

/* copy ra1 and dec1 */
for (i = 0; i < len1; i++) {
PyObject *fitem, *item;

/* copy ra1 */
item = PySequence_Fast_GET_ITEM(ra1, i);
if (item == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
return NULL;
}

fitem = PyNumber_Float(item);
if (fitem == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
PyErr_SetString(PyExc_TypeError, "items in ra1 must be
numbers");
return NULL;
}

r1[i] = PyFloat_AS_DOUBLE(fitem);
Py_DECREF(fitem);

/* copy dec1 */
item = PySequence_Fast_GET_ITEM(dec1, i);
if (item == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
return NULL;
}

fitem = PyNumber_Float(item);
if (fitem == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
PyErr_SetString(PyExc_TypeError, "items in dec1 must be
numbers");
return NULL;
}

d1[i] = PyFloat_AS_DOUBLE(fitem);
Py_DECREF(fitem);
}

/* copy ra2 and dec2 */
for (i = 0; i < len2; i++) {
PyObject *fitem, *item;

/* copy ra2 */
item = PySequence_Fast_GET_ITEM(ra2, i);
if (item == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
return NULL;
}

fitem = PyNumber_Float(item);
if (fitem == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
PyErr_SetString(PyExc_TypeError, "items in ra2 must be
numbers");
return NULL;
}

r2[i] = PyFloat_AS_DOUBLE(fitem);
Py_DECREF(fitem);

/* copy dec1 */
item = PySequence_Fast_GET_ITEM(dec2, i);
if (item == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
return NULL;
}

fitem = PyNumber_Float(item);
if (fitem == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
free(r1);
free(d1);
free(r2);
free(d2);
PyErr_SetString(PyExc_TypeError, "items in dec2 must be
numbers");
return NULL;
}

d2[i] = PyFloat_AS_DOUBLE(fitem);
Py_DECREF(fitem);
}

/* clean up Python objects */
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);

/* now compute the cross match */
neighbors = xmatch(r1, d1, len1, r2, d2, len2, mindist);

/* clean up C arrays */
free(r1);
free(d1);
free(r2);
free(d2);

/* build a Python tuple to hold the returned neighbors */
ntup = PyTuple_New(len1);
if (ntup == NULL) {
return PyErr_NoMemory();
}

for (i = 0; i < len1; i++) {
PyObject *integer = Py_BuildValue("i", neighbors[i]);
if (integer == NULL) {
Py_DECREF(ntup);
free(neighbors);
return PyErr_NoMemory();
}

PyTuple_SET_ITEM(ntup, i, integer);
}

/* free last C array */
free(neighbors);

return ntup;
}

/* functions defined in the module */
static PyMethodDef astro_methods[] = {
{"xmatch", wrap_xmatch, METH_VARARGS, "Cross match 2 sets of
positions."},
{0} /* sentinel */
};

/* initializes the module */
void initastro(void)
{
(void) Py_InitModule("astro", astro_methods);
}

Sep 1 '05 #1
5 1618
je*************@gmail.com wrote:
Hi,

I'm rather new to Python, and I've just written my first Python C
module. I was wondering if some more experience Pythonista would look
over what I've written and given me some pointers (or find some bugs).
I had a few questions while writing this as well.

Also, I know that there are much better ways to compute nearest
neighbors than the brute force n^2 method, but this is plenty fast for
this application (at least the C version is).

1. How do I add docstrings to my module?
You mean a docstring on the module object itself? In initmodule() make a
string object with the contents of the docstring and assign it to
module.__doc__ . There might be a more idiomatic way, but I can't think
of it without trawling through the docs.
2. The Python reference counting and memory managment seemes awfully
repetetive and error prone. Is there a nicer way of doing this? I
know there are some wrappers out there such as Pyrex and SWIG that
might prove useful.
I'm quite a fan of Pyrex. You may also want to give ctypes a shot. SWIG
doesn't help as much with the reference counting
3. This module consisted of turning 4 Python sequences into C double
arrays, performing some calculations, and then turning a C int array
into a Python tuple for return. And it was a lot of work. Are there
any nice wrapper functions out there for turning Python sequences into
C arrays and vice versa?
http://numeric.scipy.org
Thanks,
Jeremy Brewer

The code is below:


You probably shouldn't post such large pieces of code to the list.

--
Robert Kern
rk***@ucsd.edu

"In the fields of hell where the grass grows high
Are the graves of dreams allowed to die."
-- Richard Harter

Sep 1 '05 #2
Robert Kern <rk***@ucsd.edu> writes:
je*************@gmail.com wrote:
Hi,

I'm rather new to Python, and I've just written my first Python C
module. I was wondering if some more experience Pythonista would look
over what I've written and given me some pointers (or find some bugs).
I had a few questions while writing this as well.

Also, I know that there are much better ways to compute nearest
neighbors than the brute force n^2 method, but this is plenty fast for
this application (at least the C version is).
You should give up C with a dumb algorithm running at fast speed.
Implement a better algorithm in Python, maybe you can even outperform
the dumb code.
1. How do I add docstrings to my module?


You mean a docstring on the module object itself? In initmodule() make a
string object with the contents of the docstring and assign it to
module.__doc__ . There might be a more idiomatic way, but I can't think
of it without trawling through the docs.


Use Py_InitModule3() instead.

Thomas
Sep 1 '05 #3
>You probably shouldn't post such large pieces of code to the list.

OK.
You mean a docstring on the module object itself?
Actually, I meant docstrings to the module and the functions, objects,
methods, whatever else in the module. My code was derived from the
Python Cookbook, which left that part out (rather important for
building real C modules).
You should give up C with a dumb algorithm running at fast speed.
Implement a better algorithm in Python, maybe you can even outperform
the dumb code.


That would be the next step if I needed even more speed, but the better
algorithm here would be to use KDTrees, which would be overkill (and
would require lots of development time). The C brute force
implementation runs plenty fast for me. It took only took a few hours
to implement and yielded a factor of 7 performance increase. I was
mostly interested in feedback on whether I had done things in a
properly efficient way (there are so many Python list / tuple /
sequence functions). I also find it hard to believe that there's no
standard Python function for converting sequences of one object to
arrays in C (a friend mentioned that Ruby's C API has this).

Another question: how does the distutils package handle version
upgrades? Say for example I find some bugs in my C code and need to
recompile it, will it just overwrite what's present in the
site-packages directory?

Jeremy

Sep 2 '05 #4
jbrewer wrote:

[I wrote:]
You probably shouldn't post such large pieces of code to the list.
OK.


BTW, please attribute your quotes.

[Still me:]
You mean a docstring on the module object itself?


Actually, I meant docstrings to the module and the functions, objects,
methods, whatever else in the module. My code was derived from the
Python Cookbook, which left that part out (rather important for
building real C modules).


In the method definition tables for top-level functions and type methods
there is a place for the docstrings. You had them in your code.

Please read the documentation on writing extensions.

http://docs.python.org/ext/ext.html

Specifically,

http://docs.python.org/ext/methodTable.html
http://docs.python.org/ext/node22.html

[Thomas Heller wrote:]
You should give up C with a dumb algorithm running at fast speed.
Implement a better algorithm in Python, maybe you can even outperform
the dumb code.


That would be the next step if I needed even more speed, but the better
algorithm here would be to use KDTrees, which would be overkill (and
would require lots of development time).


Not for you, it won't.

google('kdtree python')
The C brute force
implementation runs plenty fast for me. It took only took a few hours
to implement and yielded a factor of 7 performance increase. I was
mostly interested in feedback on whether I had done things in a
properly efficient way (there are so many Python list / tuple /
sequence functions). I also find it hard to believe that there's no
standard Python function for converting sequences of one object to
arrays in C (a friend mentioned that Ruby's C API has this).

Another question: how does the distutils package handle version
upgrades? Say for example I find some bugs in my C code and need to
recompile it, will it just overwrite what's present in the
site-packages directory?


It will overwrite the files that have been changed.

--
Robert Kern
rk***@ucsd.edu

"In the fields of hell where the grass grows high
Are the graves of dreams allowed to die."
-- Richard Harter

Sep 2 '05 #5
je*************@gmail.com writes:

[...]
/* convert sequences to tuples if necessary */
ra1 = PySequence_Fast(ra1, "ra1 must be iterable");
if (ra1 == NULL) {
return NULL;
}

dec1 = PySequence_Fast(dec1, "dec1 must be iterable");
if (dec1 == NULL) {
return NULL;
}
You leak a refcount to ra1 here in the case the the second
PySequence_Fast fails.

[...] /* allocate memory for C arrays */
r1 = (double *) malloc(len1 * sizeof(double));
if (r1 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
}

d1 = (double *) malloc(len1 * sizeof(double));
if (d1 == NULL) {
Py_DECREF(ra1);
Py_DECREF(dec1);
Py_DECREF(ra2);
Py_DECREF(dec2);
return PyErr_NoMemory();
} and so on, and so on.

You should probably better start your code initializing all local vars
to NULL, like this:

PyObject *ra1 = NULL, *dec1 = NULL, ...
char *r1 = NULL, char *d1 = NULL, ...;

Then in the body of the function

d1 = (double *) malloc(len1 * sizeof(double));
if (d1 == NULL) {
PyErr_NoMemory();
goto error;
}

and have a cleanup section at the end:

error:
if (d1) free(d1);
....
Py_XDECREF(ra1);
Py_XDECREF(dec1);
return NULL;
}

Reading the sources for a few builtin modules of the Python sources
itself should give you also an idea.
I also find it hard to believe that there's no
standard Python function for converting sequences of one object to
arrays in C (a friend mentioned that Ruby's C API has this).


The builtin array module does help with this, you can build the
C-compatible arrays in Python, pass them to your extension, and access
them using the buffer api.

Thomas
Sep 2 '05 #6

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

3
by: Redefined Horizons | last post by:
I'm trying to understand the argument flags that are used in the method table of an extension module written in C. First let me ask this question about the method table. Is it an C array named...
9
by: jezonthenet | last post by:
I started using Python a couple of days ago - here are a few questions: * Doesn't the __main__() method automatically execute when I run my python program? * Only when I do an import of my...
8
by: Derek Martin | last post by:
I'd like to know if it's possible to code something in Python which would be equivalent to the following C: ---- debug.c ---- #include <stdio.h> bool DEBUG;
0
by: emmanuelkatto | last post by:
Hi All, I am Emmanuel katto from Uganda. I want to ask what challenges you've faced while migrating a website to cloud. Please let me know. Thanks! Emmanuel
0
BarryA
by: BarryA | last post by:
What are the essential steps and strategies outlined in the Data Structures and Algorithms (DSA) roadmap for aspiring data scientists? How can individuals effectively utilize this roadmap to progress...
1
by: Sonnysonu | last post by:
This is the data of csv file 1 2 3 1 2 3 1 2 3 1 2 3 2 3 2 3 3 the lengths should be different i have to store the data by column-wise with in the specific length. suppose the i have to...
0
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However,...
0
by: Hystou | last post by:
Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can...
0
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers,...
0
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven...
0
tracyyun
by: tracyyun | last post by:
Dear forum friends, With the development of smart home technology, a variety of wireless communication protocols have appeared on the market, such as Zigbee, Z-Wave, Wi-Fi, Bluetooth, etc. Each...
0
agi2029
by: agi2029 | last post by:
Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development project—planning, coding, testing,...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.