#ifndef _PY_GRAPH_MOD_H #define _PY_GRAPH_MOD_H #define PY_SSIZE_T_CLEAN #include #include #include "graph.h" #include "codec/codec_nt.h" #include "codec/codec_ttl.h" #include "py_triple.h" /* * Iterator helpers. */ /* * String iterator for encoder output. * * Yields one string (one or more lines) at a time. */ typedef struct { PyObject_HEAD void *it; const LSUP_Codec *codec; char *line; } StringIteratorObject; static void StringIterator_dealloc (StringIteratorObject *it_obj) { it_obj->codec->encode_graph_done (it_obj->it); } static PyObject * StringIterator_next (StringIteratorObject *it_obj) { LSUP_rc rc = it_obj->codec->encode_graph_iter ( it_obj->it, &it_obj->line); if (rc != LSUP_OK) { if (rc != LSUP_END) PyErr_SetString (PyExc_ValueError, "Error encoding graph."); // If not an error, this raises StopIteration. return NULL; } return PyUnicode_FromString ((char *) it_obj->line); } /* * String iterator type. * * Objects of this type are never generated from Python code, rather from * Graph_encode, hence the type has no special new or init function. */ PyTypeObject StringIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "graph.StringIterator", .tp_basicsize = sizeof (StringIteratorObject), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor) StringIterator_dealloc, .tp_iter = PyObject_SelfIter, .tp_iternext = (iternextfunc)StringIterator_next, }; /* * Graph iterator. * * Yields one triple at a time. */ typedef struct { PyObject_HEAD LSUP_GraphIterator *it; LSUP_Triple *spo; } GraphIteratorObject; static void GraphIterator_dealloc (GraphIteratorObject *it_obj) { LSUP_graph_iter_free (it_obj->it); free (it_obj->spo); } static PyObject * GraphIterator_next (GraphIteratorObject *it_obj) { LSUP_rc rc = LSUP_graph_iter_next (it_obj->it, &it_obj->spo); if (rc != LSUP_OK) { if (rc != LSUP_END) PyErr_SetString (PyExc_ValueError, "Error encoding graph."); // If not an error, this raises StopIteration. return NULL; } return build_triple (it_obj->spo); } /* * Graph iterator type. */ PyTypeObject GraphIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "graph.GraphIterator", .tp_basicsize = sizeof (GraphIteratorObject), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor) GraphIterator_dealloc, .tp_iter = PyObject_SelfIter, .tp_iternext = (iternextfunc) GraphIterator_next, }; /* * Graph stuff. */ typedef struct { PyObject_HEAD LSUP_Graph *ob_struct; } GraphObject; static int Graph_init (GraphObject *self, PyObject *args, PyObject *kwargs) { unsigned char store_type; PyObject *uri_obj = NULL; LSUP_Term *uri = NULL, *src_uri = NULL; static char *kwlist[] = {"", "uri_obj", NULL}; if (!PyArg_ParseTupleAndKeywords ( args, kwargs, "b|O", kwlist, &store_type, &uri_obj)) return -1; if (uri_obj) { if (!PyObject_TypeCheck (uri_obj, &TermType)) { PyErr_SetString (PyExc_TypeError, "uri is not a Term type."); return -1; } src_uri = ((TermObject *) uri_obj)->ob_struct; uri = LSUP_iriref_new (src_uri->data, LSUP_iriref_nsm (src_uri)); if (! LSUP_IS_IRI (uri)) { PyErr_SetString (PyExc_TypeError, "uri is not a IRIREF type."); return -1; } } else uri = LSUP_iriref_new (NULL, NULL); // Set up the store if a function for that is defined. const LSUP_StoreInt *sif = LSUP_store_int (store_type); if (UNLIKELY (!sif)) { PyErr_SetString ( PyExc_TypeError, "No interface defined for given store type."); return -1; } if (sif->setup_fn) { if (sif->setup_fn(NULL, false) < LSUP_OK) { PyErr_SetString ( PyExc_IOError, "Error initializing back end store."); return -1; } } // TODO Make store ID, nsm and initial size accessible. self->ob_struct = LSUP_graph_new ( uri, (LSUP_StoreType) store_type, NULL, NULL, 0); if (!self->ob_struct) { PyErr_SetString (PyExc_ValueError, "Could not create graph."); return -1; } LSUP_Term *uri2 = LSUP_graph_uri (self->ob_struct); log_debug("Graph URI (%p): %s", uri2, uri2->data); return 0; } static void Graph_dealloc (GraphObject *self) { LSUP_graph_free (self->ob_struct); Py_TYPE (self)->tp_free ((PyObject *) self); } static PyObject * Graph_get_uri (GraphObject *self, void *closure) { LSUP_Term *uri = LSUP_graph_uri (self->ob_struct); log_debug("Graph URI address: %p", uri); log_debug("Graph URI: %s", uri->data); return PyUnicode_FromString (uri->data); } static int Graph_set_uri (GraphObject *self, PyObject *value, void *closure) { if (!PyObject_TypeCheck (value, &TermType)) { PyErr_SetString (PyExc_TypeError, "URI is not a Term type."); return -1; } LSUP_Term *gr_uri = ((TermObject*)value)->ob_struct; log_debug ("New graph URI: %s", (gr_uri->data)); LSUP_rc rc = LSUP_graph_set_uri (self->ob_struct, LSUP_term_copy (gr_uri)); return rc == LSUP_OK ? 0 : -1; } static PyGetSetDef Graph_getsetters[] = { { "uri", (getter) Graph_get_uri, (setter) Graph_set_uri, "Graph URI.", NULL }, {NULL} }; static int Graph_copy_contents (GraphObject *self, GraphObject *dest) { if (LSUP_graph_copy_contents (self->ob_struct, dest->ob_struct) < LSUP_OK) { PyErr_SetString (PyExc_ValueError, "Error copying graph contents."); return -1; } return 0; }; static PyObject * Graph_new_from_rdf (PyTypeObject *cls, PyObject *args) { PyObject *buf, *fileno_fn, *fileno_obj; const char *type; if (! PyArg_ParseTuple (args, "Os", &buf, &type)) return NULL; // Get the file descriptor from the Python BufferedIO object. // FIXME This is not sure to be reliable. See // https://docs.python.org/3/library/io.html?highlight=io%20bufferedreader#io.IOBase.fileno if (! (fileno_fn = PyObject_GetAttrString (buf, "fileno"))) { PyErr_SetString (PyExc_TypeError, "Object has no fileno function."); return NULL; } PyObject* fileno_args = PyTuple_New(0); if (! (fileno_obj = PyObject_CallObject (fileno_fn, fileno_args))) { PyErr_SetString (PyExc_SystemError, "Error calling fileno function."); return NULL; } int fd = PyLong_AsSize_t (fileno_obj); /* * From the Linux man page: * * > The file descriptor is not dup'ed, and will be closed when the stream * > created by fdopen() is closed. The result of applying fdopen() to a * > shared memory object is undefined. * * Hence the `dup()`. */ fd = dup (fd); FILE *fh = fdopen (fd, "r"); GraphObject *res = (GraphObject *) cls->tp_alloc(cls, 0); if (!res) return PyErr_NoMemory(); const LSUP_Codec *codec; if (strcmp(type, "nt") == 0) codec = &nt_codec; else if (strcmp (type, "ttl") == 0) codec = &ttl_codec; // TODO other codecs here. else { PyErr_SetString (PyExc_ValueError, "Unsupported codec."); return NULL; } size_t ct; char *err; codec->decode_graph (fh, &res->ob_struct, &ct, &err); fclose (fh); log_debug ("Decoded %lu triples.", ct); if (UNLIKELY (err)) { PyErr_SetString (PyExc_IOError, err); return NULL; } Py_INCREF (res); return (PyObject *) res; } /** @brief Build a triple pattern for lookup purposes. */ inline static int build_trp_pattern (PyObject *args, LSUP_Term *spo[]) { PyObject *s_obj, *p_obj, *o_obj; if (! (PyArg_ParseTuple (args, "OOO", &s_obj, &p_obj, &o_obj))) return -1; if (s_obj != Py_None && !PyObject_TypeCheck (s_obj, &TermType)) { PyErr_SetString (PyExc_TypeError, "Subject must be a term or None."); return -1; } if (p_obj != Py_None && !PyObject_TypeCheck (p_obj, &TermType)) { PyErr_SetString (PyExc_TypeError, "Predicate must be a term or None."); return -1; } if (o_obj != Py_None && !PyObject_TypeCheck (o_obj, &TermType)) { PyErr_SetString (PyExc_TypeError, "Object must be a term or None."); return -1; } spo[0] = s_obj != Py_None ? ((TermObject *)s_obj)->ob_struct : NULL; spo[1] = p_obj != Py_None ? ((TermObject *)p_obj)->ob_struct : NULL; spo[2] = o_obj != Py_None ? ((TermObject *)o_obj)->ob_struct : NULL; return 0; } static PyObject * Graph_richcmp (PyObject *self, PyObject *other, int op) { // Only equality and non-equality are supported. if (op != Py_EQ && op != Py_NE) Py_RETURN_NOTIMPLEMENTED; LSUP_Graph *t1 = ((GraphObject *) self)->ob_struct; LSUP_Graph *t2 = ((GraphObject *) other)->ob_struct; if (LSUP_graph_equals (t1, t2) ^ (op == Py_NE)) Py_RETURN_TRUE; Py_RETURN_FALSE; } static inline PyObject * Graph_bool_op ( PyTypeObject *cls, LSUP_bool_op op, PyObject *gr1, PyObject *gr2) { if (! PyObject_TypeCheck (gr1, cls) || ! PyObject_TypeCheck (gr2, cls)) return NULL; GraphObject *res = (GraphObject *) cls->tp_alloc (cls, 0); if (!res) return NULL; LSUP_Graph *dest = LSUP_graph_new ( NULL, LSUP_STORE_HTABLE, NULL, NULL, 0); if (!dest) { PyErr_SetString (PyExc_Exception, "Could not create destination graph."); return NULL; } LSUP_rc rc = LSUP_graph_bool_op ( op, ((GraphObject *) gr1)->ob_struct, ((GraphObject *) gr2)->ob_struct, res->ob_struct); if (rc < LSUP_OK) { PyErr_SetString (PyExc_Exception, "Error performing boolean operation."); return NULL; } Py_INCREF(res); return (PyObject *) res; } static PyObject * Graph_add (PyObject *self, PyObject *triples) { // Triple may be any iterable. PyObject *iter = PyObject_GetIter (triples); if (! iter) { PyErr_SetString ( PyExc_ValueError, "Triples object cannot be iterated."); return NULL; } PyObject *trp_obj; int rc = 0; size_t ct = 0; LSUP_GraphIterator *it = LSUP_graph_add_init ( ((GraphObject *)self)->ob_struct); while ((trp_obj = PyIter_Next (iter))) { if (!PyObject_TypeCheck (trp_obj, &TripleType)) { PyErr_SetString ( PyExc_ValueError, "Object is not a triple."); rc = -1; goto finally; } log_trace ("Inserting triple #%lu", ct); LSUP_rc db_rc = LSUP_graph_add_iter ( it, ((TripleObject *) trp_obj)->ob_struct); if (db_rc == LSUP_OK) { rc = LSUP_OK; ct++; } else if (UNLIKELY (db_rc < 0)) { PyErr_SetString (PyExc_ValueError, "Error while adding triples."); rc = -1; goto finally; } // If db_rc > 0, it's a no-op and the counter is not increased. } finally: LSUP_graph_add_done (it); if (rc == LSUP_OK) return PyLong_FromSize_t (ct); return NULL; } static PyObject *Graph_remove (PyObject *self, PyObject *args) { LSUP_rc rc; LSUP_Term *spo[3]; rc = build_trp_pattern (args, spo); if (rc < 0) goto finally; size_t ct; rc = LSUP_graph_remove ( ((GraphObject *)self)->ob_struct, spo[0], spo[1], spo[2], &ct); if (rc < 0) { // TODO implement strerror for more details. PyErr_SetString (PyExc_SystemError, "Error removing triples."); goto finally; } log_debug ("Removed %lu triples.", ct); finally: if (rc < 0) return NULL; Py_RETURN_NONE; } static PyObject *Graph_lookup (PyObject *self, PyObject *args) { LSUP_rc rc; GraphIteratorObject *it_obj = NULL; LSUP_Term *spo[3]; rc = build_trp_pattern (args, spo); if (UNLIKELY (rc < 0)) goto finally; size_t ct; LSUP_GraphIterator *it = LSUP_graph_lookup ( ((GraphObject *)self)->ob_struct, spo[0], spo[1], spo[2], &ct); if (UNLIKELY (!it)) { // TODO implement LSUP_strerror for more details. PyErr_SetString (PyExc_SystemError, "Error looking up triples."); rc = -1; goto finally; } log_debug ("Found %lu triples.", ct); // Initialize the generator object. it_obj = PyObject_New ( GraphIteratorObject, &GraphIteratorType); if (UNLIKELY (!it_obj)) return PyErr_NoMemory(); it_obj->it = it; it_obj->spo = TRP_DUMMY; Py_INCREF (it_obj); finally: return (PyObject *)it_obj; } static PyObject * Graph_encode (PyObject *self, PyObject *args) { const char *type; if (! PyArg_ParseTuple (args, "s", &type)) return NULL; // Initialize the generator object. StringIteratorObject *it_obj = PyObject_New ( StringIteratorObject, &StringIteratorType); if (!it_obj) return NULL; if (strcmp (type, "nt") == 0) it_obj->codec = &nt_codec; else if (strcmp (type, "ttl") == 0) it_obj->codec = &ttl_codec; // TODO other codecs here. else { PyErr_SetString (PyExc_ValueError, "Unsupported codec."); return NULL; } it_obj->it = it_obj->codec->encode_graph_init ( ((GraphObject *)self)->ob_struct); it_obj->line = NULL; Py_INCREF (it_obj); return (PyObject *)it_obj; } static PyMethodDef Graph_methods[] = { { "copy", (PyCFunction) Graph_copy_contents, METH_CLASS | METH_VARARGS, "Copy the contents of a graph into another." }, { "from_rdf", (PyCFunction) Graph_new_from_rdf, METH_CLASS | METH_VARARGS, "Create a graph from a RDF file." }, {"add", (PyCFunction) Graph_add, METH_O, "Add triples to a graph."}, { "remove", (PyCFunction) Graph_remove, METH_VARARGS, "Remove triples from a graph by matching a pattern." }, { "lookup", (PyCFunction) Graph_lookup, METH_VARARGS, "Look triples in a graph by matching a pattern." }, { "to_rdf", (PyCFunction) Graph_encode, METH_VARARGS, "Encode a graph into a RDF byte buffer." }, {NULL}, }; static inline PyObject *Graph_bool_and ( PyTypeObject *cls, PyObject *gr1, PyObject *gr2) { return Graph_bool_op (cls, LSUP_BOOL_INTERSECTION, gr1, gr2); } static inline PyObject *Graph_bool_or ( PyTypeObject *cls, PyObject *gr1, PyObject *gr2) { return Graph_bool_op (cls, LSUP_BOOL_UNION, gr1, gr2); } static inline PyObject *Graph_bool_subtract ( PyTypeObject *cls, PyObject *gr1, PyObject *gr2) { return Graph_bool_op (cls, LSUP_BOOL_SUBTRACTION, gr1, gr2); } static inline PyObject *Graph_bool_xor ( PyTypeObject *cls, PyObject *gr1, PyObject *gr2) { return Graph_bool_op (cls, LSUP_BOOL_XOR, gr1, gr2); } static PyNumberMethods Graph_number_methods = { .nb_and = (binaryfunc) Graph_bool_and, .nb_or = (binaryfunc) Graph_bool_or, .nb_subtract = (binaryfunc) Graph_bool_subtract, .nb_xor = (binaryfunc) Graph_bool_xor, }; static int Graph_contains (PyObject *self, PyObject *value) { if (!PyObject_TypeCheck (value, &TripleType)) { PyErr_SetString (PyExc_ValueError, "Error parsing input value."); return -1; } int rc = LSUP_graph_contains ( ((GraphObject *) self)->ob_struct, ((TripleObject *) value)->ob_struct); return rc; } static Py_ssize_t Graph_get_size (PyObject *self) { return LSUP_graph_size (((GraphObject *) self)->ob_struct); } static PySequenceMethods Graph_seq_methods = { .sq_length = (lenfunc) Graph_get_size, .sq_contains = (objobjproc) Graph_contains, }; PyTypeObject GraphType = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "graph.Graph", .tp_doc = "RDF graph", .tp_basicsize = sizeof (GraphObject), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_new = PyType_GenericNew, .tp_init = (initproc) Graph_init, .tp_dealloc = (destructor) Graph_dealloc, .tp_getset = Graph_getsetters, .tp_methods = Graph_methods, .tp_richcompare = (richcmpfunc) Graph_richcmp, .tp_as_number = &Graph_number_methods, .tp_as_sequence = &Graph_seq_methods, }; #endif