term.pyx 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. from rdflib import URIRef, BNode, Literal
  2. #from cpython.mem cimport PyMem_Malloc, PyMem_Free
  3. from libc.stdint cimport uint64_t
  4. from libc.stdlib cimport free
  5. from lakesuperior.cy_include cimport cytpl as tpl
  6. DEF LSUP_TERM_TYPE_URIREF = 1
  7. DEF LSUP_TERM_TYPE_BNODE = 2
  8. DEF LSUP_TERM_TYPE_LITERAL = 3
  9. DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
  10. DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
  11. cdef int serialize(const Term *term, tpl.tpl_bin *sterm) except -1:
  12. """
  13. Serialize a Term into a binary buffer.
  14. The returned result is dynamically allocated and must be manually freed.
  15. """
  16. tpl.tpl_jot(
  17. tpl.TPL_MEM, &(sterm.addr), &(sterm.sz),
  18. LSUP_TERM_STRUCT_PK_FMT, term)
  19. cdef int deserialize(const Buffer *data, Term *term) except -1:
  20. """
  21. Return a term from serialized binary data.
  22. """
  23. _pk = tpl.tpl_peek(
  24. tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
  25. LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
  26. &(term[0].datatype), &(term[0].lang))
  27. if _pk is NULL:
  28. raise MemoryError('Error deserializing term.')
  29. else:
  30. free(_pk)
  31. cdef int from_rdflib(term_obj, Term *term) except -1:
  32. """
  33. Return a Term struct obtained from a Python/RDFLiib term.
  34. """
  35. _data = str(term_obj).encode()
  36. term[0].data = _data
  37. if isinstance(term_obj, Literal):
  38. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  39. _lang = (getattr(term_obj, 'language') or '').encode()
  40. term[0].type = LSUP_TERM_TYPE_LITERAL
  41. term[0].datatype = _datatype
  42. term[0].lang = _lang
  43. else:
  44. if isinstance(term_obj, URIRef):
  45. term[0].type = LSUP_TERM_TYPE_URIREF
  46. elif isinstance(term_obj, BNode):
  47. term[0].type = LSUP_TERM_TYPE_BNODE
  48. else:
  49. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  50. cdef Buffer *serialize_from_rdflib(term_obj):
  51. """
  52. Return a Buffer struct from a Python/RDFLib term.
  53. """
  54. cdef:
  55. Term term
  56. Buffer data
  57. from_rdflib(term_obj, &term)
  58. serialize(&term, &data)
  59. return &data
  60. cdef object to_rdflib(const Term *term):
  61. """
  62. Return an RDFLib term.
  63. """
  64. data = (<bytes>term[0].data).decode()
  65. if term[0].type == LSUP_TERM_TYPE_LITERAL:
  66. return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
  67. else:
  68. if term[0].type == LSUP_TERM_TYPE_URIREF:
  69. return URIRef(data)
  70. elif term[0].type == LSUP_TERM_TYPE_BNODE:
  71. return BNode(data)
  72. else:
  73. raise IOError(f'Unknown term type code: {term[0].type}')
  74. cdef object deserialize_to_rdflib(const Buffer *data):
  75. """
  76. Return a Python/RDFLib term from a serialized Cython term.
  77. """
  78. cdef Term term
  79. deserialize(data, &term)
  80. return to_rdflib(&term)
  81. cdef object to_bytes(const Term *term):
  82. """
  83. Return a Python bytes object of the serialized term.
  84. """
  85. cdef:
  86. Buffer pk_t
  87. unsigned char *bytestream
  88. serialize(term, &pk_t)
  89. bytestream = <unsigned char *>pk_t.addr
  90. return <bytes>(bytestream)[:pk_t.sz]