term.pyx 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. from rdflib import URIRef, BNode, Literal
  2. #from cpython.mem cimport PyMem_Malloc, PyMem_Free
  3. from libc.stdint cimport uint64_t
  4. from libc.stdlib cimport free
  5. from libc.string cimport memcpy
  6. from cymem.cymem cimport Pool
  7. from lakesuperior.cy_include cimport cytpl as tpl
  8. from lakesuperior.model.base cimport Buffer
  9. DEF LSUP_TERM_TYPE_URIREF = 1
  10. DEF LSUP_TERM_TYPE_BNODE = 2
  11. DEF LSUP_TERM_TYPE_LITERAL = 3
  12. DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
  13. DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
  14. cdef int serialize(const Term *term, Buffer *sterm, Pool pool=None) except -1:
  15. """
  16. Serialize a Term into a binary buffer.
  17. The returned result is dynamically allocated in the provided memory pool.
  18. """
  19. cdef:
  20. unsigned char *addr
  21. size_t sz
  22. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, term)
  23. if pool is None:
  24. sterm.addr = addr
  25. else:
  26. # addr is within this function scope. Must be copied to the cymem pool.
  27. sterm.addr = pool.alloc(sz, 1)
  28. if not sterm.addr:
  29. raise MemoryError()
  30. memcpy(sterm.addr, addr, sz)
  31. sterm.sz = sz
  32. cdef int deserialize(const Buffer *data, Term *term) except -1:
  33. """
  34. Return a term from serialized binary data.
  35. """
  36. _pk = tpl.tpl_peek(
  37. tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
  38. LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
  39. &(term[0].datatype), &(term[0].lang))
  40. if _pk is NULL:
  41. raise MemoryError('Error deserializing term.')
  42. else:
  43. free(_pk)
  44. cdef int from_rdflib(term_obj, Term *term) except -1:
  45. """
  46. Return a Term struct obtained from a Python/RDFLib term.
  47. """
  48. _data = str(term_obj).encode()
  49. term[0].data = _data
  50. if isinstance(term_obj, Literal):
  51. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  52. _lang = (getattr(term_obj, 'language') or '').encode()
  53. term[0].type = LSUP_TERM_TYPE_LITERAL
  54. term[0].datatype = _datatype
  55. term[0].lang = _lang
  56. else:
  57. term[0].datatype = NULL
  58. term[0].lang = NULL
  59. if isinstance(term_obj, URIRef):
  60. term[0].type = LSUP_TERM_TYPE_URIREF
  61. elif isinstance(term_obj, BNode):
  62. term[0].type = LSUP_TERM_TYPE_BNODE
  63. else:
  64. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  65. cdef int serialize_from_rdflib(
  66. term_obj, Buffer *data, Pool pool=None
  67. ) except -1:
  68. """
  69. Return a Buffer struct from a Python/RDFLib term.
  70. """
  71. cdef:
  72. Term _term
  73. void *addr
  74. size_t sz
  75. # From RDFlib
  76. _data = str(term_obj).encode()
  77. _term.data = _data
  78. if isinstance(term_obj, Literal):
  79. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  80. _lang = (getattr(term_obj, 'language') or '').encode()
  81. _term.type = LSUP_TERM_TYPE_LITERAL
  82. _term.datatype = _datatype
  83. _term.lang = _lang
  84. else:
  85. _term.datatype = NULL
  86. _term.lang = NULL
  87. if isinstance(term_obj, URIRef):
  88. _term.type = LSUP_TERM_TYPE_URIREF
  89. elif isinstance(term_obj, BNode):
  90. _term.type = LSUP_TERM_TYPE_BNODE
  91. else:
  92. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  93. serialize(&_term, data, pool)
  94. cdef object to_rdflib(const Term *term):
  95. """
  96. Return an RDFLib term.
  97. """
  98. cdef str data = (<bytes>term[0].data).decode()
  99. if term[0].type == LSUP_TERM_TYPE_LITERAL:
  100. return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
  101. else:
  102. if term[0].type == LSUP_TERM_TYPE_URIREF:
  103. return URIRef(data)
  104. elif term[0].type == LSUP_TERM_TYPE_BNODE:
  105. return BNode(data)
  106. else:
  107. raise IOError(f'Unknown term type code: {term[0].type}')
  108. cdef object deserialize_to_rdflib(const Buffer *data):
  109. """
  110. Return a Python/RDFLib term from a serialized Cython term.
  111. """
  112. cdef Term t
  113. deserialize(data, &t)
  114. return to_rdflib(&t)
  115. cdef object to_bytes(const Term *term):
  116. """
  117. Return a Python bytes object of the serialized term.
  118. """
  119. cdef:
  120. Buffer pk_t
  121. unsigned char *bytestream
  122. serialize(term, &pk_t)
  123. bytestream = <unsigned char *>pk_t.addr
  124. return <bytes>(bytestream)[:pk_t.sz]