term.pyx 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. from rdflib import URIRef, BNode, Literal
  2. #from cpython.mem cimport PyMem_Malloc, PyMem_Free
  3. from libc.stdint cimport uint64_t
  4. from libc.stdlib cimport free
  5. from libc.string cimport memcpy
  6. from cymem.cymem cimport Pool
  7. from lakesuperior.cy_include cimport cytpl as tpl
  8. from lakesuperior.model.base cimport Buffer, buffer_dump
  9. DEF LSUP_TERM_TYPE_URIREF = 1
  10. DEF LSUP_TERM_TYPE_BNODE = 2
  11. DEF LSUP_TERM_TYPE_LITERAL = 3
  12. DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
  13. DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
  14. cdef int serialize(const Term *term, Buffer *sterm, Pool pool=None) except -1:
  15. """
  16. Serialize a Term into a binary buffer.
  17. The returned result is dynamically allocated in the provided memory pool.
  18. """
  19. cdef:
  20. unsigned char *addr
  21. size_t sz
  22. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, term)
  23. if pool is None:
  24. sterm.addr = addr
  25. else:
  26. # addr is within this function scope. Must be copied to the cymem pool.
  27. sterm.addr = pool.alloc(sz, 1)
  28. if not sterm.addr:
  29. raise MemoryError()
  30. memcpy(sterm.addr, addr, sz)
  31. sterm.sz = sz
  32. cdef int deserialize(const Buffer *data, Term *term) except -1:
  33. """
  34. Return a term from serialized binary data.
  35. """
  36. #print(f'Deserializing: {buffer_dump(data)}')
  37. _pk = tpl.tpl_peek(
  38. tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
  39. LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
  40. &(term[0].datatype), &(term[0].lang))
  41. if _pk is NULL:
  42. raise MemoryError('Error deserializing term.')
  43. else:
  44. free(_pk)
  45. cdef int from_rdflib(term_obj, Term *term) except -1:
  46. """
  47. Return a Term struct obtained from a Python/RDFLib term.
  48. """
  49. _data = str(term_obj).encode()
  50. term[0].data = _data
  51. if isinstance(term_obj, Literal):
  52. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  53. _lang = (getattr(term_obj, 'language') or '').encode()
  54. term[0].type = LSUP_TERM_TYPE_LITERAL
  55. term[0].datatype = _datatype
  56. term[0].lang = _lang
  57. else:
  58. term[0].datatype = NULL
  59. term[0].lang = NULL
  60. if isinstance(term_obj, URIRef):
  61. term[0].type = LSUP_TERM_TYPE_URIREF
  62. elif isinstance(term_obj, BNode):
  63. term[0].type = LSUP_TERM_TYPE_BNODE
  64. else:
  65. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  66. cdef int serialize_from_rdflib(
  67. term_obj, Buffer *data, Pool pool=None
  68. ) except -1:
  69. """
  70. Return a Buffer struct from a Python/RDFLib term.
  71. """
  72. cdef:
  73. Term _term
  74. void *addr
  75. size_t sz
  76. # From RDFlib
  77. _data = str(term_obj).encode()
  78. _term.data = _data
  79. if isinstance(term_obj, Literal):
  80. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  81. _lang = (getattr(term_obj, 'language') or '').encode()
  82. _term.type = LSUP_TERM_TYPE_LITERAL
  83. _term.datatype = _datatype
  84. _term.lang = _lang
  85. else:
  86. _term.datatype = NULL
  87. _term.lang = NULL
  88. if isinstance(term_obj, URIRef):
  89. _term.type = LSUP_TERM_TYPE_URIREF
  90. elif isinstance(term_obj, BNode):
  91. _term.type = LSUP_TERM_TYPE_BNODE
  92. else:
  93. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  94. serialize(&_term, data, pool)
  95. cdef object to_rdflib(const Term *term):
  96. """
  97. Return an RDFLib term.
  98. """
  99. cdef str data = (<bytes>term[0].data).decode()
  100. if term[0].type == LSUP_TERM_TYPE_LITERAL:
  101. return Literal(
  102. data,
  103. datatype=term[0].datatype if not term[0].lang else None,
  104. lang=term[0].lang or None
  105. )
  106. else:
  107. if term[0].type == LSUP_TERM_TYPE_URIREF:
  108. return URIRef(data)
  109. elif term[0].type == LSUP_TERM_TYPE_BNODE:
  110. return BNode(data)
  111. else:
  112. raise IOError(f'Unknown term type code: {term[0].type}')
  113. cdef object deserialize_to_rdflib(const Buffer *data):
  114. """
  115. Return a Python/RDFLib term from a serialized Cython term.
  116. """
  117. cdef Term t
  118. deserialize(data, &t)
  119. return to_rdflib(&t)
  120. cdef object to_bytes(const Term *term):
  121. """
  122. Return a Python bytes object of the serialized term.
  123. """
  124. cdef:
  125. Buffer pk_t
  126. unsigned char *bytestream
  127. serialize(term, &pk_t)
  128. bytestream = <unsigned char *>pk_t.addr
  129. return <bytes>(bytestream)[:pk_t.sz]