term.pyx 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. from rdflib import URIRef, BNode, Literal
  2. #from cpython.mem cimport PyMem_Malloc, PyMem_Free
  3. from libc.stdint cimport uint64_t
  4. from libc.stdlib cimport free
  5. from lakesuperior.cy_include cimport cytpl as tpl
  6. DEF LSUP_TERM_TYPE_URIREF = 1
  7. DEF LSUP_TERM_TYPE_BNODE = 2
  8. DEF LSUP_TERM_TYPE_LITERAL = 3
  9. DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
  10. DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
  11. cdef int serialize(const Term *term, Buffer *sterm) except -1:
  12. """
  13. Serialize a Term into a binary buffer.
  14. The returned result is dynamically allocated and must be manually freed.
  15. """
  16. cdef:
  17. unsigned char *addr
  18. size_t sz
  19. print('Dump members:')
  20. print(term[0].type)
  21. print(term[0].data if term[0].data is not NULL else 'NULL')
  22. print(term[0].datatype if term[0].datatype is not NULL else 'NULL')
  23. print(term[0].lang if term[0].lang is not NULL else 'NULL')
  24. print('Now serializing.')
  25. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, term)
  26. print('Serialized.')
  27. sterm[0].addr = addr
  28. sterm[0].sz = sz
  29. print('Assigned to buffer. Returning.')
  30. cdef int deserialize(const Buffer *data, Term *term) except -1:
  31. """
  32. Return a term from serialized binary data.
  33. """
  34. _pk = tpl.tpl_peek(
  35. tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
  36. LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
  37. &(term[0].datatype), &(term[0].lang))
  38. if _pk is NULL:
  39. raise MemoryError('Error deserializing term.')
  40. else:
  41. free(_pk)
  42. cdef int from_rdflib(term_obj, Term *term) except -1:
  43. """
  44. Return a Term struct obtained from a Python/RDFLiib term.
  45. """
  46. _data = str(term_obj).encode()
  47. term[0].data = _data
  48. if isinstance(term_obj, Literal):
  49. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  50. _lang = (getattr(term_obj, 'language') or '').encode()
  51. term[0].type = LSUP_TERM_TYPE_LITERAL
  52. term[0].datatype = _datatype
  53. term[0].lang = _lang
  54. else:
  55. term[0].datatype = NULL
  56. term[0].lang = NULL
  57. if isinstance(term_obj, URIRef):
  58. term[0].type = LSUP_TERM_TYPE_URIREF
  59. elif isinstance(term_obj, BNode):
  60. term[0].type = LSUP_TERM_TYPE_BNODE
  61. else:
  62. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  63. print(f'term data: {term[0].data}')
  64. cdef int serialize_from_rdflib(term_obj, Buffer *data) except -1:
  65. """
  66. Return a Buffer struct from a Python/RDFLib term.
  67. """
  68. cdef:
  69. Term _term
  70. void *addr
  71. size_t sz
  72. # From RDFlib
  73. _data = str(term_obj).encode()
  74. _term.data = _data
  75. if isinstance(term_obj, Literal):
  76. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  77. _lang = (getattr(term_obj, 'language') or '').encode()
  78. _term.type = LSUP_TERM_TYPE_LITERAL
  79. _term.datatype = _datatype
  80. _term.lang = _lang
  81. else:
  82. _term.datatype = NULL
  83. _term.lang = NULL
  84. if isinstance(term_obj, URIRef):
  85. _term.type = LSUP_TERM_TYPE_URIREF
  86. elif isinstance(term_obj, BNode):
  87. _term.type = LSUP_TERM_TYPE_BNODE
  88. else:
  89. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  90. #print(f'term data: {_term.data}')
  91. # # # #
  92. # Serialize
  93. print('Dump members:')
  94. print(_term.type)
  95. print(_term.data if _term.data is not NULL else 'NULL')
  96. print(_term.datatype if _term.datatype is not NULL else 'NULL')
  97. print(_term.lang if _term.lang is not NULL else 'NULL')
  98. print('Now serializing.')
  99. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, &_term)
  100. print('Serialized.')
  101. print(f'addr: {<unsigned long>addr}; size: {sz}')
  102. data[0].addr = addr
  103. data[0].sz = sz
  104. print('data to be returned: ')
  105. print((<unsigned char *>data[0].addr)[:data[0].sz])
  106. #print('Assigned to buffer. Returning.')
  107. # # # #
  108. #cdef:
  109. # Term _term
  110. # Resusing other methods. This won't work until I figure out how to
  111. # not drop the intermediate var in from_rdflib().
  112. #from_rdflib(term_obj, &_term)
  113. #print('Dump members in serialize_from_rdflib:')
  114. #serialize(&_term, data)
  115. cdef object to_rdflib(const Term *term):
  116. """
  117. Return an RDFLib term.
  118. """
  119. cdef str data = (<bytes>term[0].data).decode()
  120. if term[0].type == LSUP_TERM_TYPE_LITERAL:
  121. return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
  122. else:
  123. if term[0].type == LSUP_TERM_TYPE_URIREF:
  124. return URIRef(data)
  125. elif term[0].type == LSUP_TERM_TYPE_BNODE:
  126. return BNode(data)
  127. else:
  128. raise IOError(f'Unknown term type code: {term[0].type}')
  129. cdef object deserialize_to_rdflib(const Buffer *data):
  130. """
  131. Return a Python/RDFLib term from a serialized Cython term.
  132. """
  133. cdef Term t
  134. deserialize(data, &t)
  135. return to_rdflib(&t)
  136. cdef object to_bytes(const Term *term):
  137. """
  138. Return a Python bytes object of the serialized term.
  139. """
  140. cdef:
  141. Buffer pk_t
  142. unsigned char *bytestream
  143. serialize(term, &pk_t)
  144. bytestream = <unsigned char *>pk_t.addr
  145. return <bytes>(bytestream)[:pk_t.sz]