term.pyx 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. from rdflib import URIRef, BNode, Literal
  2. #from cpython.mem cimport PyMem_Malloc, PyMem_Free
  3. from libc.stdint cimport uint64_t
  4. from libc.stdlib cimport free
  5. from lakesuperior.cy_include cimport cytpl as tpl
  6. from lakesuperior.model.base cimport Buffer
  7. DEF LSUP_TERM_TYPE_URIREF = 1
  8. DEF LSUP_TERM_TYPE_BNODE = 2
  9. DEF LSUP_TERM_TYPE_LITERAL = 3
  10. DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
  11. DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
  12. cdef int serialize(const Term *term, Buffer *sterm) except -1:
  13. """
  14. Serialize a Term into a binary buffer.
  15. The returned result is dynamically allocated and must be manually freed.
  16. """
  17. cdef:
  18. unsigned char *addr
  19. size_t sz
  20. print('Dump members:')
  21. print(term[0].type)
  22. print(term[0].data if term[0].data is not NULL else 'NULL')
  23. print(term[0].datatype if term[0].datatype is not NULL else 'NULL')
  24. print(term[0].lang if term[0].lang is not NULL else 'NULL')
  25. print('Now serializing.')
  26. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, term)
  27. print('Serialized.')
  28. sterm[0].addr = addr
  29. sterm[0].sz = sz
  30. print('Assigned to buffer. Returning.')
  31. cdef int deserialize(const Buffer *data, Term *term) except -1:
  32. """
  33. Return a term from serialized binary data.
  34. """
  35. _pk = tpl.tpl_peek(
  36. tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
  37. LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
  38. &(term[0].datatype), &(term[0].lang))
  39. if _pk is NULL:
  40. raise MemoryError('Error deserializing term.')
  41. else:
  42. free(_pk)
  43. cdef int from_rdflib(term_obj, Term *term) except -1:
  44. """
  45. Return a Term struct obtained from a Python/RDFLiib term.
  46. """
  47. _data = str(term_obj).encode()
  48. term[0].data = _data
  49. if isinstance(term_obj, Literal):
  50. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  51. _lang = (getattr(term_obj, 'language') or '').encode()
  52. term[0].type = LSUP_TERM_TYPE_LITERAL
  53. term[0].datatype = _datatype
  54. term[0].lang = _lang
  55. else:
  56. term[0].datatype = NULL
  57. term[0].lang = NULL
  58. if isinstance(term_obj, URIRef):
  59. term[0].type = LSUP_TERM_TYPE_URIREF
  60. elif isinstance(term_obj, BNode):
  61. term[0].type = LSUP_TERM_TYPE_BNODE
  62. else:
  63. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  64. print(f'term data: {term[0].data}')
  65. cdef int serialize_from_rdflib(term_obj, Buffer *data) except -1:
  66. """
  67. Return a Buffer struct from a Python/RDFLib term.
  68. """
  69. cdef:
  70. Term _term
  71. void *addr
  72. size_t sz
  73. # From RDFlib
  74. _data = str(term_obj).encode()
  75. _term.data = _data
  76. if isinstance(term_obj, Literal):
  77. _datatype = (getattr(term_obj, 'datatype') or '').encode()
  78. _lang = (getattr(term_obj, 'language') or '').encode()
  79. _term.type = LSUP_TERM_TYPE_LITERAL
  80. _term.datatype = _datatype
  81. _term.lang = _lang
  82. else:
  83. _term.datatype = NULL
  84. _term.lang = NULL
  85. if isinstance(term_obj, URIRef):
  86. _term.type = LSUP_TERM_TYPE_URIREF
  87. elif isinstance(term_obj, BNode):
  88. _term.type = LSUP_TERM_TYPE_BNODE
  89. else:
  90. raise ValueError(f'Unsupported term type: {type(term_obj)}')
  91. #print(f'term data: {_term.data}')
  92. # # # #
  93. # Serialize
  94. print('Dump members:')
  95. print(_term.type)
  96. print(_term.data if _term.data is not NULL else 'NULL')
  97. print(_term.datatype if _term.datatype is not NULL else 'NULL')
  98. print(_term.lang if _term.lang is not NULL else 'NULL')
  99. print('Now serializing.')
  100. tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, &_term)
  101. print('Serialized.')
  102. print(f'addr: {<unsigned long>addr}; size: {sz}')
  103. data[0].addr = addr
  104. data[0].sz = sz
  105. print('data to be returned: ')
  106. print((<unsigned char *>data[0].addr)[:data[0].sz])
  107. #print('Assigned to buffer. Returning.')
  108. # # # #
  109. #cdef:
  110. # Term _term
  111. # Resusing other methods. This won't work until I figure out how to
  112. # not drop the intermediate var in from_rdflib().
  113. #from_rdflib(term_obj, &_term)
  114. #print('Dump members in serialize_from_rdflib:')
  115. #serialize(&_term, data)
  116. cdef object to_rdflib(const Term *term):
  117. """
  118. Return an RDFLib term.
  119. """
  120. cdef str data = (<bytes>term[0].data).decode()
  121. if term[0].type == LSUP_TERM_TYPE_LITERAL:
  122. return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
  123. else:
  124. if term[0].type == LSUP_TERM_TYPE_URIREF:
  125. return URIRef(data)
  126. elif term[0].type == LSUP_TERM_TYPE_BNODE:
  127. return BNode(data)
  128. else:
  129. raise IOError(f'Unknown term type code: {term[0].type}')
  130. cdef object deserialize_to_rdflib(const Buffer *data):
  131. """
  132. Return a Python/RDFLib term from a serialized Cython term.
  133. """
  134. cdef Term t
  135. deserialize(data, &t)
  136. return to_rdflib(&t)
  137. cdef object to_bytes(const Term *term):
  138. """
  139. Return a Python bytes object of the serialized term.
  140. """
  141. cdef:
  142. Buffer pk_t
  143. unsigned char *bytestream
  144. serialize(term, &pk_t)
  145. bytestream = <unsigned char *>pk_t.addr
  146. return <bytes>(bytestream)[:pk_t.sz]