codec_base.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. #ifndef _LSUP_CODEC_BASE_H
  2. #define _LSUP_CODEC_BASE_H
  3. #include "graph.h"
  4. typedef struct codec_t LSUP_Codec;
  5. /** @brief Codec iterator type.
  6. *
  7. * This structure holds state data including input and output for encoding a
  8. * graph into RDF. Normally it should not be inspected or manipulated directly,
  9. * but rather passed to codec iteration functions for processing RDF.
  10. *
  11. * NOTE: This should be used as an opaque handle, however it is exposed here
  12. * for easier inclusion into each codec.
  13. */
  14. typedef struct codec_iter_t {
  15. const LSUP_Codec * codec; // Codec that generated this iterator.
  16. LSUP_Triple * trp; // RDF fragment being encoded.
  17. LSUP_GraphIterator *gr_it; // Graph iterator.
  18. const LSUP_NSMap * nsm; // Namespace map.
  19. size_t cur; // Internal cursor.
  20. LSUP_rc rc; // Internal return code.
  21. char * rep, // String representation of a RDF fragment.
  22. * str_s, // Temporary string.
  23. * str_p, // Temporary string.
  24. * str_o; // Temporary string.
  25. } LSUP_CodecIterator;
  26. /** @brief Parse error information.
  27. *
  28. */
  29. /* TODO A plain string will suffice for now.
  30. typedef struct parse_error_t {
  31. unsigned int line; // Line number where the error occurred.
  32. unsigned int linec; // Position in line of the offending token.
  33. char * token; // String representation of the token.
  34. } LSUP_ParseError;
  35. */
  36. /** @brief Term encoder callback type.
  37. *
  38. * @param[in] term Single term handle.
  39. *
  40. * @param[in] nsm Namespace map. May be NULL for no prefix shortening.
  41. *
  42. * @param[out] rep Pointer to a string to be filled with the encoded term. The
  43. * caller is in charge of freeing the string after use. Returns undefined on
  44. * error.
  45. *
  46. * @return LSUP_OK on successful encoding; <0 for other errors.
  47. */
  48. typedef LSUP_rc (*term_enc_fn_t)(
  49. const LSUP_Term *term, const LSUP_NSMap *nsm, char **rep);
  50. /** @brief Initialize a graph encoding loop.
  51. *
  52. * This prototype is to be implemented by graph encoding loops. It should
  53. * create an iterator and perform all initial setup for finding triples.
  54. *
  55. * Implementations MUST set the "codec" member of the iterator to the address
  56. * of the codec that generated it.
  57. *
  58. * @param[in] gr The graph to be encoded. The graph's namespace map is used by
  59. * the codec for namespace prefixing. The graph may only be freed after the
  60. * loop is finalized.
  61. *
  62. * @return A codec iterator handle to be passed to a #gr_codec_iter_fn_t
  63. * function and, eventually, to a #gr_codec_done_fn_t function.
  64. */
  65. typedef LSUP_CodecIterator * (*gr_encode_init_fn_t)(const LSUP_Graph *gr);
  66. /** @brief Perform one encoding iteration.
  67. *
  68. * Implementations of this prototype MUST perform all the steps to encode one
  69. * or more complete triples into an RDF fragment representing those triples.
  70. * The input and output units are up to the implementation and a caller SHOULD
  71. * assume that multiple lines may be yielded at each iteration.
  72. *
  73. * @param[in] it Iterator handle.
  74. *
  75. * @param[out] res Handle to be populated with a string obtained from encoding.
  76. * The output data should be UTF-8 [TODO or UTF-16] encoded. This pointer
  77. * must be initialized (even to NULL) and should be eventually freed manually
  78. * at the end of the loop. It is reallocated at each iteration, so memory from
  79. * a previous iteration may be overwritten with new data.
  80. *
  81. * @return LSUP_OK if a new token was processed; LSUP_END if the end of the
  82. * loop was reached.
  83. */
  84. typedef LSUP_rc (*gr_encode_iter_fn_t)(
  85. LSUP_CodecIterator *it, unsigned char **res);
  86. /** @brief Finalize an encoding operation.
  87. *
  88. * Implementations SHOULD use this function to perform all necessary steps to
  89. * clean up memory and free the iterator handle after a graph has been
  90. * completely encoded.
  91. *
  92. * @param[in] it Iterator handle.
  93. */
  94. typedef void (*gr_encode_done_fn_t)(LSUP_CodecIterator *it);
  95. /** @brief Prototype for decoding a string into a LSUP_Term.
  96. *
  97. * Implementations MAY ignore any other tokens after finding the first one.
  98. *
  99. * @param[in] rep NT representation of the term.
  100. *
  101. * @param[in] nsm Namespace map handle.
  102. *
  103. * @param[out] Pointer to the term handle to be created. Implementaions SHOULD
  104. * return NULL on a parse error.
  105. *
  106. * @return Implementations MUST return LSUP_OK on success and a negative value
  107. * on parsing error.
  108. */
  109. typedef LSUP_rc (*term_decode_fn_t)(
  110. const char *rep, const LSUP_NSMap *nsm, LSUP_Term **term);
  111. /** @brief Prototype for decoding a complete RDF document into a graph.
  112. *
  113. * Implementations SHOULD consume data from the file handle in chunks.
  114. *
  115. * @param[in] rep Open file handle pointing to the RDF data. Implementations
  116. * MUST NOT close the file handle.
  117. *
  118. * @param[out] gr Pointer to a graph handle to be generated from decoding.
  119. *
  120. * @param[out] ct If not NULL, it may be populated with the number of triples
  121. * parsed (which may be different from the resulting graph size).
  122. * Implementations may choose not not use this, and they must account for the
  123. * value to be NULL.
  124. *
  125. * @param[out] err Pointer to error info string. If no error occurs, it yields
  126. * NULL.
  127. *
  128. * @return Implementations MUST return LSUP_OK on success and a negative value
  129. * on parsing error.
  130. */
  131. typedef LSUP_rc (*gr_decode_fn_t)(
  132. FILE *rep, LSUP_Graph **gr, size_t *ct, char **err);
  133. /** @brief Codec structure.
  134. *
  135. * An instance of this structure is usually defined at compile time (see
  136. * examples in "include/codec_*.h" and "src/codec_*.c") and should have the
  137. * following defined:
  138. *
  139. * - name: A brief (16-char max), human-readable to identify the codec.
  140. * - mimetype: MIME type (32-char max) associated with the codec.
  141. * - extension: File extension associated with the serialized file.
  142. *
  143. * - encode_term: Encode a single term.
  144. *
  145. * - encode_graph_init: Initialize a graph decoding loop.
  146. * - encode_graph_iter: Run one iteration of encoding on one or more triples.
  147. * - encode_graph_done: Finalize the encoding loop and free the support data.
  148. *
  149. * - decode_term: Decode a single term.
  150. * - decode_graph: Decode a RDF document into a graph.
  151. *
  152. * For documentation on the individual encoding and decoding callbacks, see the
  153. * related function prototypes.
  154. */
  155. struct codec_t {
  156. char name[16]; // Name of the codec.
  157. char mimetype[32]; // MIME type associated with the codec.
  158. char extension[8]; // Serialized file extension.
  159. // Encoding.
  160. term_enc_fn_t encode_term; // Term encoder function.
  161. gr_encode_init_fn_t encode_graph_init; // Graph encoder initialization.
  162. gr_encode_iter_fn_t encode_graph_iter; // Graph encoder iteration.
  163. gr_encode_done_fn_t encode_graph_done; // Graph encoder finalization.
  164. // Decoding.
  165. term_decode_fn_t decode_term; // Term decoder function.
  166. gr_decode_fn_t decode_graph; // Graph decoder function.
  167. };
  168. #endif