benchmark.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #!/usr/bin/env python3
  2. import logging
  3. import sys
  4. from os import path
  5. from uuid import uuid4
  6. import arrow
  7. import click
  8. import rdflib
  9. import requests
  10. from matplotlib import pyplot as plt
  11. from lakesuperior.util.generators import (
  12. random_image, random_graph, random_utf8_string)
  13. from lakesuperior.exceptions import ResourceNotExistsError
  14. __doc__ = '''
  15. Benchmark script to measure write performance.
  16. '''
  17. def_mode = 'ldp'
  18. def_endpoint = 'http://localhost:8000/ldp'
  19. def_ct = 10000
  20. def_parent = '/pomegranate'
  21. def_gr_size = 200
  22. logging.disable(logging.WARN)
  23. @click.command()
  24. @click.option(
  25. '--mode', '-m', default=def_mode,
  26. help=(
  27. 'Mode of ingestion. One of `ldp`, `python`. With the former, the '
  28. 'HTTP/LDP web server is used. With the latter, the Python API is '
  29. 'used, in which case the server need not be running. '
  30. f'Default: {def_endpoint}'
  31. )
  32. )
  33. @click.option(
  34. '--endpoint', '-e', default=def_endpoint,
  35. help=(
  36. 'LDP endpoint. Only meaningful with `ldp` mode. '
  37. f'Default: {def_endpoint}'
  38. )
  39. )
  40. @click.option(
  41. '--count', '-c', default=def_ct,
  42. help='Number of resources to ingest. Default: {def_ct}')
  43. @click.option(
  44. '--parent', '-p', default=def_parent,
  45. help='Path to the container resource under which the new resources will be '
  46. 'created. It must begin with a slash (`/`) character. '
  47. f'Default: {def_parent}')
  48. @click.option(
  49. '--delete-container', '-d', is_flag=True,
  50. help='Delete container resource and its children if already existing. By '
  51. 'default, the container is not deleted and new resources are added to it.')
  52. @click.option(
  53. '--method', '-X', default='put',
  54. help=(
  55. 'HTTP method to use. Case insensitive. Either PUT or POST. '
  56. 'Default: PUT'
  57. )
  58. )
  59. @click.option(
  60. '--graph-size', '-s', default=def_gr_size,
  61. help=f'Number of triples in each graph. Default: {def_gr_size}')
  62. @click.option(
  63. '--resource-type', '-t', default='r',
  64. help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
  65. '`n` (only LDP-NR, i.e. binaries), or `b` (50/50% of both). '
  66. 'Default: r')
  67. @click.option(
  68. '--plot', '-P', is_flag=True, help='Plot a graph of ingest timings. '
  69. 'The graph figure is displayed on screen with basic manipulation and save '
  70. 'options.')
  71. def run(
  72. mode, endpoint, count, parent, method, delete_container,
  73. graph_size, resource_type, plot
  74. ):
  75. """
  76. Run the benchmark.
  77. """
  78. method = method.lower()
  79. if method not in ('post', 'put'):
  80. raise ValueError(f'Insertion method not supported: {method}')
  81. mode = mode.lower()
  82. if mode == 'ldp':
  83. parent = '{}/{}'.format(endpoint.strip('/'), parent.strip('/'))
  84. if delete_container:
  85. print('Removing previously existing container.')
  86. requests.delete(parent, headers={'prefer': 'no-tombstone'})
  87. requests.put(parent)
  88. elif mode == 'python':
  89. from lakesuperior import env_setup
  90. from lakesuperior.api import resource as rsrc_api
  91. if delete_container:
  92. try:
  93. print('Removing previously existing container.')
  94. rsrc_api.delete(parent, soft=False)
  95. except ResourceNotExistsError:
  96. pass
  97. rsrc_api.create_or_replace(parent)
  98. else:
  99. raise ValueError(f'Mode not supported: {mode}')
  100. # URI used to establish an in-repo relationship. This is set to
  101. # the most recently created resource in each loop.
  102. ref = parent
  103. print(f'Inserting {count} children under {parent}.')
  104. wclock_start = arrow.utcnow()
  105. if plot:
  106. print('Results will be plotted.')
  107. # Plot coordinates: X is request count, Y is request timing.
  108. px = []
  109. py = []
  110. plt.xlabel('Requests')
  111. plt.ylabel('ms per request')
  112. plt.title('Lakesuperior / FCREPO Benchmark')
  113. try:
  114. for i in range(1, count + 1):
  115. #import pdb; pdb.set_trace()
  116. if mode == 'ldp':
  117. dest = (
  118. f'{parent}/{uuid4()}' if method == 'put'
  119. else parent
  120. )
  121. else:
  122. dest = (
  123. path.join(parent, str(uuid4()))
  124. if method == 'put' else parent
  125. )
  126. if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
  127. data = random_graph(graph_size, ref)
  128. headers = {'content-type': 'text/turtle'}
  129. else:
  130. img = random_image(name=uuid4(), ts=16, ims=512)
  131. data = img['content']
  132. data.seek(0)
  133. headers = {
  134. 'content-type': 'image/png',
  135. 'content-disposition': 'attachment; filename="{}"'
  136. .format(uuid4())}
  137. # Start timing after generating the data.
  138. ckpt = arrow.utcnow()
  139. if i == 1:
  140. tcounter = ckpt - ckpt
  141. prev_tcounter = tcounter
  142. ref = (
  143. _ingest_graph_ldp(
  144. method, dest, data.serialize(format='ttl'), headers, ref
  145. )
  146. if mode == 'ldp'
  147. else _ingest_graph_py(method, dest, data, ref)
  148. )
  149. tcounter += (arrow.utcnow() - ckpt)
  150. if i % 10 == 0:
  151. avg10 = (tcounter - prev_tcounter) / 10
  152. print(
  153. f'Record: {i}\tTime elapsed: {tcounter}\t'
  154. f'Per resource: {avg10}')
  155. prev_tcounter = tcounter
  156. if plot:
  157. px.append(i)
  158. # Divide by 1000 for µs → ms
  159. py.append(avg10.microseconds // 1000)
  160. except KeyboardInterrupt:
  161. print('Interrupted after {} iterations.'.format(i))
  162. wclock = arrow.utcnow() - wclock_start
  163. print(f'Total elapsed time: {wclock}')
  164. print(f'Total time spent ingesting resources: {tcounter}')
  165. print(f'Average time per resource: {tcounter.total_seconds()/i}')
  166. if plot:
  167. if resource_type == 'r':
  168. type_label = 'LDP-RS'
  169. elif resource_type == 'n':
  170. type_label = 'LDP-NR'
  171. else:
  172. type_label = 'LDP-RS + LDP-NR'
  173. label = (
  174. f'{parent}; {method.upper()}; {graph_size} trp/graph; '
  175. f'{type_label}')
  176. plt.plot(px, py, label=label)
  177. plt.legend()
  178. plt.show()
  179. def _ingest_graph_ldp(method, uri, data, headers, ref):
  180. """
  181. Ingest the graph via HTTP/LDP.
  182. """
  183. rsp = requests.request(method, uri, data=data, headers=headers)
  184. rsp.raise_for_status()
  185. return rsp.headers['location']
  186. def _ingest_graph_py(method, dest, data, ref):
  187. from lakesuperior.api import resource as rsrc_api
  188. kwargs = {}
  189. if isinstance(data, rdflib.Graph):
  190. kwargs['graph'] = data
  191. else:
  192. kwargs['stream'] = data
  193. kwargs['mimetype'] = 'image/png'
  194. if method == 'put':
  195. _, rsrc = rsrc_api.create_or_replace(dest, **kwargs)
  196. else:
  197. _, rsrc = rsrc_api.create(dest, **kwargs)
  198. return rsrc.uid
  199. if __name__ == '__main__':
  200. run()