benchmark.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. #!/usr/bin/env python3
  2. import logging
  3. import sys
  4. from os import path
  5. from uuid import uuid4
  6. import arrow
  7. import click
  8. import rdflib
  9. import requests
  10. from matplotlib import pyplot as plt
  11. from lakesuperior.util.generators import (
  12. random_image, random_graph, random_utf8_string)
  13. __doc__ = '''
  14. Benchmark script to measure write performance.
  15. '''
  16. def_mode = 'ldp'
  17. def_endpoint = 'http://localhost:8000/ldp'
  18. def_ct = 10000
  19. def_parent = '/pomegranate'
  20. def_gr_size = 200
  21. logging.disable(logging.WARN)
  22. @click.command()
  23. @click.option(
  24. '--mode', '-m', default=def_mode,
  25. help=(
  26. 'Mode of ingestion. One of `ldp`, `python`. With the former, the '
  27. 'HTTP/LDP web server is used. With the latter, the Python API is '
  28. 'used, in which case the server need not be running. '
  29. f'Default: {def_endpoint}'
  30. )
  31. )
  32. @click.option(
  33. '--endpoint', '-e', default=def_endpoint,
  34. help=(
  35. 'LDP endpoint. Only meaningful with `ldp` mode. '
  36. f'Default: {def_endpoint}'
  37. )
  38. )
  39. @click.option(
  40. '--count', '-c', default=def_ct,
  41. help='Number of resources to ingest. Default: {def_ct}')
  42. @click.option(
  43. '--parent', '-p', default=def_parent,
  44. help='Path to the container resource under which the new resources will be '
  45. 'created. It must begin with a slash (`/`) character. '
  46. f'Default: {def_parent}')
  47. @click.option(
  48. '--delete-container', '-d', is_flag=True,
  49. help='Delete container resource and its children if already existing. By '
  50. 'default, the container is not deleted and new resources are added to it.')
  51. @click.option(
  52. '--method', '-X', default='put',
  53. help=(
  54. 'HTTP method to use. Case insensitive. Either PUT or POST. '
  55. 'Default: PUT'
  56. )
  57. )
  58. @click.option(
  59. '--graph-size', '-s', default=def_gr_size,
  60. help=f'Number of triples in each graph. Default: {def_gr_size}')
  61. @click.option(
  62. '--resource-type', '-t', default='r',
  63. help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
  64. '`n` (only LDP-NR, i.e. binaries), or `b` (50/50% of both). '
  65. 'Default: r')
  66. @click.option(
  67. '--plot', '-P', is_flag=True, help='Plot a graph of ingest timings. '
  68. 'The graph figure is displayed on screen with basic manipulation and save '
  69. 'options.')
  70. def run(
  71. mode, endpoint, count, parent, method, delete_container,
  72. graph_size, resource_type, plot
  73. ):
  74. """
  75. Run the benchmark.
  76. """
  77. method = method.lower()
  78. if method not in ('post', 'put'):
  79. raise ValueError(f'Insertion method not supported: {method}')
  80. mode = mode.lower()
  81. if mode == 'ldp':
  82. parent = '{}/{}'.format(endpoint.strip('/'), parent.strip('/'))
  83. if delete_container:
  84. requests.delete(parent, headers={'prefer': 'no-tombstone'})
  85. requests.put(parent)
  86. elif mode == 'python':
  87. from lakesuperior import env_setup
  88. from lakesuperior.api import resource as rsrc_api
  89. if delete_container:
  90. rsrc_api.delete(parent, soft=False)
  91. rsrc_api.create_or_replace(parent)
  92. else:
  93. raise ValueError(f'Mode not supported: {mode}')
  94. # URI used to establish an in-repo relationship. This is set to
  95. # the most recently created resource in each loop.
  96. ref = parent
  97. print(f'Inserting {count} children under {parent}.')
  98. wclock_start = arrow.utcnow()
  99. if plot:
  100. print('Results will be plotted.')
  101. # Plot coordinates: X is request count, Y is request timing.
  102. px = []
  103. py = []
  104. plt.xlabel('Requests')
  105. plt.ylabel('ms per request')
  106. plt.title('Lakesuperior / FCREPO Benchmark')
  107. try:
  108. for i in range(1, count + 1):
  109. #import pdb; pdb.set_trace()
  110. if mode == 'ldp':
  111. dest = (
  112. f'{parent}/{uuid4()}' if method == 'put'
  113. else parent
  114. )
  115. else:
  116. dest = (
  117. path.join(parent, str(uuid4()))
  118. if method == 'put' else parent
  119. )
  120. if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
  121. data = random_graph(graph_size, ref)
  122. headers = {'content-type': 'text/turtle'}
  123. else:
  124. img = random_image(name=uuid4(), ts=16, ims=512)
  125. data = img['content']
  126. data.seek(0)
  127. headers = {
  128. 'content-type': 'image/png',
  129. 'content-disposition': 'attachment; filename="{}"'
  130. .format(uuid4())}
  131. # Start timing after generating the data.
  132. ckpt = arrow.utcnow()
  133. if i == 1:
  134. tcounter = ckpt - ckpt
  135. prev_tcounter = tcounter
  136. ref = (
  137. _ingest_graph_ldp(
  138. method, dest, data.serialize(format='ttl'), headers, ref
  139. )
  140. if mode == 'ldp'
  141. else _ingest_graph_py(method, dest, data, ref)
  142. )
  143. tcounter += (arrow.utcnow() - ckpt)
  144. if i % 10 == 0:
  145. avg10 = (tcounter - prev_tcounter) / 10
  146. print(
  147. f'Record: {i}\tTime elapsed: {tcounter}\t'
  148. f'Per resource: {avg10}')
  149. prev_tcounter = tcounter
  150. if plot:
  151. px.append(i)
  152. # Divide by 1000 for µs → ms
  153. py.append(avg10.microseconds // 1000)
  154. except KeyboardInterrupt:
  155. print('Interrupted after {} iterations.'.format(i))
  156. wclock = arrow.utcnow() - wclock_start
  157. print(f'Total elapsed time: {wclock}')
  158. print(f'Total time spent ingesting resources: {tcounter}')
  159. print(f'Average time per resource: {tcounter.total_seconds()/i}')
  160. if plot:
  161. if resource_type == 'r':
  162. type_label = 'LDP-RS'
  163. elif resource_type == 'n':
  164. type_label = 'LDP-NR'
  165. else:
  166. type_label = 'LDP-RS + LDP-NR'
  167. label = (
  168. f'{parent}; {method.upper()}; {graph_size} trp/graph; '
  169. f'{type_label}')
  170. plt.plot(px, py, label=label)
  171. plt.legend()
  172. plt.show()
  173. def _ingest_graph_ldp(method, uri, data, headers, ref):
  174. """
  175. Ingest the graph via HTTP/LDP.
  176. """
  177. rsp = requests.request(method, uri, data=data, headers=headers)
  178. rsp.raise_for_status()
  179. return rsp.headers['location']
  180. def _ingest_graph_py(method, dest, data, ref):
  181. from lakesuperior.api import resource as rsrc_api
  182. kwargs = {}
  183. if isinstance(data, rdflib.Graph):
  184. kwargs['graph'] = data
  185. else:
  186. kwargs['stream'] = data
  187. kwargs['mimetype'] = 'image/png'
  188. if method == 'put':
  189. _, rsrc = rsrc_api.create_or_replace(dest, **kwargs)
  190. else:
  191. _, rsrc = rsrc_api.create(dest, **kwargs)
  192. return rsrc.uid
  193. if __name__ == '__main__':
  194. run()