benchmark.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. #!/usr/bin/env python3
  2. import logging
  3. import sys
  4. from os import path
  5. from uuid import uuid4
  6. import arrow
  7. import click
  8. import rdflib
  9. import requests
  10. from matplotlib import pyplot as plt
  11. from lakesuperior.util.generators import (
  12. random_image, random_graph, random_utf8_string)
  13. from lakesuperior.exceptions import ResourceNotExistsError
  14. __doc__ = '''
  15. Benchmark script to measure write performance.
  16. '''
  17. def_mode = 'ldp'
  18. def_endpoint = 'http://localhost:8000/ldp'
  19. def_ct = 10000
  20. def_parent = '/pomegranate'
  21. def_gr_size = 200
  22. def_img_size = 1024
  23. logging.disable(logging.WARN)
  24. @click.command()
  25. @click.option(
  26. '--mode', '-m', default=def_mode,
  27. help=(
  28. 'Mode of ingestion. One of `ldp`, `python`. With the former, the '
  29. 'HTTP/LDP web server is used. With the latter, the Python API is '
  30. 'used, in which case the server need not be running. '
  31. f'Default: {def_endpoint}'
  32. )
  33. )
  34. @click.option(
  35. '--endpoint', '-e', default=def_endpoint,
  36. help=(
  37. 'LDP endpoint. Only meaningful with `ldp` mode. '
  38. f'Default: {def_endpoint}'
  39. )
  40. )
  41. @click.option(
  42. '--count', '-c', default=def_ct,
  43. help='Number of resources to ingest. Default: {def_ct}')
  44. @click.option(
  45. '--parent', '-p', default=def_parent,
  46. help='Path to the container resource under which the new resources will be '
  47. 'created. It must begin with a slash (`/`) character. '
  48. f'Default: {def_parent}')
  49. @click.option(
  50. '--delete-container', '-d', is_flag=True,
  51. help='Delete container resource and its children if already existing. By '
  52. 'default, the container is not deleted and new resources are added to it.')
  53. @click.option(
  54. '--method', '-X', default='put',
  55. help=(
  56. 'HTTP method to use. Case insensitive. Either PUT or POST. '
  57. 'Default: PUT'
  58. )
  59. )
  60. @click.option(
  61. '--graph-size', '-s', default=def_gr_size,
  62. help=(
  63. 'Number of triples in each random graph, rounded down to a multiple '
  64. f'of 8. Default: {def_gr_size}'
  65. )
  66. )
  67. @click.option(
  68. '--image-size', '-S', default=def_img_size,
  69. help=(
  70. 'Size of random square image, in pixels for each dimension, rounded '
  71. f'down to a multiple of 8. Default: {def_img_size}'
  72. )
  73. )
  74. @click.option(
  75. '--resource-type', '-t', default='r',
  76. help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
  77. '`n` (only LDP-NR, i.e. binaries), or `b` (50/50% of both). '
  78. 'Default: r')
  79. @click.option(
  80. '--plot', '-P', is_flag=True, help='Plot a graph of ingest timings. '
  81. 'The graph figure is displayed on screen with basic manipulation and save '
  82. 'options.')
  83. def run(
  84. mode, endpoint, count, parent, method, delete_container,
  85. graph_size, image_size, resource_type, plot
  86. ):
  87. """
  88. Run the benchmark.
  89. """
  90. method = method.lower()
  91. if method not in ('post', 'put'):
  92. raise ValueError(f'Insertion method not supported: {method}')
  93. mode = mode.lower()
  94. if mode == 'ldp':
  95. parent = '{}/{}'.format(endpoint.strip('/'), parent.strip('/'))
  96. if delete_container:
  97. print('Removing previously existing container.')
  98. requests.delete(parent)
  99. requests.delete(f'{parent}/fcr:tombstone')
  100. requests.put(parent)
  101. elif mode == 'python':
  102. from lakesuperior import env
  103. env.setup()
  104. from lakesuperior.api import resource as rsrc_api
  105. if delete_container:
  106. try:
  107. print('Removing previously existing container.')
  108. rsrc_api.delete(parent, soft=False)
  109. except ResourceNotExistsError:
  110. pass
  111. rsrc_api.create_or_replace(parent)
  112. else:
  113. raise ValueError(f'Mode not supported: {mode}')
  114. if resource_type != 'r':
  115. # Set image parameters.
  116. ims = max(image_size - image_size % 8, 128)
  117. tn = ims // 32
  118. # URI used to establish an in-repo relationship. This is set to
  119. # the most recently created resource in each loop.
  120. ref = parent
  121. print(f'Inserting {count} children under {parent}.')
  122. wclock_start = arrow.utcnow()
  123. if plot:
  124. print('Results will be plotted.')
  125. # Plot coordinates: X is request count, Y is request timing.
  126. px = []
  127. py = []
  128. plt.xlabel('Requests')
  129. plt.ylabel('ms per request')
  130. plt.title('Lakesuperior / FCREPO Benchmark')
  131. try:
  132. for i in range(1, count + 1):
  133. if mode == 'ldp':
  134. dest = (
  135. f'{parent}/{uuid4()}' if method == 'put'
  136. else parent
  137. )
  138. else:
  139. dest = (
  140. path.join(parent, str(uuid4()))
  141. if method == 'put' else parent
  142. )
  143. if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
  144. data = random_graph(graph_size, ref)
  145. headers = {'content-type': 'text/turtle'}
  146. else:
  147. img = random_image(tn=tn, ims=ims)
  148. data = img['content']
  149. data.seek(0)
  150. headers = {
  151. 'content-type': 'image/png',
  152. 'content-disposition': 'attachment; filename="{}"'
  153. .format(uuid4())}
  154. # Start timing after generating the data.
  155. ckpt = arrow.utcnow()
  156. if i == 1:
  157. tcounter = ckpt - ckpt
  158. prev_tcounter = tcounter
  159. #import pdb; pdb.set_trace()
  160. ref = (
  161. _ingest_ldp(
  162. method, dest, data, headers, ref
  163. )
  164. if mode == 'ldp'
  165. else _ingest_py(method, dest, data, ref)
  166. )
  167. tcounter += (arrow.utcnow() - ckpt)
  168. if i % 10 == 0:
  169. avg10 = (tcounter - prev_tcounter) / 10
  170. print(
  171. f'Record: {i}\tTime elapsed: {tcounter}\t'
  172. f'Per resource: {avg10}')
  173. prev_tcounter = tcounter
  174. if plot:
  175. px.append(i)
  176. # Divide by 1000 for µs → ms
  177. py.append(avg10.microseconds // 1000)
  178. except KeyboardInterrupt:
  179. print('Interrupted after {} iterations.'.format(i))
  180. wclock = arrow.utcnow() - wclock_start
  181. print(f'Total elapsed time: {wclock}')
  182. print(f'Total time spent ingesting resources: {tcounter}')
  183. print(f'Average time per resource: {tcounter.total_seconds()/i}')
  184. if plot:
  185. if resource_type == 'r':
  186. type_label = 'LDP-RS'
  187. elif resource_type == 'n':
  188. type_label = 'LDP-NR'
  189. else:
  190. type_label = 'LDP-RS + LDP-NR'
  191. label = (
  192. f'{parent}; {method.upper()}; {graph_size} trp/graph; '
  193. f'{type_label}')
  194. plt.plot(px, py, label=label)
  195. plt.legend()
  196. plt.show()
  197. def _ingest_ldp(method, uri, data, headers, ref):
  198. """
  199. Ingest the graph via HTTP/LDP.
  200. """
  201. if isinstance(data, rdflib.Graph):
  202. data = data.serialize(format='ttl')
  203. rsp = requests.request(method, uri, data=data, headers=headers)
  204. rsp.raise_for_status()
  205. return rsp.headers['location']
  206. def _ingest_py(method, dest, data, ref):
  207. from lakesuperior.api import resource as rsrc_api
  208. kwargs = {}
  209. if isinstance(data, rdflib.Graph):
  210. kwargs['graph'] = data
  211. else:
  212. kwargs['stream'] = data
  213. kwargs['mimetype'] = 'image/png'
  214. if method == 'put':
  215. _, rsrc = rsrc_api.create_or_replace(dest, **kwargs)
  216. else:
  217. rsrc = rsrc_api.create(dest, **kwargs)
  218. return rsrc.uid
  219. if __name__ == '__main__':
  220. run()