benchmark.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #!/usr/bin/env python3
  2. import sys
  3. from uuid import uuid4
  4. import arrow
  5. import click
  6. import requests
  7. from matplotlib import pyplot as plt
  8. from lakesuperior.util.generators import (
  9. random_image, random_graph, random_utf8_string)
  10. __doc__ = '''
  11. Benchmark script to measure write performance.
  12. '''
  13. def_endpoint = 'http://localhost:8000/ldp'
  14. def_ct = 10000
  15. def_parent = '/pomegranate'
  16. def_gr_size = 200
  17. @click.command()
  18. @click.option(
  19. '--endpoint', '-e', default=def_endpoint,
  20. help=f'LDP endpoint. Default: {def_endpoint}')
  21. @click.option(
  22. '--count', '-c', default=def_ct,
  23. help='Number of resources to ingest. Default: {def_ct}')
  24. @click.option(
  25. '--parent', '-p', default=def_parent,
  26. help='Path to the container resource under which the new resources will be '
  27. 'created. It must begin with a slash (`/`) character. '
  28. f'Default: {def_parent}')
  29. @click.option(
  30. '--delete-container', '-d', is_flag=True,
  31. help='Delete container resource and its children if already existing. By '
  32. 'default, the container is not deleted and new resources are added to it.')
  33. @click.option(
  34. '--method', '-m', default='put',
  35. help='HTTP method to use. Case insensitive. Either PUT '
  36. f'or POST. Default: PUT')
  37. @click.option(
  38. '--graph-size', '-s', default=def_gr_size,
  39. help=f'Number of triples in each graph. Default: {def_gr_size}')
  40. @click.option(
  41. '--resource-type', '-t', default='r',
  42. help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
  43. '`n` (only LDP-NR, i.e. binaries), or `b` (50/50% of both). '
  44. 'Default: r')
  45. @click.option(
  46. '--graph', '-g', is_flag=True, help='Plot a graph of ingest timings. '
  47. 'The graph figure is displayed on screen with basic manipulation and save '
  48. 'options.')
  49. def run(
  50. endpoint, count, parent, method, delete_container,
  51. graph_size, resource_type, graph):
  52. container_uri = endpoint + parent
  53. method = method.lower()
  54. if method not in ('post', 'put'):
  55. raise ValueError(f'HTTP method not supported: {method}')
  56. if delete_container:
  57. requests.delete(container_uri, headers={'prefer': 'no-tombstone'})
  58. requests.put(container_uri)
  59. print(f'Inserting {count} children under {container_uri}.')
  60. # URI used to establish an in-repo relationship. This is set to
  61. # the most recently created resource in each loop.
  62. ref = container_uri
  63. wclock_start = arrow.utcnow()
  64. if graph:
  65. print('Results will be plotted.')
  66. # Plot coordinates: X is request count, Y is request timing.
  67. px = []
  68. py = []
  69. plt.xlabel('Requests')
  70. plt.ylabel('ms per request')
  71. plt.title('FCREPO Benchmark')
  72. try:
  73. for i in range(1, count + 1):
  74. url = '{}/{}'.format(container_uri, uuid4()) if method == 'put' \
  75. else container_uri
  76. if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
  77. data = random_graph(graph_size, ref).serialize(format='ttl')
  78. headers = {'content-type': 'text/turtle'}
  79. else:
  80. img = random_image(name=uuid4(), ts=16, ims=512)
  81. data = img['content']
  82. data.seek(0)
  83. headers = {
  84. 'content-type': 'image/png',
  85. 'content-disposition': 'attachment; filename="{}"'
  86. .format(uuid4())}
  87. #import pdb; pdb.set_trace()
  88. # Start timing after generating the data.
  89. ckpt = arrow.utcnow()
  90. if i == 1:
  91. tcounter = ckpt - ckpt
  92. prev_tcounter = tcounter
  93. rsp = requests.request(method, url, data=data, headers=headers)
  94. tdelta = arrow.utcnow() - ckpt
  95. tcounter += tdelta
  96. rsp.raise_for_status()
  97. ref = rsp.headers['location']
  98. if i % 10 == 0:
  99. avg10 = (tcounter - prev_tcounter) / 10
  100. print(
  101. f'Record: {i}\tTime elapsed: {tcounter}\t'
  102. f'Per resource: {avg10}')
  103. prev_tcounter = tcounter
  104. if graph:
  105. px.append(i)
  106. # Divide by 1000 for µs → ms
  107. py.append(avg10.microseconds // 1000)
  108. except KeyboardInterrupt:
  109. print('Interrupted after {} iterations.'.format(i))
  110. wclock = arrow.utcnow() - wclock_start
  111. print(f'Total elapsed time: {wclock}')
  112. print(f'Total time spent ingesting resources: {tcounter}')
  113. print(f'Average time per resource: {tcounter.total_seconds()/i}')
  114. if graph:
  115. if resource_type == 'r':
  116. type_label = 'LDP-RS'
  117. elif resource_type == 'n':
  118. type_label = 'LDP-NR'
  119. else:
  120. type_label = 'LDP-RS + LDP-NR'
  121. label = (
  122. f'{container_uri}; {method.upper()}; {graph_size} trp/graph; '
  123. f'{type_label}')
  124. plt.plot(px, py, label=label)
  125. plt.legend()
  126. plt.show()
  127. if __name__ == '__main__':
  128. run()