1 ##########################################################################
2 # Copyright (c) 2009, ETH Zurich.
5 # This file is distributed under the terms in the attached LICENSE file.
6 # If you do not find this file, copies can be found by writing to:
7 # ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
8 ##########################################################################
10 import re, socket, httplib, traceback, os, subprocess, datetime, glob, time
11 import tests, debug, siteconfig
12 from common import TestCommon, TimeoutError, select_timeout
13 from results import ResultsBase, PassFailResult, RowResults
16 WEBSERVER_TEST_FILES=['index.html', 'barrelfish.gif', 'barrelfish_sosp09.pdf', 'nevill-master-capabilities.pdf', 'razavi-master-performanceisolation.pdf']
18 WEBSERVER_TIMEOUT=5 # seconds
19 TEST_LOG_NAME = 'testlog.txt'
21 HTTPERF_BASE_ARGS='--hog --close-with-reset --timeout 2 '
22 HTTPERF_URI = '/index.html'
24 # Webserver stress test, It will download index page repeatedly for following
26 WEBSERVER_STRESS_COUNTER = 3000
28 # desired duration of an httperf test run (seconds)
31 # sleep time between runs (seconds)
32 HTTPERF_SLEEPTIME = 20
34 # timeout for a complete run, including setup etc.
35 HTTPERF_TIMEOUT = datetime.timedelta(seconds=(HTTPERF_DURATION + 30))
37 # connection rates across all client machines
38 HTTPERF_STARTRATE = 1000 # initial rate
39 HTTPERF_RATEINCREMENT = 1000 # amount to increment by for each new run
42 class WebCommon(TestCommon):
44 def __init__(self, options):
45 super(WebCommon, self).__init__(options)
46 self.test_timeout_delta = datetime.timedelta(seconds=600)
47 self.read_after_finished = True
48 self.server_failures = []
50 def setup(self, build, machine, testdir):
51 super(WebCommon, self).setup(build, machine, testdir)
52 self.testdir = testdir
56 def get_modules(self, build, machine):
57 modules = super(WebCommon, self).get_modules(build, machine)
58 modules.add_module("net_sockets_server", ["auto"])
59 nfsip = socket.gethostbyname(siteconfig.get('WEBSERVER_NFS_HOST'))
60 modules.add_module("webserver", ["core=%d" % machine.get_coreids()[0], #2
61 nfsip, siteconfig.get('WEBSERVER_NFS_PATH')])
62 # siteconfig.get('WEBSERVER_NFS_TEST_PATH')])
65 def process_line(self, line):
66 m = re.match(r'# IP Addr (\d+\.\d+\.\d+\.\d+)', line)
69 elif self.ip and 'Starting webserver' in line:
70 debug.verbose("Running the tests")
71 self.runtests(self.ip)
73 elif line.startswith("kernel PANIC!") or \
74 line.startswith("Assertion failed on core") or \
75 re.match("Assertion .* failed at line", line) or \
76 line.startswith("Aborted"):
77 # Severe error in webserver, failing test
78 if line.startswith("Aborted") and \
79 self.previous_line not in self.server_failures:
80 line = self.previous_line
81 self.server_failures.append(line.strip())
84 self.previous_line = line.strip()
87 return len(self.server_failures) == 0
89 def is_finished(self, line):
90 return self.finished or super(WebCommon, self).is_finished(line)
94 class WebserverTest(WebCommon):
95 '''tests webserver functionality'''
98 def setup(self, *args):
99 super(WebserverTest, self).setup(*args)
102 def getpage_stress(self, server, page, count):
103 debug.verbose('requesting http://%s/%s' % (server, page))
105 #c = httplib.HTTPConnection(server, timeout=WEBSERVER_TIMEOUT)
106 for i in range(count):
108 c = httplib.HTTPConnection(server, timeout=WEBSERVER_TIMEOUT)
109 c.request('GET', '/' + page)
111 if (r.status / 100) != 2 :
112 print "HTTP request failed for %d" % (i)
113 assert((r.status / 100) == 2) # check for success response
115 # Reset failure count after sucessful retrival
118 except Exception as e:
119 print "HTTP request failed for %d, (failure count %d)" % (i,
121 print "Exception: ", e
122 failure_count = failure_count + 1
123 if failure_count >= 3:
124 print "HTTP request failed for 3 successive times."
125 print "Giving up for %d, (failure count %d)" % (i,
130 debug.verbose('server replied %s %s for %d times' % (r.status, r.reason, count))
133 def getpage(self, server, page):
134 debug.verbose('requesting http://%s/%s' % (server, page))
135 c = httplib.HTTPConnection(server, timeout=WEBSERVER_TIMEOUT)
136 c.request('GET', '/' + page)
139 debug.verbose('server replied %s %s' % (r.status, r.reason))
140 assert((r.status / 100) == 2) # check for success response
143 local_path = siteconfig.get('WEBSERVER_LOCAL_PATH')
144 except AttributeError:
146 local = os.path.join(local_path, page) if local_path else None
147 if local and os.path.isfile(local) and os.access(local, os.R_OK):
148 debug.verbose('comparing content to %s' % local)
150 # read from both files and compare
153 remote_data = r.read(CHUNKSIZE)
154 local_data = l.read(CHUNKSIZE)
155 if remote_data != local_data:
156 print "Remote and local data did not match:"
157 print "Remote data\n"
161 assert(remote_data == local_data)
162 if len(local_data) < CHUNKSIZE:
165 debug.verbose('contents matched for %s' % local)
168 def dotest(self, func, args):
173 except Exception as e:
176 s = 'Test: %s%s\t%s\n' % (func.__name__, str(args),
177 'FAIL' if exception else 'PASS')
179 debug.verbose('Exception while running test: %s\n'
180 % traceback.format_exc())
181 s += 'Error was: %s\n' % traceback.format_exc()
182 self.testlog.write(s)
186 def runtests(self, server):
187 stress_counter = WEBSERVER_STRESS_COUNTER
188 self.testlog = open(os.path.join(self.testdir, TEST_LOG_NAME), 'w')
189 for f in WEBSERVER_TEST_FILES:
190 self.dotest(self.getpage, (server, f))
191 debug.verbose("Running stresstest: (%d GET %s)" %
192 (stress_counter, str(f)))
193 self.dotest(self.getpage_stress, (server, f, stress_counter))
196 def process_data(self, testdir, rawiter):
197 # the test passed iff we see at least one PASS and no FAILs in the log
200 testlog = open(os.path.join(testdir, TEST_LOG_NAME), 'r')
202 debug.verbose("Cannot find test log, failing test")
203 return PassFailResult(False, reason="Cannot find test log")
206 if re.match('Test:.*FAIL$', line):
208 elif passed != False and re.match('Test:.*PASS$', line):
211 server_ok = super(WebserverTest, self).passed()
212 return PassFailResult(passed and server_ok)
216 class HTTPerfTest(WebCommon):
217 '''httperf webserver performance benchmark'''
220 def setup(self, *args):
221 super(HTTPerfTest, self).setup(*args)
224 def _runtest(self, target, nclients, nconns, rate):
229 for nclient in range(nclients):
230 user, host = siteconfig.site.get_load_generator()
231 assert(nrun < 100 and nclient < 100)
232 filename = 'httperf_run%02d_%02d.txt' % (nrun, nclient)
233 logfile = open(os.path.join(self.testdir, filename), 'w')
234 debug.verbose('spawning httperf on %s' % host)
235 hp = HTTPerfClient(logfile, user, host, target, nconns, rate)
238 # loop collecting output from all of them
239 busy_httperfs = list(httperfs) # copy list
240 timeout = datetime.datetime.now() + HTTPERF_TIMEOUT
242 (ready, _, _) = select_timeout(timeout, busy_httperfs)
244 raise TimeoutError('waiting for httperfs')
249 busy_httperfs.remove(hp)
251 debug.log('cleaning up httperf test...')
255 def runtests(self, target):
256 nclients = siteconfig.get('HTTPERF_MAXCLIENTS')
258 totalrate = HTTPERF_STARTRATE
263 # sleep a moment to let things settle down between runs
264 debug.verbose('sleeping between httperf runs')
265 time.sleep(HTTPERF_SLEEPTIME)
267 # compute rate and total number of connections for each client
268 rate = totalrate / nclients
269 nconns = HTTPERF_DURATION * rate
271 debug.log('starting httperf: %d clients, %d conns, rate %d (%d per client)' %
272 (nclients, nconns, totalrate, rate))
273 self._runtest(target, nclients, nconns, rate)
275 # decide whether to keep going...
276 results = self._process_run(self.nruns)
277 if not results.passed():
278 debug.log('previous test failed, stopping')
280 elif results.request_rate < (0.9 * results.connect_rate):
281 debug.log('request rate below 90% of connect rate, stopping')
283 elif results.reply_rate < (0.9 * results.request_rate):
284 debug.log('reply rate below 90% of request rate, stopping')
287 totalrate += HTTPERF_RATEINCREMENT
290 def _process_one(self, logfile):
291 ret = HTTPerfResults()
296 m = re.match('Connection rate: (\d+\.\d+) conn/s', line)
299 ret.connect_rate = float(m.group(1))
302 m = re.match('Request rate: (\d+\.\d+) req/s', line)
305 ret.request_rate = float(m.group(1))
308 m = re.search('Reply rate \[replies/s\]: min .* avg (\d+\.\d+)'
309 ' max .* stddev .*', line)
312 ret.reply_rate = float(m.group(1))
315 m = re.match('Net I/O: .* KB/s \((\d+\.\d+)\*10\^6 bps\)', line)
318 ret.bandwidth = float(m.group(1))
321 m = re.match('Errors: fd-unavail (\d+) addrunavail (\d+)'
322 ' ftab-full (\d+) other (\d+)', line)
325 ret.fd_unavail = int(m.group(1))
326 ret.addrunavail = int(m.group(2))
327 ret.ftab_full = int(m.group(3))
328 ret.other_err = int(m.group(4))
331 m = re.match('Errors: total \d+ client-timo (\d+) socket-timo (\d+)'
332 ' connrefused (\d+) connreset (\d+)', line)
335 ret.client_timo = int(m.group(1))
336 ret.socket_timo = int(m.group(2))
337 ret.connrefused = int(m.group(3))
338 ret.connreset = int(m.group(4))
340 if matches != 6 : # otherwise we have an invalid log
341 print "Instead of 6, only %d matches found\n" % (matches)
346 def _process_run(self, nrun):
347 nameglob = 'httperf_run%02d_*.txt' % nrun
349 for filename in glob.iglob(os.path.join(self.testdir, nameglob)):
350 with open(filename, 'r') as logfile:
351 results.append(self._process_one(logfile))
352 return sum(results, HTTPerfResults())
354 def process_data(self, testdir, raw_iter):
355 self.testdir = testdir
357 for filename in glob.iglob(os.path.join(testdir, 'httperf_run*.txt')):
358 nrun = int(re.match('.*/httperf_run(\d+)_', filename).group(1))
359 result = self._process_run(nrun)
360 totals[nrun] = result
362 fields = 'run connect_rate request_rate reply_rate bandwidth errors'.split()
363 final = RowResults(fields)
365 for run in sorted(totals.keys()):
367 errsum = sum([getattr(total, f) for f in total._err_fields])
368 final.add_row([run, total.connect_rate, total.request_rate,
369 total.reply_rate, total.bandwidth, errsum])
370 # XXX: often the last run will have errors in it, due to the control algorithm
372 # final.mark_failed()
374 # If we saw a severe failure (assertion failure, kernel panic, or user
375 # level panic) in the webserver, fail the test
376 if not super(HTTPerfTest, self).passed():
377 final.mark_failed('\n'.join(self.server_failures))
382 class HTTPerfResults(ResultsBase):
383 _err_fields = 'fd_unavail addrunavail ftab_full other_err'.split()
384 _result_fields = ('client_timo socket_timo connrefused connreset'
385 ' connect_rate request_rate bandwidth reply_rate').split()
386 _fields = _err_fields + _result_fields
389 super(HTTPerfResults, self).__init__()
390 for f in self._fields:
393 def __add__(self, other):
394 ret = HTTPerfResults()
395 for f in self._fields:
396 setattr(ret, f, getattr(self, f) + getattr(other, f))
400 return all([getattr(self, field) == 0 for field in self._err_fields])
402 def to_file(self, fh):
403 errs = [(f, getattr(self,f)) for f in self._err_fields if getattr(self,f)]
405 fh.write('Failed run: ' + ' '.join(['%s %d' % e for e in errs]))
407 fh.write('Request rate:\t%f\n' % self.request_rate)
408 fh.write('Bandwidth:\t%f\n' % self.bandwidth)
409 fh.write('Reply rate:\t%f\n' % self.reply_rate)
412 class HTTPerfClient(object):
413 def __init__(self, logfile, user, host, target, nconns, rate):
416 self.httperf_path = siteconfig.get('HTTPERF_PATH')
417 cmd = '%s %s' % (self.httperf_path, HTTPERF_BASE_ARGS)
418 cmd += ' --num-conns %d --rate %d --server %s --uri %s' % (
419 nconns, rate, target, HTTPERF_URI)
420 self.proc = self._launchssh(cmd, stdout=subprocess.PIPE, bufsize=0)
421 self.logfile = logfile
423 def _launchssh(self, remotecmd, **kwargs):
424 ssh_dest = '%s@%s' % (self.user, self.host)
425 cmd = ['ssh'] + siteconfig.get('SSH_ARGS').split() + [ssh_dest, remotecmd]
426 return subprocess.Popen(cmd, **kwargs)
428 # mirror builtin file method so that we can pass this to select()
430 return self.proc.stdout.fileno()
433 # read only a single character to avoid blocking!
434 s = self.proc.stdout.read(1)
437 self.logfile.write(s)
440 """perform cleanup if necessary"""
442 if self.proc is None or self.proc.poll() == 0:
445 if self.proc.returncode:
446 debug.warning('httperf: SSH to %s exited with error %d'
447 % (self.host, self.proc.returncode))
448 else: # kill SSH if still up
449 debug.warning('httperf: killing SSH child for %s' % self.host)
450 self.proc.terminate()
453 # run a remote killall to get rid of any errant httperfs
454 debug.verbose('killing any errant httperfs on %s' % self.host)
455 p = self._launchssh('killall -q %s' % self.httperf_path)
458 debug.warning('failed to killall httperf on %s!' % self.host)