1.编写exporter
import prometheus_clientfrom prometheus_client import Gauge,start_http_server,Counterimport pycurlimport timeimport threadingfrom io import BytesIO#创建client_python中提供的prometheus urlunter数据类型http_code = Counter("url_http_code", "request http_code of the host",['code','url'])url_http_request_time = Counter("url_http_request_time", "request http_request_time of the host",['le','url'])http_request_total = Counter("http_request_total", "request request total of the host",['url'])#curl url,返回状态码和总共需要时间 -- 返回状态和响应时间deff test_website(url): buffer_curl = BytesIO() c = pycurl.Curl() c.setopt(pycurl.URL, url)# c.setopt(pycurl.WRITEDATA, buffer_curl) c.setopt(pycurl.CONNECTTIMEOUT, 3) c.setopt(pycurl.TIMEOUT, 3) try: c.perform() except pycurl.error: http_code = 500 http_total_time = 999 else: http_code = c.getinfo(pycurl.HTTP_CODE) http_total_time = c.getinfo(pycurl.TOTAL_TIME) return http_code, http_total_time #根据curl返回值,exporter显示的数据统计 -- 统计每个状态的总数deff count_metric(url): http_code, http_total_time = test_website(url) if http_code >= 100 and http_code < 200 : url_http_code.labels(1xx,url).inc() elif http_code >= 200 and http_code < 300 : url_http_code.labels(2xx,url).inc() elif http_code >= 300 and http_code < 400 : url_http_code.labels(3xx,url).inc() elif http_code >= 400 and http_code < 500 : url_http_code.labels(四xx,url).inc() else: url_http_code.labels(5xx,url).inc() if http_total_time < 1 : url_http_request_time.labels('1',url).inc() elif http_total_time < 2 : url_http_request_time.labels('2',url).inc() elif http_total_time < 3 : url_http_request_time.labels('3',url).inc() else : url_http_request_time.labels('+Inf',url).inc() http_request_total.labels(url).inc() #线程控制,每隔5秒执行一次curlll urldef count_threads(url): while True: t = threading.Thread(target=count_metric,args=(url,)) t.setDaemon(True) t.start() time.sleep(5)#为每个需要监控的域名设置一个过程if __name__ == '__main__': start_http_server(9091) server_list = [ 'www.baidu.com', 'www.qq.com', 'blog.csdn.net', 'github.com', 'google.com' ] threads = [] for url in server_list: t = threading.Thread(target=count_threads,args=(url,)) threads.append(t) for thread in threads: thread.setDaemon(True) thread.start() thread.join()
坑:prometheus不会提醒metrics的名字,主动复制:curl http://10.0.0.111:19091/metrics--------------------------------------------------------------------------------------
计算exportermetrics的比率
#自定义exporterter-counter##状态码是500个数url_http_code_total{code="5xx",url="10.0.0.111:55555/a.txt"} #http___request_total{url="10.0.0.111:55555/a.txt"}-#错误率deltata(url_http_code_total{code="5xx",url="10.0.0.111:55555/a.txt}[1m] / on(url) group_left delta(http_request_total{url="10.0.0.111:55555/a.txt}[1m]#http 如果code的每分钟增长率出现5xx,这表明irate有问题。(http_request_total[1m])#显示预期时间的比例,如只显示小于1秒,deltata占总次数的比例(url_http_request_time_total{le="1"}[1m] / on(url) group_left delta(http_request_total[1m]).
用一个例子演示会更清晰