博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
使用prometheus_client监控程序
阅读量:5930 次
发布时间:2019-06-19

本文共 5789 字,大约阅读时间需要 19 分钟。

  hot3.png

promethues提供了一些常用的exporter,但是在进程监控上,我们需要一些更加详细的监控,程序的存活,内存,CPU,句柄数,线程数等等。

下面我有一个示例,如何在机器上监控指定进程,并且推送到promethues。

 

#!/usr/bin/python# -*- coding:utf-8 -*-import globimport loggingimport osimport psutilimport requestsimport socketimport structimport timeimport fcntlfrom prometheus_client import Gauge, start_http_serverDataBase = {    "user": "prometheus",    "password": "AgTKZu4RkOZvqKZA",    "project": "ht",    "host": "127.0.0.1"}monitor = ['_cpu', '_memory', '_threads', '_fs', '_status', '_ctime']logger = logging.getLogger("LOG FORMAT")logger.setLevel(logging.DEBUG)ch = logging.StreamHandler()ch.setLevel(logging.DEBUG)formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")ch.setFormatter(formatter)logger.addHandler(ch)class GameServer(object):    def __init__(self):        pass    @property    def channel(self):        """        :return: 读取游戏服数量,方法是从/data/下读取游戏服目录[当前使用渠道区分版本,每个渠道下有自己独立的登录服,游戏服]        """        channel = glob.glob("/data/%s_*" % DataBase["project"])        return channel    @property    def mysql(self):        """        :return: mysql数据库进程详细状态        """        pid_file = glob.glob("/data/mysql_data/data/*.pid")[0]        with open(pid_file) as f:            pid = f.read()        p = psutil.Process(int(pid))        if p.is_running():            mem = p.memory_info().rss / 1024 / 1024            return mem        else:            return 0    @property    def local_ip(self):        """        :return: 主机内网ip        """        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)        return socket.inet_ntoa(fcntl.ioctl(            s.fileno(),            0x8915,  # SIOCGIFADDR            struct.pack('256s', 'eth0'[:15])        )[20:24])    @property    def wan_ip(self):        """        :return: 腾讯云主机获取本机外网IP        """        try:            r = requests.get('http://metadata.tencentyun.com/meta-data/public-ipv4', timeout=5)            data = r.content        except Exception as e:            logging.exception(e)            data = '0.0.0.0'        return dataclass Action:    @staticmethod    def step_1(channels):        """        :return: 读取渠道内的pid获取游戏的pid        """        temp_dict = dict()        for channel in channels:            os.chdir(channel)            servers = glob.glob("*_*")            for server in servers:                if not os.path.isdir(server):                    continue                try:                    pid_file = "%s/run/%s.pid" % (server, "".join(server.split("_")))                    if not os.path.exists(pid_file):                        temp_dict[server] = 0                    else:                        with open('%s/run/%s.pid' % (server, "".join(server.split("_"))), 'r') as f:                            pid = f.readline().strip('\n')                            temp_dict[server] = pid                except Exception as e:                    logging.exception(e)                    temp_dict[server] = 0        return temp_dict    @staticmethod    def step_2(m_data=None):        """        :return: 提取游戏服的重要指标作为监控项目        """        monitor_data = dict()        for server, pid in m_data.iteritems():            if int(pid) > 0:                try:                    d = dict()                    p = psutil.Process(int(pid))                    if p.is_running():                        d['status'] = 1                    d['cpu'] = p.cpu_percent(interval=0.1)                    d['memory'] = p.memory_info().rss / 1024 / 1024                    d['threads'] = p.num_threads()                    d['fs'] = p.num_fds()                    d['ctime'] = p.create_time()                    monitor_data[server] = d                except Exception as e:                    logging.exception(e)                    d = dict()                    d['cpu'] = 0                    d['memory'] = 0                    d['threads'] = 0                    d['status'] = 0                    d['fs'] = 0                    d['ctime'] = 0                    monitor_data[server] = d            else:                d = dict()                d['cpu'] = 0                d['memory'] = 0                d['threads'] = 0                d['status'] = 0                d['fs'] = 0                d['ctime'] = 0                monitor_data[server] = d        return monitor_data    @staticmethod    def create_series(role=None):        """        :return: 创建 promethues gauge series 实例方法        """        role_list = []        func = dict()        for item in monitor:            gague = Gauge(role.split("_")[0] + item, role + item, ['role', 'serverid', 'local_ip', 'wan_ip'])            func[role + item] = gague        return funcif __name__ == '__main__':    gs = GameServer()    wan_ip = gs.wan_ip    local_ip = gs.local_ip    channels = gs.channel    start_http_server(8111)    temp_roles_data = [x for x,y in Action.step_1(channels=channels).iteritems()]    roles = list(set([x.split("_")[0] for x in temp_roles_data]))    series_tmp = [ Action.create_series(x) for x in roles]    series = dict()    for d in series_tmp:        series.update(d)            while True:        s1 = Action.step_1(channels=channels)        s2 = Action.step_2(m_data=s1)        try:            for role, data in s2.iteritems():                key_role, key_id = role.split("_")[0], role.split("_")[1]                for m in monitor:                    series.get(key_role + m).labels(role=key_role, serverid=key_id, local_ip=local_ip, wan_ip=wan_ip).set(data.get(m.strip("_")))        time.sleep(60)   #60秒刷新一次数据        except Exception as e:            logging.exception(e)

启动程序,使用ip+端口访问web,就可以得到你想要的数据了。

在promethues页面我们使用采集的数据检查下

可以看到,我们自己的exporter也采集到了数据

这样我们就能在granfana里进行展现了

好了,自定义的exporter就完成了,下次我们说说promethues如何和consul搭配来完成我们的自动发现

转载于:https://my.oschina.net/jastme/blog/1548721

你可能感兴趣的文章
让git for windows记住密码
查看>>
Asp.Net时间戳与时间互转
查看>>
如何终止java线程
查看>>
从tcp原理角度理解Broken pipe和Connection reset by peer的区别
查看>>
sloth——算法工程师标注数据的福音
查看>>
恢复计算机崩溃数据的五款最佳Linux发行版
查看>>
【MySQL】MySQL快速插入大量数据
查看>>
weblogic重置用户名密码。
查看>>
C语言扩展Python模块
查看>>
父类不能转换成子类
查看>>
李洪强iOS开发之带placeHolder的Textview
查看>>
编写高质量代码:改善Java程序的151个建议(第7章:泛型和反射___建议93~97)
查看>>
Android 高仿微信表情输入与键盘输入详解
查看>>
【faster-rcnn】训练自己的数据——修改图片格式、类别
查看>>
C#:额外知识点
查看>>
防止表单重复提交
查看>>
【iCore3应用开发平台】发布 iCore3 应用开发平台出厂代码rev0.0.6
查看>>
leetcode - First Missing Positive
查看>>
CentOS 7.0系统安装配置步骤详解
查看>>
深入学习semaphore
查看>>