|
🦄 个人主页——🎐开着拖拉机回家_Linux,大数据运维-CSDN博客 🎐✨🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁🪁🍁感谢点赞和关注,每天进步一点点!加油!目录一、概述二、集群版本信息三、组件状态信息获取三、告警实现一、概述Ambari借鉴了很多成熟分布式软件的API设计。RestAPI就是一个很好地体现。通过Ambari的RestAPI,可以在脚本中通过curl维护整个集群。并且,我们可以用RestAPI实现一些无法在AmbariGUI上面做的操作。二、集群版本信息三、组件状态信息获取参考:【Ambari】Python调用RestAPI获取集群状态信息并发送钉钉告警-CSDN博客RESOURCEMANAGER停止curl-uadmin:admin-i-H'X-Requested-By:ambari'-XPUT-d'{"RequestInfo":{"context":"RestartRESOURCEMANAGERviaREST"},"Body":{"HostRoles":{"state":"INSTALLED"}}}'http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER-uAmbari登录用户:密码-i-H获取http请求的完整头部信息,包括请求方法、请求地址、请求头信息等-X同时想发HEAD、GET或POST请求,需在-X中声明要使用的请求方式ambari页面显示停止成功调用ambari页面RM服务停止RESOURCEMANAGER启动curl-uadmin:admin-i-H'X-Requested-By:ambari'-XPUT-d'{"RequestInfo":{"context":"RestartRESOURCEMANAGERviaREST"},"Body":{"HostRoles":{"state":"STARTED"}}}'http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER直接使用RESTARTcurl-uadmin:admin-H'X-Requested-By:ambari'-XPOST-d'{"RequestInfo":{"command":"RESTART","context":"RestartRESOURCEMANAGERviaREST","operation_level":{"level":"HOST","cluster_name":"winner"}},"Requests/resource_filters":[{"service_name":"YARN","component_name":"RESOURCEMANAGER","hosts":"hdp103"}]}'http://192.168.2.153:8080/api/v1/clusters/winner/requests请求响应接受四、告警实现python脚本实现RMHA的监控告警#-*-coding:utf-8-*-importloggingimporttimefromimpimportreloadimportrequestsimportjsonimportsys"""~~~~~~~~~~~~author:kanglldate:2023/12/613:29desc:--curl请求,如下为测试链接curl-uadmin:admin-i-HX-Requested-By:ambari-XGEThttp://192.168.2.153:8080/api/v1/clusters/winner/hosts/winnerhttp://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp105/host_components/RESOURCEMANAGER--datanode启动curl-uadmin:admin-i-H'X-Requested-By:ambari'-XPUT-d'{"RequestInfo":{"context":"StartRESOURCEMANAGERviaREST"},"Body":{"ServiceInfo":{"state":"STARTED"}}}'http://192.168.2.153:8080/api/v1/clusters/winner/services/HDFS"""reload(sys)sys.setdefaultencoding('utf8')__author__='kanglilong'logger=logging.getLogger('mylogger')logger.setLevel(level=logging.INFO)#Ambarirestapi访问地址control_url="http://192.168.2.153:8080/api/v1/clusters/winner/hosts"#ambariweb登录账号AUTH=("admin","admin")headers={'Content-Type':'application/json;charset=utf-8'}#钉钉URLapi_url="https://oapi.dingtalk.com/robot/send?access_token=f4e0f344306ce9b6eec60bec95d5aa7c57f4264a791458dc09121dd7e948ac64"#RMhostnamerm_hostname_01="hdp103"rm_hostname_02="hdp105"requests_rm_url="http://192.168.2.153:8080/api/v1/clusters/winner/requests"defgetHostComponentsStatus():"""获取某个服务器上某个组件的状态信息:paramhost::return:component_dict组件与其状态status当前节点状态是否符合期望,getStatus是否获取到了状态"""now_time=time.localtime(time.time())formatted_time=time.strftime('%Y-%m-%d%H:%M:%S',now_time)json_text={"RequestInfo":{"command":"RESTART","context":"RestartRESOURCEMANAGERviaREST","operation_level":{"level":"HOST","cluster_name":"winner"}},"Requests/resource_filters":[{"service_name":"YARN","component_name":"RESOURCEMANAGER","hosts":"{}".format(rm_hostname_01)}]}get_rm_status_url_01=control_url+"/{}/host_components/RESOURCEMANAGER".format(rm_hostname_01)get_rm_status_url_02=control_url+"/{}/host_components/RESOURCEMANAGER".format(rm_hostname_02)print("-----------------")rep01=requests.get(get_rm_status_url_01,auth=AUTH)#如果状态码是20x则获取成功print(rep01.status_code)ifstr(rep01.status_code).startswith("200"):jsonRep01=json.loads(rep01.text)component_name_01=jsonRep01['HostRoles']['component_name']status_01=jsonRep01['HostRoles']['state']ha_state_01=jsonRep01['HostRoles']['ha_state']host_name_01=jsonRep01['HostRoles']['host_name']#STARTEDelse:#没有正常获取到状态print("没有正常获取到状态")rep02=requests.get(get_rm_status_url_02,auth=AUTH)#如果状态码是20x则获取成功print(rep02.status_code)ifstr(rep02.status_code).startswith("200"):jsonRep02=json.loads(rep02.text)component_name_02=jsonRep02['HostRoles']['component_name']status_02=jsonRep02['HostRoles']['state']ha_state_02=jsonRep02['HostRoles']['ha_state']host_name_02=jsonRep02['HostRoles']['host_name']#STARTEDelse:#没有正常获取到状态print("没有正常获取到状态")#RM为STARTEDstatusifcomponent_name_01==component_name_02andstatus_01=="STARTED"andstatus_02=="STARTED":#两个RM都为STANDBY则重启其中一个ifha_state_01==ha_state_02andha_state_01=="STANDBY"andha_state_02=="STANDBY":text="告警对象:主机名:"+host_name_01+','+host_name_02+'\n组件名称:'+component_name_01+"\n告警内容:HDP集群服务"+component_name_01+"高可用状态异常"+ha_state_01+",触发重启"+"\n告警时间:"+formatted_timesend_msg(text)print("RESOURCEMANAGERstateabnormal.")restart_res=requests.post(requests_rm_url,data=json.dumps(json_text),auth=AUTH,headers=headers)print(restart_res.text)ifstr(restart_res.status_code).startswith("202"):#没有正常获取到状态print("重启请求发送成功!")else:#没有正常获取到状态print("重启请求发送成功!")else:#print("RESOURCEMANAGERstatenormal.")defsend_msg(text):""":paramtext:告警文本:return:"""json_text={"msgtype":"text","text":{"content":text},"at":{"atMobiles":["1786881xxxx"]}}requests.post(api_url,json.dumps(json_text),headers=headers).contentgetHostComponentsStatus()钉钉告警发送成功RM重启一台RM操作完成,触发HA的强制切换需要添加配置ambari.propertiesvi/etc/ambari-server/conf/ambari.properties#添加如下配置api.csrfPrevention.enabled=false#重启ambari-serverrestartrequest请求返回的状态码
|
|