作者:京东科技 刘恩浩 一、纳尼背景基于K8s集群的自建私有化交付方案中,日志收集采用了                            ilogtail+logstash+kafka+es方案,集群其中ilogtail负责日志收集,日志logstash负责对数据转换,收集kafka负责对日志传递中的通过消峰进而减少es的写入压力,es用来保存日志数据。纳尼在私有化交付中本方案中涉及的自建中间件一般需要单独部署,但是集群在京东内网环境的部署考虑到kafka和es的高可用,则不推荐采用单独部署的日志方案。                          
 二、收集新方案实践1.新方案简介在京东内网环境部署                                                            K8S并                                                            收集日志,通过 kafka+es的纳尼替代方案考虑使用                                                            yhQ+JES,由于yhQ的自建底层是基于kafaka、服务器托管JES的集群底层基于ES,所以该替换方案理论上是可行的                         2.主要架构数据流向大致如下                         应用日志 -> ilogtail ->                        yhQ-> logstash ->                        JES3.如何使用核心改造点汇总ilogtail nameservers配置                         增加解析yhQ域名的nameserver(京东云主机上无法直接解析.local域名)                        spec:                            spec:                              dnsPolicy: "None"                              dnsConfig:                                nameservers:                                  - x.x.x.x # 可以解析yhq域名的nameserver                        ilogtail flushers配置                         调整发送到yhQ到配置                        apiVersion: v1kind: ConfigMapmetadata:                          name: ilogtail-user-cm  namespace: elastic-systemdata:                          app_stdout.yaml: |                            flushers:                              - Type: flusher_stdout                                OnlyStdout: true                              - Type: flusher_kafka_v2                                Brokers:                                  - nameserver.yhq.jd.local:80 # yhq元数据地址                                Topic: ai-middle-k8s-log-prod # yhq topic                                 ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应yhq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)                        logstash kafka&es配置 apiVersion: v1kind: ConfigMapmetadata:                          name: logstash-config  namespace: elastic-system  labels:                            elastic-app: logstashdata:                          logstash.conf: |-                            input {                                kafka {                                        bootstrap_servers => ["nameserver.yhq.jd.local:80"] #yhq的元数据地址                                        group_id => "ai4middle4log" # yhq的Group的名称                                        client_id => "ai4middle4log" # yhq的Group的名称,即yhq的省略了kafka中的client_id概念,用Group名称代替                                        consumer_threads => 2                                        decorate_events => true                                        topics => ["ai-middle-k8s-log-prod"] # yhp的topic                                        auto_offset_reset => "latest"                                        codec => json { charset => "UTF-8" }                                }                            }                            output {                                elasticsearch {                                        hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址                                        index =>  "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则                                        user => "XXXXXX" #jes的用户名                                        password => "xxxxx" #jes的密码                                        ssl => "false"                                        ssl_certificate_verification => "false"        }                            }                        ilogtail 的配置如下# ilogtail-daemonset.yamlapiVersion: apps/v1kind: DaemonSetmetadata:                          name: ilogtail-ds  namespace: elastic-system  labels:                            k8s-app: logtail-dsspec:                          selector:                            matchLabels:                              k8s-app: logtail-ds  template:                            metadata:                              labels:                                k8s-app: logtail-ds    spec:                              dnsPolicy: "None"                              dnsConfig:                                nameservers:                                  - x.x.x.x # (京东云主机上)可以解析yhq域名的站群服务器nameserver                              tolerations:                                - operator: Exists                    # deploy on all nodes                              containers:                                - name: logtail          env:                                    - name: ALIYUN_LOG_ENV_TAGS       # add log tags from env                                      value: _node_name_|_node_ip_            - name: _node_name_              valueFrom:                                        fieldRef:                                          apiVersion: v1                  fieldPath: spec.nodeName            - name: _node_ip_              valueFrom:                                        fieldRef:                                          apiVersion: v1                  fieldPath: status.hostIP            - name: cpu_usage_limit           # iLogtails self monitor cpu limit                                      value: "1"                                    - name: mem_usage_limit           # iLogtails self monitor mem limit                                      value: "512"                                  image: dockerhub.ai.jd.local/ai-middleware/ilogtail-community-edition/ilogtail:1.3.1          imagePullPolicy: IfNotPresent          resources:                                    limits:                                      cpu: 1000m              memory: 1Gi            requests:                                      cpu: 400m              memory: 384Mi          volumeMounts:                                    - mountPath: /var/run                       # for container runtime socket                                      name: run            - mountPath: /logtail_host                  # for log access on the node                                      mountPropagation: HostToContainer              name: root              readOnly: true                                    - mountPath: /usr/local/ilogtail/checkpoint # for checkpoint between container restart                                      name: checkpoint            - mountPath: /usr/local/ilogtail/user_yaml_config.d # mount config dir                                      name: user-config              readOnly: true                                    - mountPath: /usr/local/ilogtail/apsara_log_conf.json              name: apsara-log-config              readOnly: true                                      subPath: apsara_log_conf.json      dnsPolicy: ClusterFirst      hostNetwork: true                              volumes:                                - hostPath:                                    path: /var/run            type: Directory          name: run        - hostPath:                                    path: /            type: Directory          name: root        - hostPath:                                    path: /etc/ilogtail-ilogtail-ds/checkpoint            type: DirectoryOrCreate          name: checkpoint        - configMap:                                    defaultMode: 420                                    name: ilogtail-user-cm          name: user-config        - configMap:                                    defaultMode: 420                                    name: ilogtail-apsara-log-config-cm          name: apsara-log-config                        # ilogtail-user-configmap.yamlapiVersion: v1kind: ConfigMapmetadata:                          name: ilogtail-user-cm  namespace: elastic-systemdata:                          app_stdout.yaml: |                            enable: true                            inputs:                              - Type: service_docker_stdout                                Stderr: true                                Stdout: true                                K8sNamespaceRegex: ai-train                                ExternalK8sLabelTag:                                  platform/resource-name: k8s_label_resource-name                                  platform/task-identify: k8s_label_task-identify                                  task-id: k8s_label_task-id                                  run-id: k8s_label_run-id                                  request-id: k8s_label_request-id                            processors:                              - Type: processor_rename                                SourceKeys:                                  - k8s_label_resource-name                                  - k8s_label_task-identify                                  - k8s_label_task-id                                  - k8s_label_run-id                                  - k8s_label_request-id                                  - _namespace_                                  - _image_name_                                  - _pod_uid_                                  - _pod_name_                                  - _container_name_                                  - _container_ip_                                  - __path__                                  - _source_                                DestKeys:                                  - resource_name                                  - task_identify                                  - task_id                                  - run_id                                  - request_id                                  - namespace                                  - image_name                                  - pod_uid                                  - pod_name                                  - container_name                                  - container_ip                                  - path                                  - source                            flushers:                              - Type: flusher_stdout                                OnlyStdout: true                              - Type: flusher_kafka_v2                                Brokers:                                  - nameserver.yhq.jd.local:80 # yhq元数据地址                                Topic: ai-middle-k8s-log-prod # yhq topic                                 ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应yhq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)                          app_file_log.yaml: |                            enable: true                            inputs:                              - Type: file_log                                LogPath: /export/Logs/ai-dt-algorithm-tools                                FilePattern: "*.log"                                ContainerInfo:                                  K8sNamespaceRegex: ai-train                                  ExternalK8sLabelTag:                                    platform/resource-name: k8s_label_resource-name                                    platform/task-identify: k8s_label_task-identify                                    task-id: k8s_label_task-id                                    run-id: k8s_label_run-id                                    request-id: k8s_label_request-id                            processors:                              - Type: processor_add_fields        Fields:                                  source: file      - Type: processor_rename        SourceKeys:                                  - __tag__:k8s_label_resource-name          - __tag__:k8s_label_task-identify          - __tag__:k8s_label_task-id          - __tag__:k8s_label_run-id          - __tag__:k8s_label_request-id          - __tag__:_namespace_          - __tag__:_image_name_          - __tag__:_pod_uid_          - __tag__:_pod_name_          - __tag__:_container_name_          - __tag__:_container_ip_          - __tag__:__path__        DestKeys:                                  - resource_name          - task_identify          - task_id          - run_id          - request_id          - namespace          - image_name          - pod_uid          - pod_name          - container_name          - container_ip          - path    flushers:                              - Type: flusher_stdout        OnlyStdout: true                              - Type: flusher_kafka_v2        Brokers:                                  - nameserver.yhq.jd.local:80                                Topic: ai-middle-k8s-log-prod        ClientID: ai4middle4log                        logstash 的配置如下# logstash-configmap.yaml---apiVersion: v1kind: ConfigMapmetadata:                          name: logstash-config  namespace: elastic-system  labels:                            elastic-app: logstashdata:                          logstash.conf: |-                            input {                                kafka {                                        bootstrap_servers => ["nameserver.yhq.jd.local:80"] #yhq的元数据地址                                        #group_id => "services"                                        group_id => "ai4middle4log" # yhq的Group的名称                                        client_id => "ai4middle4log" # yhq的Group的名称,即yhq的省略了kafka中的client_id概念,用Group名称代替                                        consumer_threads => 2                                        decorate_events => true                #topics_pattern => ".*"                                        topics => ["ai-middle-k8s-log-prod"] # yhp的topic                                        auto_offset_reset => "latest"                                        codec => json { charset => "UTF-8" }                                }                            }                            filter {                              ruby {                                  code => "event.set(index_date, event.get(@timestamp).time.localtime + 8*60*60)"                              }                              ruby {                                  code => "event.set(message,event.get(contents))"      }                              #ruby {                              #    code => "event.set(@timestamp,event.get(time).time.localtime)"                              #}                              mutate {                                  remove_field => ["contents"]                                  convert => ["index_date", "string"]                                  #convert => ["@timestamp", "string"]                                  gsub => ["index_date", "T.*Z",""]                                  #gsub => ["@timestamp", "T.*Z",""]                              }                            }                            output {                                elasticsearch {                                        #hosts => ["https://ai-middle-cluster-es-http:9200"]                                        hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址                                        index =>  "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则                                        user => "XXXXXX" #jes的用户名                                        password => "xxxxx" #jes的密码                                        ssl => "false"                                        ssl_certificate_verification => "false"                #cacert => "/usr/share/logstash/cert/ca_logstash.cer"                                }                                stdout {                                    codec => rubydebug        }                            }                        4.核心价值在私有化部署的基础上通过简单改造实现了与京东内部中间件的完美融合,使得系统在高可用性上适应性更强、可用范围更广。  |