SkyWalking部署及Agent组件接入

SkyWalking部署及Spring组件接入

系统版本为AlmaLinux 9.0
JDK:https://builds.openlogic.com/downloadJDK/openlogic-openjdk/11.0.16+8/openlogic-openjdk-11.0.16+8-linux-x64.tar.gz
SkyWalking:https://www.apache.org/dyn/closer.cgi/skywalking/9.2.0/apache-skywalking-apm-9.2.0.tar.gz
JAVA Agent:https://www.apache.org/dyn/closer.cgi/skywalking/java-agent/8.12.0/apache-skywalking-java-agent-8.12.0.tgz

部署文档:https://skywalking.apache.org/docs/main/v9.2.0/readme/

OAP+webapp

wget https://mirrors.tuna.tsinghua.edu.cn/apache/skywalking/9.2.0/apache-skywalking-apm-9.2.0.tar.gz
tar -zxvf soft/apache-skywalking-apm-9.2.0.tar.gz -C running/
ln -s apache-skywalking-apm-bin skywalking-apm
# 由于部署使用的zookeepr 3.4.14,需将oap-libs下的zookeeper-3.5.7.jar替换为https://repo1.maven.org/maven2/org/apache/zookeeper/zookeeper/3.4.14/zookeeper-3.4.14.jar
vim /home/kami/running/skywalking-apm/config/application.yml
# 这里使用zookeeper集群部署，支持单机standalone部署，集群部署支持zookeeper/kubernetes/consul/consul/etcd/nacos
cluster:
  selector: ${SW_CLUSTER:zookeeper}
  # Please check your ZooKeeper is 3.5+, However, it is also compatible with ZooKeeper 3.4.x. Replace the ZooKeeper 3.5+  # library the oap-libs folder with your ZooKeeper 3.4.x library.
  zookeeper:
    namespace: ${SW_NAMESPACE:"skywalking"}
    hostPort: ${SW_CLUSTER_ZK_HOST_PORT:10.0.10.101:2181,10.0.10.102:2181,10.0.10.103:2181}
    # Retry Policy
    baseSleepTimeMs: ${SW_CLUSTER_ZK_SLEEP_TIME:1000} # initial amount of time to wait between retries
    maxRetries: ${SW_CLUSTER_ZK_MAX_RETRIES:3} # max number of times to retry
    # Enable ACL
    enableACL: ${SW_ZK_ENABLE_ACL:false} # disable ACL in default
    schema: ${SW_ZK_SCHEMA:digest} # only support digest schema
    expression: ${SW_ZK_EXPRESSION:skywalking:skywalking}
    internalComHost: ${SW_CLUSTER_INTERNAL_COM_HOST:""}
    internalComPort: ${SW_CLUSTER_INTERNAL_COM_PORT:-1}
# 数据收集/分析等核心配置，没有特殊需求建议按默认配置
core:
  selector: ${SW_CORE:default}
  default:
    # Mixed: Receive agent data, Level 1 aggregate, Level 2 aggregate
    # Receiver: Receive agent data, Level 1 aggregate
    # Aggregator: Level 2 aggregate
    role: ${SW_CORE_ROLE:Mixed} # Mixed/Receiver/Aggregator
    restHost: ${SW_CORE_REST_HOST:0.0.0.0}
    restPort: ${SW_CORE_REST_PORT:12800}
    restContextPath: ${SW_CORE_REST_CONTEXT_PATH:/}
    restMaxThreads: ${SW_CORE_REST_MAX_THREADS:200}
    restIdleTimeOut: ${SW_CORE_REST_IDLE_TIMEOUT:30000}
    restAcceptQueueSize: ${SW_CORE_REST_QUEUE_SIZE:0}
    httpMaxRequestHeaderSize: ${SW_CORE_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
    gRPCHost: ${SW_CORE_GRPC_HOST:0.0.0.0}
    gRPCPort: ${SW_CORE_GRPC_PORT:11800}
    maxConcurrentCallsPerConnection: ${SW_CORE_GRPC_MAX_CONCURRENT_CALL:0}
    maxMessageSize: ${SW_CORE_GRPC_MAX_MESSAGE_SIZE:0}
    gRPCThreadPoolQueueSize: ${SW_CORE_GRPC_POOL_QUEUE_SIZE:-1}
    gRPCThreadPoolSize: ${SW_CORE_GRPC_THREAD_POOL_SIZE:-1}
    gRPCSslEnabled: ${SW_CORE_GRPC_SSL_ENABLED:false}
    gRPCSslKeyPath: ${SW_CORE_GRPC_SSL_KEY_PATH:""}
    gRPCSslCertChainPath: ${SW_CORE_GRPC_SSL_CERT_CHAIN_PATH:""}
    gRPCSslTrustedCAPath: ${SW_CORE_GRPC_SSL_TRUSTED_CA_PATH:""}
    downsampling:
      - Hour
      - Day
    # Set a timeout on metrics data. After the timeout has expired, the metrics data will automatically be deleted.
    enableDataKeeperExecutor: ${SW_CORE_ENABLE_DATA_KEEPER_EXECUTOR:true} # Turn it off then automatically metrics data delete will be close.
    dataKeeperExecutePeriod: ${SW_CORE_DATA_KEEPER_EXECUTE_PERIOD:5} # How often the data keeper executor runs periodically, unit is minute
    recordDataTTL: ${SW_CORE_RECORD_DATA_TTL:3} # Unit is day
    metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day
    # The period of L1 aggregation flush to L2 aggregation. Unit is ms.
    l1FlushPeriod: ${SW_CORE_L1_AGGREGATION_FLUSH_PERIOD:500}
    # The threshold of session time. Unit is ms. Default value is 70s.
    storageSessionTimeout: ${SW_CORE_STORAGE_SESSION_TIMEOUT:70000}
    # The period of doing data persistence. Unit is second.Default value is 25s
    persistentPeriod: ${SW_CORE_PERSISTENT_PERIOD:25}
    # Cache metrics data for 1 minute to reduce database queries, and if the OAP cluster changes within that minute,
    # the metrics may not be accurate within that minute.
    enableDatabaseSession: ${SW_CORE_ENABLE_DATABASE_SESSION:true}
    topNReportPeriod: ${SW_CORE_TOPN_REPORT_PERIOD:10} # top_n record worker report cycle, unit is minute
    # Extra model column are the column defined by in the codes, These columns of model are not required logically in aggregation or further query,
    # and it will cause more load for memory, network of OAP and storage.
    # But, being activated, user could see the name in the storage entities, which make users easier to use 3rd party tool, such as Kibana->ES, to query the data by themselves.
    activeExtraModelColumns: ${SW_CORE_ACTIVE_EXTRA_MODEL_COLUMNS:false}
    # The max length of service + instance names should be less than 200
    serviceNameMaxLength: ${SW_SERVICE_NAME_MAX_LENGTH:70}
    instanceNameMaxLength: ${SW_INSTANCE_NAME_MAX_LENGTH:70}
    # The max length of service + endpoint names should be less than 240
    endpointNameMaxLength: ${SW_ENDPOINT_NAME_MAX_LENGTH:150}
    # Define the set of span tag keys, which should be searchable through the GraphQL.
    searchableTracesTags: ${SW_SEARCHABLE_TAG_KEYS:http.method,http.status_code,rpc.status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker}
    # Define the set of log tag keys, which should be searchable through the GraphQL.
    searchableLogsTags: ${SW_SEARCHABLE_LOGS_TAG_KEYS:level}
    # Define the set of alarm tag keys, which should be searchable through the GraphQL.
    searchableAlarmTags: ${SW_SEARCHABLE_ALARM_TAG_KEYS:level}
    # The max size of tags keys for autocomplete select.
    autocompleteTagKeysQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_KEYS_QUERY_MAX_SIZE:100}
    # The max size of tags values for autocomplete select.
    autocompleteTagValuesQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_VALUES_QUERY_MAX_SIZE:100}
    # The number of threads used to prepare metrics data to the storage.
    prepareThreads: ${SW_CORE_PREPARE_THREADS:2}
    # Turn it on then automatically grouping endpoint by the given OpenAPI definitions.
    enableEndpointNameGroupingByOpenapi: ${SW_CORE_ENABLE_ENDPOINT_NAME_GROUPING_BY_OPAENAPI:true}
# 持久化配置，这里使用elasticsearch,支持h2/elasticsearch/mysql/h2/tidb/postgresql/banyandb,部署的ES版本为6.8.21
storage:
  selector: ${SW_STORAGE:elasticsearch}
  elasticsearch:
    namespace: ${SW_NAMESPACE:"skyworking"}
    clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:10.0.10.102:9200}
    protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
    connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
    socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
    responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
    numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
    user: ${SW_ES_USER:""}
    password: ${SW_ES_PASSWORD:""}
    trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
    trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
    secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""} # Secrets management file in the properties format includes the username, password, which are managed by 3rd party tool.
    dayStep: ${SW_STORAGE_DAY_STEP:1} # Represent the number of days in the one minute/hour/day index.
    indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:1} # Shard number of new indexes
    indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:1} # Replicas number of new indexes
    # Super data set has been defined in the codes, such as trace segments.The following 3 config would be improve es performance when storage super size data in es.
    superDatasetDayStep: ${SW_SUPERDATASET_STORAGE_DAY_STEP:-1} # Represent the number of days in the super size dataset record index, the default value is the same as dayStep when the value is less than 0
    superDatasetIndexShardsFactor: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR:5} #  This factor provides more shards for the super data set, shards number = indexShardsNumber * superDatasetIndexShardsFactor. Also, this factor effects Zipkin and Jaeger traces.
    superDatasetIndexReplicasNumber: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_REPLICAS_NUMBER:0} # Represent the replicas number in the super size dataset record index, the default value is 0.
    indexTemplateOrder: ${SW_STORAGE_ES_INDEX_TEMPLATE_ORDER:0} # the order of index template
    bulkActions: ${SW_STORAGE_ES_BULK_ACTIONS:5000} # Execute the async bulk record data every ${SW_STORAGE_ES_BULK_ACTIONS} requests
    # flush the bulk every 10 seconds whatever the number of requests
    # INT(flushInterval * 2/3) would be used for index refresh period.
    flushInterval: ${SW_STORAGE_ES_FLUSH_INTERVAL:15}
    concurrentRequests: ${SW_STORAGE_ES_CONCURRENT_REQUESTS:2} # the number of concurrent requests
    resultWindowMaxSize: ${SW_STORAGE_ES_QUERY_MAX_WINDOW_SIZE:10000}
    metadataQueryMaxSize: ${SW_STORAGE_ES_QUERY_MAX_SIZE:10000}
    scrollingBatchSize: ${SW_STORAGE_ES_SCROLLING_BATCH_SIZE:5000}
    segmentQueryMaxSize: ${SW_STORAGE_ES_QUERY_SEGMENT_SIZE:200}
    profileTaskQueryMaxSize: ${SW_STORAGE_ES_QUERY_PROFILE_TASK_SIZE:200}
    profileDataQueryBatchSize: ${SW_STORAGE_ES_QUERY_PROFILE_DATA_BATCH_SIZE:100}
    oapAnalyzer: ${SW_STORAGE_ES_OAP_ANALYZER:"{\"analyzer\":{\"oap_analyzer\":{\"type\":\"stop\"}}}"} # the oap analyzer.
    oapLogAnalyzer: ${SW_STORAGE_ES_OAP_LOG_ANALYZER:"{\"analyzer\":{\"oap_log_analyzer\":{\"type\":\"standard\"}}}"} # the oap log analyzer. It could be customized by the ES analyzer configuration to support more language log formats, such as Chinese log, Japanese log and etc.
    advanced: ${SW_STORAGE_ES_ADVANCED:""}
    # Enable shard metrics and records indices into multi-physical indices, one index template per metric/meter aggregation function or record.
    logicSharding: ${SW_STORAGE_ES_LOGIC_SHARDING:false}
agent-analyzer:
  selector: ${SW_AGENT_ANALYZER:default}
  default:
    # The default sampling rate and the default trace latency time configured by the 'traceSamplingPolicySettingsFile' file.
    traceSamplingPolicySettingsFile: ${SW_TRACE_SAMPLING_POLICY_SETTINGS_FILE:trace-sampling-policy-settings.yml}
    slowDBAccessThreshold: ${SW_SLOW_DB_THRESHOLD:default:200,mongodb:100} # The slow database access thresholds. Unit ms.
    forceSampleErrorSegment: ${SW_FORCE_SAMPLE_ERROR_SEGMENT:true} # When sampling mechanism active, this config can open(true) force save some error segment. true is default.
    segmentStatusAnalysisStrategy: ${SW_SEGMENT_STATUS_ANALYSIS_STRATEGY:FROM_SPAN_STATUS} # Determine the final segment status from the status of spans. Available values are FROM_SPAN_STATUS , FROM_ENTRY_SPAN and FROM_FIRST_SPAN. FROM_SPAN_STATUS represents the segment status would be error if any span is in error status. FROM_ENTRY_SPAN means the segment status would be determined by the status of entry spans only. FROM_FIRST_SPAN means the segment status would be determined by the status of the first span only.
    # Nginx and Envoy agents can't get the real remote address.
    # Exit spans with the component in the list would not generate the client-side instance relation metrics.
    noUpstreamRealAddressAgents: ${SW_NO_UPSTREAM_REAL_ADDRESS:6000,9000}
    meterAnalyzerActiveFiles: ${SW_METER_ANALYZER_ACTIVE_FILES:datasource,threadpool,satellite} # Which files could be meter analyzed, files split by ","

log-analyzer:
  selector: ${SW_LOG_ANALYZER:default}
  default:
    lalFiles: ${SW_LOG_LAL_FILES:default}
    malFiles: ${SW_LOG_MAL_FILES:""}

event-analyzer:
  selector: ${SW_EVENT_ANALYZER:default}
  default:

receiver-sharing-server:
  selector: ${SW_RECEIVER_SHARING_SERVER:default}
  default:
    # For HTTP server
    restHost: ${SW_RECEIVER_SHARING_REST_HOST:0.0.0.0}
    restPort: ${SW_RECEIVER_SHARING_REST_PORT:0}
    restContextPath: ${SW_RECEIVER_SHARING_REST_CONTEXT_PATH:/}
    restMaxThreads: ${SW_RECEIVER_SHARING_REST_MAX_THREADS:200}
    restIdleTimeOut: ${SW_RECEIVER_SHARING_REST_IDLE_TIMEOUT:30000}
    restAcceptQueueSize: ${SW_RECEIVER_SHARING_REST_QUEUE_SIZE:0}
    httpMaxRequestHeaderSize: ${SW_RECEIVER_SHARING_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
    # For gRPC server
    gRPCHost: ${SW_RECEIVER_GRPC_HOST:0.0.0.0}
    gRPCPort: ${SW_RECEIVER_GRPC_PORT:0}
    maxConcurrentCallsPerConnection: ${SW_RECEIVER_GRPC_MAX_CONCURRENT_CALL:0}
    maxMessageSize: ${SW_RECEIVER_GRPC_MAX_MESSAGE_SIZE:0}
    gRPCThreadPoolQueueSize: ${SW_RECEIVER_GRPC_POOL_QUEUE_SIZE:0}
    gRPCThreadPoolSize: ${SW_RECEIVER_GRPC_THREAD_POOL_SIZE:0}
    gRPCSslEnabled: ${SW_RECEIVER_GRPC_SSL_ENABLED:false}
    gRPCSslKeyPath: ${SW_RECEIVER_GRPC_SSL_KEY_PATH:""}
    gRPCSslCertChainPath: ${SW_RECEIVER_GRPC_SSL_CERT_CHAIN_PATH:""}
    gRPCSslTrustedCAsPath: ${SW_RECEIVER_GRPC_SSL_TRUSTED_CAS_PATH:""}
    authentication: ${SW_AUTHENTICATION:""}
receiver-register:
  selector: ${SW_RECEIVER_REGISTER:default}
  default:

receiver-trace:
  selector: ${SW_RECEIVER_TRACE:default}
  default:

receiver-jvm:
  selector: ${SW_RECEIVER_JVM:default}
  default:

receiver-clr:
  selector: ${SW_RECEIVER_CLR:default}
  default:

receiver-profile:
  selector: ${SW_RECEIVER_PROFILE:default}
  default:

receiver-zabbix:
  selector: ${SW_RECEIVER_ZABBIX:-}
  default:
    port: ${SW_RECEIVER_ZABBIX_PORT:10051}
    host: ${SW_RECEIVER_ZABBIX_HOST:0.0.0.0}
    activeFiles: ${SW_RECEIVER_ZABBIX_ACTIVE_FILES:agent}

service-mesh:
  selector: ${SW_SERVICE_MESH:default}
  default:

envoy-metric:
  selector: ${SW_ENVOY_METRIC:default}
  default:
    acceptMetricsService: ${SW_ENVOY_METRIC_SERVICE:true}
    alsHTTPAnalysis: ${SW_ENVOY_METRIC_ALS_HTTP_ANALYSIS:""}
    alsTCPAnalysis: ${SW_ENVOY_METRIC_ALS_TCP_ANALYSIS:""}
    # k8sServiceNameRule allows you to customize the service name in ALS via Kubernetes metadata,
    # the available variables are pod, service, f.e., you can use ${service.metadata.name}-${pod.metadata.labels.version}
    # to append the version number to the service name.
    # Be careful, when using environment variables to pass this configuration, use single quotes('') to avoid it being evaluated by the shell.
    k8sServiceNameRule: ${K8S_SERVICE_NAME_RULE:"${pod.metadata.labels.(service.istio.io/canonical-name)}"}

prometheus-fetcher:
  selector: ${SW_PROMETHEUS_FETCHER:-}
  default:
    enabledRules: ${SW_PROMETHEUS_FETCHER_ENABLED_RULES:"self"}
    maxConvertWorker: ${SW_PROMETHEUS_FETCHER_NUM_CONVERT_WORKER:-1}

kafka-fetcher:
  selector: ${SW_KAFKA_FETCHER:-}
  default:
    bootstrapServers: ${SW_KAFKA_FETCHER_SERVERS:localhost:9092}
    namespace: ${SW_NAMESPACE:""}
    partitions: ${SW_KAFKA_FETCHER_PARTITIONS:3}
    replicationFactor: ${SW_KAFKA_FETCHER_PARTITIONS_FACTOR:2}
    enableNativeProtoLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_PROTO_LOG:true}
    enableNativeJsonLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_JSON_LOG:true}
    consumers: ${SW_KAFKA_FETCHER_CONSUMERS:1}
    kafkaHandlerThreadPoolSize: ${SW_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
    kafkaHandlerThreadPoolQueueSize: ${SW_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}

receiver-meter:
  selector: ${SW_RECEIVER_METER:default}
  default:

receiver-otel:
  selector: ${SW_OTEL_RECEIVER:default}
  default:
    enabledHandlers: ${SW_OTEL_RECEIVER_ENABLED_HANDLERS:"oc,otlp"}
    enabledOtelRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_RULES:"istio-controlplane,k8s-node,oap,vm,mysql,postgresql"}

receiver-zipkin:
  selector: ${SW_RECEIVER_ZIPKIN:-}
  default:
    searchableTracesTags: ${SW_ZIPKIN_SEARCHABLE_TAG_KEYS:http.method}
    # The sample rate precision is 1/10000, should be between 0 and 10000
    sampleRate: ${SW_ZIPKIN_SAMPLE_RATE:10000}
    ## The below configs are for OAP collect zipkin trace from HTTP
    enableHttpCollector: ${SW_ZIPKIN_HTTP_COLLECTOR_ENABLED:true}
    restHost: ${SW_RECEIVER_ZIPKIN_REST_HOST:0.0.0.0}
    restPort: ${SW_RECEIVER_ZIPKIN_REST_PORT:9411}
    restContextPath: ${SW_RECEIVER_ZIPKIN_REST_CONTEXT_PATH:/}
    restMaxThreads: ${SW_RECEIVER_ZIPKIN_REST_MAX_THREADS:200}
    restIdleTimeOut: ${SW_RECEIVER_ZIPKIN_REST_IDLE_TIMEOUT:30000}
    restAcceptQueueSize: ${SW_RECEIVER_ZIPKIN_REST_QUEUE_SIZE:0}
    ## The below configs are for OAP collect zipkin trace from kafka
    enableKafkaCollector: ${SW_ZIPKIN_KAFKA_COLLECTOR_ENABLED:false}
    kafkaBootstrapServers: ${SW_ZIPKIN_KAFKA_SERVERS:localhost:9092}
    kafkaGroupId: ${SW_ZIPKIN_KAFKA_GROUP_ID:zipkin}
    kafkaTopic: ${SW_ZIPKIN_KAFKA_TOPIC:zipkin}
    # Kafka consumer config, JSON format as Properties. If it contains the same key with above, would override.
    kafkaConsumerConfig: ${SW_ZIPKIN_KAFKA_CONSUMER_CONFIG:"{\"auto.offset.reset\":\"earliest\",\"enable.auto.commit\":true}"}
    # The Count of the topic consumers
    kafkaConsumers: ${SW_ZIPKIN_KAFKA_CONSUMERS:1}
    kafkaHandlerThreadPoolSize: ${SW_ZIPKIN_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
    kafkaHandlerThreadPoolQueueSize: ${SW_ZIPKIN_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}

receiver-browser:
  selector: ${SW_RECEIVER_BROWSER:default}
  default:
    # The sample rate precision is 1/10000. 10000 means 100% sample in default.
    sampleRate: ${SW_RECEIVER_BROWSER_SAMPLE_RATE:10000}

receiver-log:
  selector: ${SW_RECEIVER_LOG:default}
  default:

query:
  selector: ${SW_QUERY:graphql}
  graphql:
    # Enable the log testing API to test the LAL.
    # NOTE: This API evaluates untrusted code on the OAP server.
    # A malicious script can do significant damage (steal keys and secrets, remove files and directories, install malware, etc).
    # As such, please enable this API only when you completely trust your users.
    enableLogTestTool: ${SW_QUERY_GRAPHQL_ENABLE_LOG_TEST_TOOL:false}
    # Maximum complexity allowed for the GraphQL query that can be used to
    # abort a query if the total number of data fields queried exceeds the defined threshold.
    maxQueryComplexity: ${SW_QUERY_MAX_QUERY_COMPLEXITY:1000}
    # Allow user add, disable and update UI template
    enableUpdateUITemplate: ${SW_ENABLE_UPDATE_UI_TEMPLATE:false}
    # "On demand log" allows users to fetch Pod containers' log in real time,
    # because this might expose secrets in the logs (if any), users need
    # to enable this manually, and add permissions to OAP cluster role.
    enableOnDemandPodLog: ${SW_ENABLE_ON_DEMAND_POD_LOG:false}

# This module is for Zipkin query API and support zipkin-lens UI
query-zipkin:
  selector: ${SW_QUERY_ZIPKIN:-}
  default:
    # For HTTP server
    restHost: ${SW_QUERY_ZIPKIN_REST_HOST:0.0.0.0}
    restPort: ${SW_QUERY_ZIPKIN_REST_PORT:9412}
    restContextPath: ${SW_QUERY_ZIPKIN_REST_CONTEXT_PATH:/zipkin}
    restMaxThreads: ${SW_QUERY_ZIPKIN_REST_MAX_THREADS:200}
    restIdleTimeOut: ${SW_QUERY_ZIPKIN_REST_IDLE_TIMEOUT:30000}
    restAcceptQueueSize: ${SW_QUERY_ZIPKIN_REST_QUEUE_SIZE:0}
    # Default look back for traces and autocompleteTags, 1 day in millis
    lookback: ${SW_QUERY_ZIPKIN_LOOKBACK:86400000}
    # The Cache-Control max-age (seconds) for serviceNames, remoteServiceNames and spanNames
    namesMaxAge: ${SW_QUERY_ZIPKIN_NAMES_MAX_AGE:300}
    ## The below config are OAP support for zipkin-lens UI
    # Default traces query max size
    uiQueryLimit: ${SW_QUERY_ZIPKIN_UI_QUERY_LIMIT:10}
    # Default look back on the UI for search traces, 15 minutes in millis
    uiDefaultLookback: ${SW_QUERY_ZIPKIN_UI_DEFAULT_LOOKBACK:900000}

alarm:
  selector: ${SW_ALARM:default}
  default:

telemetry:
  selector: ${SW_TELEMETRY:none}
  none:
  prometheus:
    host: ${SW_TELEMETRY_PROMETHEUS_HOST:0.0.0.0}
    port: ${SW_TELEMETRY_PROMETHEUS_PORT:1234}
    sslEnabled: ${SW_TELEMETRY_PROMETHEUS_SSL_ENABLED:false}
    sslKeyPath: ${SW_TELEMETRY_PROMETHEUS_SSL_KEY_PATH:""}
    sslCertChainPath: ${SW_TELEMETRY_PROMETHEUS_SSL_CERT_CHAIN_PATH:""}

configuration:
  selector: ${SW_CONFIGURATION:none}
  none:
  grpc:
    host: ${SW_DCS_SERVER_HOST:""}
    port: ${SW_DCS_SERVER_PORT:80}
    clusterName: ${SW_DCS_CLUSTER_NAME:SkyWalking}
    period: ${SW_DCS_PERIOD:20}
  apollo:
    apolloMeta: ${SW_CONFIG_APOLLO:http://localhost:8080}
    apolloCluster: ${SW_CONFIG_APOLLO_CLUSTER:default}
    apolloEnv: ${SW_CONFIG_APOLLO_ENV:""}
    appId: ${SW_CONFIG_APOLLO_APP_ID:skywalking}
    period: ${SW_CONFIG_APOLLO_PERIOD:60}
  zookeeper:
    period: ${SW_CONFIG_ZK_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
    namespace: ${SW_CONFIG_ZK_NAMESPACE:/default}
    hostPort: ${SW_CONFIG_ZK_HOST_PORT:localhost:2181}
    # Retry Policy
    baseSleepTimeMs: ${SW_CONFIG_ZK_BASE_SLEEP_TIME_MS:1000} # initial amount of time to wait between retries
    maxRetries: ${SW_CONFIG_ZK_MAX_RETRIES:3} # max number of times to retry
  etcd:
    period: ${SW_CONFIG_ETCD_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
    endpoints: ${SW_CONFIG_ETCD_ENDPOINTS:http://localhost:2379}
    namespace: ${SW_CONFIG_ETCD_NAMESPACE:/skywalking}
    authentication: ${SW_CONFIG_ETCD_AUTHENTICATION:false}
    user: ${SW_CONFIG_ETCD_USER:}
    password: ${SW_CONFIG_ETCD_password:}
  consul:
    # Consul host and ports, separated by comma, e.g. 1.2.3.4:8500,2.3.4.5:8500
    hostAndPorts: ${SW_CONFIG_CONSUL_HOST_AND_PORTS:1.2.3.4:8500}
    # Sync period in seconds. Defaults to 60 seconds.
    period: ${SW_CONFIG_CONSUL_PERIOD:60}
    # Consul aclToken
    aclToken: ${SW_CONFIG_CONSUL_ACL_TOKEN:""}
  k8s-configmap:
    period: ${SW_CONFIG_CONFIGMAP_PERIOD:60}
    namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
    labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}
  nacos:
    # Nacos Server Host
    serverAddr: ${SW_CONFIG_NACOS_SERVER_ADDR:127.0.0.1}
    # Nacos Server Port
    port: ${SW_CONFIG_NACOS_SERVER_PORT:8848}
    # Nacos Configuration Group
    group: ${SW_CONFIG_NACOS_SERVER_GROUP:skywalking}
    # Nacos Configuration namespace
    namespace: ${SW_CONFIG_NACOS_SERVER_NAMESPACE:}
    # Unit seconds, sync period. Default fetch every 60 seconds.
    period: ${SW_CONFIG_NACOS_PERIOD:60}
    # Nacos auth username
    username: ${SW_CONFIG_NACOS_USERNAME:""}
    password: ${SW_CONFIG_NACOS_PASSWORD:""}
    # Nacos auth accessKey
    accessKey: ${SW_CONFIG_NACOS_ACCESSKEY:""}
    secretKey: ${SW_CONFIG_NACOS_SECRETKEY:""}

exporter:
  selector: ${SW_EXPORTER:-}
  grpc:
    targetHost: ${SW_EXPORTER_GRPC_HOST:127.0.0.1}
    targetPort: ${SW_EXPORTER_GRPC_PORT:9870}

health-checker:
  selector: ${SW_HEALTH_CHECKER:-}
  default:
    checkIntervalSeconds: ${SW_HEALTH_CHECKER_INTERVAL_SECONDS:5}

configuration-discovery:
  selector: ${SW_CONFIGURATION_DISCOVERY:default}
  default:
    disableMessageDigest: ${SW_DISABLE_MESSAGE_DIGEST:false}

receiver-event:
  selector: ${SW_RECEIVER_EVENT:default}
  default:

receiver-ebpf:
  selector: ${SW_RECEIVER_EBPF:default}
  default:

=========================================================================================================
#q启动脚本配置
vim /home/kami/running/skywalking-apm/bin/oapService_supervisor.sh
PRG="$0"
PRGDIR=$(dirname "$PRG")
[ -z "$OAP_HOME" ] && OAP_HOME=$(cd "$PRGDIR/.." > /dev/null || exit 1; pwd)

OAP_LOG_DIR="/data/var/opt/skywalking-oap/logs/"
JAVA_OPTS="${JAVA_OPTS:-  -Xms256M -Xmx512M}"

if [ ! -d "${OAP_LOG_DIR}" ]; then
    mkdir -p "${OAP_LOG_DIR}"
fi

_RUNJAVA=${JAVA_HOME}/bin/java
[ -z "$JAVA_HOME" ] && _RUNJAVA=java

CLASSPATH="$OAP_HOME/config:$CLASSPATH"
for i in "$OAP_HOME"/oap-libs/*.jar
do
    CLASSPATH="$i:$CLASSPATH"
done

OAP_OPTIONS=" -Doap.logDir=${OAP_LOG_DIR}"

eval exec "\"$_RUNJAVA\" ${JAVA_OPTS} ${OAP_OPTIONS} -classpath $CLASSPATH org.apache.skywalking.oap.server.starter.OAPServerStartUp 2>${OAP_LOG_DIR}/oap.log 1> /dev/null"
----------------------------------------------------------------------------------------------
vim /home/kami/running/skywalking-apm/bin/webappService_supervisor.sh
PRG="$0"
PRGDIR=$(dirname "$PRG")
[ -z "$WEBAPP_HOME" ] && WEBAPP_HOME=$(cd "$PRGDIR/.." > /dev/null || exit 1; pwd)

WEBAPP_LOG_DIR="/data/var/opt/skywalking-webappService/logs"
JAVA_OPTS="${JAVA_OPTS:-  -Xms256M -Xmx512M}"
JAR_PATH="${WEBAPP_HOME}/webapp"

if [ ! -d "${WEBAPP_LOG_DIR}" ]; then
    mkdir -p "${WEBAPP_LOG_DIR}"
fi

LOG_FILE_LOCATION=${WEBAPP_LOG_DIR}/webapp.log

_RUNJAVA=${JAVA_HOME}/bin/java
[ -z "$JAVA_HOME" ] && _RUNJAVA=java

eval exec "\"$_RUNJAVA\" ${JAVA_OPTS} -jar ${JAR_PATH}/skywalking-webapp.jar \
         --spring.config.location=${JAR_PATH}/webapp.yml \
         --logging.file=${LOG_FILE_LOCATION} \
        2>${WEBAPP_LOG_DIR}/webapp-console.log 1> /dev/null"
----------------------------------------------------------------------------------------------
cat <<EOF>> /data/opt/supervisor/conf.d/skywalking-oap.conf
; skywalking-oap config file
[program:skywalking-oap]
environment=JAVA_HOME=/data/opt/jdk
command=sh /home/kami/running/skywalking-apm/bin/oapService_supervisor.sh
directory=/home/kami/running/skywalking-apm
user=kami
numprocs=1
stdout_logfile=/data/var/opt/skywalking-oap/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF

cat <<EOF>> /data/opt/supervisor/conf.d/skywalking-webappService.conf
; skywalking-webappService config file
[program:skywalking-webappService]
environment=JAVA_HOME=/data/opt/jdk
command=sh /home/kami/running/skywalking-apm/bin/webappService_supervisor.sh
directory=/home/kami/running/skywalking-apm
user=kami
numprocs=1
stdout_logfile=/data/var/opt/skywalking-webappService/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/{skywalking-oap,skywalking-webappService}/logs/

# 若需通过kafka接入Agent数据，配置config/application.yml中kafka-fetcher，Agent客户端配置加入-Dskywalking.plugin.kafka.bootstrap_servers=IP:PORT即可

agent接入

Java接入

wget https://mirrors.tuna.tsinghua.edu.cn/apache/skywalking/java-agent/8.12.0/apache-skywalking-java-agent-8.12.0.tgz 
tar -zxvf apache-skywalking-java-agent-8.12.0.tgz -C /data/opt/
#这里使用的为SuperVisor管理程序，修改环境变量及启动命令即可
vim /data/opt/supervisor/conf.d/anomaly_restapi.conf
environment=JAVA_HOME=/data/opt/jdk,SW_AGENT_NAME=xxxxx,SW_AGENT_COLLECTOR_BACKEND_SERVICES=10.0.10.101:11800
command=/data/opt/jdk/bin/java -javaagent:/data/opt/skywalking-agent/skywalking-agent.jar -jar xxxxxxx

自监控配置

#开启SkyWorking遥测数据
vim /home/kami/running/skywalking-apm/config/application.yml
prometheus-fetcher:
  selector: ${SW_PROMETHEUS_FETCHER:default}
  default:
    enabledRules: ${SW_PROMETHEUS_FETCHER_ENABLED_RULES:"self"}
    maxConvertWorker: ${SW_PROMETHEUS_FETCHER_NUM_CONVERT_WORKER:-1}
    active: ${SW_PROMETHEUS_FETCHER_ACTIVE:true}
telemetry:
  selector: ${SW_TELEMETRY:prometheus}
  none:
  prometheus:
    host: ${SW_TELEMETRY_PROMETHEUS_HOST:10.0.10.101}
    port: ${SW_TELEMETRY_PROMETHEUS_PORT:1234}
    sslEnabled: ${SW_TELEMETRY_PROMETHEUS_SSL_ENABLED:false}
    sslKeyPath: ${SW_TELEMETRY_PROMETHEUS_SSL_KEY_PATH:""}
    sslCertChainPath: ${SW_TELEMETRY_PROMETHEUS_SSL_CERT_CHAIN_PATH:""}

vim /home/kami/running/skywalking-apm/config/fetcher-prom-rules/self.yaml
staticConfig:
  targets:
    - url: http://10.0.10.101:1234

Linux主机监控

#部署 Node Exporter
#https://prometheus.io/download/#node_exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz
tar -zxvf node_exporter-1.4.0.linux-amd64.tar.gz
ln -s node_exporter-1.4.0.linux-amd64 node_exporter
#curl http://localhost:9100/metrics
cat <<EOF>> /data/opt/supervisor/conf.d/node_exporter.conf
; node_exporter config file
[program:node_exporter]
environment=JAVA_HOME=/data/opt/jdk
command=/data/opt/node_exporter/node_exporter
directory=/data/opt/node_exporter
user=kami
numprocs=1
stdout_logfile=/data/var/opt/node_exporter/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/node_exporter/logs/

#OpenTelemetery 收集器配置
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.60.0/otelcol_0.60.0_linux_amd64.rpm
sudo rpm -ivh otelcol_0.60.0_linux_amd64.rpm
cat <<EOF>>  /etc/otelcol/config.yaml
receivers:
  prometheus:
    config:
      scrape_configs:
        - job_name: "vm-monitoring" # make sure to use this in the vm.yaml to filter only VM metrics
          scrape_interval: 10s
          static_configs:
            - targets: ["10.0.10.101:9100"]

processors:
  batch:

exporters:
  otlp:
    endpoint: "10.0.10.101:11800" # The OAP Server address
    tls:
      insecure: false
      #insecure: true
  # Exports data to the console
  logging:
    logLevel: debug

service:
  pipelines:
    metrics:
      receivers: [prometheus]
      processors: [batch]
      exporters: [otlp, logging]
EOF
sudo systemctl restart otelcol
#或者使用docker运行OpenTelemetery 收集器
#sudo docker run -d --restart=unless-stopped --name  opentelemetry -v /export/servers/opentelemetry-collector/config.yaml:/etc/otelcol/config.yaml -v /etc/localtime:/etc/localtime  otel/opentelemetry-collector:0.60.0
#检查SkyWorking/config/application.yaml文件内receiver-otel/default/enabledHandlers是否存在otlp

MySQL

#https://prometheus.io/download/#mysqld_exporter
wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.14.0/mysqld_exporter-0.14.0.linux-amd64.tar.gz
tar -zxvf mysqld_exporter-0.14.0.linux-amd64.tar.gz
ln -s mysqld_exporter-0.14.0.linux-amd64 mysqld_exporter
cat <<EOF>> /data/opt/mysqld_exporter/my.cnf
[client]
port=3306
user=kami
password=123456
EOF
cat <<EOF>> /data/opt/supervisor/conf.d/mysqld_exporter.conf
; mysqld_exporter config file
[program:mysqld_exporter]
environment=JAVA_HOME=/data/opt/jdk
command=/data/opt/mysqld_exporter/mysqld_exporter --config.my-cnf=/data/opt/mysqld_exporter/my.cnf
directory=/data/opt/mysqld_exporter
user=kami
numprocs=1
stdout_logfile=/data/var/opt/mysqld_exporter/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/mysqld_exporter/logs/

#OpenTelemetery 收集器配置
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.60.0/otelcol_0.60.0_linux_amd64.rpm
sudo rpm -ivh otelcol_0.60.0_linux_amd64.rpm
cat <<EOF>>  /etc/otelcol/config.yaml
receivers:
  prometheus:
    config:
     scrape_configs:
       - job_name: 'mysql-monitoring'
         scrape_interval: 5s
         static_configs:
           - targets: ['10.0.10.101:9104']
             labels:
               host_name: 10.0.10.102
processors:
  batch:

exporters:
  otlp:
    endpoint: 10.0.10.101:11800
    tls:
      insecure: true
service:
  pipelines:
    metrics:
      receivers:
      - prometheus
      processors:
      - batch
      exporters:
      - otlp
EOF
sudo systemctl restart otelcol