SkyWalking部署及Spring组件接入
系统版本为AlmaLinux 9.0
JDK:https://builds.openlogic.com/downloadJDK/openlogic-openjdk/11.0.16+8/openlogic-openjdk-11.0.16+8-linux-x64.tar.gz
SkyWalking:https://www.apache.org/dyn/closer.cgi/skywalking/9.2.0/apache-skywalking-apm-9.2.0.tar.gz
JAVA Agent:https://www.apache.org/dyn/closer.cgi/skywalking/java-agent/8.12.0/apache-skywalking-java-agent-8.12.0.tgz
部署文档:https://skywalking.apache.org/docs/main/v9.2.0/readme/
OAP+webapp
wget https://mirrors.tuna.tsinghua.edu.cn/apache/skywalking/9.2.0/apache-skywalking-apm-9.2.0.tar.gz
tar -zxvf soft/apache-skywalking-apm-9.2.0.tar.gz -C running/
ln -s apache-skywalking-apm-bin skywalking-apm
# 由于部署使用的zookeepr 3.4.14,需将oap-libs下的zookeeper-3.5.7.jar替换为https://repo1.maven.org/maven2/org/apache/zookeeper/zookeeper/3.4.14/zookeeper-3.4.14.jar
vim /home/kami/running/skywalking-apm/config/application.yml
# 这里使用zookeeper集群部署,支持单机standalone部署,集群部署支持zookeeper/kubernetes/consul/consul/etcd/nacos
cluster:
selector: ${SW_CLUSTER:zookeeper}
# Please check your ZooKeeper is 3.5+, However, it is also compatible with ZooKeeper 3.4.x. Replace the ZooKeeper 3.5+ # library the oap-libs folder with your ZooKeeper 3.4.x library.
zookeeper:
namespace: ${SW_NAMESPACE:"skywalking"}
hostPort: ${SW_CLUSTER_ZK_HOST_PORT:10.0.10.101:2181,10.0.10.102:2181,10.0.10.103:2181}
# Retry Policy
baseSleepTimeMs: ${SW_CLUSTER_ZK_SLEEP_TIME:1000} # initial amount of time to wait between retries
maxRetries: ${SW_CLUSTER_ZK_MAX_RETRIES:3} # max number of times to retry
# Enable ACL
enableACL: ${SW_ZK_ENABLE_ACL:false} # disable ACL in default
schema: ${SW_ZK_SCHEMA:digest} # only support digest schema
expression: ${SW_ZK_EXPRESSION:skywalking:skywalking}
internalComHost: ${SW_CLUSTER_INTERNAL_COM_HOST:""}
internalComPort: ${SW_CLUSTER_INTERNAL_COM_PORT:-1}
# 数据收集/分析等核心配置,没有特殊需求建议按默认配置
core:
selector: ${SW_CORE:default}
default:
# Mixed: Receive agent data, Level 1 aggregate, Level 2 aggregate
# Receiver: Receive agent data, Level 1 aggregate
# Aggregator: Level 2 aggregate
role: ${SW_CORE_ROLE:Mixed} # Mixed/Receiver/Aggregator
restHost: ${SW_CORE_REST_HOST:0.0.0.0}
restPort: ${SW_CORE_REST_PORT:12800}
restContextPath: ${SW_CORE_REST_CONTEXT_PATH:/}
restMaxThreads: ${SW_CORE_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_CORE_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_CORE_REST_QUEUE_SIZE:0}
httpMaxRequestHeaderSize: ${SW_CORE_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
gRPCHost: ${SW_CORE_GRPC_HOST:0.0.0.0}
gRPCPort: ${SW_CORE_GRPC_PORT:11800}
maxConcurrentCallsPerConnection: ${SW_CORE_GRPC_MAX_CONCURRENT_CALL:0}
maxMessageSize: ${SW_CORE_GRPC_MAX_MESSAGE_SIZE:0}
gRPCThreadPoolQueueSize: ${SW_CORE_GRPC_POOL_QUEUE_SIZE:-1}
gRPCThreadPoolSize: ${SW_CORE_GRPC_THREAD_POOL_SIZE:-1}
gRPCSslEnabled: ${SW_CORE_GRPC_SSL_ENABLED:false}
gRPCSslKeyPath: ${SW_CORE_GRPC_SSL_KEY_PATH:""}
gRPCSslCertChainPath: ${SW_CORE_GRPC_SSL_CERT_CHAIN_PATH:""}
gRPCSslTrustedCAPath: ${SW_CORE_GRPC_SSL_TRUSTED_CA_PATH:""}
downsampling:
- Hour
- Day
# Set a timeout on metrics data. After the timeout has expired, the metrics data will automatically be deleted.
enableDataKeeperExecutor: ${SW_CORE_ENABLE_DATA_KEEPER_EXECUTOR:true} # Turn it off then automatically metrics data delete will be close.
dataKeeperExecutePeriod: ${SW_CORE_DATA_KEEPER_EXECUTE_PERIOD:5} # How often the data keeper executor runs periodically, unit is minute
recordDataTTL: ${SW_CORE_RECORD_DATA_TTL:3} # Unit is day
metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day
# The period of L1 aggregation flush to L2 aggregation. Unit is ms.
l1FlushPeriod: ${SW_CORE_L1_AGGREGATION_FLUSH_PERIOD:500}
# The threshold of session time. Unit is ms. Default value is 70s.
storageSessionTimeout: ${SW_CORE_STORAGE_SESSION_TIMEOUT:70000}
# The period of doing data persistence. Unit is second.Default value is 25s
persistentPeriod: ${SW_CORE_PERSISTENT_PERIOD:25}
# Cache metrics data for 1 minute to reduce database queries, and if the OAP cluster changes within that minute,
# the metrics may not be accurate within that minute.
enableDatabaseSession: ${SW_CORE_ENABLE_DATABASE_SESSION:true}
topNReportPeriod: ${SW_CORE_TOPN_REPORT_PERIOD:10} # top_n record worker report cycle, unit is minute
# Extra model column are the column defined by in the codes, These columns of model are not required logically in aggregation or further query,
# and it will cause more load for memory, network of OAP and storage.
# But, being activated, user could see the name in the storage entities, which make users easier to use 3rd party tool, such as Kibana->ES, to query the data by themselves.
activeExtraModelColumns: ${SW_CORE_ACTIVE_EXTRA_MODEL_COLUMNS:false}
# The max length of service + instance names should be less than 200
serviceNameMaxLength: ${SW_SERVICE_NAME_MAX_LENGTH:70}
instanceNameMaxLength: ${SW_INSTANCE_NAME_MAX_LENGTH:70}
# The max length of service + endpoint names should be less than 240
endpointNameMaxLength: ${SW_ENDPOINT_NAME_MAX_LENGTH:150}
# Define the set of span tag keys, which should be searchable through the GraphQL.
searchableTracesTags: ${SW_SEARCHABLE_TAG_KEYS:http.method,http.status_code,rpc.status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker}
# Define the set of log tag keys, which should be searchable through the GraphQL.
searchableLogsTags: ${SW_SEARCHABLE_LOGS_TAG_KEYS:level}
# Define the set of alarm tag keys, which should be searchable through the GraphQL.
searchableAlarmTags: ${SW_SEARCHABLE_ALARM_TAG_KEYS:level}
# The max size of tags keys for autocomplete select.
autocompleteTagKeysQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_KEYS_QUERY_MAX_SIZE:100}
# The max size of tags values for autocomplete select.
autocompleteTagValuesQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_VALUES_QUERY_MAX_SIZE:100}
# The number of threads used to prepare metrics data to the storage.
prepareThreads: ${SW_CORE_PREPARE_THREADS:2}
# Turn it on then automatically grouping endpoint by the given OpenAPI definitions.
enableEndpointNameGroupingByOpenapi: ${SW_CORE_ENABLE_ENDPOINT_NAME_GROUPING_BY_OPAENAPI:true}
# 持久化配置,这里使用elasticsearch,支持h2/elasticsearch/mysql/h2/tidb/postgresql/banyandb,部署的ES版本为6.8.21
storage:
selector: ${SW_STORAGE:elasticsearch}
elasticsearch:
namespace: ${SW_NAMESPACE:"skyworking"}
clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:10.0.10.102:9200}
protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
user: ${SW_ES_USER:""}
password: ${SW_ES_PASSWORD:""}
trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""} # Secrets management file in the properties format includes the username, password, which are managed by 3rd party tool.
dayStep: ${SW_STORAGE_DAY_STEP:1} # Represent the number of days in the one minute/hour/day index.
indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:1} # Shard number of new indexes
indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:1} # Replicas number of new indexes
# Super data set has been defined in the codes, such as trace segments.The following 3 config would be improve es performance when storage super size data in es.
superDatasetDayStep: ${SW_SUPERDATASET_STORAGE_DAY_STEP:-1} # Represent the number of days in the super size dataset record index, the default value is the same as dayStep when the value is less than 0
superDatasetIndexShardsFactor: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR:5} # This factor provides more shards for the super data set, shards number = indexShardsNumber * superDatasetIndexShardsFactor. Also, this factor effects Zipkin and Jaeger traces.
superDatasetIndexReplicasNumber: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_REPLICAS_NUMBER:0} # Represent the replicas number in the super size dataset record index, the default value is 0.
indexTemplateOrder: ${SW_STORAGE_ES_INDEX_TEMPLATE_ORDER:0} # the order of index template
bulkActions: ${SW_STORAGE_ES_BULK_ACTIONS:5000} # Execute the async bulk record data every ${SW_STORAGE_ES_BULK_ACTIONS} requests
# flush the bulk every 10 seconds whatever the number of requests
# INT(flushInterval * 2/3) would be used for index refresh period.
flushInterval: ${SW_STORAGE_ES_FLUSH_INTERVAL:15}
concurrentRequests: ${SW_STORAGE_ES_CONCURRENT_REQUESTS:2} # the number of concurrent requests
resultWindowMaxSize: ${SW_STORAGE_ES_QUERY_MAX_WINDOW_SIZE:10000}
metadataQueryMaxSize: ${SW_STORAGE_ES_QUERY_MAX_SIZE:10000}
scrollingBatchSize: ${SW_STORAGE_ES_SCROLLING_BATCH_SIZE:5000}
segmentQueryMaxSize: ${SW_STORAGE_ES_QUERY_SEGMENT_SIZE:200}
profileTaskQueryMaxSize: ${SW_STORAGE_ES_QUERY_PROFILE_TASK_SIZE:200}
profileDataQueryBatchSize: ${SW_STORAGE_ES_QUERY_PROFILE_DATA_BATCH_SIZE:100}
oapAnalyzer: ${SW_STORAGE_ES_OAP_ANALYZER:"{\"analyzer\":{\"oap_analyzer\":{\"type\":\"stop\"}}}"} # the oap analyzer.
oapLogAnalyzer: ${SW_STORAGE_ES_OAP_LOG_ANALYZER:"{\"analyzer\":{\"oap_log_analyzer\":{\"type\":\"standard\"}}}"} # the oap log analyzer. It could be customized by the ES analyzer configuration to support more language log formats, such as Chinese log, Japanese log and etc.
advanced: ${SW_STORAGE_ES_ADVANCED:""}
# Enable shard metrics and records indices into multi-physical indices, one index template per metric/meter aggregation function or record.
logicSharding: ${SW_STORAGE_ES_LOGIC_SHARDING:false}
agent-analyzer:
selector: ${SW_AGENT_ANALYZER:default}
default:
# The default sampling rate and the default trace latency time configured by the 'traceSamplingPolicySettingsFile' file.
traceSamplingPolicySettingsFile: ${SW_TRACE_SAMPLING_POLICY_SETTINGS_FILE:trace-sampling-policy-settings.yml}
slowDBAccessThreshold: ${SW_SLOW_DB_THRESHOLD:default:200,mongodb:100} # The slow database access thresholds. Unit ms.
forceSampleErrorSegment: ${SW_FORCE_SAMPLE_ERROR_SEGMENT:true} # When sampling mechanism active, this config can open(true) force save some error segment. true is default.
segmentStatusAnalysisStrategy: ${SW_SEGMENT_STATUS_ANALYSIS_STRATEGY:FROM_SPAN_STATUS} # Determine the final segment status from the status of spans. Available values are FROM_SPAN_STATUS
, FROM_ENTRY_SPAN
and FROM_FIRST_SPAN
. FROM_SPAN_STATUS
represents the segment status would be error if any span is in error status. FROM_ENTRY_SPAN
means the segment status would be determined by the status of entry spans only. FROM_FIRST_SPAN
means the segment status would be determined by the status of the first span only.
# Nginx and Envoy agents can't get the real remote address.
# Exit spans with the component in the list would not generate the client-side instance relation metrics.
noUpstreamRealAddressAgents: ${SW_NO_UPSTREAM_REAL_ADDRESS:6000,9000}
meterAnalyzerActiveFiles: ${SW_METER_ANALYZER_ACTIVE_FILES:datasource,threadpool,satellite} # Which files could be meter analyzed, files split by ","
log-analyzer:
selector: ${SW_LOG_ANALYZER:default}
default:
lalFiles: ${SW_LOG_LAL_FILES:default}
malFiles: ${SW_LOG_MAL_FILES:""}
event-analyzer:
selector: ${SW_EVENT_ANALYZER:default}
default:
receiver-sharing-server:
selector: ${SW_RECEIVER_SHARING_SERVER:default}
default:
# For HTTP server
restHost: ${SW_RECEIVER_SHARING_REST_HOST:0.0.0.0}
restPort: ${SW_RECEIVER_SHARING_REST_PORT:0}
restContextPath: ${SW_RECEIVER_SHARING_REST_CONTEXT_PATH:/}
restMaxThreads: ${SW_RECEIVER_SHARING_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_RECEIVER_SHARING_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_RECEIVER_SHARING_REST_QUEUE_SIZE:0}
httpMaxRequestHeaderSize: ${SW_RECEIVER_SHARING_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
# For gRPC server
gRPCHost: ${SW_RECEIVER_GRPC_HOST:0.0.0.0}
gRPCPort: ${SW_RECEIVER_GRPC_PORT:0}
maxConcurrentCallsPerConnection: ${SW_RECEIVER_GRPC_MAX_CONCURRENT_CALL:0}
maxMessageSize: ${SW_RECEIVER_GRPC_MAX_MESSAGE_SIZE:0}
gRPCThreadPoolQueueSize: ${SW_RECEIVER_GRPC_POOL_QUEUE_SIZE:0}
gRPCThreadPoolSize: ${SW_RECEIVER_GRPC_THREAD_POOL_SIZE:0}
gRPCSslEnabled: ${SW_RECEIVER_GRPC_SSL_ENABLED:false}
gRPCSslKeyPath: ${SW_RECEIVER_GRPC_SSL_KEY_PATH:""}
gRPCSslCertChainPath: ${SW_RECEIVER_GRPC_SSL_CERT_CHAIN_PATH:""}
gRPCSslTrustedCAsPath: ${SW_RECEIVER_GRPC_SSL_TRUSTED_CAS_PATH:""}
authentication: ${SW_AUTHENTICATION:""}
receiver-register:
selector: ${SW_RECEIVER_REGISTER:default}
default:
receiver-trace:
selector: ${SW_RECEIVER_TRACE:default}
default:
receiver-jvm:
selector: ${SW_RECEIVER_JVM:default}
default:
receiver-clr:
selector: ${SW_RECEIVER_CLR:default}
default:
receiver-profile:
selector: ${SW_RECEIVER_PROFILE:default}
default:
receiver-zabbix:
selector: ${SW_RECEIVER_ZABBIX:-}
default:
port: ${SW_RECEIVER_ZABBIX_PORT:10051}
host: ${SW_RECEIVER_ZABBIX_HOST:0.0.0.0}
activeFiles: ${SW_RECEIVER_ZABBIX_ACTIVE_FILES:agent}
service-mesh:
selector: ${SW_SERVICE_MESH:default}
default:
envoy-metric:
selector: ${SW_ENVOY_METRIC:default}
default:
acceptMetricsService: ${SW_ENVOY_METRIC_SERVICE:true}
alsHTTPAnalysis: ${SW_ENVOY_METRIC_ALS_HTTP_ANALYSIS:""}
alsTCPAnalysis: ${SW_ENVOY_METRIC_ALS_TCP_ANALYSIS:""}
# k8sServiceNameRule
allows you to customize the service name in ALS via Kubernetes metadata,
# the available variables are pod
, service
, f.e., you can use ${service.metadata.name}-${pod.metadata.labels.version}
# to append the version number to the service name.
# Be careful, when using environment variables to pass this configuration, use single quotes(''
) to avoid it being evaluated by the shell.
k8sServiceNameRule: ${K8S_SERVICE_NAME_RULE:"${pod.metadata.labels.(service.istio.io/canonical-name)}"}
prometheus-fetcher:
selector: ${SW_PROMETHEUS_FETCHER:-}
default:
enabledRules: ${SW_PROMETHEUS_FETCHER_ENABLED_RULES:"self"}
maxConvertWorker: ${SW_PROMETHEUS_FETCHER_NUM_CONVERT_WORKER:-1}
kafka-fetcher:
selector: ${SW_KAFKA_FETCHER:-}
default:
bootstrapServers: ${SW_KAFKA_FETCHER_SERVERS:localhost:9092}
namespace: ${SW_NAMESPACE:""}
partitions: ${SW_KAFKA_FETCHER_PARTITIONS:3}
replicationFactor: ${SW_KAFKA_FETCHER_PARTITIONS_FACTOR:2}
enableNativeProtoLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_PROTO_LOG:true}
enableNativeJsonLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_JSON_LOG:true}
consumers: ${SW_KAFKA_FETCHER_CONSUMERS:1}
kafkaHandlerThreadPoolSize: ${SW_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
kafkaHandlerThreadPoolQueueSize: ${SW_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}
receiver-meter:
selector: ${SW_RECEIVER_METER:default}
default:
receiver-otel:
selector: ${SW_OTEL_RECEIVER:default}
default:
enabledHandlers: ${SW_OTEL_RECEIVER_ENABLED_HANDLERS:"oc,otlp"}
enabledOtelRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_RULES:"istio-controlplane,k8s-node,oap,vm,mysql,postgresql"}
receiver-zipkin:
selector: ${SW_RECEIVER_ZIPKIN:-}
default:
searchableTracesTags: ${SW_ZIPKIN_SEARCHABLE_TAG_KEYS:http.method}
# The sample rate precision is 1/10000, should be between 0 and 10000
sampleRate: ${SW_ZIPKIN_SAMPLE_RATE:10000}
## The below configs are for OAP collect zipkin trace from HTTP
enableHttpCollector: ${SW_ZIPKIN_HTTP_COLLECTOR_ENABLED:true}
restHost: ${SW_RECEIVER_ZIPKIN_REST_HOST:0.0.0.0}
restPort: ${SW_RECEIVER_ZIPKIN_REST_PORT:9411}
restContextPath: ${SW_RECEIVER_ZIPKIN_REST_CONTEXT_PATH:/}
restMaxThreads: ${SW_RECEIVER_ZIPKIN_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_RECEIVER_ZIPKIN_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_RECEIVER_ZIPKIN_REST_QUEUE_SIZE:0}
## The below configs are for OAP collect zipkin trace from kafka
enableKafkaCollector: ${SW_ZIPKIN_KAFKA_COLLECTOR_ENABLED:false}
kafkaBootstrapServers: ${SW_ZIPKIN_KAFKA_SERVERS:localhost:9092}
kafkaGroupId: ${SW_ZIPKIN_KAFKA_GROUP_ID:zipkin}
kafkaTopic: ${SW_ZIPKIN_KAFKA_TOPIC:zipkin}
# Kafka consumer config, JSON format as Properties. If it contains the same key with above, would override.
kafkaConsumerConfig: ${SW_ZIPKIN_KAFKA_CONSUMER_CONFIG:"{\"auto.offset.reset\":\"earliest\",\"enable.auto.commit\":true}"}
# The Count of the topic consumers
kafkaConsumers: ${SW_ZIPKIN_KAFKA_CONSUMERS:1}
kafkaHandlerThreadPoolSize: ${SW_ZIPKIN_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
kafkaHandlerThreadPoolQueueSize: ${SW_ZIPKIN_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}
receiver-browser:
selector: ${SW_RECEIVER_BROWSER:default}
default:
# The sample rate precision is 1/10000. 10000 means 100% sample in default.
sampleRate: ${SW_RECEIVER_BROWSER_SAMPLE_RATE:10000}
receiver-log:
selector: ${SW_RECEIVER_LOG:default}
default:
query:
selector: ${SW_QUERY:graphql}
graphql:
# Enable the log testing API to test the LAL.
# NOTE: This API evaluates untrusted code on the OAP server.
# A malicious script can do significant damage (steal keys and secrets, remove files and directories, install malware, etc).
# As such, please enable this API only when you completely trust your users.
enableLogTestTool: ${SW_QUERY_GRAPHQL_ENABLE_LOG_TEST_TOOL:false}
# Maximum complexity allowed for the GraphQL query that can be used to
# abort a query if the total number of data fields queried exceeds the defined threshold.
maxQueryComplexity: ${SW_QUERY_MAX_QUERY_COMPLEXITY:1000}
# Allow user add, disable and update UI template
enableUpdateUITemplate: ${SW_ENABLE_UPDATE_UI_TEMPLATE:false}
# "On demand log" allows users to fetch Pod containers' log in real time,
# because this might expose secrets in the logs (if any), users need
# to enable this manually, and add permissions to OAP cluster role.
enableOnDemandPodLog: ${SW_ENABLE_ON_DEMAND_POD_LOG:false}
# This module is for Zipkin query API and support zipkin-lens UI
query-zipkin:
selector: ${SW_QUERY_ZIPKIN:-}
default:
# For HTTP server
restHost: ${SW_QUERY_ZIPKIN_REST_HOST:0.0.0.0}
restPort: ${SW_QUERY_ZIPKIN_REST_PORT:9412}
restContextPath: ${SW_QUERY_ZIPKIN_REST_CONTEXT_PATH:/zipkin}
restMaxThreads: ${SW_QUERY_ZIPKIN_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_QUERY_ZIPKIN_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_QUERY_ZIPKIN_REST_QUEUE_SIZE:0}
# Default look back for traces and autocompleteTags, 1 day in millis
lookback: ${SW_QUERY_ZIPKIN_LOOKBACK:86400000}
# The Cache-Control max-age (seconds) for serviceNames, remoteServiceNames and spanNames
namesMaxAge: ${SW_QUERY_ZIPKIN_NAMES_MAX_AGE:300}
## The below config are OAP support for zipkin-lens UI
# Default traces query max size
uiQueryLimit: ${SW_QUERY_ZIPKIN_UI_QUERY_LIMIT:10}
# Default look back on the UI for search traces, 15 minutes in millis
uiDefaultLookback: ${SW_QUERY_ZIPKIN_UI_DEFAULT_LOOKBACK:900000}
alarm:
selector: ${SW_ALARM:default}
default:
telemetry:
selector: ${SW_TELEMETRY:none}
none:
prometheus:
host: ${SW_TELEMETRY_PROMETHEUS_HOST:0.0.0.0}
port: ${SW_TELEMETRY_PROMETHEUS_PORT:1234}
sslEnabled: ${SW_TELEMETRY_PROMETHEUS_SSL_ENABLED:false}
sslKeyPath: ${SW_TELEMETRY_PROMETHEUS_SSL_KEY_PATH:""}
sslCertChainPath: ${SW_TELEMETRY_PROMETHEUS_SSL_CERT_CHAIN_PATH:""}
configuration:
selector: ${SW_CONFIGURATION:none}
none:
grpc:
host: ${SW_DCS_SERVER_HOST:""}
port: ${SW_DCS_SERVER_PORT:80}
clusterName: ${SW_DCS_CLUSTER_NAME:SkyWalking}
period: ${SW_DCS_PERIOD:20}
apollo:
apolloMeta: ${SW_CONFIG_APOLLO:http://localhost:8080}
apolloCluster: ${SW_CONFIG_APOLLO_CLUSTER:default}
apolloEnv: ${SW_CONFIG_APOLLO_ENV:""}
appId: ${SW_CONFIG_APOLLO_APP_ID:skywalking}
period: ${SW_CONFIG_APOLLO_PERIOD:60}
zookeeper:
period: ${SW_CONFIG_ZK_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
namespace: ${SW_CONFIG_ZK_NAMESPACE:/default}
hostPort: ${SW_CONFIG_ZK_HOST_PORT:localhost:2181}
# Retry Policy
baseSleepTimeMs: ${SW_CONFIG_ZK_BASE_SLEEP_TIME_MS:1000} # initial amount of time to wait between retries
maxRetries: ${SW_CONFIG_ZK_MAX_RETRIES:3} # max number of times to retry
etcd:
period: ${SW_CONFIG_ETCD_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
endpoints: ${SW_CONFIG_ETCD_ENDPOINTS:http://localhost:2379}
namespace: ${SW_CONFIG_ETCD_NAMESPACE:/skywalking}
authentication: ${SW_CONFIG_ETCD_AUTHENTICATION:false}
user: ${SW_CONFIG_ETCD_USER:}
password: ${SW_CONFIG_ETCD_password:}
consul:
# Consul host and ports, separated by comma, e.g. 1.2.3.4:8500,2.3.4.5:8500
hostAndPorts: ${SW_CONFIG_CONSUL_HOST_AND_PORTS:1.2.3.4:8500}
# Sync period in seconds. Defaults to 60 seconds.
period: ${SW_CONFIG_CONSUL_PERIOD:60}
# Consul aclToken
aclToken: ${SW_CONFIG_CONSUL_ACL_TOKEN:""}
k8s-configmap:
period: ${SW_CONFIG_CONFIGMAP_PERIOD:60}
namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}
nacos:
# Nacos Server Host
serverAddr: ${SW_CONFIG_NACOS_SERVER_ADDR:127.0.0.1}
# Nacos Server Port
port: ${SW_CONFIG_NACOS_SERVER_PORT:8848}
# Nacos Configuration Group
group: ${SW_CONFIG_NACOS_SERVER_GROUP:skywalking}
# Nacos Configuration namespace
namespace: ${SW_CONFIG_NACOS_SERVER_NAMESPACE:}
# Unit seconds, sync period. Default fetch every 60 seconds.
period: ${SW_CONFIG_NACOS_PERIOD:60}
# Nacos auth username
username: ${SW_CONFIG_NACOS_USERNAME:""}
password: ${SW_CONFIG_NACOS_PASSWORD:""}
# Nacos auth accessKey
accessKey: ${SW_CONFIG_NACOS_ACCESSKEY:""}
secretKey: ${SW_CONFIG_NACOS_SECRETKEY:""}
exporter:
selector: ${SW_EXPORTER:-}
grpc:
targetHost: ${SW_EXPORTER_GRPC_HOST:127.0.0.1}
targetPort: ${SW_EXPORTER_GRPC_PORT:9870}
health-checker:
selector: ${SW_HEALTH_CHECKER:-}
default:
checkIntervalSeconds: ${SW_HEALTH_CHECKER_INTERVAL_SECONDS:5}
configuration-discovery:
selector: ${SW_CONFIGURATION_DISCOVERY:default}
default:
disableMessageDigest: ${SW_DISABLE_MESSAGE_DIGEST:false}
receiver-event:
selector: ${SW_RECEIVER_EVENT:default}
default:
receiver-ebpf:
selector: ${SW_RECEIVER_EBPF:default}
default:
=========================================================================================================
#q启动脚本配置
vim /home/kami/running/skywalking-apm/bin/oapService_supervisor.sh
PRG="$0"
PRGDIR=$(dirname "$PRG")
[ -z "$OAP_HOME" ] && OAP_HOME=$(cd "$PRGDIR/.." > /dev/null || exit 1; pwd)
OAP_LOG_DIR="/data/var/opt/skywalking-oap/logs/"
JAVA_OPTS="${JAVA_OPTS:- -Xms256M -Xmx512M}"
if [ ! -d "${OAP_LOG_DIR}" ]; then
mkdir -p "${OAP_LOG_DIR}"
fi
_RUNJAVA=${JAVA_HOME}/bin/java
[ -z "$JAVA_HOME" ] && _RUNJAVA=java
CLASSPATH="$OAP_HOME/config:$CLASSPATH"
for i in "$OAP_HOME"/oap-libs/*.jar
do
CLASSPATH="$i:$CLASSPATH"
done
OAP_OPTIONS=" -Doap.logDir=${OAP_LOG_DIR}"
eval exec "\"$_RUNJAVA\" ${JAVA_OPTS} ${OAP_OPTIONS} -classpath $CLASSPATH org.apache.skywalking.oap.server.starter.OAPServerStartUp 2>${OAP_LOG_DIR}/oap.log 1> /dev/null"
----------------------------------------------------------------------------------------------
vim /home/kami/running/skywalking-apm/bin/webappService_supervisor.sh
PRG="$0"
PRGDIR=$(dirname "$PRG")
[ -z "$WEBAPP_HOME" ] && WEBAPP_HOME=$(cd "$PRGDIR/.." > /dev/null || exit 1; pwd)
WEBAPP_LOG_DIR="/data/var/opt/skywalking-webappService/logs"
JAVA_OPTS="${JAVA_OPTS:- -Xms256M -Xmx512M}"
JAR_PATH="${WEBAPP_HOME}/webapp"
if [ ! -d "${WEBAPP_LOG_DIR}" ]; then
mkdir -p "${WEBAPP_LOG_DIR}"
fi
LOG_FILE_LOCATION=${WEBAPP_LOG_DIR}/webapp.log
_RUNJAVA=${JAVA_HOME}/bin/java
[ -z "$JAVA_HOME" ] && _RUNJAVA=java
eval exec "\"$_RUNJAVA\" ${JAVA_OPTS} -jar ${JAR_PATH}/skywalking-webapp.jar \
--spring.config.location=${JAR_PATH}/webapp.yml \
--logging.file=${LOG_FILE_LOCATION} \
2>${WEBAPP_LOG_DIR}/webapp-console.log 1> /dev/null"
----------------------------------------------------------------------------------------------
cat <<EOF>> /data/opt/supervisor/conf.d/skywalking-oap.conf
; skywalking-oap config file
[program:skywalking-oap]
environment=JAVA_HOME=/data/opt/jdk
command=sh /home/kami/running/skywalking-apm/bin/oapService_supervisor.sh
directory=/home/kami/running/skywalking-apm
user=kami
numprocs=1
stdout_logfile=/data/var/opt/skywalking-oap/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
cat <<EOF>> /data/opt/supervisor/conf.d/skywalking-webappService.conf
; skywalking-webappService config file
[program:skywalking-webappService]
environment=JAVA_HOME=/data/opt/jdk
command=sh /home/kami/running/skywalking-apm/bin/webappService_supervisor.sh
directory=/home/kami/running/skywalking-apm
user=kami
numprocs=1
stdout_logfile=/data/var/opt/skywalking-webappService/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/{skywalking-oap,skywalking-webappService}/logs/
# 若需通过kafka接入Agent数据,配置config/application.yml中kafka-fetcher,Agent客户端配置加入-Dskywalking.plugin.kafka.bootstrap_servers=IP:PORT即可
agent接入
Java接入
wget https://mirrors.tuna.tsinghua.edu.cn/apache/skywalking/java-agent/8.12.0/apache-skywalking-java-agent-8.12.0.tgz
tar -zxvf apache-skywalking-java-agent-8.12.0.tgz -C /data/opt/
#这里使用的为SuperVisor管理程序,修改环境变量及启动命令即可
vim /data/opt/supervisor/conf.d/anomaly_restapi.conf
environment=JAVA_HOME=/data/opt/jdk,SW_AGENT_NAME=xxxxx,SW_AGENT_COLLECTOR_BACKEND_SERVICES=10.0.10.101:11800
command=/data/opt/jdk/bin/java -javaagent:/data/opt/skywalking-agent/skywalking-agent.jar -jar xxxxxxx
自监控配置
#开启SkyWorking遥测数据
vim /home/kami/running/skywalking-apm/config/application.yml
prometheus-fetcher:
selector: ${SW_PROMETHEUS_FETCHER:default}
default:
enabledRules: ${SW_PROMETHEUS_FETCHER_ENABLED_RULES:"self"}
maxConvertWorker: ${SW_PROMETHEUS_FETCHER_NUM_CONVERT_WORKER:-1}
active: ${SW_PROMETHEUS_FETCHER_ACTIVE:true}
telemetry:
selector: ${SW_TELEMETRY:prometheus}
none:
prometheus:
host: ${SW_TELEMETRY_PROMETHEUS_HOST:10.0.10.101}
port: ${SW_TELEMETRY_PROMETHEUS_PORT:1234}
sslEnabled: ${SW_TELEMETRY_PROMETHEUS_SSL_ENABLED:false}
sslKeyPath: ${SW_TELEMETRY_PROMETHEUS_SSL_KEY_PATH:""}
sslCertChainPath: ${SW_TELEMETRY_PROMETHEUS_SSL_CERT_CHAIN_PATH:""}
vim /home/kami/running/skywalking-apm/config/fetcher-prom-rules/self.yaml
staticConfig:
targets:
- url: http://10.0.10.101:1234
Linux主机监控
#部署 Node Exporter
#https://prometheus.io/download/#node_exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz
tar -zxvf node_exporter-1.4.0.linux-amd64.tar.gz
ln -s node_exporter-1.4.0.linux-amd64 node_exporter
#curl http://localhost:9100/metrics
cat <<EOF>> /data/opt/supervisor/conf.d/node_exporter.conf
; node_exporter config file
[program:node_exporter]
environment=JAVA_HOME=/data/opt/jdk
command=/data/opt/node_exporter/node_exporter
directory=/data/opt/node_exporter
user=kami
numprocs=1
stdout_logfile=/data/var/opt/node_exporter/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/node_exporter/logs/
#OpenTelemetery 收集器配置
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.60.0/otelcol_0.60.0_linux_amd64.rpm
sudo rpm -ivh otelcol_0.60.0_linux_amd64.rpm
cat <<EOF>> /etc/otelcol/config.yaml
receivers:
prometheus:
config:
scrape_configs:
- job_name: "vm-monitoring" # make sure to use this in the vm.yaml to filter only VM metrics
scrape_interval: 10s
static_configs:
- targets: ["10.0.10.101:9100"]
processors:
batch:
exporters:
otlp:
endpoint: "10.0.10.101:11800" # The OAP Server address
tls:
insecure: false
#insecure: true
# Exports data to the console
logging:
logLevel: debug
service:
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [otlp, logging]
EOF
sudo systemctl restart otelcol
#或者使用docker运行OpenTelemetery 收集器
#sudo docker run -d --restart=unless-stopped --name opentelemetry -v /export/servers/opentelemetry-collector/config.yaml:/etc/otelcol/config.yaml -v /etc/localtime:/etc/localtime otel/opentelemetry-collector:0.60.0
#检查SkyWorking/config/application.yaml文件内receiver-otel/default/enabledHandlers是否存在otlp
MySQL
#https://prometheus.io/download/#mysqld_exporter
wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.14.0/mysqld_exporter-0.14.0.linux-amd64.tar.gz
tar -zxvf mysqld_exporter-0.14.0.linux-amd64.tar.gz
ln -s mysqld_exporter-0.14.0.linux-amd64 mysqld_exporter
cat <<EOF>> /data/opt/mysqld_exporter/my.cnf
[client]
port=3306
user=kami
password=123456
EOF
cat <<EOF>> /data/opt/supervisor/conf.d/mysqld_exporter.conf
; mysqld_exporter config file
[program:mysqld_exporter]
environment=JAVA_HOME=/data/opt/jdk
command=/data/opt/mysqld_exporter/mysqld_exporter --config.my-cnf=/data/opt/mysqld_exporter/my.cnf
directory=/data/opt/mysqld_exporter
user=kami
numprocs=1
stdout_logfile=/data/var/opt/mysqld_exporter/logs/stdout.log
stdout_logfile_maxbytes=64MB
stdout_logfile_backups=1
redirect_stderr=true
autostart=true
autorestart=true
startsecs=6
stopwaitsecs=1
killasgroup=true
priority=2
EOF
mkdir -p /data/var/opt/mysqld_exporter/logs/
#OpenTelemetery 收集器配置
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.60.0/otelcol_0.60.0_linux_amd64.rpm
sudo rpm -ivh otelcol_0.60.0_linux_amd64.rpm
cat <<EOF>> /etc/otelcol/config.yaml
receivers:
prometheus:
config:
scrape_configs:
- job_name: 'mysql-monitoring'
scrape_interval: 5s
static_configs:
- targets: ['10.0.10.101:9104']
labels:
host_name: 10.0.10.102
processors:
batch:
exporters:
otlp:
endpoint: 10.0.10.101:11800
tls:
insecure: true
service:
pipelines:
metrics:
receivers:
- prometheus
processors:
- batch
exporters:
- otlp
EOF
sudo systemctl restart otelcol
文章评论
Thanks.