Kafka duplicates some messages hundreds of times - java
I'm using 3 Kafka brokers in a cluster using Kafka 2.3.0. Then I have a streaming application that consumes data from an other kafka cluster which transforms data and pushes this data to the before mentioned 3 kafka broker cluster.
The streaming application has a producer written in Java using Spring Cloud Stream Greenwhich.SR1. This producer uses the following code to push messages:
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cloud.stream.annotation.EnableBinding;
import org.springframework.integration.support.MessageBuilder;
import org.springframework.kafka.support.KafkaHeaders;
import org.springframework.messaging.Message;
import org.springframework.stereotype.Component;
#Slf4j
#Component
#EnableBinding(SensorDataBinding.class)
public class SensorDataProducer {
private final SensorDataBinding sensorDataOut;
private final long sendTimeoutInMilliseconds;
public SensorDataProducer(SensorDataBinding binding,
#Value("${sendTimeoutInMilliseconds}") long sendTimeoutInMilliseconds) {
this.sensorDataOut = binding;
this.sendTimeoutInMilliseconds = sendTimeoutInMilliseconds;
}
public void produce(SensorData sensorMeasurement) {
send(sensorMeasurement);
}
private void send(SensorData sensorMeasurement) {
log.trace("sending message with contents: {}", sensorMeasurement.toString());
Message<SensorData> message = MessageBuilder
.withPayload(sensorMeasurement)
.setHeader(KafkaHeaders.MESSAGE_KEY, getMessageKey(sensorMeasurement))
.build();
failSafeMessageSend(message);
}
private void failSafeMessageSend(Message<SensorData> message) {
boolean sendSucceeded = false;
do {
try {
this.sensorDataOut.sensorDataOut().send(message, this.sendTimeoutInMilliseconds);
sendSucceeded = true;
}
catch (Exception ex) {
log.error("Exception when sending message: {}", ex.getMessage());
}
}
while (!sendSucceeded);
}
private byte[] getMessageKey(SensorData measurement) {
return (measurement.getMessageKey()).getBytes();
}
}
Producer config:
ProducerConfig values:
acks = 1
batch.size = 16384
bootstrap.servers = [localhost:9095]
buffer.memory = 33554432
client.id =
compression.type = none
connections.max.idle.ms = 540000
enable.idempotence = false
interceptor.classes = []
key.serializer = class org.apache.kafka.common.serialization.ByteArraySerializer
linger.ms = 0
max.block.ms = 60000
max.in.flight.requests.per.connection = 5
max.request.size = 1048576
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
receive.buffer.bytes = 32768
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 30000
retries = 0
retry.backoff.ms = 100
sasl.client.callback.handler.class = null
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.login.callback.handler.class = null
sasl.login.class = null
sasl.login.refresh.buffer.seconds = 300
sasl.login.refresh.min.period.seconds = 60
sasl.login.refresh.window.factor = 0.8
sasl.login.refresh.window.jitter = 0.05
sasl.mechanism = GSSAPI
security.protocol = PLAINTEXT
send.buffer.bytes = 131072
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = https
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = null
ssl.keystore.password = null
ssl.keystore.type = JKS
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = null
ssl.truststore.password = null
ssl.truststore.type = JKS
transaction.timeout.ms = 60000
transactional.id = null
value.serializer = class org.apache.kafka.common.serialization.ByteArraySerializer
ProducerConfig values:
acks = 1
batch.size = 16384
bootstrap.servers = [https://....:...]
buffer.memory = 33554432
client.id = client-1ae836b8-9a13-4903-aad6-09ce11a4be08-StreamThread-1-producer
compression.type = none
connections.max.idle.ms = 540000
enable.idempotence = false
interceptor.classes = []
key.serializer = class org.apache.kafka.common.serialization.ByteArraySerializer
linger.ms = 100
max.block.ms = 60000
max.in.flight.requests.per.connection = 5
max.request.size = 1048576
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
receive.buffer.bytes = 32768
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 30000
retries = 10
retry.backoff.ms = 100
sasl.client.callback.handler.class = null
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.login.callback.handler.class = null
sasl.login.class = null
sasl.login.refresh.buffer.seconds = 300
sasl.login.refresh.min.period.seconds = 60
sasl.login.refresh.window.factor = 0.8
sasl.login.refresh.window.jitter = 0.05
sasl.mechanism = GSSAPI
security.protocol = SSL
send.buffer.bytes = 131072
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = https
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = keystore/prod/client.keystore.p12
ssl.keystore.password = [hidden]
ssl.keystore.type = PKCS12
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = keystore/prod/client.truststore.jks
ssl.truststore.password = [hidden]
ssl.truststore.type = JKS
transaction.timeout.ms = 60000
transactional.id = null
value.serializer = class org.apache.kafka.common.serialization.ByteArraySerializer
on our environment we have 8 instances of this application that are part of a consumer group which are consuming from a topic of 60 partitions on the external cluster. As mentioned this data is transformed and pushed on our own 3 broker Kafka cluster setup. The data is pushed to a sensor-data-topic which has 30 partitions, a retention time of 7 days and delete compaction.
I'm fully aware of duplicate messages using at least once semantics, but I'm seeing some messages being duplicated sometimes over 300 times, causing the required disksize to grow tremendously, while other messages are duplicated max 3 or 4.
The following is an example of some metrics that shows a message being duplicated 233 times where the timestamps array shows the timestamp of each message and the offsets array shows the offset of each message that is seen as duplicate:
Key: b'100083952:300793850'|-|1591011300000. Value: {'count': 233, 'partition': 3, 'offset': 26637463, 'timestamps': [1594133472060, 1594133472062, 1594133472064, 1594133472066, 1594133472068, 1594133472071, 1594133472072, 1594133472074, 1594133472076, 1594133472081, 1594133472084, 1594133472085, 1594133472087, 1594133472090, 1594133472092, 1594133472095, 1594133472097, 1594133472099, 1594133472102, 1594133472103, 1594133472105, 1594133472107, 1594133472111, 1594133472113, 1594133472115, 1594133472117, 1594133472119, 1594133472121, 1594133472139, 1594133472141, 1594133472155, 1594133472157, 1594133472160, 1594133472163, 1594133472166, 1594133472169, 1594133472171, 1594133472174, 1594133472179, 1594133472181, 1594133472188, 1594133472190, 1594133472193, 1594133472195, 1594133472197, 1594133472199, 1594133472201, 1594133472204, 1594133472207, 1594133472209, 1594133472212, 1594133472215, 1594133472217, 1594133472219, 1594133472221, 1594133472222, 1594133472224, 1594133472227, 1594133472229, 1594133472231, 1594133472234, 1594133472235, 1594133472237, 1594133472240, 1594133472242, 1594133472243, 1594133472247, 1594133472249, 1594133472252, 1594133472255, 1594133472257, 1594133472262, 1594133472267, 1594133472270, 1594133472272, 1594133472275, 1594133472277, 1594133472279, 1594133472282, 1594133472289, 1594133472293, 1594133472295, 1594133472297, 1594133472299, 1594133472300, 1594133472303, 1594133472305, 1594133472307, 1594133472309, 1594133472311, 1594133472312, 1594133472316, 1594133472319, 1594133472321, 1594133472323, 1594133472325, 1594133472327, 1594133472333, 1594133472335, 1594133472337, 1594133472338, 1594133472341, 1594133472343, 1594133472346, 1594133472351, 1594133472356, 1594133472359, 1594133472361, 1594133472363, 1594133472364, 1594133472366, 1594133472371, 1594133472374, 1594133472377, 1594133472379, 1594133472381, 1594133472385, 1594133472393, 1594133472399, 1594133472401, 1594133472402, 1594133472404, 1594133472406, 1594133472409, 1594133472412, 1594133472413, 1594133472415, 1594133472417, 1594133472419, 1594133472421, 1594133472423, 1594133472425, 1594133472427, 1594133472429, 1594133472432, 1594133472434, 1594133472436, 1594133472439, 1594133472442, 1594133472469, 1594133472479, 1594133472483, 1594133472485, 1594133472488, 1594133472491, 1594133472494, 1594133472496, 1594133472498, 1594133472500, 1594133472503, 1594133472506, 1594133472508, 1594133472510, 1594133472512, 1594133472515, 1594133472520, 1594133472522, 1594133472524, 1594133472526, 1594133472528, 1594133472530, 1594133472532, 1594133472534, 1594133472535, 1594133472537, 1594133472539, 1594133472541, 1594133472543, 1594133472545, 1594133472547, 1594133472549, 1594133472551, 1594133472552, 1594133472554, 1594133472556, 1594133472558, 1594133472560, 1594133472562, 1594133472564, 1594133472566, 1594133472568, 1594133472570, 1594133472572, 1594133472573, 1594133472575, 1594133472577, 1594133472579, 1594133472581, 1594133472583, 1594133472587, 1594133472589, 1594133472593, 1594133472595, 1594133472596, 1594133472598, 1594133472599, 1594133472601, 1594133472603, 1594133472605, 1594133472606, 1594133472609, 1594133472611, 1594133472613, 1594133472615, 1594133472619, 1594133472622, 1594133472624, 1594133472626, 1594133472631, 1594133472633, 1594133472635, 1594133472637, 1594133472639, 1594133472641, 1594133472643, 1594133472644, 1594133472646, 1594133472649, 1594133472651, 1594133472653, 1594133472654, 1594133472657, 1594133472659, 1594133472660, 1594133472662, 1594133472664, 1594133472666, 1594133472667, 1594133472669, 1594133472671, 1594133472673, 1594133472675, 1594133472676],
'offsets':
[26637463, 26637464, 26637465, 26637466, 26637467, 26637468, 26637469, 26637470, 26637471, 26637472, 26637473, 26637474, 26637475, 26637476, 26637477, 26637478, 26637479, 26637480, 26637481, 26637482, 26637483, 26637484, 26637485, 26637486, 26637487, 26637488, 26637489, 26637490, 26637491, 26637492, 26637493, 26637494, 26637495, 26637496, 26637497, 26637498, 26637499, 26637500, 26637501, 26637502, 26637503, 26637504, 26637505, 26637506, 26637507, 26637508, 26637509, 26637510, 26637511, 26637512, 26637513, 26637514, 26637515, 26637516, 26637517, 26637518, 26637519, 26637520, 26637521, 26637522, 26637523, 26637524, 26637525, 26637526, 26637527, 26637528, 26637529, 26637530, 26637531, 26637532, 26637533, 26637534, 26637535, 26637536, 26637537, 26637538, 26637539, 26637540, 26637541, 26637542, 26637543, 26637544, 26637545, 26637546, 26637547, 26637548, 26637549, 26637550, 26637551, 26637552, 26637553, 26637554, 26637555, 26637556, 26637557, 26637558, 26637559, 26637560, 26637561, 26637562, 26637563, 26637564, 26637565, 26637566, 26637567, 26637568, 26637569, 26637570, 26637571, 26637572, 26637573, 26637574, 26637575, 26637576, 26637577, 26637578, 26637579, 26637580, 26637581, 26637582, 26637583, 26637584, 26637585, 26637586, 26637587, 26637588, 26637589, 26637590, 26637591, 26637592, 26637593, 26637594, 26637595, 26637596, 26637597, 26637598, 26637599, 26637600, 26637601, 26637602, 26637603, 26637604, 26637605, 26637606, 26637607, 26637608, 26637609, 26637610, 26637611, 26637612, 26637613, 26637614, 26637615, 26637616, 26637617, 26637618, 26637619, 26637620, 26637621, 26637622, 26637623, 26637624, 26637625, 26637626, 26637627, 26637628, 26637629, 26637630, 26637631, 26637632, 26637633, 26637634, 26637635, 26637636, 26637637, 26637638, 26637639, 26637640, 26637641, 26637642, 26637643, 26637644, 26637645, 26637646, 26637647, 26637648, 26637649, 26637650, 26637651, 26637652, 26637653, 26637654, 26637655, 26637656, 26637657, 26637658, 26637659, 26637660, 26637661, 26637662, 26637663, 26637664, 26637665, 26637666, 26637667, 26637668, 26637669, 26637670, 26637671, 26637672, 26637673, 26637674, 26637675, 26637676, 26637677, 26637678, 26637679, 26637680, 26637681, 26637682, 26637683, 26637684, 26637685, 26637686, 26637687, 26637688, 26637689, 26637690, 26637691, 26637692, 26637693, 26637694, 26637695],
Date: 2020-06-01 13:35:00
You can see the offset monotonically increasing.
I'm wondering what could cause this difference between only a few duplicate messages compared to the many hundred duplicate messages. I'm expecting that the retries property would have a hand in limiting the number of duplicates although it doesn't really show.
Related
Topic in Confluent Cloud is always empty
I'm new to both Confluent and Kafka, and am trying to run the sample producer and consumer code in Java. No exceptions are being thrown, but the web console shows no messages. When this happened for Kafka connections for other people, I've seen it be due to a Docker networking setup. Interestingly, if I go to the Confluent web page and produce a test message, it never shows up on the consumer. I likely missed a key bit of information, please let me know what I should be looking at. Thanks, Woodsman Consumer Properties: [2022-12-02 23:28:21,243] INFO ConsumerConfig values: allow.auto.create.topics = true auto.commit.interval.ms = 5000 auto.offset.reset = earliest bootstrap.servers = [some-machine.us-east-1.aws.confluent.cloud:9092] check.crcs = true client.dns.lookup = use_all_dns_ips client.id = consumer-demo-consumer-1-1 client.rack = connections.max.idle.ms = 540000 default.api.timeout.ms = 60000 enable.auto.commit = true exclude.internal.topics = true fetch.max.bytes = 52428800 fetch.max.wait.ms = 500 fetch.min.bytes = 1 group.id = demo-consumer-1 group.instance.id = null heartbeat.interval.ms = 3000 interceptor.classes = [] internal.leave.group.on.close = true internal.throw.on.fetch.stable.offset.unsupported = false isolation.level = read_uncommitted key.deserializer = class org.apache.kafka.common.serialization.StringDeserializer max.partition.fetch.bytes = 1048576 max.poll.interval.ms = 300000 max.poll.records = 500 metadata.max.age.ms = 300000 metric.reporters = [] metrics.num.samples = 2 metrics.recording.level = INFO metrics.sample.window.ms = 30000 partition.assignment.strategy = [class org.apache.kafka.clients.consumer.RangeAssignor, class org.apache.kafka.clients.consumer.CooperativeStickyAssignor] receive.buffer.bytes = 65536 reconnect.backoff.max.ms = 1000 reconnect.backoff.ms = 50 request.timeout.ms = 30000 retry.backoff.ms = 100 sasl.client.callback.handler.class = null sasl.jaas.config = [hidden] sasl.kerberos.kinit.cmd = /usr/bin/kinit sasl.kerberos.min.time.before.relogin = 60000 sasl.kerberos.service.name = null sasl.kerberos.ticket.renew.jitter = 0.05 sasl.kerberos.ticket.renew.window.factor = 0.8 sasl.login.callback.handler.class = null sasl.login.class = null sasl.login.connect.timeout.ms = null sasl.login.read.timeout.ms = null sasl.login.refresh.buffer.seconds = 300 sasl.login.refresh.min.period.seconds = 60 sasl.login.refresh.window.factor = 0.8 sasl.login.refresh.window.jitter = 0.05 sasl.login.retry.backoff.max.ms = 10000 sasl.login.retry.backoff.ms = 100 sasl.mechanism = PLAIN sasl.oauthbearer.clock.skew.seconds = 30 sasl.oauthbearer.expected.audience = null sasl.oauthbearer.expected.issuer = null sasl.oauthbearer.jwks.endpoint.refresh.ms = 3600000 sasl.oauthbearer.jwks.endpoint.retry.backoff.max.ms = 10000 sasl.oauthbearer.jwks.endpoint.retry.backoff.ms = 100 sasl.oauthbearer.jwks.endpoint.url = null sasl.oauthbearer.scope.claim.name = scope sasl.oauthbearer.sub.claim.name = sub sasl.oauthbearer.token.endpoint.url = null security.protocol = SASL_SSL security.providers = null send.buffer.bytes = 131072 session.timeout.ms = 45000 socket.connection.setup.timeout.max.ms = 30000 socket.connection.setup.timeout.ms = 10000 ssl.cipher.suites = null ssl.enabled.protocols = [TLSv1.2, TLSv1.3] ssl.endpoint.identification.algorithm = https ssl.engine.factory.class = null ssl.key.password = null ssl.keymanager.algorithm = SunX509 ssl.keystore.certificate.chain = null ssl.keystore.key = null ssl.keystore.location = null ssl.keystore.password = null ssl.keystore.type = JKS ssl.protocol = TLSv1.3 ssl.provider = null ssl.secure.random.implementation = null ssl.trustmanager.algorithm = PKIX ssl.truststore.certificates = null ssl.truststore.location = null ssl.truststore.password = null ssl.truststore.type = JKS value.deserializer = class io.confluent.kafka.serializers.KafkaJsonDeserializer Producer Properties: bootstrap.servers = [some-machine.us-east-1.aws.confluent.cloud:9092] client.dns.lookup = use_all_dns_ips client.id = connections.max.idle.ms = 300000 default.api.timeout.ms = 60000 metadata.max.age.ms = 300000 metric.reporters = [] metrics.num.samples = 2 metrics.recording.level = INFO metrics.sample.window.ms = 30000 receive.buffer.bytes = 65536 reconnect.backoff.max.ms = 1000 reconnect.backoff.ms = 50 request.timeout.ms = 30000 retries = 2147483647 retry.backoff.ms = 100 sasl.client.callback.handler.class = null sasl.jaas.config = [hidden] sasl.kerberos.kinit.cmd = /usr/bin/kinit sasl.kerberos.min.time.before.relogin = 60000 sasl.kerberos.service.name = null sasl.kerberos.ticket.renew.jitter = 0.05 sasl.kerberos.ticket.renew.window.factor = 0.8 sasl.login.callback.handler.class = null sasl.login.class = null sasl.login.connect.timeout.ms = null sasl.login.read.timeout.ms = null sasl.login.refresh.buffer.seconds = 300 sasl.login.refresh.min.period.seconds = 60 sasl.login.refresh.window.factor = 0.8 sasl.login.refresh.window.jitter = 0.05 sasl.login.retry.backoff.max.ms = 10000 sasl.login.retry.backoff.ms = 100 sasl.mechanism = PLAIN sasl.oauthbearer.clock.skew.seconds = 30 sasl.oauthbearer.expected.audience = null sasl.oauthbearer.expected.issuer = null sasl.oauthbearer.jwks.endpoint.refresh.ms = 3600000 sasl.oauthbearer.jwks.endpoint.retry.backoff.max.ms = 10000 sasl.oauthbearer.jwks.endpoint.retry.backoff.ms = 100 sasl.oauthbearer.jwks.endpoint.url = null sasl.oauthbearer.scope.claim.name = scope sasl.oauthbearer.sub.claim.name = sub sasl.oauthbearer.token.endpoint.url = null security.protocol = SASL_SSL security.providers = null send.buffer.bytes = 131072 socket.connection.setup.timeout.max.ms = 30000 socket.connection.setup.timeout.ms = 10000 ssl.cipher.suites = null ssl.enabled.protocols = [TLSv1.2, TLSv1.3] ssl.endpoint.identification.algorithm = https ssl.engine.factory.class = null ssl.key.password = null ssl.keymanager.algorithm = SunX509 ssl.keystore.certificate.chain = null ssl.keystore.key = null ssl.keystore.location = null ssl.keystore.password = null ssl.keystore.type = JKS ssl.protocol = TLSv1.3 ssl.provider = null ssl.secure.random.implementation = null ssl.trustmanager.algorithm = PKIX ssl.truststore.certificates = null ssl.truststore.location = null ssl.truststore.password = null ssl.truststore.type = JKS I'm running the code at https://docs.confluent.io/platform/current/tutorials/examples/clients/docs/java.html#basic-producer-and-consumer-and-kafka-streams unchanged. Updated approach: I was asked to install a local Kafka cluster. I created a cluster by running docker-compose using the docker-compose file at https://developer.confluent.io/tutorials/creating-first-apache-kafka-producer-application/kafka.html#initialize-the-project. I used the exact docker-compose file as Confluent specified. I also changed the configuration to the one they specified on that same link (though I changed the topic names), but ran the same producer and consumer code as previously mentioned. It is bootstrap.servers=localhost:29092 acks=all All messages were sent and received successfully to my local cluster. Again, the differences are a different authentication method, and the location itself. There was no change in the producer or consumer Java code What should I look for? This is for a client who is insisting on a cluster managed by Confluent. What should I look for or start? At this point, I believe this is some Confluent configuration that I'm not aware of.
Logback messages not sent to Kafka topic after batch job execution is finished
I am doing a POC where I have to send batch job's execution summary to Kafka via Logback. During the processing of the batch job, I create a summary object and when the processing is done for all the records, I am sending the summary to Kafka. But the logs are not sent to the Topic. With the same logback configurations, if I send the logs during the processing of the batch job, the logs are sent successfully to the Topic. There is no error or timeout in the logs, so the issue is not clear. Batch duration : 5-7 min Configuration logs: SLF4J: A number (135) of logging calls during the initialization phase have been intercepted and are SLF4J: now being replayed. These are subject to the filtering rules of the underlying logging system. SLF4J: See also http://www.slf4j.org/codes.html#replay 2022-09-02 14:53:33,403 INFO [main] o.a.k.c.p.ProducerConfig [NativeMethodAccessorImpl.java:-2] ProducerConfig values: acks = -1 batch.size = 16384 bootstrap.servers = [xxx] buffer.memory = 33554432 client.dns.lookup = use_all_dns_ips client.id = IndividualDataProducer compression.type = none connections.max.idle.ms = 540000 delivery.timeout.ms = 120000 enable.idempotence = true interceptor.classes = [] key.serializer = class org.apache.kafka.common.serialization.StringSerializer linger.ms = 0 max.block.ms = 60000 max.in.flight.requests.per.connection = 5 max.request.size = 1048576 metadata.max.age.ms = 300000 metadata.max.idle.ms = 300000 metric.reporters = [] metrics.num.samples = 2 metrics.recording.level = INFO metrics.sample.window.ms = 30000 partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner receive.buffer.bytes = 32768 reconnect.backoff.max.ms = 1000 reconnect.backoff.ms = 50 request.timeout.ms = 7206000 retries = 2147483647 retry.backoff.ms = 100 sasl.client.callback.handler.class = null sasl.jaas.config = null sasl.kerberos.kinit.cmd = /usr/bin/kinit sasl.kerberos.min.time.before.relogin = 60000 sasl.kerberos.service.name = kafka sasl.kerberos.ticket.renew.jitter = 0.05 sasl.kerberos.ticket.renew.window.factor = 0.8 sasl.login.callback.handler.class = null sasl.login.class = null sasl.login.connect.timeout.ms = null sasl.login.read.timeout.ms = null sasl.login.refresh.buffer.seconds = 300 sasl.login.refresh.min.period.seconds = 60 sasl.login.refresh.window.factor = 0.8 sasl.login.refresh.window.jitter = 0.05 sasl.login.retry.backoff.max.ms = 10000 sasl.login.retry.backoff.ms = 100 sasl.mechanism = GSSAPI sasl.oauthbearer.clock.skew.seconds = 30 sasl.oauthbearer.expected.audience = null sasl.oauthbearer.expected.issuer = null sasl.oauthbearer.jwks.endpoint.refresh.ms = 3600000 sasl.oauthbearer.jwks.endpoint.retry.backoff.max.ms = 10000 sasl.oauthbearer.jwks.endpoint.retry.backoff.ms = 100 sasl.oauthbearer.jwks.endpoint.url = null sasl.oauthbearer.scope.claim.name = scope sasl.oauthbearer.sub.claim.name = sub sasl.oauthbearer.token.endpoint.url = null security.protocol = SASL_PLAINTEXT security.providers = null send.buffer.bytes = 131072 socket.connection.setup.timeout.max.ms = 7206000 socket.connection.setup.timeout.ms = 720600 ssl.cipher.suites = null ssl.enabled.protocols = [TLSv1.2] ssl.endpoint.identification.algorithm = https ssl.engine.factory.class = null ssl.key.password = null ssl.keymanager.algorithm = SunX509 ssl.keystore.certificate.chain = null ssl.keystore.key = null ssl.keystore.location = null ssl.keystore.password = null ssl.keystore.type = JKS ssl.protocol = TLSv1.2 ssl.provider = null ssl.secure.random.implementation = null ssl.trustmanager.algorithm = PKIX ssl.truststore.certificates = null ssl.truststore.location = null ssl.truststore.password = null ssl.truststore.type = JKS transaction.timeout.ms = 60000 transactional.id = null value.serializer = class org.apache.kafka.common.serialization.StringSerializer
kafka producer does not throw exception when broker is down
Created a cluster with two brokers using same zookeeper and trying to produce message to a topic whose details are as below. When the producer sets acks="all" or -1,min.insync.replicas="2", it is supposed to receive acknowledgement from the brokers(leaders and replicas) but when one broker is shut manually while it is producing, it is making no difference to the kafka producer even when acks="all" can someone explain the reason for this weird behavior? brokers are on 9091,9092. acks = -1 batch.size = 16384 bootstrap.servers = [localhost:9092] buffer.memory = 33554432 client.dns.lookup = use_all_dns_ips client.id = producer-1 compression.type = none connections.max.idle.ms = 540000 delivery.timeout.ms = 120000 enable.idempotence = false interceptor.classes = [] internal.auto.downgrade.txn.commit = false key.serializer = class org.apache.kafka.common.serialization.StringSerializer linger.ms = 0 max.block.ms = 60000 max.in.flight.requests.per.connection = 5 max.request.size = 1048576 metadata.max.age.ms = 300000 metadata.max.idle.ms = 300000 metric.reporters = [] metrics.num.samples = 2 metrics.recording.level = INFO metrics.sample.window.ms = 30000 partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner receive.buffer.bytes = 32768 reconnect.backoff.max.ms = 1000 reconnect.backoff.ms = 50 request.timeout.ms = 30000 retries = 2147483647 retry.backoff.ms = 100 sasl.client.callback.handler.class = null sasl.jaas.config = null sasl.kerberos.kinit.cmd = /usr/bin/kinit sasl.kerberos.min.time.before.relogin = 60000 sasl.kerberos.service.name = null sasl.kerberos.ticket.renew.jitter = 0.05 sasl.kerberos.ticket.renew.window.factor = 0.8 sasl.login.callback.handler.class = null sasl.login.class = null sasl.login.refresh.buffer.seconds = 300 sasl.login.refresh.min.period.seconds = 60 sasl.login.refresh.window.factor = 0.8 sasl.login.refresh.window.jitter = 0.05 sasl.mechanism = GSSAPI security.protocol = PLAINTEXT security.providers = null send.buffer.bytes = 131072 ssl.cipher.suites = null ssl.enabled.protocols = [TLSv1.2] ssl.endpoint.identification.algorithm = https ssl.engine.factory.class = null ssl.key.password = null ssl.keymanager.algorithm = SunX509 ssl.keystore.location = null ssl.keystore.password = null ssl.keystore.type = JKS ssl.protocol = TLSv1.2 ssl.provider = null ssl.secure.random.implementation = null ssl.trustmanager.algorithm = PKIX ssl.truststore.location = null ssl.truststore.password = null ssl.truststore.type = JKS transaction.timeout.ms = 60000 transactional.id = null value.serializer = class org.apache.kafka.common.serialization.StringSerializer Below is the source code for the kafka producer public static void main(String k[]) { Properties prop=new Properties(); prop.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"localhost:9092"); prop.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); prop.setProperty(ProducerConfig.ACKS_CONFIG,"all"); prop.setProperty("min.insync.replicas", "2"); prop.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); KafkaProducer<String,String> producer=new KafkaProducer<>(prop); ProducerRecord<String,String> rec=new ProducerRecord<String,String>("clust_topic","123"); while(true) { producer.send(rec, new Callback() { #Override public void onCompletion(RecordMetadata rm, Exception arg1) { System.out.println(arg1); if(arg1!=null) System.out.println(arg1); else System.out.println(rm.topic()+" "+rm.partition()+" "+rm.offset()+" "); } }); } }
ack=all means that it requires ack from all in-sync replicas, not from all replicas (refer to documentation)
Kafka Consumer reading messages only when two messages stack
We have a kafka producer that produces some messages once in a while. I wrote a Consumer to consume these messages. Problem is, the messages are consumed only when 2 of them stack. For example if a message is produced at 13:00 the consumer doesn't do anything. If another message is produced at 13:01, the consumer consumes both messages. In kafkaTool, at consumer properties it's present a column called LAG that when the message is not consumed is 1. Is there any config for this thing that I'm missing? The Consumer Config: 16:43:04,472 INFO [org.apache.kafka.clients.consumer.ConsumerConfig] (http--0.0.0.0-8180-1) ConsumerConfig values: request.timeout.ms = 180001 check.crcs = true retry.backoff.ms = 100 ssl.truststore.password = null ssl.keymanager.algorithm = SunX509 receive.buffer.bytes = 32768 ssl.cipher.suites = null ssl.key.password = null sasl.kerberos.ticket.renew.jitter = 0.05 ssl.provider = null sasl.kerberos.service.name = null session.timeout.ms = 180000 sasl.kerberos.ticket.renew.window.factor = 0.8 bootstrap.servers = [mtxbuctra22.prod.orange.intra:9092] client.id = fetch.max.wait.ms = 180000 fetch.min.bytes = 1024 key.deserializer = class io.confluent.kafka.serializers.KafkaAvroDeserializer sasl.kerberos.kinit.cmd = /usr/bin/kinit auto.offset.reset = earliest value.deserializer = class io.confluent.kafka.serializers.KafkaAvroDeserializer ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1] partition.assignment.strategy = [org.apache.kafka.clients.consumer.RangeAssignor] ssl.endpoint.identification.algorithm = null max.partition.fetch.bytes = 1048576 ssl.keystore.location = null ssl.truststore.location = null ssl.keystore.password = null metrics.sample.window.ms = 30000 metadata.max.age.ms = 300000 security.protocol = PLAINTEXT auto.commit.interval.ms = 1000 ssl.protocol = TLS sasl.kerberos.min.time.before.relogin = 60000 connections.max.idle.ms = 540000 ssl.trustmanager.algorithm = PKIX group.id = ifd_006 enable.auto.commit = true metric.reporters = [] ssl.truststore.type = JKS send.buffer.bytes = 131072 reconnect.backoff.ms = 50 metrics.num.samples = 2 ssl.keystore.type = JKS heartbeat.interval.ms = 3000 16:43:04,493 INFO [io.confluent.kafka.serializers.KafkaAvroDeserializerConfig] (http--0.0.0.0-8180-1) KafkaAvroDeserializerConfig values: max.schemas.per.subject = 1000 specific.avro.reader = true schema.registry.url = [http://mtxbuctra22.prod.orange.intra:8081] 16:43:04,498 INFO [io.confluent.kafka.serializers.KafkaAvroDeserializerConfig] (http--0.0.0.0-8180-1) KafkaAvroDeserializerConfig values: max.schemas.per.subject = 1000 specific.avro.reader = true schema.registry.url = [http://mtxbuctra22.prod.orange.intra:8081] Kafka tool:
Figured it out. In documentation for kafka 0.9.0.1 it's stated that fetch.min.bytes is 1. But i have kafka 0.9.0.0. And the default value is 1024. So, only after 2 messages this value was passed. Changed the fetch.min.bytes to 1 and now it works ok.
Kafka producer giving TimeoutException
I have Kafka running in a remote server and I am using spring framework (java) to produce and consume messages. For testing on my local machine, I am just producing 1 event. Here is a simplified code of how I produce messages: import org.springframework.kafka.core.KafkaTemplate; ... #Autowired KafkaTemplate<String, String> kafkaTemplate; ... kafkaTemplate.send("sampletopic", "1234").get(); ... Here payload is just a user-id string. When I execute the send function, I get the following error: kafka.. error:java.util.concurrent.ExecutionException: org.springframework.kafka.core.KafkaProducerException: Failed to send; nested exception is org.apache.kafka.common.errors.TimeoutException: Expiring 1 record(s) for sampletopic-0: 30028 ms has passed since batch creation plus linger time Here are the relevant logs I get before getting the error: [http-nio-8080-exec-3] INFO org.apache.kafka.clients.producer.ProducerConfig - ProducerConfig values: acks = 1 batch.size = 16384 block.on.buffer.full = false bootstrap.servers = [41.204.196.251:9092] buffer.memory = 33554432 client.id = compression.type = none connections.max.idle.ms = 540000 interceptor.classes = null key.serializer = class org.apache.kafka.common.serialization.StringSerializer linger.ms = 0 max.block.ms = 60000 max.in.flight.requests.per.connection = 5 max.request.size = 1048576 metadata.fetch.timeout.ms = 60000 metadata.max.age.ms = 300000 metric.reporters = [] metrics.num.samples = 2 metrics.sample.window.ms = 30000 partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner receive.buffer.bytes = 32768 reconnect.backoff.ms = 50 request.timeout.ms = 30000 retries = 0 retry.backoff.ms = 100 sasl.jaas.config = null sasl.kerberos.kinit.cmd = /usr/bin/kinit sasl.kerberos.min.time.before.relogin = 60000 sasl.kerberos.service.name = null sasl.kerberos.ticket.renew.jitter = 0.05 sasl.kerberos.ticket.renew.window.factor = 0.8 sasl.mechanism = GSSAPI security.protocol = PLAINTEXT send.buffer.bytes = 131072 ssl.cipher.suites = null ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1] ssl.endpoint.identification.algorithm = null ssl.key.password = null ssl.keymanager.algorithm = SunX509 ssl.keystore.location = null ssl.keystore.password = null ssl.keystore.type = JKS ssl.protocol = TLS ssl.provider = null ssl.secure.random.implementation = null ssl.trustmanager.algorithm = PKIX ssl.truststore.location = null ssl.truststore.password = null ssl.truststore.type = JKS timeout.ms = 30000 value.serializer = class org.apache.kafka.common.serialization.StringSerializer [http-nio-8080-exec-3] INFO org.apache.kafka.common.utils.AppInfoParser - Kafka version : 0.10.2.0 [http-nio-8080-exec-3] INFO org.apache.kafka.common.utils.AppInfoParser - Kafka commitId : 576d93a8ds0cf421