• Correct SSL script for Kafka deployment

    Hi,

    I wrote some time ago a post about certificate generation in order to secure Kafka cluster.

    Long story short, it was wrong!

    Here is the correct version that returns O (keystore is correctly generated and used)

    
    #!/bin/bash
    HOST=<%= @fqdn %>
    PASSWORD=<%= @pass %>
    KEYSTOREPASS=<%= @keystorepass %>
    VALIDITY=365
    
    keytool -keystore kafka.server.temp.keystore.jks -alias $HOST -validity $VALIDITY -genkey -dname "CN=${HOST}, OU=Myteam, O=Mycompany, L=Bucharest S=Romania C=RO" -storepass $KEYSTOREPASS -keypass $KEYSTOREPASS
    openssl req -new -x509 -keyout ca-key -out ca-cert -days $VALIDITY -subj "/CN=${HOST}/OU=Myteam/O=MyCompany/L=Bucharest/S=Romania/C=RO" -passout pass:$PASSWORD
    keytool -keystore kafka.server.temp.keystore.jks -alias $HOST -certreq -file cert-file-${HOST}.host -storepass $KEYSTOREPASS
    openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file-${HOST}.host -out cert-signed-${HOST}.host -days $VALIDITY -CAcreateserial -passin pass:$PASSWORD
    keytool -keystore kafka.server.keystore.jks -alias $HOST -import -file cert-signed-${HOST}.host -storepass $KEYSTOREPASS -noprompt
    keytool -keystore kafka.server.keystore.jks -alias CARoot -import -file ca-cert -storepass $KEYSTOREPASS -noprompt
    keytool -keystore kafka.server.truststore.jks -alias CARoot -import -file ca-cert -storepass $KEYSTOREPASS -noprompt
    
    
    <% @servers.each do |server| -%>
    # <%= server %>
    keytool -keystore kafka.server.temp.keystore.jks -alias <%= server %> -validity $VALIDITY -genkey -dname "CN=<%= server %>, OU=Myteam, O=MyCompany, L=Bucharest S=Romania C=RO" -storepass $KEYSTOREPASS -keypass $KEYSTOREPASS
    keytool -keystore kafka.server.temp.keystore.jks -alias <%= server %> -certreq -file cert-file-<%= server %>.host -storepass $KEYSTOREPASS
    openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file-<%= server %>.host -out cert-signed-<%= server %>.host -days $VALIDITY -CAcreateserial -passin pass:$PASSWORD
    keytool -keystore kafka.server.keystore.jks -alias <%= server %> -import -file cert-signed-<%= server %>.host -storepass $KEYSTOREPASS -noprompt
    <% end -%>
    
    keytool -keystore kafka.client.temp.keystore.jks -alias 'client' -validity $VALIDITY -genkey -dname "CN=${HOST}, OU=Myteam, O=MyCompany, L=Bucharest S=Romania C=RO" -storepass $KEYSTOREPASS -keypass $KEYSTOREPASS
    keytool -keystore kafka.client.temp.keystore.jks -alias 'client' -certreq -file cert-file-client.host -storepass $KEYSTOREPASS
    openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file-client.host -out cert-signed-client.host -days $VALIDITY -CAcreateserial -passin pass:$PASSWORD
    keytool -keystore kafka.client.keystore.jks -alias $HOST -import -file cert-signed-client.host -storepass $KEYSTOREPASS -noprompt
    keytool -keystore kafka.client.truststore.jks -alias CARoot -import -file ca-cert -storepass $KEYSTOREPASS -noprompt
    

    Here is also a link to the old article for comparison wrong way to do it

    PS: It seems that this is also wrong. Please check article

  • Cgroups management on Linux – first steps

    Hi,

    I didn’t know that much about control groups but i see that there are a big thing in performance and process optimization.
    For the moment i would like to share two important info that i found.
    First, there are three options that you need to activate in you want to play with control group management:

    DefaultCPUAccounting=yes
    DefaultBlockIOAccounting=yes
    DefaultMemoryAccounting=yes
    

    thtat you can find under /etc/systemd/system.conf.

    And, there is also a command that shows CPU utilization along with other info related to the user/system slices – systemd-cgtop.
    If the accounting is not enabled, no details are shown…..once you do that you will have info like this:

    Path                                                                                                                                                                        Tasks   %CPU   Memory  Input/s Output/s
    
    /                                                                                                                                                                              66    9.2        -        -        -
    /user.slice                                                                                                                                                                     -    5.0        -        -        -
    /user.slice/user-1000.slice                                                                                                                                                     -    5.0        -        -        -
    /user.slice/user-1000.slice/session-1.scope                                                                                                                                    47    5.0        -        -        -
    /system.slice                                                                                                                                                                   -    3.8        -        -        -
    /system.slice/lightdm.service                                                                                                                                                   2    3.5        -        -        -
    /system.slice/docker.service                                                                                                                                                    2    0.3        -        -        -
    /system.slice/vboxadd-service.service                                                                                                                                           1    0.0        -        -        -
    /system.slice/ModemManager.service                                                                                                                                              1      -        -        -        -
    /system.slice/NetworkManager.service                                                                                                                                            2      -        -        -        -
    /system.slice/accounts-daemon.service                                                                                                                                           1      -        -        -        -
    /system.slice/acpid.service                                                                                                                                                     1      -        -        -        -
    /system.slice/atd.service                                                                                                                                                       1      -        -        -        -
    /system.slice/avahi-daemon.service                                                                                                                                              2      -        -        -        -
    /system.slice/colord.service                                                                                                                                                    1      -        -        -        -
    /system.slice/cron.service                                                                                                                                                      1      -        -        -        -
    /system.slice/cups-browsed.service                                                                                                                                              1      -        -        -        -
    /system.slice/cups.service                                                                                                                                                      1      -        -        -        -
    /system.slice/dbus.service

    That is all so far. I will let you know once i discover new info.

    Cheers

  • Kernel not compatible with zookeeper version

    Morning,

    It’s important to share this situation with you. This morning i came to the office to see that a cluster that was upgraded/restarted had an issue with Zookeeper instances.

    Symptoms  were clear: instances won’t start completely. But why?

    After a little bit of investigation, i went to the /var/log/syslog (/var/log/zookeeper did not contain any information at all) to see that there is a bad page table in the jvm.

    Java version is:

    java version "1.8.0_111"
    Java(TM) SE Runtime Environment (build 1.8.0_111-b14)
    Java HotSpot(TM) 64-Bit Server VM (build 25.111-b14, mixed mode)
    

    So, the log showed following lines:

    Aug 16 07:16:04 kafka0 kernel: [  742.349010] init: zookeeper main process ended, respawning
    Aug 16 07:16:04 kafka0 kernel: [  742.925427] java: Corrupted page table at address 7f6a81e5d100
    Aug 16 07:16:05 kafka0 kernel: [  742.926589] PGD 80000000373f4067 PUD b7852067 PMD b1c08067 PTE 80003ffffe17c225
    Aug 16 07:16:05 kafka0 kernel: [  742.928011] Bad pagetable: 000d [#1643] SMP 
    Aug 16 07:16:05 kafka0 kernel: [  742.928011] Modules linked in: dm_crypt serio_raw isofs crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy
    

    Why should the JVM throw a memory error? The main reason is incompatibility with kernel version.

    Let’s take a look in the GRUB config file.

    Looks like we are using for boot:

    menuentry 'Ubuntu' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option 'gnulinux-simple-baf292e5-0bb6-4e58-8a71-5b912e0f09b6' {
    	recordfail
    	load_video
    	gfxmode $linux_gfx_mode
    	insmod gzio
    	insmod part_msdos
    	insmod ext2
    	if [ x$feature_platform_search_hint = xy ]; then
    	  search --no-floppy --fs-uuid --set=root  baf292e5-0bb6-4e58-8a71-5b912e0f09b6
    	else
    	  search --no-floppy --fs-uuid --set=root baf292e5-0bb6-4e58-8a71-5b912e0f09b6
    	fi
    	linux	/boot/vmlinuz-3.13.0-155-generic root=UUID=baf292e5-0bb6-4e58-8a71-5b912e0f09b6 ro  console=tty1 console=ttyS0
    	initrd	/boot/initrd.img-3.13.0-155-generic
    

    There was also an older version of kernel image available 3.13.0-153.

    Short fix for this is to update the grub.cfg file with the old version and reboot the server.

    Good fix is still in progress. Will post as soon as i have it.

    P.S: I forgot to mention the Zookeeper version:

    Zookeeper version: 3.4.5--1, built on 06/10/2013 17:26 GMT

    P.S 2: It seems that the issue is related with the java processes in general not only zookeeper

    Cheers

  • Puppet gems install workaround after TLS 1.0 switchoff

    Hi,

    It seems that since Ruby disabled the TLS 1.0 protocol, there is an issue with installing custom gems in the puppet server.

    If you run puppetserver gem environment you will probably see the following output:

    /opt/puppetlabs/bin/puppetserver gem environment
    RubyGems Environment:
      - RUBYGEMS VERSION: 2.4.8
      - RUBY VERSION: 1.9.3 (2015-06-10 patchlevel 551) [java]
      - INSTALLATION DIRECTORY: /opt/puppetlabs/server/data/puppetserver/jruby-gems
      - RUBY EXECUTABLE: java -jar /opt/puppetlabs/server/apps/puppetserver/puppet-server-release.jar
      - EXECUTABLE DIRECTORY: /opt/puppetlabs/server/data/puppetserver/jruby-gems/bin
      - SPEC CACHE DIRECTORY: /root/.gem/specs
      - SYSTEM CONFIGURATION DIRECTORY: file:/opt/puppetlabs/server/apps/puppetserver/puppet-server-release.jar!/META-INF/jruby.home/etc
      - RUBYGEMS PLATFORMS:
        - ruby
        - universal-java-1.7
      - GEM PATHS:
         - /opt/puppetlabs/server/data/puppetserver/jruby-gems
         - /root/.gem/jruby/1.9
         - file:/opt/puppetlabs/server/apps/puppetserver/puppet-server-release.jar!/META-INF/jruby.home/lib/ruby/gems/shared
      - GEM CONFIGURATION:
         - :update_sources => true
         - :verbose => true
         - :backtrace => false
         - :bulk_threshold => 1000
         - "install" => "--no-rdoc --no-ri --env-shebang"
         - "update" => "--no-rdoc --no-ri --env-shebang"
      - REMOTE SOURCES:
         - https://rubygems.org/
      - SHELL PATH:
         - /usr/local/sbin
         - /usr/local/bin
         - /usr/sbin
         - /usr/bin
         - /sbin
         - /bin
         - /usr/games
         - /usr/local/games
         - /opt/puppetlabs/bin
    

    Also if you want to install a gem you will receive:

    /opt/puppetlabs/bin/puppetserver gem install toml-rb
    ERROR:  Could not find a valid gem 'toml-rb' (>= 0), here is why:
              Unable to download data from https://rubygems.org/ - Received fatal alert: protocol_version (https://api.rubygems.org/specs.4.8.gz)
    

    Short but unsafe fix for this is:

    opt/puppetlabs/bin/puppetserver gem install --source "http://rubygems.org/" toml-rb
    Fetching: toml-rb-1.1.1.gem (100%)
    Successfully installed toml-rb-1.1.1
    WARNING:  Unable to pull data from 'https://rubygems.org/': Received fatal alert: protocol_version (https://api.rubygems.org/specs.4.8.gz)
    1 gem installed
    

    It’s not that elegant, but it does the trick. You can also include this in an puppet exec block.

    Cheers

  • Convert mdx image (daemon tools) to iso

    Hi,

    Some time ago i created some images that were saved in mdx format using (Daemon Tools). Those were Windows times, but i migrated to Debian since then.

    I created a Windows virtual-box machine in order to use them but unfortunately it does not allow to mount them in this format.

    In order to convert them you will have to install a small package called iat using command sudo apt-get install iat . It is found in the default repo.

    One it’s installed , just run iat old_image.mdx old_image.iso

    Cheers

  • Kafka cluster nodes and controller using golang

    Hi,

    Using the golang library for zookeeper from here you can get very easily the nodes that are registered in the cluster controller node.
    In order to install this module, beside needing to setup the GOPATH you will have also to install packages from linux distro repo called:
    bzr, gcc, libzookeeper-mt-dev
    Once all of this is install just go get launchpad.net/gozk 🙂

    And here is the small example:

    
    package main
    
    import (
    	"launchpad.net/gozk"
    	"fmt"
    	"strings"
    )
    func main() {
    	GetResource()
    }
    func GetResource() {
    	zk, session, err := zookeeper.Dial("localhost:2181", 5e9)
     if err != nil {
    	fmt. Println("Couldn't connect")
    	}
    	
    	defer zk.Close()
    
    	event := <-session
    	if event.State != zookeeper.STATE_CONNECTED {
    		fmt.Println("Couldn't connect")
    	}
    
    	GetBrokers(zk)
    	GetController(zk)
    
    }
    func GetController(connection *zookeeper.Conn) {
    	rs ,_ , err := connection.Get("/controller")
     if err != nil {
            fmt. Println("Couldn't get the resource")
            }
    	controller := strings. Split(rs,",")
    	// fmt.Println(controller[1])
    	id := strings.Split(controller[1],":")
    	fmt. Printf("\nCluster controller is: %s\n",id[1])
    }
    func GetBrokers(connection *zookeeper.Conn) {	
    	trs ,_, err := connection.Children("/brokers/ids")
    
    	 if err != nil {
            fmt. Println("Couldn't get the resource")
            }
    	fmt.Printf("List of brokers: ")
    	for _, value := range trs {
    	fmt.Printf(" %s",value)
    	}
    	
    }
    

    Yeah, i know it's not the most elegant but it works:

    go run zootest.go
    List of brokers:  1011 1009 1001
    Cluster controller is: 1011
    

    That would be all.

    Tnx and cheers!

  • Error 127, not related to Puppet or Golang

    Hi,

    Something from my experience playing with Golang and Puppet code this morning.
    I wrote a very very simple script to restart a service that you can find here
    Today i wanted to put it on the machine and run it with puppet, so i wrote a very small class that looked like this:

    class profiles_test::updatekafka {
    
    package { 'golang':
      ensure => installed,
      name   => 'golang',
    }
    file {"/root/servicerestart.go":
        source => 'puppet:///modules/profiles_test/servicerestart.go',
        mode => '0644',
        replace => true,
    	}
    exec { 'go build servicerestart.go':
        cwd => '/root',
        creates => '/root/servicerestart',
        path => ['/usr/bin', '/usr/sbin'],
    } ->
    exec { '/root/servicerestart':
      cwd     => '/root',
      path    => ['/usr/bin', '/usr/sbin','/root'],
      onlyif => 'ps -u kafka',
    }
    }
    

    On execution, surprise, it kept throw this feedback:

    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/File[/root/check_cluster_state.go]/ensure: defined content as '{md5}0817dbf82b74072e125f8b71ee914150'
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: panic: exit status 127
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: goroutine 1 [running]:
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: runtime.panic(0x4c2100, 0xc2100000b8)
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/usr/lib/go/src/pkg/runtime/panic.c:266 +0xb6
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.check(0x7f45b08ed150, 0xc2100000b8)
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:94 +0x4f
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.RunCommand(0x4ea7f0, 0x2c, 0x7f4500000000, 0x409928, 0x5d9f38)
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:87 +0x112
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.GetBrokerList(0x7f45b08ecf00, 0xc21003fa00, 0xe)
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:66 +0x3c
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.GetStatus(0xc21000a170)
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:33 +0x1e
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.StopBroker()
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:39 +0x21
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: main.main()
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: 	/root/servicerestart.go:15 +0x1e
    08:19:44 Notice: /Stage[main]/Profiles_test::Updatekafka/Exec[go run servicerestart.go]/returns: exit status 2
    08:19:44 Error: go run servicerestart.go returned 1 instead of one of [0]

    The secret is in panic: exit status 127 and it seems that it’s not related to neither golang or puppet, but shell.
    In my script they way to get the broker list is by

    output1 := RunCommand("echo dump | nc localhost 2181 | grep brokers")

    and the error is related to not having the required binaries in you path. For example if you run

    whereis echo
    echo: /bin/echo /usr/share/man/man1/echo.1.gz

    So the right way to for the exec block is actually:

    exec { '/root/servicerestart':
      cwd     => '/root',
      path    => ['/usr/bin', '/usr/sbin','/root','/bin','/sbin'],
      onlyif => 'ps -u kafka',
    }

    And then it will work.

    Cheers!

  • Multiple classes block declaration in hiera will not work

    Morning,

    Do not add multiple classes in hiera like this:

    ---
    classes:
      - profiles::datadogagent
      - profiles::updatekafka
    
    kafka::security: true
    kafka::security_default: true
    kafka::heap_size: 2048
    classes:
     - profiles::pybackuplogs
     - profiles::group_coordinator
    

    Class updatekafka will not be executed.

    The structure should look like:

    ---
    classes:
      - profiles::datadogagent
      - profiles::updatekafka
      - profiles::pybackuplogs
      - profiles::group_coordinator
    kafka::security: true
    kafka::security_default: true
    kafka::heap_size: 2048
    

    Cheers!

  • Delete corrupted Kafka topic version 2.0

    Hi,

    We had in the past the situation described in this link use-case-deleting-corrupted-kafka-topic
    The situation repeated a little bit different this time. Taking a look on the list of topics, there were three topics marked for deletion.
    None of them had a Leader or Isr, so after a little bit of investigation the conclusion that they weren’t available anymore on the filesystem.
    My assumption is that the cluster controller began the delete process but failed before sending a new metadata update to the zookeepers.
    A restart of the cluster controller was performed in order to provide a new epoch, and after that, manual deletion of the “deletion request” and metadata from the zookeepers(you can find the commands in the above link).

    On another list, everything looks good.

    Cheers

  • Golang example for kafka service restart script

    Hi,

    Not much to say, a pretty decent script for Kafka service restart(i tried to write it for our rolling upgrade procedure) that it’s still work in progress. If there are any changes that needed to be made to it, i will post it.

    Here is the script:

    package main
    
    import (
    	"os/exec"
    	"strings"
    	"fmt"
    	"time"
    	"io/ioutil"
    )
    
    const (
        libpath = "/opt/kafka/libs"
        )
    func main(){
    	StopBroker()
    	StartBroker()
    	fmt.Printf("You are running version %s",GetVersion(libpath))
    }
    
    func GetVersion(libpath string) string {
     	var KafkaVersion []string
    	files, err := ioutil.ReadDir(libpath)
    	check(err)
    for _, f := range files {
    	if (strings.HasPrefix(f.Name(),"kafka_")) {
    		KafkaVersion = strings.Split(f.Name(),"-")
    		break
    		}
        }
    	return KafkaVersion[1]
    }
    func GetStatus() bool {
    	brokers := GetBrokerList()
    	id := GetBrokerId()
            status := Contain(id,brokers)
    return status
    }
    func StopBroker() {
    	status := GetStatus()
    	brokers := GetBrokerList()
    if (status == true) && (len(brokers) > 2) {
    	stop := RunCommand("service kafka stop")
    	fmt.Println(string(stop))
    	time.Sleep(60000 * time.Millisecond)
    	} 
    if (status == false) {
    	fmt.Println("Broker has been successful stopped")
    	} else {
    	StopBroker()
    	}
    }
    func StartBroker() {
    	status := GetStatus()
    	if (status == false) {
    	start := RunCommand("service kafka start")
    	fmt.Println(string(start))
    	time.Sleep(60000 * time.Millisecond)
    	}
    }
    func GetBrokerId() string {
    	output := RunCommand("cat /srv/kafka/meta.properties | grep broker.id | cut -d'=' -f2")
    	return strings.TrimRight(string(output),"\n")
    }
    func GetBrokerList() []string {
    	output1 := RunCommand("echo dump | nc localhost 2181 | grep brokers")
    	var brokers []string
    	lines:= strings.Split(string(output1),"\n")
    for _, line := range lines {
    	trimString := strings.TrimSpace(line)
    	finalString := strings.TrimPrefix(trimString, "/brokers/ids/")
    	brokers = append(brokers, finalString)
    	}
     return brokers
    }
    func Contain(val1 string, val2 []string) bool {
      for _,value := range val2 {
    	if (value == val1) {
    		return true
    }
    }
    	return false
    }
    func RunCommand(command string) []byte {
     cmd :=exec.Command("/bin/sh", "-c", command)
     result,err := cmd.Output()
     check(err)
     return result
    }
    
    
    func check(e error) {
        if e != nil {
            panic(e)
        }
    }
    

    Cheers