diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go index 54aa9ade64d441c092279a0476d8d6414d3256e5..00f0568962c9a3d470f2c4ef239e5e5a7f650f8f 100644 --- a/collector/systemd_linux.go +++ b/collector/systemd_linux.go @@ -137,27 +137,35 @@ func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error { log.Debugf("systemd filtering units took %f", time.Since(begin).Seconds()) begin = time.Now() - c.collectUnitStatusMetrics(ch, units) + err = c.collectUnitStatusMetrics(ch, units) + if err != nil { + return fmt.Errorf("couldn't get unit status metrics: %s", err) + } log.Debugf("systemd collectUnitStatusMetrics took %f", time.Since(begin).Seconds()) begin = time.Now() - c.collectUnitStartTimeMetrics(ch, units) + err = c.collectUnitStartTimeMetrics(ch, units) + if err != nil { + return fmt.Errorf("couldn't get unit start time metrics: %s", err) + } log.Debugf("systemd collectUnitStartTimeMetrics took %f", time.Since(begin).Seconds()) begin = time.Now() - c.collectUnitTasksCurrentMetrics(ch, units) - log.Debugf("systemd collectUnitTasksCurrentMetrics took %f", time.Since(begin).Seconds()) - - begin = time.Now() - c.collectUnitTasksMaxMetrics(ch, units) - log.Debugf("systemd collectUnitTasksMaxMetrics took %f", time.Since(begin).Seconds()) + err = c.collectUnitTasksMetrics(ch, units) + if err != nil { + return fmt.Errorf("couldn't get unit tasks metrics: %s", err) + } + log.Debugf("systemd collectUnitTasksMetrics took %f", time.Since(begin).Seconds()) begin = time.Now() c.collectTimers(ch, units) log.Debugf("systemd collectTimers took %f", time.Since(begin).Seconds()) begin = time.Now() - c.collectSockets(ch, units) + err = c.collectSockets(ch, units) + if err != nil { + return fmt.Errorf("couldn't get unit socket metrics: %s", err) + } log.Debugf("systemd collectSockets took %f", time.Since(begin).Seconds()) begin = time.Now() @@ -171,7 +179,13 @@ func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error { return nil } -func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, units []unit) { +func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, units []unit) error { + conn, err := c.newDbus() + if err != nil { + return fmt.Errorf("couldn't get dbus connection: %s", err) + } + defer conn.Close() + for _, unit := range units { for _, stateName := range unitStatesName { isActive := 0.0 @@ -182,60 +196,141 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, c.unitDesc, prometheus.GaugeValue, isActive, unit.Name, stateName) } - if strings.HasSuffix(unit.Name, ".service") && unit.nRestarts != nil { - ch <- prometheus.MustNewConstMetric( - c.nRestartsDesc, prometheus.CounterValue, - float64(*unit.nRestarts), unit.Name) + if strings.HasSuffix(unit.Name, ".service") { + // NRestarts wasn't added until systemd 235. + restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") + if err != nil { + log.Debugf("couldn't get unit '%s' NRestarts: %s", unit.Name, err) + } else { + ch <- prometheus.MustNewConstMetric( + c.nRestartsDesc, prometheus.CounterValue, + float64(restartsCount.Value.Value().(uint32)), unit.Name) + } } } + return nil } -func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []unit) { +func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []unit) error { + conn, err := c.newDbus() + if err != nil { + return fmt.Errorf("couldn't get dbus connection: %s", err) + } + defer conn.Close() + var ( + acceptedConnections uint32 + currentConnections uint32 + refusedConnections *uint32 + ) + for _, unit := range units { if !strings.HasSuffix(unit.Name, ".socket") { continue } + acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") + if err != nil { + log.Debugf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) + continue + } + + acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) + + currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") + if err != nil { + log.Debugf("couldn't get unit '%s' NConnections: %s", unit.Name, err) + continue + } + currentConnections = currentConnectionCount.Value.Value().(uint32) + + // NRefused wasn't added until systemd 239. + refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") + if err != nil { + log.Debugf("couldn't get unit '%s' NRefused: %s", unit.Name, err) + } else { + nRefused := refusedConnectionCount.Value.Value().(uint32) + refusedConnections = &nRefused + } + ch <- prometheus.MustNewConstMetric( c.socketAcceptedConnectionsDesc, prometheus.CounterValue, - float64(unit.acceptedConnections), unit.Name) + float64(acceptedConnections), unit.Name) ch <- prometheus.MustNewConstMetric( c.socketCurrentConnectionsDesc, prometheus.GaugeValue, - float64(unit.currentConnections), unit.Name) - if unit.refusedConnections != nil { + float64(currentConnections), unit.Name) + if refusedConnections != nil { ch <- prometheus.MustNewConstMetric( c.socketRefusedConnectionsDesc, prometheus.GaugeValue, - float64(*unit.refusedConnections), unit.Name) + float64(*refusedConnections), unit.Name) } } + return nil } -func (c *systemdCollector) collectUnitStartTimeMetrics(ch chan<- prometheus.Metric, units []unit) { +func (c *systemdCollector) collectUnitStartTimeMetrics(ch chan<- prometheus.Metric, units []unit) error { + conn, err := c.newDbus() + if err != nil { + return fmt.Errorf("couldn't get dbus connection: %s", err) + } + defer conn.Close() + var startTimeUsec uint64 + for _, unit := range units { + if unit.ActiveState != "active" { + startTimeUsec = 0 + } else { + timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") + if err != nil { + log.Debugf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) + continue + } + startTimeUsec = timestampValue.Value.Value().(uint64) + } + ch <- prometheus.MustNewConstMetric( c.unitStartTimeDesc, prometheus.GaugeValue, - float64(unit.startTimeUsec)/1e6, unit.Name) + float64(startTimeUsec)/1e6, unit.Name) } + return nil } -func (c *systemdCollector) collectUnitTasksCurrentMetrics(ch chan<- prometheus.Metric, units []unit) { - for _, unit := range units { - if unit.tasksCurrent != nil { - ch <- prometheus.MustNewConstMetric( - c.unitTasksCurrentDesc, prometheus.GaugeValue, - float64(*unit.tasksCurrent), unit.Name) - } +func (c *systemdCollector) collectUnitTasksMetrics(ch chan<- prometheus.Metric, units []unit) error { + conn, err := c.newDbus() + if err != nil { + return fmt.Errorf("couldn't get dbus connection: %s", err) } -} + defer conn.Close() -func (c *systemdCollector) collectUnitTasksMaxMetrics(ch chan<- prometheus.Metric, units []unit) { + var val uint64 for _, unit := range units { - if unit.tasksMax != nil { - ch <- prometheus.MustNewConstMetric( - c.unitTasksMaxDesc, prometheus.GaugeValue, - float64(*unit.tasksMax), unit.Name) + if strings.HasSuffix(unit.Name, ".service") { + tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent") + if err != nil { + log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err) + } else { + val = tasksCurrentCount.Value.Value().(uint64) + // Don't set if tasksCurrent if dbus reports MaxUint64. + if val != math.MaxUint64 { + ch <- prometheus.MustNewConstMetric( + c.unitTasksCurrentDesc, prometheus.GaugeValue, + float64(val), unit.Name) + } + } + tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax") + if err != nil { + log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err) + } else { + val = tasksMaxCount.Value.Value().(uint64) + // Don't set if tasksMax if dbus reports MaxUint64. + if val != math.MaxUint64 { + ch <- prometheus.MustNewConstMetric( + c.unitTasksMaxDesc, prometheus.GaugeValue, + float64(val), unit.Name) + } + } } } + return nil } func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) { @@ -274,14 +369,7 @@ func (c *systemdCollector) newDbus() (*dbus.Conn, error) { type unit struct { dbus.UnitStatus - lastTriggerUsec uint64 - startTimeUsec uint64 - tasksCurrent *uint64 - tasksMax *uint64 - nRestarts *uint32 - acceptedConnections uint32 - currentConnections uint32 - refusedConnections *uint32 + lastTriggerUsec uint64 } func (c *systemdCollector) getAllUnits() ([]unit, error) { @@ -313,77 +401,6 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) { unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) } - if strings.HasSuffix(unit.Name, ".service") { - // NRestarts wasn't added until systemd 235. - restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") - if err != nil { - log.Debugf("couldn't get unit '%s' NRestarts: %s", unit.Name, err) - } else { - nRestarts := restartsCount.Value.Value().(uint32) - unit.nRestarts = &nRestarts - } - - tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent") - if err != nil { - log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err) - } else { - val := tasksCurrentCount.Value.Value().(uint64) - // Don't set if tasksCurrent if dbus reports MaxUint64. - if val != math.MaxUint64 { - unit.tasksCurrent = &val - } - } - - tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax") - if err != nil { - log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err) - } else { - val := tasksMaxCount.Value.Value().(uint64) - // Don't set if tasksMax if dbus reports MaxUint64. - if val != math.MaxUint64 { - unit.tasksMax = &val - } - } - - } - - if strings.HasSuffix(unit.Name, ".socket") { - acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") - if err != nil { - log.Debugf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) - continue - } - - unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) - - currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") - if err != nil { - log.Debugf("couldn't get unit '%s' NConnections: %s", unit.Name, err) - continue - } - unit.currentConnections = currentConnectionCount.Value.Value().(uint32) - - // NRefused wasn't added until systemd 239. - refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") - if err != nil { - log.Debugf("couldn't get unit '%s' NRefused: %s", unit.Name, err) - } else { - nRefused := refusedConnectionCount.Value.Value().(uint32) - unit.refusedConnections = &nRefused - } - } - - if unit.ActiveState != "active" { - unit.startTimeUsec = 0 - } else { - timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") - if err != nil { - log.Debugf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) - continue - } - - unit.startTimeUsec = timestampValue.Value.Value().(uint64) - } result = append(result, unit) }