专栏首页KubernetesrunC源码分析——cgroup

runC源码分析——cgroup

runC项目中,与cgroups相关的代码,都在目录 runc/libcontainer/cgroups/下,下面是其源码目录结构分析:

我们关注的主要内容在apply_raw.go和各个cgroups子系统的操作方法实现定义文件,如上图中红色字体部分显示的文件。 apply_raw.go主要是实现了cgroups.go中定义的一系列接口:

type Manager interface {
       // Applies cgroup configuration to the process with the specified pid
       Apply(pid int) error
 
       // Returns the PIDs inside the cgroup set
       GetPids() ([]int, error)
 
       // Returns the PIDs inside the cgroup set & all sub-cgroups
       GetAllPids() ([]int, error)
 
       // Returns statistics for the cgroup set
       GetStats() (*Stats, error)
 
       // Toggles the freezer cgroup according with specified state
       Freeze(state configs.FreezerState) error
 
       // Destroys the cgroup set
       Destroy() error
 
       // NewCgroupManager() and LoadCgroupManager() require following attributes:
       //     Paths   map[string]string
       //     Cgroups *cgroups.Cgroup
       // Paths maps cgroup subsystem to path at which it is mounted.
       // Cgroups specifies specific cgroup settings for the various subsystems
 
       // Returns cgroup paths to save in a state file and to be able to
       // restore the object later.
       GetPaths() map[string]string
 
       // Sets the cgroup as configured.
       Set(container *configs.Config) error
}

apply_raw.go中对上面定义的Manager中的8个接口逐一实现:

type Manager struct {
       mu      sync.Mutex
       Cgroups *configs.Cgroup
       Paths   map[string]string
}
 
func (m *Manager) Apply(pid int) (err error) {
       if m.Cgroups == nil {
              return nil
       }
       m.mu.Lock()
       defer m.mu.Unlock()
 
       var c = m.Cgroups
 
       d, err := getCgroupData(m.Cgroups, pid)
       if err != nil {
              return err
       }
 
       if c.Paths != nil {
              paths := make(map[string]string)
              for name, path := range c.Paths {
                     _, err := d.path(name)
                     if err != nil {
                            if cgroups.IsNotFound(err) {
                                   continue
                            }
                            return err
                     }
                     paths[name] = path
              }
              m.Paths = paths
              return cgroups.EnterPid(m.Paths, pid)
       }
 
       paths := make(map[string]string)
       for _, sys := range subsystems {
              if err := sys.Apply(d); err != nil {
                     return err
              }
              // TODO: Apply should, ideally, be reentrant or be broken up into a separate
              // create and join phase so that the cgroup hierarchy for a container can be
              // created then join consists of writing the process pids to cgroup.procs
              p, err := d.path(sys.Name())
              if err != nil {
                     // The non-presence of the devices subsystem is
                     // considered fatal for security reasons.
                     if cgroups.IsNotFound(err) && sys.Name() != "devices" {
                            continue
                     }
                     return err
              }
              paths[sys.Name()] = p
       }
       m.Paths = paths
       return nil
}
 
func (m *Manager) Destroy() error {
       if m.Cgroups.Paths != nil {
              return nil
       }
       m.mu.Lock()
       defer m.mu.Unlock()
       if err := cgroups.RemovePaths(m.Paths); err != nil {
              return err
       }
       m.Paths = make(map[string]string)
       return nil
}
 
func (m *Manager) GetPaths() map[string]string {
       m.mu.Lock()
       paths := m.Paths
       m.mu.Unlock()
       return paths
}
 
func (m *Manager) GetStats() (*cgroups.Stats, error) {
       m.mu.Lock()
       defer m.mu.Unlock()
       stats := cgroups.NewStats()
       for name, path := range m.Paths {
              sys, err := subsystems.Get(name)
              if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
                     continue
              }
              if err := sys.GetStats(path, stats); err != nil {
                     return nil, err
              }
       }
       return stats, nil
}
 
func (m *Manager) Set(container *configs.Config) error {
       // If Paths are set, then we are just joining cgroups paths
       // and there is no need to set any values.
       if m.Cgroups.Paths != nil {
              return nil
       }
 
       paths := m.GetPaths()
       for _, sys := range subsystems {
              path := paths[sys.Name()]
              if err := sys.Set(path, container.Cgroups); err != nil {
                     return err
              }
       }
 
       if m.Paths["cpu"] != "" {
              if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
                     return err
              }
       }
       return nil
}
 
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
       paths := m.GetPaths()
       dir := paths["freezer"]
       prevState := m.Cgroups.Resources.Freezer
       m.Cgroups.Resources.Freezer = state
       freezer, err := subsystems.Get("freezer")
       if err != nil {
              return err
       }
       err = freezer.Set(dir, m.Cgroups)
       if err != nil {
              m.Cgroups.Resources.Freezer = prevState
              return err
       }
       return nil
}
 
func (m *Manager) GetPids() ([]int, error) {
       paths := m.GetPaths()
       return cgroups.GetPids(paths["devices"])
}
 
func (m *Manager) GetAllPids() ([]int, error) {
       paths := m.GetPaths()
       return cgroups.GetAllPids(paths["devices"])
}

再以cpu subsystem为例,看看各subsystem具体的操作方法定义:

type CpuGroup struct {
}
 
func (s *CpuGroup) Name() string {
       return "cpu"
}
 
// 将cgroup配置和对应的pid更新到cpu subsystem
func (s *CpuGroup) Apply(d *cgroupData) error {
       // We always want to join the cpu group, to allow fair cpu scheduling
       // on a container basis
       path, err := d.path("cpu")
       if err != nil && !cgroups.IsNotFound(err) {
              return err
       }
       return s.ApplyDir(path, d.config, d.pid)
}
 
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
       // This might happen if we have no cpu cgroup mounted.
       // Just do nothing and don't fail.
       if path == "" {
              return nil
       }
       if err := os.MkdirAll(path, 0755); err != nil {
              return err
       }
       // We should set the real-Time group scheduling settings before moving
       // in the process because if the process is already in SCHED_RR mode
       // and no RT bandwidth is set, adding it will fail.
       if err := s.SetRtSched(path, cgroup); err != nil {
              return err
       }
       // because we are not using d.join we need to place the pid into the procs file
       // unlike the other subsystems
       if err := cgroups.WriteCgroupProc(path, pid); err != nil {
              return err
       }
 
       return nil
}
 
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
       if cgroup.Resources.CpuRtPeriod != 0 {
              if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuRtRuntime != 0 {
              if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
                     return err
              }
       }
       return nil
}
 
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
       if cgroup.Resources.CpuShares != 0 {
              if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuPeriod != 0 {
              if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuQuota != 0 {
              if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
                     return err
              }
       }
       if err := s.SetRtSched(path, cgroup); err != nil {
              return err
       }
 
       return nil
}
 
func (s *CpuGroup) Remove(d *cgroupData) error {
       return removePath(d.path("cpu"))
}
 
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
       f, err := os.Open(filepath.Join(path, "cpu.stat"))
       if err != nil {
              if os.IsNotExist(err) {
                     return nil
              }
              return err
       }
       defer f.Close()
 
       sc := bufio.NewScanner(f)
       for sc.Scan() {
              t, v, err := getCgroupParamKeyValue(sc.Text())
              if err != nil {
                     return err
              }
              switch t {
              case "nr_periods":
                     stats.CpuStats.ThrottlingData.Periods = v
 
              case "nr_throttled":
                     stats.CpuStats.ThrottlingData.ThrottledPeriods = v
 
              case "throttled_time":
                     stats.CpuStats.ThrottlingData.ThrottledTime = v
              }
       }
       return nil
}

查看某个runC启动的容器state.json文件,能看到该容器对应的cgroup和namespace 路径信息: $ cat /var/run/runc/$containerName/state.json | jq .

"namespace_paths": {
    "NEWUTS": "/proc/30097/ns/uts",
    "NEWUSER": "/proc/30097/ns/user",
    "NEWPID": "/proc/30097/ns/pid",
    "NEWNS": "/proc/30097/ns/mnt",
    "NEWNET": "/proc/30097/ns/net",
    "NEWIPC": "/proc/30097/ns/ipc"
  },
  "cgroup_paths": {
    "perf_event": "/sys/fs/cgroup/perf_event/user.slice/container1",
    "net_cls": "/sys/fs/cgroup/net_cls/user.slice/container1",
    "name=systemd": "/sys/fs/cgroup/systemd/user.slice/container1",
    "blkio": "/sys/fs/cgroup/blkio/user.slice/container1",
    "cpu": "/sys/fs/cgroup/cpu,cpuacct/user.slice/container1",
    "cpuacct": "/sys/fs/cgroup/cpu,cpuacct/user.slice/container1",
    "cpuset": "/sys/fs/cgroup/cpuset/user.slice/container1",
    "devices": "/sys/fs/cgroup/devices/user.slice/container1",
    "freezer": "/sys/fs/cgroup/freezer/user.slice/container1",
    "hugetlb": "/sys/fs/cgroup/hugetlb/user.slice/container1",
    "memory": "/sys/fs/cgroup/memory/user.slice/container1"
  },

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

我来说两句

0 条评论
登录 后参与评论

相关文章

  • runC源码分析——namespace

    runc/libcontainer/configs/config.go中定义了container对应的Namespaces。另外对于User Namespace...

    Walton
  • 深入分析Kubernetes Critical Pod(三)

    本文介绍了Kubelet在Predicate Admit准入检查时对CriticalPod的资源抢占的原理,以及Priority Admission Contr...

    Walton
  • SkyDNS2源码分析

    SkyDNS2是SkyDNS Version 2.x的统称,其官方文档只有README.md,网上能找到的资料也不多,因此需要我们自行对代码进行一定的分析,才能...

    Walton
  • Guava-1.10.1类Equivalence.Wrapper<T>

    com.google.common.base Class Equivalence.Wrapper<T>

    悠扬前奏
  • web前端开发在5G时代有哪些发展

    在享受互联网带来的便捷也给互联网产品提出了新的需求,这意味着前端开发人员也有了更多的机会和挑战。无论是大小公司,对前端开发工程师的需求都是在快速上涨,薪资待遇也...

    千锋哈尔滨IT培训
  • 十分钟接入iOS 12新特性——Siri Shortcuts

    Xcode 10已经正式发布,开发者可以接入Siri Shortcuts的iOS 12新特性。 WWDC2018的Introduction to Siri S...

    落影
  • 近几年前端技术盘点以及 2016 年技术发展方向

    Web 发展了几十个春秋,风起云涌,千变万化。我很庆幸自己没有完整地经历过这些年头,而是站在前人的肩膀上行走。Web 技术发展的速度让人感觉那几乎不是继承式的迭...

    用户1631416
  • ​画解算法:100. 相同的树

    https://leetcode-cn.com/problems/same-tree/

    灵魂画师牧码
  • Python finally的用法

    try语句有一个可选finally子句,用于定义在所有情况下都必须执行的finally操作

    于小勇
  • 如何挂起Promise请求,refresh_token后再用新的access_token重新发起请求?

    接手老项目,需要写一个access_token刷新的逻辑,具体流程我就不赘述了,网上关于JWT刷新流程的文章有很多。我遇到的主要问题是,项目没有使用axios,...

    用户2141756

扫码关注云+社区

领取腾讯云代金券