前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >runC源码分析——cgroup

runC源码分析——cgroup

作者头像
Walton
发布2018-04-13 16:35:19
1.5K0
发布2018-04-13 16:35:19
举报
文章被收录于专栏:KubernetesKubernetes

runC项目中,与cgroups相关的代码,都在目录 runc/libcontainer/cgroups/下,下面是其源码目录结构分析:

runC中Cgroup源码目录结构
runC中Cgroup源码目录结构

我们关注的主要内容在apply_raw.go和各个cgroups子系统的操作方法实现定义文件,如上图中红色字体部分显示的文件。 apply_raw.go主要是实现了cgroups.go中定义的一系列接口:

代码语言:javascript
复制
type Manager interface {
       // Applies cgroup configuration to the process with the specified pid
       Apply(pid int) error
 
       // Returns the PIDs inside the cgroup set
       GetPids() ([]int, error)
 
       // Returns the PIDs inside the cgroup set & all sub-cgroups
       GetAllPids() ([]int, error)
 
       // Returns statistics for the cgroup set
       GetStats() (*Stats, error)
 
       // Toggles the freezer cgroup according with specified state
       Freeze(state configs.FreezerState) error
 
       // Destroys the cgroup set
       Destroy() error
 
       // NewCgroupManager() and LoadCgroupManager() require following attributes:
       //     Paths   map[string]string
       //     Cgroups *cgroups.Cgroup
       // Paths maps cgroup subsystem to path at which it is mounted.
       // Cgroups specifies specific cgroup settings for the various subsystems
 
       // Returns cgroup paths to save in a state file and to be able to
       // restore the object later.
       GetPaths() map[string]string
 
       // Sets the cgroup as configured.
       Set(container *configs.Config) error
}

apply_raw.go中对上面定义的Manager中的8个接口逐一实现:

代码语言:javascript
复制
type Manager struct {
       mu      sync.Mutex
       Cgroups *configs.Cgroup
       Paths   map[string]string
}
 
func (m *Manager) Apply(pid int) (err error) {
       if m.Cgroups == nil {
              return nil
       }
       m.mu.Lock()
       defer m.mu.Unlock()
 
       var c = m.Cgroups
 
       d, err := getCgroupData(m.Cgroups, pid)
       if err != nil {
              return err
       }
 
       if c.Paths != nil {
              paths := make(map[string]string)
              for name, path := range c.Paths {
                     _, err := d.path(name)
                     if err != nil {
                            if cgroups.IsNotFound(err) {
                                   continue
                            }
                            return err
                     }
                     paths[name] = path
              }
              m.Paths = paths
              return cgroups.EnterPid(m.Paths, pid)
       }
 
       paths := make(map[string]string)
       for _, sys := range subsystems {
              if err := sys.Apply(d); err != nil {
                     return err
              }
              // TODO: Apply should, ideally, be reentrant or be broken up into a separate
              // create and join phase so that the cgroup hierarchy for a container can be
              // created then join consists of writing the process pids to cgroup.procs
              p, err := d.path(sys.Name())
              if err != nil {
                     // The non-presence of the devices subsystem is
                     // considered fatal for security reasons.
                     if cgroups.IsNotFound(err) && sys.Name() != "devices" {
                            continue
                     }
                     return err
              }
              paths[sys.Name()] = p
       }
       m.Paths = paths
       return nil
}
 
func (m *Manager) Destroy() error {
       if m.Cgroups.Paths != nil {
              return nil
       }
       m.mu.Lock()
       defer m.mu.Unlock()
       if err := cgroups.RemovePaths(m.Paths); err != nil {
              return err
       }
       m.Paths = make(map[string]string)
       return nil
}
 
func (m *Manager) GetPaths() map[string]string {
       m.mu.Lock()
       paths := m.Paths
       m.mu.Unlock()
       return paths
}
 
func (m *Manager) GetStats() (*cgroups.Stats, error) {
       m.mu.Lock()
       defer m.mu.Unlock()
       stats := cgroups.NewStats()
       for name, path := range m.Paths {
              sys, err := subsystems.Get(name)
              if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
                     continue
              }
              if err := sys.GetStats(path, stats); err != nil {
                     return nil, err
              }
       }
       return stats, nil
}
 
func (m *Manager) Set(container *configs.Config) error {
       // If Paths are set, then we are just joining cgroups paths
       // and there is no need to set any values.
       if m.Cgroups.Paths != nil {
              return nil
       }
 
       paths := m.GetPaths()
       for _, sys := range subsystems {
              path := paths[sys.Name()]
              if err := sys.Set(path, container.Cgroups); err != nil {
                     return err
              }
       }
 
       if m.Paths["cpu"] != "" {
              if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
                     return err
              }
       }
       return nil
}
 
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
       paths := m.GetPaths()
       dir := paths["freezer"]
       prevState := m.Cgroups.Resources.Freezer
       m.Cgroups.Resources.Freezer = state
       freezer, err := subsystems.Get("freezer")
       if err != nil {
              return err
       }
       err = freezer.Set(dir, m.Cgroups)
       if err != nil {
              m.Cgroups.Resources.Freezer = prevState
              return err
       }
       return nil
}
 
func (m *Manager) GetPids() ([]int, error) {
       paths := m.GetPaths()
       return cgroups.GetPids(paths["devices"])
}
 
func (m *Manager) GetAllPids() ([]int, error) {
       paths := m.GetPaths()
       return cgroups.GetAllPids(paths["devices"])
}

再以cpu subsystem为例,看看各subsystem具体的操作方法定义:

代码语言:javascript
复制
type CpuGroup struct {
}
 
func (s *CpuGroup) Name() string {
       return "cpu"
}
 
// 将cgroup配置和对应的pid更新到cpu subsystem
func (s *CpuGroup) Apply(d *cgroupData) error {
       // We always want to join the cpu group, to allow fair cpu scheduling
       // on a container basis
       path, err := d.path("cpu")
       if err != nil && !cgroups.IsNotFound(err) {
              return err
       }
       return s.ApplyDir(path, d.config, d.pid)
}
 
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
       // This might happen if we have no cpu cgroup mounted.
       // Just do nothing and don't fail.
       if path == "" {
              return nil
       }
       if err := os.MkdirAll(path, 0755); err != nil {
              return err
       }
       // We should set the real-Time group scheduling settings before moving
       // in the process because if the process is already in SCHED_RR mode
       // and no RT bandwidth is set, adding it will fail.
       if err := s.SetRtSched(path, cgroup); err != nil {
              return err
       }
       // because we are not using d.join we need to place the pid into the procs file
       // unlike the other subsystems
       if err := cgroups.WriteCgroupProc(path, pid); err != nil {
              return err
       }
 
       return nil
}
 
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
       if cgroup.Resources.CpuRtPeriod != 0 {
              if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuRtRuntime != 0 {
              if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
                     return err
              }
       }
       return nil
}
 
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
       if cgroup.Resources.CpuShares != 0 {
              if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuPeriod != 0 {
              if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
                     return err
              }
       }
       if cgroup.Resources.CpuQuota != 0 {
              if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
                     return err
              }
       }
       if err := s.SetRtSched(path, cgroup); err != nil {
              return err
       }
 
       return nil
}
 
func (s *CpuGroup) Remove(d *cgroupData) error {
       return removePath(d.path("cpu"))
}
 
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
       f, err := os.Open(filepath.Join(path, "cpu.stat"))
       if err != nil {
              if os.IsNotExist(err) {
                     return nil
              }
              return err
       }
       defer f.Close()
 
       sc := bufio.NewScanner(f)
       for sc.Scan() {
              t, v, err := getCgroupParamKeyValue(sc.Text())
              if err != nil {
                     return err
              }
              switch t {
              case "nr_periods":
                     stats.CpuStats.ThrottlingData.Periods = v
 
              case "nr_throttled":
                     stats.CpuStats.ThrottlingData.ThrottledPeriods = v
 
              case "throttled_time":
                     stats.CpuStats.ThrottlingData.ThrottledTime = v
              }
       }
       return nil
}

查看某个runC启动的容器state.json文件,能看到该容器对应的cgroup和namespace 路径信息: $ cat /var/run/runc/$containerName/state.json | jq .

代码语言:javascript
复制
"namespace_paths": {
    "NEWUTS": "/proc/30097/ns/uts",
    "NEWUSER": "/proc/30097/ns/user",
    "NEWPID": "/proc/30097/ns/pid",
    "NEWNS": "/proc/30097/ns/mnt",
    "NEWNET": "/proc/30097/ns/net",
    "NEWIPC": "/proc/30097/ns/ipc"
  },
  "cgroup_paths": {
    "perf_event": "/sys/fs/cgroup/perf_event/user.slice/container1",
    "net_cls": "/sys/fs/cgroup/net_cls/user.slice/container1",
    "name=systemd": "/sys/fs/cgroup/systemd/user.slice/container1",
    "blkio": "/sys/fs/cgroup/blkio/user.slice/container1",
    "cpu": "/sys/fs/cgroup/cpu,cpuacct/user.slice/container1",
    "cpuacct": "/sys/fs/cgroup/cpu,cpuacct/user.slice/container1",
    "cpuset": "/sys/fs/cgroup/cpuset/user.slice/container1",
    "devices": "/sys/fs/cgroup/devices/user.slice/container1",
    "freezer": "/sys/fs/cgroup/freezer/user.slice/container1",
    "hugetlb": "/sys/fs/cgroup/hugetlb/user.slice/container1",
    "memory": "/sys/fs/cgroup/memory/user.slice/container1"
  },
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
相关产品与服务
容器服务
腾讯云容器服务(Tencent Kubernetes Engine, TKE)基于原生 kubernetes 提供以容器为核心的、高度可扩展的高性能容器管理服务,覆盖 Serverless、边缘计算、分布式云等多种业务部署场景,业内首创单个集群兼容多种计算节点的容器资源管理模式。同时产品作为云原生 Finops 领先布道者,主导开源项目Crane,全面助力客户实现资源优化、成本控制。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档