aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerge Hallyn <serge.hallyn@canonical.com>2012-01-23 12:07:44 -0600
committerDaniel Lezcano <daniel.lezcano@free.fr>2012-02-26 10:44:40 +0100
commitd08ba6ec05510e95eca791b19731f4a241d7f675 (patch)
tree2be5dddc4f699f617f954d0e3c2ac9f4eae69e98
parentlxc-ubuntu: use release-updates and release-security (diff)
downloadlxc-d08ba6ec05510e95eca791b19731f4a241d7f675.tar.gz
lxc-d08ba6ec05510e95eca791b19731f4a241d7f675.tar.bz2
lxc-d08ba6ec05510e95eca791b19731f4a241d7f675.zip
Support nested cgroups
With this patch, I can start a container 'o1' inside another container 'o1'. (Of course, the containers must be on a different subnet) Detail: 1. Create cgroups for containers under /lxc. 2. Support nested lxc: respect init's cgroup: Create cgroups under init's cgroup. So if we start a container c2 inside a container 'c1', we'll use /sys/fs/cgroup/freezer/lxc/c1/lxc/c2 instead of /sys/fs/cgroup/freezer/c2. This allows a container c1 to be created inside container c1 It also allow a container's limits to be enforced on all a container's children (which a MAC policy could already enforce, in which case current lxc code would be unable to nest altogether). 3. Finally, if a container's cgroup already exists, rename it rather than failing to start the container. Try to WARN the user so they might go clean the old cgroup up. Whereas without this patch, container o1's cgroup would be /sys/fs/cgroup/<subsys>/o1, it now becomes /sys/fs/cgroup/<subsys>/<initcgroup>/lxc/o1 so if init is in cgroup '/' then o1's freezer cgroup would be: /sys/fs/cgroup/freezer/lxc/o1 Changelog: . make lxc-ps work with separate mtab. If cgroups were mounted with -n, and mtab is not linked to /proc/self/mounts, then 'mount -t cgroup' won't show these mounts. So make lxc-ps not use it, but rather use /proc/self/mounts directly. . lxc-ls in the past assumed that a container's cgroup was just '/<name>'. Now it is '/<host-init-cgroup>/lxc/<name>'. Handle that. . first version of this patch was setting clone_children on <path-to-cpusets-cgroup>/<init-cgroup>/lxc, not the parent of that dir. That failed to initialize that cgroup, so tasks could not enter it. Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com> Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
-rw-r--r--src/lxc/cgroup.c171
-rw-r--r--src/lxc/lxc-ls.in5
-rwxr-xr-xsrc/lxc/lxc-ps.in43
3 files changed, 184 insertions, 35 deletions
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index a8e6c27..8077a8d 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -81,9 +81,65 @@ static char *hasmntopt_multiple(struct mntent *mntent, const char *options)
return hasmntopt(mntent, ptr);
}
+/*
+ * get_init_cgroup: get the cgroup init is in.
+ * dsg: preallocated buffer to put the output in
+ * subsystem: the exact cgroup subsystem to look up
+ * mntent: a mntent (from getmntent) whose mntopts contains the
+ * subsystem to look up.
+ *
+ * subsystem and mntent can both be NULL, in which case we return
+ * the first entry in /proc/1/cgroup.
+ *
+ * Returns a pointer to the answer, which may be "".
+ */
+static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
+ char *dsg)
+{
+ FILE *f;
+ char *c, *c2;
+ char line[MAXPATHLEN];
+
+ *dsg = '\0';
+ f = fopen("/proc/1/cgroup", "r");
+ if (!f)
+ return dsg;
+
+ while (fgets(line, MAXPATHLEN, f)) {
+ c = index(line, ':');
+ if (!c)
+ continue;
+ c++;
+ c2 = index(c, ':');
+ if (!c2)
+ continue;
+ *c2 = '\0';
+ c2++;
+ if (!subsystem && !mntent)
+ goto good;
+ if (subsystem && strcmp(c, subsystem) != 0)
+ continue;
+ if (mntent && !hasmntopt(mntent, c))
+ continue;
+good:
+ DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
+ subsystem, c2);
+ strncpy(dsg, c2, MAXPATHLEN);
+ c = &dsg[strlen(dsg)-1];
+ if (*c == '\n')
+ *c = '\0';
+ goto found;
+ }
+
+found:
+ fclose(f);
+ return dsg;
+}
+
static int get_cgroup_mount(const char *subsystem, char *mnt)
{
struct mntent *mntent;
+ char initcgroup[MAXPATHLEN];
FILE *file = NULL;
file = setmntent(MTAB, "r");
@@ -97,14 +153,22 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
if (strcmp(mntent->mnt_type, "cgroup"))
continue;
if (!subsystem || hasmntopt_multiple(mntent, subsystem)) {
- strcpy(mnt, mntent->mnt_dir);
+ int ret;
+ ret = snprintf(mnt, MAXPATHLEN, "%s%s/lxc",
+ mntent->mnt_dir,
+ get_init_cgroup(subsystem, NULL,
+ initcgroup));
+ if (ret < 0 || ret >= MAXPATHLEN)
+ goto fail;
fclose(file);
DEBUG("using cgroup mounted at '%s'", mnt);
return 0;
}
};
- DEBUG("Failed to find cgroup for %s\n", subsystem ? subsystem : "(NULL)");
+fail:
+ DEBUG("Failed to find cgroup for %s\n",
+ subsystem ? subsystem : "(NULL)");
fclose(file);
@@ -195,38 +259,76 @@ int lxc_cgroup_attach(const char *path, pid_t pid)
}
/*
+ * rename cgname, which is under cgparent, to a new name starting
+ * with 'cgparent/dead'. That way cgname can be reused. Return
+ * 0 on success, -1 on failure.
+ */
+int try_to_move_cgname(char *cgparent, char *cgname)
+{
+ char *newdir;
+
+ /* tempnam problems don't matter here - cgroupfs will prevent
+ * duplicates if we race, and we'll just fail at that (unlikely)
+ * point
+ */
+
+ newdir = tempnam(cgparent, "dead");
+ if (!newdir)
+ return -1;
+ if (rename(cgname, newdir))
+ return -1;
+ WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
+ cgname, newdir);
+
+ return 0;
+}
+
+/*
* create a cgroup for the container in a particular subsystem.
- * XXX TODO we will of course want to use cgroup_path{subsystem}/lxc/name,
- * not just cgroup_path{subsystem}/name.
*/
static int lxc_one_cgroup_create(const char *name,
struct mntent *mntent, pid_t pid)
{
- char cgname[MAXPATHLEN];
+ char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
char clonechild[MAXPATHLEN];
- int flags;
-
- snprintf(cgname, MAXPATHLEN, "%s/%s", mntent->mnt_dir, name);
+ char initcgroup[MAXPATHLEN];
+ int flags, ret;
+
+ /* cgparent is the parent dir, /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
+ /* (remember get_init_cgroup() returns a path starting with '/') */
+ /* cgname is the full name, /sys/fs/cgroup/</cgroup>/<init-cgroup>/lxc/name */
+ ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir,
+ get_init_cgroup(NULL, mntent, initcgroup));
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret);
+ return -1;
+ }
- /*
- * There is a previous cgroup, assume it is empty,
- * otherwise that fails
- */
- if (!access(cgname, F_OK) && rmdir(cgname)) {
- SYSERROR("failed to remove previous cgroup '%s'", cgname);
+ ret = snprintf(cgparent, MAXPATHLEN, "%s/lxc", cginit);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret);
+ return -1;
+ }
+ ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
return -1;
}
flags = get_cgroup_flags(mntent);
- /* We have the deprecated ns_cgroup subsystem */
+ /* Do we have the deprecated ns_cgroup subsystem? */
if (flags & CGROUP_NS_CGROUP) {
WARN("using deprecated ns_cgroup");
- return cgroup_rename_nsgroup(mntent->mnt_dir, cgname, pid);
+ return cgroup_rename_nsgroup(cgparent, cgname, pid);
}
- snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
- mntent->mnt_dir);
+ ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
+ cginit);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ SYSERROR("Failed creating pathname for clone_children (%d)\n", ret);
+ return -1;
+ }
/* we check if the kernel has clone_children, at this point if there
* no clone_children neither ns_cgroup, that means the cgroup is mounted
@@ -237,14 +339,31 @@ static int lxc_one_cgroup_create(const char *name,
return -1;
}
- /* we enable the clone_children flag of the cgroup */
+ /* enable the clone_children flag of the cgroup */
if (cgroup_enable_clone_children(clonechild)) {
SYSERROR("failed to enable 'clone_children flag");
return -1;
}
+ /* if /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc does not exist, create it */
+ if (access(cgparent, F_OK) && mkdir(cgparent, 0755)) {
+ SYSERROR("failed to create '%s' directory", cgparent);
+ return -1;
+ }
+
+ /*
+ * There is a previous cgroup. Try to delete it. If that fails
+ * (i.e. it is not empty) try to move it out of the way.
+ */
+ if (!access(cgname, F_OK) && rmdir(cgname)) {
+ if (try_to_move_cgname(cgparent, cgname)) {
+ SYSERROR("failed to remove previous cgroup '%s'", cgname);
+ return -1;
+ }
+ }
+
/* Let's create the cgroup */
- if (mkdir(cgname, 0700)) {
+ if (mkdir(cgname, 0755)) {
SYSERROR("failed to create '%s' directory", cgname);
return -1;
}
@@ -301,11 +420,14 @@ out:
}
-int lxc_one_cgroup_destroy(const char *cgmnt, const char *name)
+int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
{
- char cgname[MAXPATHLEN];
+ char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
+ char *cgmnt = mntent->mnt_dir;
- snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name);
+ snprintf(cgname, MAXPATHLEN, "%s%s/lxc/%s", cgmnt,
+ get_init_cgroup(NULL, mntent, initcgroup), name);
+ DEBUG("destroying %s\n", cgname);
if (rmdir(cgname)) {
SYSERROR("failed to remove cgroup '%s'", cgname);
return -1;
@@ -333,8 +455,7 @@ int lxc_cgroup_destroy(const char *name)
while ((mntent = getmntent(file))) {
if (!strcmp(mntent->mnt_type, "cgroup")) {
- DEBUG("destroying %s %s\n", mntent->mnt_dir, name);
- ret = lxc_one_cgroup_destroy(mntent->mnt_dir, name);
+ ret = lxc_one_cgroup_destroy(mntent, name);
if (ret) {
fclose(file);
return ret;
diff --git a/src/lxc/lxc-ls.in b/src/lxc/lxc-ls.in
index 2f9075c..792ea0e 100644
--- a/src/lxc/lxc-ls.in
+++ b/src/lxc/lxc-ls.in
@@ -31,6 +31,9 @@ active=$(netstat -xl | grep $lxcpath | \
if test -n "$active"; then
get_cgroup
if test -n "$mount_point"; then
- cd $mount_point; ls "$@" -d $active
+ # get cgroup for init
+ init_cgroup=`cat /proc/1/cgroup | awk -F: '{ print $3 }' | head -1`
+ cd $mount_point/$init_cgroup/lxc
+ ls "$@" -d $active
fi
fi
diff --git a/src/lxc/lxc-ps.in b/src/lxc/lxc-ps.in
index 2f1d537..2fa7b8b 100755
--- a/src/lxc/lxc-ps.in
+++ b/src/lxc/lxc-ps.in
@@ -53,25 +53,32 @@ sub get_cgroup {
my $mount_string;
$mount_string=`mount -t cgroup |grep -E -e '^lxc '`;
- unless ($mount_string) {
- $mount_string=`mount |grep -m1 'type cgroup'`;
- }
- chomp($mount_string);
if ($mount_string) {
+ # use the one 'lxc' cgroup mount if it exists
+ chomp($mount_string);
$$ref_cgroup=`echo "$mount_string" |cut -d' ' -f3`;
chomp($$ref_cgroup);
}
- die "unable to find mounted cgroup" unless $$ref_cgroup;
+ # Otherwise (i.e. cgroup-bin) use the first cgroup mount
+ $mount_string=`grep -m1 -E '^[^ \t]+[ \t]+[^ \t]+[ \t]+cgroup' /proc/self/mounts`;
+ unless ($mount_string) {
+ die "unable to find mounted cgroup" unless $$ref_cgroup;
+ }
+ chomp($mount_string);
+ $$ref_cgroup=`echo "$mount_string" |cut -d' ' -f2`;
+ chomp($$ref_cgroup);
+ return;
}
sub get_pids_in_containers {
my $ref_names = shift;
my $ref_cgroup = shift;
my $ref_pids = shift;
+ my $init_cgroup = shift;
my @pidlist;
for (@{$ref_names}) {
- my $task_file = "$$ref_cgroup/$_/tasks";
+ my $task_file = "$$ref_cgroup/$init_cgroup/lxc/$_/tasks";
$LXC_NAMES{$_} = 1;
open(tasks, "cat $task_file 2>/dev/null |") or next;
@@ -108,6 +115,20 @@ sub execute_ps {
close ps;
}
+sub get_init_cgroup {
+ my $filename = "/proc/1/cgroup";
+ open(LXC, "$filename");
+ my @cgroup = <LXC>;
+ close LXC;
+ my $container = '';
+ foreach ( @cgroup ) {
+ chomp;
+ # find the container name after :/
+ s/.*:\///o;
+ }
+ return $container;
+}
+
sub get_container {
my $pid = shift;
my $filename = "/proc/$pid/cgroup";
@@ -119,8 +140,10 @@ sub get_container {
foreach ( @cgroup ) {
chomp;
# find the container name after :/
- s/.*:\///o;
- $container = $_;
+ s/.*:\///o;
+ # chop off everything up to 'lxc/'
+ s/lxc\///o;
+ $container = $_;
}
return $container;
}
@@ -160,6 +183,7 @@ my $arg_help = '';
my $arg_usage = '';
my $arg_lxc = '';
my @arg_name;
+my $init_cgroup = '/';
GetOptions('help' => \$arg_help,
'usage' => \$arg_usage,
@@ -186,8 +210,9 @@ if (@arg_name > 0) {
my $pid_list;
$LXC_DISPLAY = 2;
+ $init_cgroup = get_init_cgroup();
get_cgroup \$cgroup;
- get_pids_in_containers(\@arg_name, \$cgroup, \$pid_list);
+ get_pids_in_containers(\@arg_name, \$cgroup, \$pid_list, $init_cgroup);
if ($pid_list) {
@ARGV = ("-p $pid_list",@ARGV);
}